Skip to content

Commit 5160447

Browse files
committed
[AMDGPU] Add gfx10 assembler directive to specify shared VGPR count
Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D105507
1 parent 0405920 commit 5160447

File tree

5 files changed

+75
-2
lines changed

5 files changed

+75
-2
lines changed

llvm/docs/AMDGPUUsage.rst

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4451,8 +4451,10 @@ The fields used by CP for code objects before V3 also match those specified in
44514451
======= ======= =============================== ===========================================================================
44524452
Bits Size Field Name Description
44534453
======= ======= =============================== ===========================================================================
4454-
3:0 4 bits SHARED_VGPR_COUNT Number of shared VGPRs for wavefront size 64. Granularity 8. Value 0-120.
4455-
compute_pgm_rsrc1.vgprs + shared_vgpr_cnt cannot exceed 64.
4454+
3:0 4 bits SHARED_VGPR_COUNT Number of shared VGPR blocks when executing in subvector mode. For
4455+
wavefront size 64 the value is 0-15, representing 0-120 VGPRs (granularity
4456+
of 8), such that (compute_pgm_rsrc1.vgprs +1)*4 + shared_vgpr_count*8 does
4457+
not exceed 256. For wavefront size 32 shared_vgpr_count must be 0.
44564458
31:4 28 Reserved, must be 0.
44574459
bits
44584460
32 **Total size 4 bytes.**
@@ -12372,6 +12374,8 @@ terminated by an ``.end_amdhsa_kernel`` directive.
1237212374
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`.
1237312375
``.amdhsa_forward_progress`` 0 GFX10 Controls FWD_PROGRESS in
1237412376
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`.
12377+
``.amdhsa_shared_vgpr_count`` 0 GFX10 Controls SHARED_VGPR_COUNT in
12378+
:ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx10-table`.
1237512379
``.amdhsa_exception_fp_ieee_invalid_op`` 0 GFX6-GFX10 Controls ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION in
1237612380
:ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`.
1237712381
``.amdhsa_exception_fp_denorm_src`` 0 GFX6-GFX10 Controls ENABLE_EXCEPTION_FP_DENORMAL_SOURCE in

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4645,6 +4645,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
46454645
SMRange VGPRRange;
46464646
uint64_t NextFreeVGPR = 0;
46474647
uint64_t AccumOffset = 0;
4648+
uint64_t SharedVGPRCount = 0;
46484649
SMRange SGPRRange;
46494650
uint64_t NextFreeSGPR = 0;
46504651

@@ -4872,6 +4873,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
48724873
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
48734874
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
48744875
ValRange);
4876+
} else if (ID == ".amdhsa_shared_vgpr_count") {
4877+
if (IVersion.Major < 10)
4878+
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4879+
SharedVGPRCount = Val;
4880+
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
4881+
COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val,
4882+
ValRange);
48754883
} else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
48764884
PARSE_BITS_ENTRY(
48774885
KD.compute_pgm_rsrc2,
@@ -4961,6 +4969,19 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
49614969
(AccumOffset / 4 - 1));
49624970
}
49634971

4972+
if (IVersion.Major == 10) {
4973+
// SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
4974+
if (SharedVGPRCount && EnableWavefrontSize32) {
4975+
return TokError("shared_vgpr_count directive not valid on "
4976+
"wavefront size 32");
4977+
}
4978+
if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
4979+
return TokError("shared_vgpr_count*2 + "
4980+
"compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
4981+
"exceed 63\n");
4982+
}
4983+
}
4984+
49644985
getTargetStreamer().EmitAmdhsaKernelDescriptor(
49654986
getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
49664987
ReserveFlatScr);

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,8 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
447447
PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
448448
compute_pgm_rsrc1,
449449
amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS);
450+
PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3,
451+
amdhsa::COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT);
450452
}
451453
PRINT_FIELD(
452454
OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,

llvm/test/MC/AMDGPU/hsa-diag-v3.s

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,49 @@
225225
.amdhsa_forward_progress 5
226226
.end_amdhsa_kernel
227227

228+
// GCN-LABEL: warning: test_amdhsa_shared_vgpr_count_invalid1
229+
// NONGFX10: error: directive requires gfx10+
230+
// GFX10: error: .amdhsa_next_free_vgpr directive is required
231+
// NONAMDHSA: error: unknown directive
232+
.warning "test_amdhsa_shared_vgpr_count_invalid1"
233+
.amdhsa_kernel test_amdhsa_shared_vgpr_count_invalid1
234+
.amdhsa_shared_vgpr_count 8
235+
.end_amdhsa_kernel
236+
237+
// GCN-LABEL: warning: test_amdhsa_shared_vgpr_count_invalid2
238+
// NONGFX10: error: directive requires gfx10+
239+
// GFX10: error: shared_vgpr_count directive not valid on wavefront size 32
240+
// NONAMDHSA: error: unknown directive
241+
.warning "test_amdhsa_shared_vgpr_count_invalid2"
242+
.amdhsa_kernel test_amdhsa_shared_vgpr_count_invalid2
243+
.amdhsa_next_free_vgpr 16
244+
.amdhsa_next_free_sgpr 0
245+
.amdhsa_shared_vgpr_count 8
246+
.amdhsa_wavefront_size32 1
247+
.end_amdhsa_kernel
248+
249+
// GCN-LABEL: warning: test_amdhsa_shared_vgpr_count_invalid3
250+
// NONGFX10: error: directive requires gfx10+
251+
// GFX10: error: value out of range
252+
// NONAMDHSA: error: unknown directive
253+
.warning "test_amdhsa_shared_vgpr_count_invalid3"
254+
.amdhsa_kernel test_amdhsa_shared_vgpr_count_invalid3
255+
.amdhsa_next_free_vgpr 32
256+
.amdhsa_next_free_sgpr 0
257+
.amdhsa_shared_vgpr_count 16
258+
.end_amdhsa_kernel
259+
260+
// GCN-LABEL: warning: test_amdhsa_shared_vgpr_count_invalid4
261+
// NONGFX10: error: directive requires gfx10+
262+
// GFX10: error: shared_vgpr_count*2 + compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot exceed 63
263+
// NONAMDHSA: error: unknown directive
264+
.warning "test_amdhsa_shared_vgpr_count_invalid4"
265+
.amdhsa_kernel test_amdhsa_shared_vgpr_count_invalid4
266+
.amdhsa_next_free_vgpr 273
267+
.amdhsa_next_free_sgpr 0
268+
.amdhsa_shared_vgpr_count 15
269+
.end_amdhsa_kernel
270+
228271
// GCN-LABEL: warning: test_next_free_vgpr_invalid
229272
// AMDHSA: error: .amdgcn.next_free_{v,s}gpr symbols must be absolute expressions
230273
// NONAMDHSA-NOT: error:

llvm/test/MC/AMDGPU/hsa-gfx10-v3.s

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,11 +68,13 @@ special_sgpr:
6868
.amdhsa_kernel minimal
6969
.amdhsa_next_free_vgpr 0
7070
.amdhsa_next_free_sgpr 0
71+
.amdhsa_shared_vgpr_count 0
7172
.end_amdhsa_kernel
7273

7374
// ASM: .amdhsa_kernel minimal
7475
// ASM: .amdhsa_next_free_vgpr 0
7576
// ASM-NEXT: .amdhsa_next_free_sgpr 0
77+
// ASM: .amdhsa_shared_vgpr_count 0
7678
// ASM: .end_amdhsa_kernel
7779

7880
// Test that we can specify all available directives with non-default values.
@@ -153,6 +155,7 @@ special_sgpr:
153155
// ASM-NEXT: .amdhsa_workgroup_processor_mode 1
154156
// ASM-NEXT: .amdhsa_memory_ordered 1
155157
// ASM-NEXT: .amdhsa_forward_progress 1
158+
// ASM-NEXT: .amdhsa_shared_vgpr_count 0
156159
// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
157160
// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
158161
// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1

0 commit comments

Comments
 (0)