Skip to content

Commit 2959e08

Browse files
committed
AMDGPU: Assume all amdhsa kernarg passed implicit arguments by default
Previously we would require adding an attribute to kernels to enable the inputs passed in the kernarg segment, accessed by llvm.amdgcn.implicitarg.ptr. This violates the principle of being correct by default. Some OpenMP testcases were broken recently since it wasn't correctly setting this attribute, and no known frontends are setting this to anything other than the maximum. Most of the test changes are from load widening of argument loads since there now more implied dereferenceable bytes.
1 parent ae0ba7d commit 2959e08

12 files changed

+1798
-1745
lines changed

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -648,14 +648,18 @@ bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {
648648
}
649649

650650
unsigned AMDGPUSubtarget::getImplicitArgNumBytes(const Function &F) const {
651+
assert(AMDGPU::isKernel(F.getCallingConv()));
652+
651653
// We don't allocate the segment if we know the implicit arguments weren't
652654
// used, even if the ABI implies we need them.
653655
if (F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
654656
return 0;
655657

656658
if (isMesaKernel(F))
657659
return 16;
658-
return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
660+
661+
// Assume all implicit inputs are used by default
662+
return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 56);
659663
}
660664

661665
uint64_t AMDGPUSubtarget::getExplicitKernArgSize(const Function &F,

llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll

Lines changed: 60 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -9,34 +9,35 @@ declare hidden void @extern()
99
define amdgpu_kernel void @kernel_call_no_workitem_ids() {
1010
; CHECK-LABEL: name: kernel_call_no_workitem_ids
1111
; CHECK: bb.1 (%ir-block.0):
12-
; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
12+
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
1313
; CHECK-NEXT: {{ $}}
14-
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr14
15-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr13
16-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr12
17-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
14+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr16
15+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
16+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
17+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
1818
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
1919
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
20+
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
2021
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
2122
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
22-
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY5]]
23-
; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY4]]
24-
; CHECK-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
25-
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
26-
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
27-
; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s64) = COPY [[COPY3]]
28-
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY2]]
29-
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]]
30-
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY]]
31-
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
32-
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]](<4 x s32>)
33-
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY6]](p4)
34-
; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY7]](p4)
23+
; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]]
24+
; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY4]]
25+
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
26+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
27+
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64)
28+
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]]
29+
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]]
30+
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]]
31+
; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]]
32+
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
33+
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>)
34+
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4)
35+
; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY8]](p4)
3536
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
36-
; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY8]](s64)
37-
; CHECK-NEXT: $sgpr12 = COPY [[COPY9]](s32)
38-
; CHECK-NEXT: $sgpr13 = COPY [[COPY10]](s32)
39-
; CHECK-NEXT: $sgpr14 = COPY [[COPY11]](s32)
37+
; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64)
38+
; CHECK-NEXT: $sgpr12 = COPY [[COPY11]](s32)
39+
; CHECK-NEXT: $sgpr13 = COPY [[COPY12]](s32)
40+
; CHECK-NEXT: $sgpr14 = COPY [[COPY13]](s32)
4041
; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14
4142
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
4243
; CHECK-NEXT: S_ENDPGM 0
@@ -47,37 +48,38 @@ define amdgpu_kernel void @kernel_call_no_workitem_ids() {
4748
define amdgpu_kernel void @kernel_call_no_workgroup_ids() {
4849
; CHECK-LABEL: name: kernel_call_no_workgroup_ids
4950
; CHECK: bb.1 (%ir-block.0):
50-
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
51+
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
5152
; CHECK-NEXT: {{ $}}
5253
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
5354
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
5455
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
55-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
56+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
5657
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
5758
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
59+
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
5860
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
5961
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
60-
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY5]]
61-
; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY4]]
62-
; CHECK-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
63-
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
64-
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
65-
; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s64) = COPY [[COPY3]]
66-
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
67-
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
68-
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
69-
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C2]](s32)
70-
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY9]], [[SHL]]
71-
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
72-
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
73-
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C3]](s32)
62+
; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY5]]
63+
; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY4]]
64+
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]](p4)
65+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
66+
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64)
67+
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]]
68+
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
69+
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
70+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
71+
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY12]], [[C1]](s32)
72+
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY11]], [[SHL]]
73+
; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
74+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
75+
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY13]], [[C2]](s32)
7476
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
75-
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
76-
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]](<4 x s32>)
77-
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY6]](p4)
78-
; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY7]](p4)
77+
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
78+
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>)
79+
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4)
80+
; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY8]](p4)
7981
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
80-
; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY8]](s64)
82+
; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64)
8183
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
8284
; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $vgpr31
8385
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -89,27 +91,28 @@ define amdgpu_kernel void @kernel_call_no_workgroup_ids() {
8991
define amdgpu_kernel void @kernel_call_no_other_sgprs() {
9092
; CHECK-LABEL: name: kernel_call_no_other_sgprs
9193
; CHECK: bb.1 (%ir-block.0):
92-
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
94+
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr8_sgpr9
9395
; CHECK-NEXT: {{ $}}
9496
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
9597
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
9698
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
99+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
97100
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
98101
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
99-
; CHECK-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
100-
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
101-
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
102-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
103-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
104-
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
105-
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C2]](s32)
106-
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL]]
107-
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
108-
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
109-
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C3]](s32)
102+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p4) = COPY [[COPY3]](p4)
103+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
104+
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY4]], [[C]](s64)
105+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
106+
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
107+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
108+
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C1]](s32)
109+
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[SHL]]
110+
; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
111+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
112+
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C2]](s32)
110113
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
111-
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
112-
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x s32>)
114+
; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
115+
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY8]](<4 x s32>)
113116
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
114117
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
115118
; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $vgpr31

0 commit comments

Comments
 (0)