Skip to content

Commit ae0ba7d

Browse files
committed
AMDGPU: Optimize out implicit kernarg argument allocation if unused
We already annotate whether llvm.amdgcn.implicitarg.ptr is known to be unused. Start using it to avoid allocating the implicit arguments if unneeded.
1 parent ee69197 commit ae0ba7d

16 files changed

+70
-66
lines changed

llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -280,11 +280,12 @@ void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
280280
}
281281
}
282282

283-
void MetadataStreamerV2::emitKernelArgs(const Function &Func) {
283+
void MetadataStreamerV2::emitKernelArgs(const Function &Func,
284+
const GCNSubtarget &ST) {
284285
for (auto &Arg : Func.args())
285286
emitKernelArg(Arg);
286287

287-
emitHiddenKernelArgs(Func);
288+
emitHiddenKernelArgs(Func, ST);
288289
}
289290

290291
void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
@@ -381,10 +382,9 @@ void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
381382
}
382383
}
383384

384-
void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
385-
int HiddenArgNumBytes =
386-
getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
387-
385+
void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func,
386+
const GCNSubtarget &ST) {
387+
unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
388388
if (!HiddenArgNumBytes)
389389
return;
390390

@@ -465,11 +465,12 @@ void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
465465
HSAMetadata.mKernels.push_back(Kernel::Metadata());
466466
auto &Kernel = HSAMetadata.mKernels.back();
467467

468+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
468469
Kernel.mName = std::string(Func.getName());
469470
Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
470471
emitKernelLanguage(Func);
471472
emitKernelAttrs(Func);
472-
emitKernelArgs(Func);
473+
emitKernelArgs(Func, ST);
473474
HSAMetadata.mKernels.back().mCodeProps = CodeProps;
474475
HSAMetadata.mKernels.back().mDebugProps = DebugProps;
475476
}
@@ -673,13 +674,14 @@ void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
673674
}
674675

675676
void MetadataStreamerV3::emitKernelArgs(const Function &Func,
677+
const GCNSubtarget &ST,
676678
msgpack::MapDocNode Kern) {
677679
unsigned Offset = 0;
678680
auto Args = HSAMetadataDoc->getArrayNode();
679681
for (auto &Arg : Func.args())
680682
emitKernelArg(Arg, Offset, Args);
681683

682-
emitHiddenKernelArgs(Func, Offset, Args);
684+
emitHiddenKernelArgs(Func, ST, Offset, Args);
683685

684686
Kern[".args"] = Args;
685687
}
@@ -791,11 +793,10 @@ void MetadataStreamerV3::emitKernelArg(
791793
}
792794

793795
void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
796+
const GCNSubtarget &ST,
794797
unsigned &Offset,
795798
msgpack::ArrayDocNode Args) {
796-
int HiddenArgNumBytes =
797-
getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
798-
799+
unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
799800
if (!HiddenArgNumBytes)
800801
return;
801802

@@ -912,6 +913,7 @@ void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
912913
const SIProgramInfo &ProgramInfo) {
913914
auto &Func = MF.getFunction();
914915
auto Kern = getHSAKernelProps(MF, ProgramInfo);
916+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
915917

916918
assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
917919
Func.getCallingConv() == CallingConv::SPIR_KERNEL);
@@ -925,7 +927,7 @@ void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
925927
(Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
926928
emitKernelLanguage(Func, Kern);
927929
emitKernelAttrs(Func, Kern);
928-
emitKernelArgs(Func, Kern);
930+
emitKernelArgs(Func, ST, Kern);
929931
}
930932

931933
Kernels.push_back(Kern);

llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ class MDNode;
3030
class Module;
3131
struct SIProgramInfo;
3232
class Type;
33+
class GCNSubtarget;
3334

3435
namespace AMDGPU {
3536

@@ -86,7 +87,8 @@ class MetadataStreamerV3 : public MetadataStreamer {
8687

8788
void emitKernelAttrs(const Function &Func, msgpack::MapDocNode Kern);
8889

89-
void emitKernelArgs(const Function &Func, msgpack::MapDocNode Kern);
90+
void emitKernelArgs(const Function &Func, const GCNSubtarget &ST,
91+
msgpack::MapDocNode Kern);
9092

9193
void emitKernelArg(const Argument &Arg, unsigned &Offset,
9294
msgpack::ArrayDocNode Args);
@@ -98,8 +100,8 @@ class MetadataStreamerV3 : public MetadataStreamer {
98100
StringRef BaseTypeName = "", StringRef AccQual = "",
99101
StringRef TypeQual = "");
100102

101-
void emitHiddenKernelArgs(const Function &Func, unsigned &Offset,
102-
msgpack::ArrayDocNode Args);
103+
void emitHiddenKernelArgs(const Function &Func, const GCNSubtarget &ST,
104+
unsigned &Offset, msgpack::ArrayDocNode Args);
103105

104106
msgpack::DocNode &getRootMetadata(StringRef Key) {
105107
return HSAMetadataDoc->getRoot().getMap(/*Convert=*/true)[Key];
@@ -173,7 +175,7 @@ class MetadataStreamerV2 final : public MetadataStreamer {
173175

174176
void emitKernelAttrs(const Function &Func);
175177

176-
void emitKernelArgs(const Function &Func);
178+
void emitKernelArgs(const Function &Func, const GCNSubtarget &ST);
177179

178180
void emitKernelArg(const Argument &Arg);
179181

@@ -183,7 +185,7 @@ class MetadataStreamerV2 final : public MetadataStreamer {
183185
StringRef BaseTypeName = "", StringRef AccQual = "",
184186
StringRef TypeQual = "");
185187

186-
void emitHiddenKernelArgs(const Function &Func);
188+
void emitHiddenKernelArgs(const Function &Func, const GCNSubtarget &ST);
187189

188190
const Metadata &getHSAMetadata() const {
189191
return HSAMetadata;

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,11 @@ bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {
648648
}
649649

650650
unsigned AMDGPUSubtarget::getImplicitArgNumBytes(const Function &F) const {
651+
// We don't allocate the segment if we know the implicit arguments weren't
652+
// used, even if the ABI implies we need them.
653+
if (F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
654+
return 0;
655+
651656
if (isMesaKernel(F))
652657
return 16;
653658
return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);

llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2730,7 +2730,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(double addrspace(1)* %out, i32
27302730
; GPRIDX-NEXT: workitem_private_segment_byte_size = 0
27312731
; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0
27322732
; GPRIDX-NEXT: gds_segment_byte_size = 0
2733-
; GPRIDX-NEXT: kernarg_segment_byte_size = 28
2733+
; GPRIDX-NEXT: kernarg_segment_byte_size = 12
27342734
; GPRIDX-NEXT: workgroup_fbarrier_count = 0
27352735
; GPRIDX-NEXT: wavefront_sgpr_count = 9
27362736
; GPRIDX-NEXT: workitem_vgpr_count = 3
@@ -2821,7 +2821,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(double addrspace(1)* %out, i32
28212821
; MOVREL-NEXT: workitem_private_segment_byte_size = 0
28222822
; MOVREL-NEXT: workgroup_group_segment_byte_size = 0
28232823
; MOVREL-NEXT: gds_segment_byte_size = 0
2824-
; MOVREL-NEXT: kernarg_segment_byte_size = 28
2824+
; MOVREL-NEXT: kernarg_segment_byte_size = 12
28252825
; MOVREL-NEXT: workgroup_fbarrier_count = 0
28262826
; MOVREL-NEXT: wavefront_sgpr_count = 9
28272827
; MOVREL-NEXT: workitem_vgpr_count = 4
@@ -2913,7 +2913,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(double addrspace(1)* %out, i32
29132913
; GFX10-NEXT: workitem_private_segment_byte_size = 0
29142914
; GFX10-NEXT: workgroup_group_segment_byte_size = 0
29152915
; GFX10-NEXT: gds_segment_byte_size = 0
2916-
; GFX10-NEXT: kernarg_segment_byte_size = 28
2916+
; GFX10-NEXT: kernarg_segment_byte_size = 12
29172917
; GFX10-NEXT: workgroup_fbarrier_count = 0
29182918
; GFX10-NEXT: wavefront_sgpr_count = 9
29192919
; GFX10-NEXT: workitem_vgpr_count = 3
@@ -3559,7 +3559,7 @@ define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(float addrspace(1)* %out, i32
35593559
; GPRIDX-NEXT: workitem_private_segment_byte_size = 0
35603560
; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0
35613561
; GPRIDX-NEXT: gds_segment_byte_size = 0
3562-
; GPRIDX-NEXT: kernarg_segment_byte_size = 28
3562+
; GPRIDX-NEXT: kernarg_segment_byte_size = 12
35633563
; GPRIDX-NEXT: workgroup_fbarrier_count = 0
35643564
; GPRIDX-NEXT: wavefront_sgpr_count = 6
35653565
; GPRIDX-NEXT: workitem_vgpr_count = 2
@@ -3643,7 +3643,7 @@ define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(float addrspace(1)* %out, i32
36433643
; MOVREL-NEXT: workitem_private_segment_byte_size = 0
36443644
; MOVREL-NEXT: workgroup_group_segment_byte_size = 0
36453645
; MOVREL-NEXT: gds_segment_byte_size = 0
3646-
; MOVREL-NEXT: kernarg_segment_byte_size = 28
3646+
; MOVREL-NEXT: kernarg_segment_byte_size = 12
36473647
; MOVREL-NEXT: workgroup_fbarrier_count = 0
36483648
; MOVREL-NEXT: wavefront_sgpr_count = 6
36493649
; MOVREL-NEXT: workitem_vgpr_count = 3
@@ -3728,7 +3728,7 @@ define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(float addrspace(1)* %out, i32
37283728
; GFX10-NEXT: workitem_private_segment_byte_size = 0
37293729
; GFX10-NEXT: workgroup_group_segment_byte_size = 0
37303730
; GFX10-NEXT: gds_segment_byte_size = 0
3731-
; GFX10-NEXT: kernarg_segment_byte_size = 28
3731+
; GFX10-NEXT: kernarg_segment_byte_size = 12
37323732
; GFX10-NEXT: workgroup_fbarrier_count = 0
37333733
; GFX10-NEXT: wavefront_sgpr_count = 6
37343734
; GFX10-NEXT: workitem_vgpr_count = 2
@@ -3819,7 +3819,7 @@ define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(double addrspace(1)* %out, i3
38193819
; GPRIDX-NEXT: workitem_private_segment_byte_size = 0
38203820
; GPRIDX-NEXT: workgroup_group_segment_byte_size = 0
38213821
; GPRIDX-NEXT: gds_segment_byte_size = 0
3822-
; GPRIDX-NEXT: kernarg_segment_byte_size = 28
3822+
; GPRIDX-NEXT: kernarg_segment_byte_size = 12
38233823
; GPRIDX-NEXT: workgroup_fbarrier_count = 0
38243824
; GPRIDX-NEXT: wavefront_sgpr_count = 7
38253825
; GPRIDX-NEXT: workitem_vgpr_count = 3
@@ -3906,7 +3906,7 @@ define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(double addrspace(1)* %out, i3
39063906
; MOVREL-NEXT: workitem_private_segment_byte_size = 0
39073907
; MOVREL-NEXT: workgroup_group_segment_byte_size = 0
39083908
; MOVREL-NEXT: gds_segment_byte_size = 0
3909-
; MOVREL-NEXT: kernarg_segment_byte_size = 28
3909+
; MOVREL-NEXT: kernarg_segment_byte_size = 12
39103910
; MOVREL-NEXT: workgroup_fbarrier_count = 0
39113911
; MOVREL-NEXT: wavefront_sgpr_count = 7
39123912
; MOVREL-NEXT: workitem_vgpr_count = 4
@@ -3994,7 +3994,7 @@ define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(double addrspace(1)* %out, i3
39943994
; GFX10-NEXT: workitem_private_segment_byte_size = 0
39953995
; GFX10-NEXT: workgroup_group_segment_byte_size = 0
39963996
; GFX10-NEXT: gds_segment_byte_size = 0
3997-
; GFX10-NEXT: kernarg_segment_byte_size = 28
3997+
; GFX10-NEXT: kernarg_segment_byte_size = 12
39983998
; GFX10-NEXT: workgroup_fbarrier_count = 0
39993999
; GFX10-NEXT: wavefront_sgpr_count = 7
40004000
; GFX10-NEXT: workitem_vgpr_count = 3

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -74,14 +74,9 @@ define amdgpu_kernel void @opencl_test_implicit_alignment(i32 addrspace(1)* %out
7474
ret void
7575
}
7676

77-
; Mesa implies 16-bytes are always allocated, hsa requires the
78-
; attribute for the additional space.
7977
; ALL-LABEL: {{^}}test_no_kernargs:
80-
; HSA: enable_sgpr_kernarg_segment_ptr = 0
81-
; HSA: kernarg_segment_byte_size = 0
82-
83-
; OS-MESA3D: enable_sgpr_kernarg_segment_ptr = 1
84-
; OS-MESA3D: kernarg_segment_byte_size = 16
78+
; CO-V2: enable_sgpr_kernarg_segment_ptr = 0
79+
; CO-V2: kernarg_segment_byte_size = 0
8580
; CO-V2: kernarg_segment_alignment = 4
8681

8782
; HSA: s_mov_b64 [[OFFSET_NULL:s\[[0-9]+:[0-9]+\]]], 40{{$}}
@@ -97,7 +92,7 @@ define amdgpu_kernel void @test_no_kernargs() #1 {
9792

9893
; ALL-LABEL: {{^}}opencl_test_implicit_alignment_no_explicit_kernargs:
9994
; HSA: kernarg_segment_byte_size = 48
100-
; OS-MESA3d: kernarg_segment_byte_size = 16
95+
; OS-MESA3D: kernarg_segment_byte_size = 16
10196
; CO-V2: kernarg_segment_alignment = 4
10297
define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs() #2 {
10398
%implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()

llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,8 @@ define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #1
7676
; CHECK-NEXT: - 0
7777
; CHECK-NOT: amdhsa.printf:
7878

79-
attributes #0 = { "amdgpu-implicitarg-num-bytes"="48" }
80-
attributes #1 = { "calls-enqueue-kernel" "amdgpu-implicitarg-num-bytes"="48" }
79+
attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
80+
attributes #1 = { optnone noinline "calls-enqueue-kernel" "amdgpu-implicitarg-num-bytes"="48" }
8181

8282
!1 = !{i32 0}
8383
!2 = !{!"none"}

llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,8 @@ define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #1
7272
ret void
7373
}
7474

75-
attributes #0 = { "amdgpu-implicitarg-num-bytes"="48" }
76-
attributes #1 = { "calls-enqueue-kernel" "amdgpu-implicitarg-num-bytes"="48" }
75+
attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
76+
attributes #1 = { optnone noinline "calls-enqueue-kernel" "amdgpu-implicitarg-num-bytes"="48" }
7777

7878
!1 = !{i32 0}
7979
!2 = !{!"none"}

llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1894,9 +1894,9 @@ define amdgpu_kernel void @unknown_addrspace_kernarg(i32 addrspace(12345)* %ptr)
18941894
; CHECK-NEXT: - 1
18951895
; CHECK-NEXT: - 0
18961896

1897-
attributes #0 = { "amdgpu-implicitarg-num-bytes"="56" }
1898-
attributes #1 = { "amdgpu-implicitarg-num-bytes"="56" "runtime-handle"="__test_block_invoke_kernel_runtime_handle" }
1899-
attributes #2 = { "amdgpu-implicitarg-num-bytes"="56" "calls-enqueue-kernel" }
1897+
attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" }
1898+
attributes #1 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" "runtime-handle"="__test_block_invoke_kernel_runtime_handle" }
1899+
attributes #2 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" "calls-enqueue-kernel" }
19001900

19011901
!llvm.printf.fmts = !{!100, !101}
19021902

llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1866,9 +1866,9 @@ define amdgpu_kernel void @unknown_addrspace_kernarg(i32 addrspace(12345)* %ptr)
18661866
ret void
18671867
}
18681868

1869-
attributes #0 = { "amdgpu-implicitarg-num-bytes"="56" }
1870-
attributes #1 = { "amdgpu-implicitarg-num-bytes"="56" "runtime-handle"="__test_block_invoke_kernel_runtime_handle" }
1871-
attributes #2 = { "amdgpu-implicitarg-num-bytes"="56" "calls-enqueue-kernel" }
1869+
attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" }
1870+
attributes #1 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" "runtime-handle"="__test_block_invoke_kernel_runtime_handle" }
1871+
attributes #2 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" "calls-enqueue-kernel" }
18721872

18731873
!llvm.printf.fmts = !{!100, !101}
18741874

llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -296,9 +296,11 @@ entry:
296296
; CHECK-NEXT: - 1
297297
; CHECK-NEXT: - 0
298298

299-
attributes #0 = { "amdgpu-implicitarg-num-bytes"="8" }
300-
attributes #1 = { "amdgpu-implicitarg-num-bytes"="16" }
301-
attributes #2 = { "amdgpu-implicitarg-num-bytes"="24" }
302-
attributes #3 = { "amdgpu-implicitarg-num-bytes"="32" }
303-
attributes #4 = { "amdgpu-implicitarg-num-bytes"="48" }
304-
attributes #5 = { "amdgpu-implicitarg-num-bytes"="56" }
299+
; We don't have a use of llvm.amdgcn.implicitarg.ptr, so optnone to
300+
; avoid optimizing out the implicit argument allocation.
301+
attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="8" }
302+
attributes #1 = { optnone noinline "amdgpu-implicitarg-num-bytes"="16" }
303+
attributes #2 = { optnone noinline "amdgpu-implicitarg-num-bytes"="24" }
304+
attributes #3 = { optnone noinline "amdgpu-implicitarg-num-bytes"="32" }
305+
attributes #4 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
306+
attributes #5 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" }

llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -300,9 +300,11 @@ entry:
300300
ret void
301301
}
302302

303-
attributes #0 = { "amdgpu-implicitarg-num-bytes"="8" }
304-
attributes #1 = { "amdgpu-implicitarg-num-bytes"="16" }
305-
attributes #2 = { "amdgpu-implicitarg-num-bytes"="24" }
306-
attributes #3 = { "amdgpu-implicitarg-num-bytes"="32" }
307-
attributes #4 = { "amdgpu-implicitarg-num-bytes"="48" }
308-
attributes #5 = { "amdgpu-implicitarg-num-bytes"="56" }
303+
; We don't have a use of llvm.amdgcn.implicitarg.ptr, so optnone to
304+
; avoid optimizing out the implicit argument allocation.
305+
attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="8" }
306+
attributes #1 = { optnone noinline "amdgpu-implicitarg-num-bytes"="16" }
307+
attributes #2 = { optnone noinline "amdgpu-implicitarg-num-bytes"="24" }
308+
attributes #3 = { optnone noinline "amdgpu-implicitarg-num-bytes"="32" }
309+
attributes #4 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
310+
attributes #5 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" }

llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent-v3.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ define amdgpu_kernel void @test_kernel(i8 %a) #0
3838
; CHECK-NEXT: - 1
3939
; CHECK-NEXT: - 0
4040

41-
attributes #0 = { "amdgpu-implicitarg-num-bytes"="48" }
41+
attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
4242

4343
!1 = !{i32 0}
4444
!2 = !{!"none"}

llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ define amdgpu_kernel void @test_kernel(i8 %a) #0
3535
ret void
3636
}
3737

38-
attributes #0 = { "amdgpu-implicitarg-num-bytes"="48" }
38+
attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
3939

4040
!1 = !{i32 0}
4141
!2 = !{!"none"}

llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ define amdgpu_kernel void @test_kernel(i8 %a) #0
3939
; CHECK-NEXT: - 1
4040
; CHECK-NEXT: - 0
4141

42-
attributes #0 = { "amdgpu-implicitarg-num-bytes"="48" }
42+
attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
4343

4444
!1 = !{i32 0}
4545
!2 = !{!"none"}

llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ define amdgpu_kernel void @test_kernel(i8 %a) #0
4040
ret void
4141
}
4242

43-
attributes #0 = { "amdgpu-implicitarg-num-bytes"="48" }
43+
attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
4444

4545
!1 = !{i32 0}
4646
!2 = !{!"none"}

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -75,14 +75,10 @@ define amdgpu_kernel void @opencl_test_implicit_alignment(i32 addrspace(1)* %out
7575
ret void
7676
}
7777

78-
; Mesa implies 16-bytes are always allocated, hsa requires the
79-
; attribute for the additional space.
8078
; ALL-LABEL: {{^}}test_no_kernargs:
81-
; HSA: enable_sgpr_kernarg_segment_ptr = 0
82-
; HSA: kernarg_segment_byte_size = 0
79+
; CO-V2: enable_sgpr_kernarg_segment_ptr = 0
80+
; CO-V2: kernarg_segment_byte_size = 0
8381

84-
; OS-MESA3D: enable_sgpr_kernarg_segment_ptr = 1
85-
; OS-MESA3D: kernarg_segment_byte_size = 16
8682
; CO-V2: kernarg_segment_alignment = 4
8783

8884
; HSA: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0{{$}}

0 commit comments

Comments
 (0)