Skip to content

Commit 87c21bf

Browse files
authored
[AMDGPU] Preserve noundef and range during kernel argument loads (#118395)
This commit ensures than noundef (which is frequently a prerequisite for other annotations) and range() annotations on kernel arguments are copied onto their corresponding load from the kernel argument structure.
1 parent e2c3d16 commit 87c21bf

File tree

2 files changed

+157
-22
lines changed

2 files changed

+157
-22
lines changed

llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "GCNSubtarget.h"
1616
#include "llvm/Analysis/ValueTracking.h"
1717
#include "llvm/CodeGen/TargetPassConfig.h"
18+
#include "llvm/IR/Attributes.h"
1819
#include "llvm/IR/IRBuilder.h"
1920
#include "llvm/IR/IntrinsicsAMDGPU.h"
2021
#include "llvm/IR/MDBuilder.h"
@@ -416,6 +417,16 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {
416417

417418
MDBuilder MDB(Ctx);
418419

420+
if (Arg.hasAttribute(Attribute::NoUndef))
421+
Load->setMetadata(LLVMContext::MD_noundef, MDNode::get(Ctx, {}));
422+
423+
if (Arg.hasAttribute(Attribute::Range)) {
424+
const ConstantRange &Range =
425+
Arg.getAttribute(Attribute::Range).getValueAsConstantRange();
426+
Load->setMetadata(LLVMContext::MD_range,
427+
MDB.createRange(Range.getLower(), Range.getUpper()));
428+
}
429+
419430
if (isa<PointerType>(ArgTy)) {
420431
if (Arg.hasNonNullAttr())
421432
Load->setMetadata(LLVMContext::MD_nonnull, MDNode::get(Ctx, {}));

llvm/test/CodeGen/AMDGPU/lower-kernargs.ll

Lines changed: 146 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
target datalayout = "A5"
66

7+
declare void @llvm.fake.use(...)
8+
79
define amdgpu_kernel void @kern_noargs() {
810
; GCN-LABEL: @kern_noargs(
911
; GCN-NEXT: ret void
@@ -255,6 +257,25 @@ define amdgpu_kernel void @kern_i32(i32 %arg0) {
255257
ret void
256258
}
257259

260+
define amdgpu_kernel void @kern_range_noundef_i32(i32 noundef range(i32 0, 8) %arg0) {
261+
; HSA-LABEL: @kern_range_noundef_i32(
262+
; HSA-NEXT: [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
263+
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT]], i64 0
264+
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !range [[RNG2:![0-9]+]], !invariant.load [[META1]], !noundef [[META1]]
265+
; HSA-NEXT: call void (...) @llvm.fake.use(i32 [[ARG0_LOAD]])
266+
; HSA-NEXT: ret void
267+
;
268+
; MESA-LABEL: @kern_range_noundef_i32(
269+
; MESA-NEXT: [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
270+
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT]], i64 36
271+
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !range [[RNG2:![0-9]+]], !invariant.load [[META1]], !noundef [[META1]]
272+
; MESA-NEXT: call void (...) @llvm.fake.use(i32 [[ARG0_LOAD]])
273+
; MESA-NEXT: ret void
274+
;
275+
call void (...) @llvm.fake.use(i32 %arg0)
276+
ret void
277+
}
278+
258279
define amdgpu_kernel void @kern_f32(float %arg0) {
259280
; HSA-LABEL: @kern_f32(
260281
; HSA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
@@ -1022,14 +1043,14 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable(ptr addrspace(1) deref
10221043
; HSA-LABEL: @kern_global_ptr_dereferencable(
10231044
; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
10241045
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 0
1025-
; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !dereferenceable [[META2:![0-9]+]]
1046+
; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !dereferenceable [[META3:![0-9]+]]
10261047
; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
10271048
; HSA-NEXT: ret void
10281049
;
10291050
; MESA-LABEL: @kern_global_ptr_dereferencable(
10301051
; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
10311052
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 36
1032-
; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !dereferenceable [[META2:![0-9]+]]
1053+
; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !dereferenceable [[META3:![0-9]+]]
10331054
; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
10341055
; MESA-NEXT: ret void
10351056
;
@@ -1041,14 +1062,14 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(ptr addrspace(
10411062
; HSA-LABEL: @kern_global_ptr_dereferencable_or_null(
10421063
; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
10431064
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 0
1044-
; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !dereferenceable_or_null [[META3:![0-9]+]]
1065+
; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !dereferenceable_or_null [[META4:![0-9]+]]
10451066
; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
10461067
; HSA-NEXT: ret void
10471068
;
10481069
; MESA-LABEL: @kern_global_ptr_dereferencable_or_null(
10491070
; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
10501071
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 36
1051-
; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !dereferenceable_or_null [[META3:![0-9]+]]
1072+
; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !dereferenceable_or_null [[META4:![0-9]+]]
10521073
; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
10531074
; MESA-NEXT: ret void
10541075
;
@@ -1079,14 +1100,14 @@ define amdgpu_kernel void @kern_align32_global_ptr(ptr addrspace(1) align 1024 %
10791100
; HSA-LABEL: @kern_align32_global_ptr(
10801101
; HSA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
10811102
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
1082-
; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !align [[META4:![0-9]+]]
1103+
; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !align [[META5:![0-9]+]]
10831104
; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
10841105
; HSA-NEXT: ret void
10851106
;
10861107
; MESA-LABEL: @kern_align32_global_ptr(
10871108
; MESA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
10881109
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
1089-
; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !align [[META4:![0-9]+]]
1110+
; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !align [[META5:![0-9]+]]
10901111
; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
10911112
; MESA-NEXT: ret void
10921113
;
@@ -1120,14 +1141,14 @@ define amdgpu_kernel void @kern_noundef_global_ptr(ptr addrspace(1) noundef %ptr
11201141
; HSA-LABEL: @kern_noundef_global_ptr(
11211142
; HSA-NEXT: [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
11221143
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
1123-
; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
1144+
; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]]
11241145
; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) null, align 8
11251146
; HSA-NEXT: ret void
11261147
;
11271148
; MESA-LABEL: @kern_noundef_global_ptr(
11281149
; MESA-NEXT: [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
11291150
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
1130-
; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
1151+
; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]]
11311152
; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) null, align 8
11321153
; MESA-NEXT: ret void
11331154
;
@@ -1709,6 +1730,105 @@ define amdgpu_kernel void @byref_constant_i32_arg_offset0(ptr addrspace(4) byref
17091730
ret void
17101731
}
17111732

1733+
define amdgpu_kernel void @noundef_f32(float noundef %arg0) {
1734+
; HSA-LABEL: @noundef_f32(
1735+
; HSA-NEXT: [[NOUNDEF_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1736+
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_F32_KERNARG_SEGMENT]], i64 0
1737+
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load float, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]]
1738+
; HSA-NEXT: call void (...) @llvm.fake.use(float [[ARG0_LOAD]])
1739+
; HSA-NEXT: ret void
1740+
;
1741+
; MESA-LABEL: @noundef_f32(
1742+
; MESA-NEXT: [[NOUNDEF_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1743+
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_F32_KERNARG_SEGMENT]], i64 36
1744+
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load float, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]]
1745+
; MESA-NEXT: call void (...) @llvm.fake.use(float [[ARG0_LOAD]])
1746+
; MESA-NEXT: ret void
1747+
;
1748+
call void (...) @llvm.fake.use(float %arg0)
1749+
ret void
1750+
}
1751+
1752+
define amdgpu_kernel void @noundef_f16(half noundef %arg0) {
1753+
; HSA-LABEL: @noundef_f16(
1754+
; HSA-NEXT: [[NOUNDEF_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1755+
; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_F16_KERNARG_SEGMENT]], i64 0
1756+
; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]], !noundef [[META1]]
1757+
; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
1758+
; HSA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
1759+
; HSA-NEXT: call void (...) @llvm.fake.use(half [[ARG0_LOAD]])
1760+
; HSA-NEXT: ret void
1761+
;
1762+
; MESA-LABEL: @noundef_f16(
1763+
; MESA-NEXT: [[NOUNDEF_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1764+
; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_F16_KERNARG_SEGMENT]], i64 36
1765+
; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]], !noundef [[META1]]
1766+
; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
1767+
; MESA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half
1768+
; MESA-NEXT: call void (...) @llvm.fake.use(half [[ARG0_LOAD]])
1769+
; MESA-NEXT: ret void
1770+
;
1771+
call void (...) @llvm.fake.use(half %arg0)
1772+
ret void
1773+
}
1774+
1775+
define amdgpu_kernel void @noundef_v2i32(<2 x i32> noundef %arg0) {
1776+
; HSA-LABEL: @noundef_v2i32(
1777+
; HSA-NEXT: [[NOUNDEF_V2I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1778+
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_V2I32_KERNARG_SEGMENT]], i64 0
1779+
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x i32>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]]
1780+
; HSA-NEXT: call void (...) @llvm.fake.use(<2 x i32> [[ARG0_LOAD]])
1781+
; HSA-NEXT: ret void
1782+
;
1783+
; MESA-LABEL: @noundef_v2i32(
1784+
; MESA-NEXT: [[NOUNDEF_V2I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1785+
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_V2I32_KERNARG_SEGMENT]], i64 36
1786+
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x i32>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]]
1787+
; MESA-NEXT: call void (...) @llvm.fake.use(<2 x i32> [[ARG0_LOAD]])
1788+
; MESA-NEXT: ret void
1789+
;
1790+
call void (...) @llvm.fake.use(<2 x i32> %arg0)
1791+
ret void
1792+
}
1793+
1794+
define amdgpu_kernel void @noundef_p0(ptr noundef %arg0) {
1795+
; HSA-LABEL: @noundef_p0(
1796+
; HSA-NEXT: [[NOUNDEF_P0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1797+
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_P0_KERNARG_SEGMENT]], i64 0
1798+
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load ptr, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]]
1799+
; HSA-NEXT: call void (...) @llvm.fake.use(ptr [[ARG0_LOAD]])
1800+
; HSA-NEXT: ret void
1801+
;
1802+
; MESA-LABEL: @noundef_p0(
1803+
; MESA-NEXT: [[NOUNDEF_P0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1804+
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_P0_KERNARG_SEGMENT]], i64 36
1805+
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load ptr, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]]
1806+
; MESA-NEXT: call void (...) @llvm.fake.use(ptr [[ARG0_LOAD]])
1807+
; MESA-NEXT: ret void
1808+
;
1809+
call void (...) @llvm.fake.use(ptr %arg0)
1810+
ret void
1811+
}
1812+
1813+
define amdgpu_kernel void @noundef_v2p0(<2 x ptr> noundef %arg0) {
1814+
; HSA-LABEL: @noundef_v2p0(
1815+
; HSA-NEXT: [[NOUNDEF_V2P0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1816+
; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_V2P0_KERNARG_SEGMENT]], i64 0
1817+
; HSA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]]
1818+
; HSA-NEXT: call void (...) @llvm.fake.use(<2 x ptr> [[ARG0_LOAD]])
1819+
; HSA-NEXT: ret void
1820+
;
1821+
; MESA-LABEL: @noundef_v2p0(
1822+
; MESA-NEXT: [[NOUNDEF_V2P0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
1823+
; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_V2P0_KERNARG_SEGMENT]], i64 36
1824+
; MESA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]]
1825+
; MESA-NEXT: call void (...) @llvm.fake.use(<2 x ptr> [[ARG0_LOAD]])
1826+
; MESA-NEXT: ret void
1827+
;
1828+
call void (...) @llvm.fake.use(<2 x ptr> %arg0)
1829+
ret void
1830+
}
1831+
17121832
attributes #0 = { nounwind "target-cpu"="kaveri" }
17131833
attributes #1 = { nounwind "target-cpu"="kaveri" "amdgpu-implicitarg-num-bytes"="40" }
17141834
attributes #2 = { nounwind "target-cpu"="tahiti" }
@@ -1717,25 +1837,29 @@ attributes #2 = { nounwind "target-cpu"="tahiti" }
17171837
!llvm.module.flags = !{!0}
17181838
!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
17191839
;.
1720-
; HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind "target-cpu"="kaveri" }
1721-
; HSA: attributes #[[ATTR1:[0-9]+]] = { nounwind "amdgpu-implicitarg-num-bytes"="40" "target-cpu"="kaveri" }
1722-
; HSA: attributes #[[ATTR2:[0-9]+]] = { nounwind "target-cpu"="tahiti" }
1723-
; HSA: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
1840+
; HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind }
1841+
; HSA: attributes #[[ATTR1:[0-9]+]] = { nounwind "target-cpu"="kaveri" }
1842+
; HSA: attributes #[[ATTR2:[0-9]+]] = { nounwind "amdgpu-implicitarg-num-bytes"="40" "target-cpu"="kaveri" }
1843+
; HSA: attributes #[[ATTR3:[0-9]+]] = { nounwind "target-cpu"="tahiti" }
1844+
; HSA: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
17241845
;.
1725-
; MESA: attributes #[[ATTR0:[0-9]+]] = { nounwind "target-cpu"="kaveri" }
1726-
; MESA: attributes #[[ATTR1:[0-9]+]] = { nounwind "amdgpu-implicitarg-num-bytes"="40" "target-cpu"="kaveri" }
1727-
; MESA: attributes #[[ATTR2:[0-9]+]] = { nounwind "target-cpu"="tahiti" }
1728-
; MESA: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
1846+
; MESA: attributes #[[ATTR0:[0-9]+]] = { nounwind }
1847+
; MESA: attributes #[[ATTR1:[0-9]+]] = { nounwind "target-cpu"="kaveri" }
1848+
; MESA: attributes #[[ATTR2:[0-9]+]] = { nounwind "amdgpu-implicitarg-num-bytes"="40" "target-cpu"="kaveri" }
1849+
; MESA: attributes #[[ATTR3:[0-9]+]] = { nounwind "target-cpu"="tahiti" }
1850+
; MESA: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
17291851
;.
17301852
; HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
17311853
; HSA: [[META1]] = !{}
1732-
; HSA: [[META2]] = !{i64 42}
1733-
; HSA: [[META3]] = !{i64 128}
1734-
; HSA: [[META4]] = !{i64 1024}
1854+
; HSA: [[RNG2]] = !{i32 0, i32 8}
1855+
; HSA: [[META3]] = !{i64 42}
1856+
; HSA: [[META4]] = !{i64 128}
1857+
; HSA: [[META5]] = !{i64 1024}
17351858
;.
17361859
; MESA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
17371860
; MESA: [[META1]] = !{}
1738-
; MESA: [[META2]] = !{i64 42}
1739-
; MESA: [[META3]] = !{i64 128}
1740-
; MESA: [[META4]] = !{i64 1024}
1861+
; MESA: [[RNG2]] = !{i32 0, i32 8}
1862+
; MESA: [[META3]] = !{i64 42}
1863+
; MESA: [[META4]] = !{i64 128}
1864+
; MESA: [[META5]] = !{i64 1024}
17411865
;.

0 commit comments

Comments
 (0)