-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU] Preserve noundef
and range
during kernel argument loads
#118395
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This commit ensures than noundef (which is frequently a prerequisite for other annotations) and range() annotations on kernel arguments are copied onto their corresponding load from the kernel argument structure.
@llvm/pr-subscribers-backend-amdgpu Author: Krzysztof Drewniak (krzysz00) ChangesThis commit ensures than noundef (which is frequently a prerequisite for other annotations) and range() annotations on kernel arguments are copied onto their corresponding load from the kernel argument structure. Full diff: https://github.com/llvm/llvm-project/pull/118395.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index 380633f22a1781..9de4cf82d0faca 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -15,6 +15,7 @@
#include "GCNSubtarget.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/MDBuilder.h"
@@ -416,6 +417,16 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {
MDBuilder MDB(Ctx);
+ if (Arg.hasAttribute(Attribute::NoUndef))
+ Load->setMetadata(LLVMContext::MD_noundef, MDNode::get(Ctx, {}));
+
+ if (Arg.hasAttribute(Attribute::Range)) {
+ const ConstantRange &Range =
+ Arg.getAttribute(Attribute::Range).getValueAsConstantRange();
+ Load->setMetadata(LLVMContext::MD_range,
+ MDB.createRange(Range.getLower(), Range.getUpper()));
+ }
+
if (isa<PointerType>(ArgTy)) {
if (Arg.hasNonNullAttr())
Load->setMetadata(LLVMContext::MD_nonnull, MDNode::get(Ctx, {}));
diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll b/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
index 043294f5a11634..8b2bf911b0dbc3 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
@@ -255,6 +255,25 @@ define amdgpu_kernel void @kern_i32(i32 %arg0) {
ret void
}
+define amdgpu_kernel void @kern_range_noundef_i32(i32 noundef range(i32 0, 8) %arg0) {
+; HSA-LABEL: @kern_range_noundef_i32(
+; HSA-NEXT: [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT]], i64 0
+; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !range [[RNG2:![0-9]+]], !invariant.load [[META1]], !noundef [[META1]]
+; HSA-NEXT: store volatile i32 [[ARG0_LOAD]], ptr addrspace(1) poison, align 4
+; HSA-NEXT: ret void
+;
+; MESA-LABEL: @kern_range_noundef_i32(
+; MESA-NEXT: [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT]], i64 36
+; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !range [[RNG2:![0-9]+]], !invariant.load [[META1]], !noundef [[META1]]
+; MESA-NEXT: store volatile i32 [[ARG0_LOAD]], ptr addrspace(1) poison, align 4
+; MESA-NEXT: ret void
+;
+ store volatile i32 %arg0, ptr addrspace(1) poison
+ ret void
+}
+
define amdgpu_kernel void @kern_f32(float %arg0) {
; HSA-LABEL: @kern_f32(
; HSA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
@@ -1022,14 +1041,14 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable(ptr addrspace(1) deref
; HSA-LABEL: @kern_global_ptr_dereferencable(
; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 0
-; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !dereferenceable [[META2:![0-9]+]]
+; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !dereferenceable [[META3:![0-9]+]]
; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_global_ptr_dereferencable(
; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 36
-; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !dereferenceable [[META2:![0-9]+]]
+; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !dereferenceable [[META3:![0-9]+]]
; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
; MESA-NEXT: ret void
;
@@ -1041,14 +1060,14 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(ptr addrspace(
; HSA-LABEL: @kern_global_ptr_dereferencable_or_null(
; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 0
-; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !dereferenceable_or_null [[META3:![0-9]+]]
+; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !dereferenceable_or_null [[META4:![0-9]+]]
; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_global_ptr_dereferencable_or_null(
; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 36
-; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !dereferenceable_or_null [[META3:![0-9]+]]
+; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !dereferenceable_or_null [[META4:![0-9]+]]
; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
; MESA-NEXT: ret void
;
@@ -1079,14 +1098,14 @@ define amdgpu_kernel void @kern_align32_global_ptr(ptr addrspace(1) align 1024 %
; HSA-LABEL: @kern_align32_global_ptr(
; HSA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
-; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !align [[META4:![0-9]+]]
+; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !align [[META5:![0-9]+]]
; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_align32_global_ptr(
; MESA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
-; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !align [[META4:![0-9]+]]
+; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !align [[META5:![0-9]+]]
; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
; MESA-NEXT: ret void
;
@@ -1120,14 +1139,14 @@ define amdgpu_kernel void @kern_noundef_global_ptr(ptr addrspace(1) noundef %ptr
; HSA-LABEL: @kern_noundef_global_ptr(
; HSA-NEXT: [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
-; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
+; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]]
; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) null, align 8
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_noundef_global_ptr(
; MESA-NEXT: [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
-; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
+; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]]
; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) null, align 8
; MESA-NEXT: ret void
;
@@ -1729,13 +1748,15 @@ attributes #2 = { nounwind "target-cpu"="tahiti" }
;.
; HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
; HSA: [[META1]] = !{}
-; HSA: [[META2]] = !{i64 42}
-; HSA: [[META3]] = !{i64 128}
-; HSA: [[META4]] = !{i64 1024}
+; HSA: [[RNG2]] = !{i32 0, i32 8}
+; HSA: [[META3]] = !{i64 42}
+; HSA: [[META4]] = !{i64 128}
+; HSA: [[META5]] = !{i64 1024}
;.
; MESA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
; MESA: [[META1]] = !{}
-; MESA: [[META2]] = !{i64 42}
-; MESA: [[META3]] = !{i64 128}
-; MESA: [[META4]] = !{i64 1024}
+; MESA: [[RNG2]] = !{i32 0, i32 8}
+; MESA: [[META3]] = !{i64 42}
+; MESA: [[META4]] = !{i64 128}
+; MESA: [[META5]] = !{i64 1024}
;.
|
store volatile i32 %arg0, ptr addrspace(1) poison | ||
ret void | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add a noundef test that covers FP, vector, pointer
; MESA-NEXT: store volatile i32 [[ARG0_LOAD]], ptr addrspace(1) poison, align 4 | ||
; MESA-NEXT: ret void | ||
; | ||
store volatile i32 %arg0, ptr addrspace(1) poison |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Avoid store to poison
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there a good idiom here for a side effect that keeps the argument alive? I remember someone recently complaining about stores to undef
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
store to null in non-0 address space is technically well defined, but probably best to just use an ordinary function argument or global variable declaration
This commit ensures than noundef (which is frequently a prerequisite for other annotations) and range() annotations on kernel arguments are copied onto their corresponding load from the kernel argument structure.