-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[AMDGPU] Set Convergent property for image.(getlod/sample*) intrinsics which uses WQM #122908
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Set Convergent property for image.(getlod/sample*) intrinsics which uses WQM #122908
Conversation
…s which uses WQM This change adds IntrConvergent property to image.getlod intrinsic and to several image.sample intrinsics. All image.sample intrinsics apart from LOD(_L), Level 0(_LZ), Derivative(_D) will be marked as Convergent.
@llvm/pr-subscribers-backend-amdgpu Author: Mariusz Sikora (mariusz-sikora-at-amd) ChangesThis change adds IntrConvergent property to image.getlod intrinsic and to several image.sample intrinsics. All image.sample intrinsics apart from LOD(_L), Level 0(_LZ), Derivative(_D) will be marked as Convergent. Full diff: https://github.com/llvm/llvm-project/pull/122908.diff 2 Files Affected:
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index b930d6983e2251..b529642a558710 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -876,6 +876,8 @@ class AMDGPUSampleVariant<string ucmod, string lcmod, list<AMDGPUArg> extra_addr
// Name of the {lod} or {clamp} argument that is appended to the coordinates,
// if any.
string LodOrClamp = "";
+
+ bit UsesWQM = false;
}
// AMDGPUSampleVariants: all variants supported by IMAGE_SAMPLE
@@ -905,8 +907,9 @@ defset list<AMDGPUSampleVariant> AMDGPUSampleVariants = {
}
defset list<AMDGPUSampleVariant> AMDGPUSampleVariantsNoGradients = {
+ let UsesWQM = true in
defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"", "", []>;
- let Bias = true in
+ let Bias = true, UsesWQM = true in
defm AMDGPUSample : AMDGPUSampleHelper_Clamp<
"_B", "_b", [AMDGPUArg<llvm_anyfloat_ty, "bias">]>;
let LodOrClamp = "lod" in
@@ -1172,7 +1175,8 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
foreach dim = AMDGPUDims.NoMsaa in {
def !strconcat(NAME, "_", dim.Name) : AMDGPUImageDimIntrinsic<
AMDGPUDimSampleProfile<opmod, dim, sample>,
- !if(NoMem, [IntrNoMem], [IntrReadMem]),
+ !listconcat(!if(NoMem, [IntrNoMem], [IntrReadMem]),
+ !if(sample.UsesWQM, [IntrConvergent], [])),
!if(NoMem, [], [SDNPMemOperand])>;
}
}
@@ -1188,7 +1192,8 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
foreach dim = AMDGPUDims.NoMsaa in {
def !strconcat(NAME, "_", dim.Name, "_nortn") : AMDGPUImageDimIntrinsic<
AMDGPUDimSampleNoReturnProfile<opmod, dim, sample>,
- [IntrWillReturn], [SDNPMemOperand]>;
+ !listconcat([IntrWillReturn], !if(sample.UsesWQM, [IntrConvergent], [])),
+ [SDNPMemOperand]>;
}
}
foreach sample = AMDGPUSampleVariants in {
diff --git a/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll b/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll
index e1273e1a4bcd08..8a3b544ac2b80f 100644
--- a/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll
+++ b/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll
@@ -11,7 +11,7 @@
define amdgpu_ps float @sinking_img_sample() {
main_body:
- %i = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
+ %i = call <3 x float> @llvm.amdgcn.image.sample.l.2d.v3f32.f32(i32 7, float undef, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
br i1 undef, label %endif1, label %if1
if1: ; preds = %main_body
@@ -28,7 +28,7 @@ exit: ; preds = %endif1, %if1
ret float %i24
}
; Function Attrs: nounwind readonly willreturn
-declare <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
+declare <3 x float> @llvm.amdgcn.image.sample.l.2d.v3f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
declare float @llvm.fma.f32(float, float, float) #2
|
@llvm/pr-subscribers-llvm-ir Author: Mariusz Sikora (mariusz-sikora-at-amd) ChangesThis change adds IntrConvergent property to image.getlod intrinsic and to several image.sample intrinsics. All image.sample intrinsics apart from LOD(_L), Level 0(_LZ), Derivative(_D) will be marked as Convergent. Full diff: https://github.com/llvm/llvm-project/pull/122908.diff 2 Files Affected:
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index b930d6983e2251..b529642a558710 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -876,6 +876,8 @@ class AMDGPUSampleVariant<string ucmod, string lcmod, list<AMDGPUArg> extra_addr
// Name of the {lod} or {clamp} argument that is appended to the coordinates,
// if any.
string LodOrClamp = "";
+
+ bit UsesWQM = false;
}
// AMDGPUSampleVariants: all variants supported by IMAGE_SAMPLE
@@ -905,8 +907,9 @@ defset list<AMDGPUSampleVariant> AMDGPUSampleVariants = {
}
defset list<AMDGPUSampleVariant> AMDGPUSampleVariantsNoGradients = {
+ let UsesWQM = true in
defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"", "", []>;
- let Bias = true in
+ let Bias = true, UsesWQM = true in
defm AMDGPUSample : AMDGPUSampleHelper_Clamp<
"_B", "_b", [AMDGPUArg<llvm_anyfloat_ty, "bias">]>;
let LodOrClamp = "lod" in
@@ -1172,7 +1175,8 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
foreach dim = AMDGPUDims.NoMsaa in {
def !strconcat(NAME, "_", dim.Name) : AMDGPUImageDimIntrinsic<
AMDGPUDimSampleProfile<opmod, dim, sample>,
- !if(NoMem, [IntrNoMem], [IntrReadMem]),
+ !listconcat(!if(NoMem, [IntrNoMem], [IntrReadMem]),
+ !if(sample.UsesWQM, [IntrConvergent], [])),
!if(NoMem, [], [SDNPMemOperand])>;
}
}
@@ -1188,7 +1192,8 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
foreach dim = AMDGPUDims.NoMsaa in {
def !strconcat(NAME, "_", dim.Name, "_nortn") : AMDGPUImageDimIntrinsic<
AMDGPUDimSampleNoReturnProfile<opmod, dim, sample>,
- [IntrWillReturn], [SDNPMemOperand]>;
+ !listconcat([IntrWillReturn], !if(sample.UsesWQM, [IntrConvergent], [])),
+ [SDNPMemOperand]>;
}
}
foreach sample = AMDGPUSampleVariants in {
diff --git a/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll b/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll
index e1273e1a4bcd08..8a3b544ac2b80f 100644
--- a/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll
+++ b/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll
@@ -11,7 +11,7 @@
define amdgpu_ps float @sinking_img_sample() {
main_body:
- %i = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
+ %i = call <3 x float> @llvm.amdgcn.image.sample.l.2d.v3f32.f32(i32 7, float undef, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
br i1 undef, label %endif1, label %if1
if1: ; preds = %main_body
@@ -28,7 +28,7 @@ exit: ; preds = %endif1, %if1
ret float %i24
}
; Function Attrs: nounwind readonly willreturn
-declare <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
+declare <3 x float> @llvm.amdgcn.image.sample.l.2d.v3f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
declare float @llvm.fma.f32(float, float, float) #2
|
✅ With the latest revision this PR passed the undef deprecator. |
@@ -11,7 +11,7 @@ | |||
|
|||
define amdgpu_ps float @sinking_img_sample() { | |||
main_body: | |||
%i = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0) | |||
%i = call <3 x float> @llvm.amdgcn.image.sample.l.2d.v3f32.f32(i32 7, float undef, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add a new test instead of changing the operation in an old one? Presumably we should have tests for all the cases
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added more tests to cover other cases of image.sample
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The code change looks fine to me.
However I think for completeness this also needs to be applied to some variants of image_gather4
as well.
I will prepare new PR for image_gather4 |
This change adds IntrConvergent property to image.getlod intrinsic and to several image.sample intrinsics. All image.sample intrinsics apart from LOD(_L), Level 0(_LZ), Derivative(_D) will be marked as Convergent.