[AMDGPU] Set Convergent property for image.(getlod/sample*) intrinsics which uses WQM #122908

mariusz-sikora-at-amd · 2025-01-14T14:32:48Z

This change adds IntrConvergent property to image.getlod intrinsic and to several image.sample intrinsics. All image.sample intrinsics apart from LOD(_L), Level 0(_LZ), Derivative(_D) will be marked as Convergent.

…s which uses WQM This change adds IntrConvergent property to image.getlod intrinsic and to several image.sample intrinsics. All image.sample intrinsics apart from LOD(_L), Level 0(_LZ), Derivative(_D) will be marked as Convergent.

llvmbot · 2025-01-14T14:33:22Z

@llvm/pr-subscribers-backend-amdgpu

Author: Mariusz Sikora (mariusz-sikora-at-amd)

Changes

This change adds IntrConvergent property to image.getlod intrinsic and to several image.sample intrinsics. All image.sample intrinsics apart from LOD(_L), Level 0(_LZ), Derivative(_D) will be marked as Convergent.

Full diff: https://github.com/llvm/llvm-project/pull/122908.diff

2 Files Affected:

(modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+8-3)
(modified) llvm/test/CodeGen/AMDGPU/sink-image-sample.ll (+2-2)

diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index b930d6983e2251..b529642a558710 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -876,6 +876,8 @@ class AMDGPUSampleVariant<string ucmod, string lcmod, list<AMDGPUArg> extra_addr
   // Name of the {lod} or {clamp} argument that is appended to the coordinates,
   // if any.
   string LodOrClamp = "";
+
+  bit UsesWQM = false;
 }
 
 // AMDGPUSampleVariants: all variants supported by IMAGE_SAMPLE
@@ -905,8 +907,9 @@ defset list<AMDGPUSampleVariant> AMDGPUSampleVariants = {
   }
 
   defset list<AMDGPUSampleVariant> AMDGPUSampleVariantsNoGradients = {
+    let UsesWQM = true in
     defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"", "", []>;
-    let Bias = true in
+    let Bias = true, UsesWQM = true in
     defm AMDGPUSample : AMDGPUSampleHelper_Clamp<
         "_B", "_b", [AMDGPUArg<llvm_anyfloat_ty, "bias">]>;
     let LodOrClamp = "lod" in
@@ -1172,7 +1175,8 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
     foreach dim = AMDGPUDims.NoMsaa in {
       def !strconcat(NAME, "_", dim.Name) : AMDGPUImageDimIntrinsic<
           AMDGPUDimSampleProfile<opmod, dim, sample>,
-          !if(NoMem, [IntrNoMem], [IntrReadMem]),
+          !listconcat(!if(NoMem, [IntrNoMem], [IntrReadMem]),
+                      !if(sample.UsesWQM, [IntrConvergent], [])),
           !if(NoMem, [], [SDNPMemOperand])>;
     }
   }
@@ -1188,7 +1192,8 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
     foreach dim = AMDGPUDims.NoMsaa in {
       def !strconcat(NAME, "_", dim.Name, "_nortn") : AMDGPUImageDimIntrinsic<
           AMDGPUDimSampleNoReturnProfile<opmod, dim, sample>,
-          [IntrWillReturn], [SDNPMemOperand]>;
+          !listconcat([IntrWillReturn], !if(sample.UsesWQM, [IntrConvergent], [])),
+          [SDNPMemOperand]>;
     }
   }
   foreach sample = AMDGPUSampleVariants in {
diff --git a/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll b/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll
index e1273e1a4bcd08..8a3b544ac2b80f 100644
--- a/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll
+++ b/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll
@@ -11,7 +11,7 @@
 
 define amdgpu_ps float @sinking_img_sample() {
 main_body:
-  %i = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
+  %i = call <3 x float> @llvm.amdgcn.image.sample.l.2d.v3f32.f32(i32 7, float undef, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
   br i1 undef, label %endif1, label %if1
 
 if1:                                              ; preds = %main_body
@@ -28,7 +28,7 @@ exit:                                             ; preds = %endif1, %if1
   ret float %i24
 }
 ; Function Attrs: nounwind readonly willreturn
-declare <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
+declare <3 x float> @llvm.amdgcn.image.sample.l.2d.v3f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
 
 ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
 declare float @llvm.fma.f32(float, float, float) #2

llvmbot · 2025-01-14T14:33:23Z

@llvm/pr-subscribers-llvm-ir

Author: Mariusz Sikora (mariusz-sikora-at-amd)

Changes

This change adds IntrConvergent property to image.getlod intrinsic and to several image.sample intrinsics. All image.sample intrinsics apart from LOD(_L), Level 0(_LZ), Derivative(_D) will be marked as Convergent.

Full diff: https://github.com/llvm/llvm-project/pull/122908.diff

2 Files Affected:

(modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+8-3)
(modified) llvm/test/CodeGen/AMDGPU/sink-image-sample.ll (+2-2)

diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index b930d6983e2251..b529642a558710 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -876,6 +876,8 @@ class AMDGPUSampleVariant<string ucmod, string lcmod, list<AMDGPUArg> extra_addr
   // Name of the {lod} or {clamp} argument that is appended to the coordinates,
   // if any.
   string LodOrClamp = "";
+
+  bit UsesWQM = false;
 }
 
 // AMDGPUSampleVariants: all variants supported by IMAGE_SAMPLE
@@ -905,8 +907,9 @@ defset list<AMDGPUSampleVariant> AMDGPUSampleVariants = {
   }
 
   defset list<AMDGPUSampleVariant> AMDGPUSampleVariantsNoGradients = {
+    let UsesWQM = true in
     defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"", "", []>;
-    let Bias = true in
+    let Bias = true, UsesWQM = true in
     defm AMDGPUSample : AMDGPUSampleHelper_Clamp<
         "_B", "_b", [AMDGPUArg<llvm_anyfloat_ty, "bias">]>;
     let LodOrClamp = "lod" in
@@ -1172,7 +1175,8 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
     foreach dim = AMDGPUDims.NoMsaa in {
       def !strconcat(NAME, "_", dim.Name) : AMDGPUImageDimIntrinsic<
           AMDGPUDimSampleProfile<opmod, dim, sample>,
-          !if(NoMem, [IntrNoMem], [IntrReadMem]),
+          !listconcat(!if(NoMem, [IntrNoMem], [IntrReadMem]),
+                      !if(sample.UsesWQM, [IntrConvergent], [])),
           !if(NoMem, [], [SDNPMemOperand])>;
     }
   }
@@ -1188,7 +1192,8 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
     foreach dim = AMDGPUDims.NoMsaa in {
       def !strconcat(NAME, "_", dim.Name, "_nortn") : AMDGPUImageDimIntrinsic<
           AMDGPUDimSampleNoReturnProfile<opmod, dim, sample>,
-          [IntrWillReturn], [SDNPMemOperand]>;
+          !listconcat([IntrWillReturn], !if(sample.UsesWQM, [IntrConvergent], [])),
+          [SDNPMemOperand]>;
     }
   }
   foreach sample = AMDGPUSampleVariants in {
diff --git a/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll b/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll
index e1273e1a4bcd08..8a3b544ac2b80f 100644
--- a/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll
+++ b/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll
@@ -11,7 +11,7 @@
 
 define amdgpu_ps float @sinking_img_sample() {
 main_body:
-  %i = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
+  %i = call <3 x float> @llvm.amdgcn.image.sample.l.2d.v3f32.f32(i32 7, float undef, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
   br i1 undef, label %endif1, label %if1
 
 if1:                                              ; preds = %main_body
@@ -28,7 +28,7 @@ exit:                                             ; preds = %endif1, %if1
   ret float %i24
 }
 ; Function Attrs: nounwind readonly willreturn
-declare <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
+declare <3 x float> @llvm.amdgcn.image.sample.l.2d.v3f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
 
 ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
 declare float @llvm.fma.f32(float, float, float) #2

github-actions · 2025-01-14T14:36:09Z

✅ With the latest revision this PR passed the undef deprecator.

arsenm · 2025-01-14T14:40:16Z

llvm/test/CodeGen/AMDGPU/sink-image-sample.ll

@@ -11,7 +11,7 @@

 define amdgpu_ps float @sinking_img_sample() {
 main_body:
-  %i = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
+  %i = call <3 x float> @llvm.amdgcn.image.sample.l.2d.v3f32.f32(i32 7, float undef, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)


Add a new test instead of changing the operation in an old one? Presumably we should have tests for all the cases

I added more tests to cover other cases of image.sample

perlfu

The code change looks fine to me.
However I think for completeness this also needs to be applied to some variants of image_gather4 as well.

mariusz-sikora-at-amd · 2025-01-15T08:43:03Z

The code change looks fine to me. However I think for completeness this also needs to be applied to some variants of image_gather4 as well.

I will prepare new PR for image_gather4

mariusz-sikora-at-amd requested review from perlfu, nhaehnle and piotrAMD January 14, 2025 14:32

llvmbot added backend:AMDGPU llvm:ir labels Jan 14, 2025

arsenm reviewed Jan 14, 2025

View reviewed changes

Add more tests for image sample

4699392

arsenm approved these changes Jan 15, 2025

View reviewed changes

perlfu reviewed Jan 15, 2025

View reviewed changes

mariusz-sikora-at-amd merged commit b3924cb into llvm:main Jan 15, 2025
8 checks passed

mariusz-sikora-at-amd deleted the pub-image-sample-convergent branch January 15, 2025 09:23

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AMDGPU] Set Convergent property for image.(getlod/sample*) intrinsics which uses WQM #122908

[AMDGPU] Set Convergent property for image.(getlod/sample*) intrinsics which uses WQM #122908

Uh oh!

mariusz-sikora-at-amd commented Jan 14, 2025

Uh oh!

llvmbot commented Jan 14, 2025

Uh oh!

llvmbot commented Jan 14, 2025

Uh oh!

github-actions bot commented Jan 14, 2025 •

edited

Loading

Uh oh!

arsenm Jan 14, 2025

Uh oh!

mariusz-sikora-at-amd Jan 14, 2025

Uh oh!

perlfu left a comment

Uh oh!

mariusz-sikora-at-amd commented Jan 15, 2025

Uh oh!

Uh oh!

Uh oh!

[AMDGPU] Set Convergent property for image.(getlod/sample*) intrinsics which uses WQM #122908

[AMDGPU] Set Convergent property for image.(getlod/sample*) intrinsics which uses WQM #122908

Uh oh!

Conversation

mariusz-sikora-at-amd commented Jan 14, 2025

Uh oh!

llvmbot commented Jan 14, 2025

Uh oh!

llvmbot commented Jan 14, 2025

Uh oh!

github-actions bot commented Jan 14, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

arsenm Jan 14, 2025

Choose a reason for hiding this comment

Uh oh!

mariusz-sikora-at-amd Jan 14, 2025

Choose a reason for hiding this comment

Uh oh!

perlfu left a comment

Choose a reason for hiding this comment

Uh oh!

mariusz-sikora-at-amd commented Jan 15, 2025

Uh oh!

Uh oh!

Uh oh!

github-actions bot commented Jan 14, 2025 •

edited

Loading