Skip to content

Commit fbdec0a

Browse files
committed
Don't simplify image intrinsic with dmask == 0
1 parent 092b865 commit fbdec0a

File tree

3 files changed

+10
-2
lines changed

3 files changed

+10
-2
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1241,6 +1241,10 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
12411241
ConstantInt *DMask = cast<ConstantInt>(Args[DMaskIdx]);
12421242
unsigned DMaskVal = DMask->getZExtValue() & 0xf;
12431243

1244+
// dmask 0 has special semantics, do not simplify.
1245+
if (DMaskVal == 0)
1246+
return nullptr;
1247+
12441248
// Mask off values that are undefined because the dmask doesn't cover them
12451249
DemandedElts &= (1 << llvm::popcount(DMaskVal)) - 1;
12461250

llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4792,7 +4792,9 @@ define amdgpu_ps float @extract_elt0_image_sample_2d_v4f32_f32(float %s, float %
47924792

47934793
define amdgpu_ps float @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(float %s, float %t, float %r, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
47944794
; CHECK-LABEL: @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(
4795-
; CHECK-NEXT: ret float poison
4795+
; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 0, float [[S:%.*]], float [[T:%.*]], float [[R:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4796+
; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
4797+
; CHECK-NEXT: ret float [[ELT0]]
47964798
;
47974799
%data = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 0, float %s, float %t, float %r, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
47984800
%elt0 = extractelement <4 x float> %data, i32 0

llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4791,7 +4791,9 @@ define amdgpu_ps float @extract_elt0_image_sample_2d_v4f32_f32(float %s, float %
47914791

47924792
define amdgpu_ps float @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(float %s, float %t, float %r, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
47934793
; CHECK-LABEL: @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(
4794-
; CHECK-NEXT: ret float poison
4794+
; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 0, float [[S:%.*]], float [[T:%.*]], float [[R:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0)
4795+
; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0
4796+
; CHECK-NEXT: ret float [[ELT0]]
47954797
;
47964798
%data = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 0, float %s, float %t, float %r, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
47974799
%elt0 = extractelement <4 x float> %data, i32 0

0 commit comments

Comments
 (0)