Skip to content

Commit 7047cb5

Browse files
author
Mateja Marjanovic
committed
[AMDGPU] Trim trailing undefs from the end of image and buffer store
Remove undef values from the end of the vector operand in image and buffer store instructions. Also instead of call to computeKnownFPClass, use only findScalarElement. Continuation of: 88421ea Trim zero components from buffer and image stores Differential Revision: https://reviews.llvm.org/D152440
1 parent 974b1a6 commit 7047cb5

File tree

2 files changed

+24
-8
lines changed

2 files changed

+24
-8
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -385,17 +385,20 @@ static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV,
385385
APInt DemandedElts = APInt::getAllOnes(VWidth);
386386

387387
for (int i = VWidth - 1; i > 0; --i) {
388-
APInt DemandOneElt = APInt::getOneBitSet(VWidth, i);
389-
KnownFPClass KnownFPClass =
390-
computeKnownFPClass(UseV, DemandOneElt, IC.getDataLayout(),
391-
/*InterestedClasses=*/fcAllFlags,
392-
/*Depth=*/0, &IC.getTargetLibraryInfo(),
393-
&IC.getAssumptionCache(), I,
394-
&IC.getDominatorTree());
395-
if (KnownFPClass.KnownFPClasses != fcPosZero)
388+
auto *Elt = findScalarElement(UseV, i);
389+
if (!Elt)
396390
break;
391+
392+
if (auto *ConstElt = dyn_cast<Constant>(Elt)) {
393+
if (!ConstElt->isNullValue() && !isa<UndefValue>(Elt))
394+
break;
395+
} else {
396+
break;
397+
}
398+
397399
DemandedElts.clearBit(i);
398400
}
401+
399402
return DemandedElts;
400403
}
401404

llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-simplify-image-buffer-stores.ll

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,19 @@ define amdgpu_ps void @struct_tbuffer_store_insert_zeros_at_beginning(<4 x i32>
8484
ret void
8585
}
8686

87+
define amdgpu_ps void @struct_tbuffer_store_insert_undefs(<4 x i32> inreg %a, float %vdata1, i32 %b) {
88+
; GCN-LABEL: @struct_tbuffer_store_insert_undefs(
89+
; GCN-NEXT: [[TMP1:%.*]] = insertelement <2 x float> <float poison, float 1.000000e+00>, float [[VDATA1:%.*]], i64 0
90+
; GCN-NEXT: call void @llvm.amdgcn.struct.tbuffer.store.v2f32(<2 x float> [[TMP1]], <4 x i32> [[A:%.*]], i32 [[B:%.*]], i32 0, i32 42, i32 0, i32 15)
91+
; GCN-NEXT: ret void
92+
;
93+
%newvdata1 = insertelement <4 x float> poison, float %vdata1, i32 0
94+
%newvdata2 = insertelement <4 x float> %newvdata1, float 1.0, i32 1
95+
call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %newvdata2, <4 x i32> %a, i32 %b, i32 0, i32 42, i32 0, i32 15)
96+
ret void
97+
}
98+
99+
87100
declare void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) #2
88101
declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1
89102
declare void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #2

0 commit comments

Comments
 (0)