Skip to content

Commit cce0982

Browse files
committed
[InstCombine] Handle more scalable geps in EmitGEPOffset
Following up on llvm#71565, this makes scalable splats in EmitGEPOffset use the ElementCount as opposed to assuming it is fixed width, and attempts to handle scalable offsets with vector geps by splatting the vscale to each vector lane. It appears that the `& PtrSizeMask` can be removed without altering any of the tests or any of the test I tried across AArch64/Arm.
1 parent 9bb69c1 commit cce0982

File tree

2 files changed

+61
-13
lines changed

2 files changed

+61
-13
lines changed

llvm/lib/Analysis/Local.cpp

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -36,25 +36,18 @@ Value *llvm::emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL,
3636
Result = Offset;
3737
};
3838

39-
// Build a mask for high order bits.
40-
unsigned IntPtrWidth = IntIdxTy->getScalarType()->getIntegerBitWidth();
41-
uint64_t PtrSizeMask =
42-
std::numeric_limits<uint64_t>::max() >> (64 - IntPtrWidth);
43-
4439
gep_type_iterator GTI = gep_type_begin(GEP);
4540
for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e;
4641
++i, ++GTI) {
4742
Value *Op = *i;
48-
TypeSize TSize = DL.getTypeAllocSize(GTI.getIndexedType());
49-
uint64_t Size = TSize.getKnownMinValue() & PtrSizeMask;
5043
if (Constant *OpC = dyn_cast<Constant>(Op)) {
5144
if (OpC->isZeroValue())
5245
continue;
5346

5447
// Handle a struct index, which adds its field offset to the pointer.
5548
if (StructType *STy = GTI.getStructTypeOrNull()) {
5649
uint64_t OpValue = OpC->getUniqueInteger().getZExtValue();
57-
Size = DL.getStructLayout(STy)->getElementOffset(OpValue);
50+
unsigned Size = DL.getStructLayout(STy)->getElementOffset(OpValue);
5851
if (!Size)
5952
continue;
6053

@@ -66,16 +59,18 @@ Value *llvm::emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL,
6659
// Splat the index if needed.
6760
if (IntIdxTy->isVectorTy() && !Op->getType()->isVectorTy())
6861
Op = Builder->CreateVectorSplat(
69-
cast<FixedVectorType>(IntIdxTy)->getNumElements(), Op);
62+
cast<VectorType>(IntIdxTy)->getElementCount(), Op);
7063

7164
// Convert to correct type.
7265
if (Op->getType() != IntIdxTy)
7366
Op = Builder->CreateIntCast(Op, IntIdxTy, true, Op->getName() + ".c");
74-
if (Size != 1 || TSize.isScalable()) {
67+
TypeSize TSize = DL.getTypeAllocSize(GTI.getIndexedType());
68+
if (TSize != TypeSize::Fixed(1)) {
69+
Value *Scale = Builder->CreateTypeSize(IntIdxTy->getScalarType(), TSize);
70+
if (IntIdxTy->isVectorTy())
71+
Scale = Builder->CreateVectorSplat(
72+
cast<VectorType>(IntIdxTy)->getElementCount(), Scale);
7573
// We'll let instcombine(mul) convert this to a shl if possible.
76-
auto *ScaleC = ConstantInt::get(IntIdxTy, Size);
77-
Value *Scale =
78-
!TSize.isScalable() ? ScaleC : Builder->CreateVScale(ScaleC);
7974
Op = Builder->CreateMul(Op, Scale, GEP->getName() + ".idx", false /*NUW*/,
8075
isInBounds /*NSW*/);
8176
}

llvm/test/Transforms/InstCombine/getelementptr.ll

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,59 @@ define <2 x i1> @test13_vector2(i64 %X, <2 x ptr> %P) nounwind {
233233
ret <2 x i1> %C
234234
}
235235

236+
define <2 x i1> @test13_fixed_fixed(i64 %X, ptr %P, <2 x i64> %y) nounwind {
237+
; CHECK-LABEL: @test13_fixed_fixed(
238+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i64 0
239+
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[DOTSPLATINSERT]], <i64 3, i64 0>
240+
; CHECK-NEXT: [[A_IDX:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <2 x i32> zeroinitializer
241+
; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw <2 x i64> [[Y:%.*]], <i64 4, i64 4>
242+
; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i64> [[A_IDX]], [[B_IDX]]
243+
; CHECK-NEXT: ret <2 x i1> [[C]]
244+
;
245+
%A = getelementptr inbounds <2 x i64>, ptr %P, <2 x i64> zeroinitializer, i64 %X
246+
%B = getelementptr inbounds <2 x i64>, ptr %P, <2 x i64> %y
247+
%C = icmp eq <2 x ptr> %A, %B
248+
ret <2 x i1> %C
249+
}
250+
251+
define <2 x i1> @test13_fixed_scalable(i64 %X, ptr %P, <2 x i64> %y) nounwind {
252+
; CHECK-LABEL: @test13_fixed_scalable(
253+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i64 0
254+
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[DOTSPLATINSERT]], <i64 3, i64 0>
255+
; CHECK-NEXT: [[A_IDX:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <2 x i32> zeroinitializer
256+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
257+
; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 4
258+
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
259+
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer
260+
; CHECK-NEXT: [[B_IDX:%.*]] = mul nsw <2 x i64> [[DOTSPLAT2]], [[Y:%.*]]
261+
; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i64> [[A_IDX]], [[B_IDX]]
262+
; CHECK-NEXT: ret <2 x i1> [[C]]
263+
;
264+
%A = getelementptr inbounds <vscale x 2 x i64>, ptr %P, <2 x i64> zeroinitializer, i64 %X
265+
%B = getelementptr inbounds <vscale x 2 x i64>, ptr %P, <2 x i64> %y
266+
%C = icmp eq <2 x ptr> %A, %B
267+
ret <2 x i1> %C
268+
}
269+
270+
define <vscale x 2 x i1> @test13_scalable_scalable(i64 %X, ptr %P, <vscale x 2 x i64> %y) nounwind {
271+
; CHECK-LABEL: @test13_scalable_scalable(
272+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[X:%.*]], i64 0
273+
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
274+
; CHECK-NEXT: [[A_IDX:%.*]] = shl nsw <vscale x 2 x i64> [[DOTSPLAT]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 3, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
275+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
276+
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 4
277+
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP2]], i64 0
278+
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
279+
; CHECK-NEXT: [[B_IDX:%.*]] = mul nsw <vscale x 2 x i64> [[DOTSPLAT2]], [[Y:%.*]]
280+
; CHECK-NEXT: [[C:%.*]] = icmp eq <vscale x 2 x i64> [[A_IDX]], [[B_IDX]]
281+
; CHECK-NEXT: ret <vscale x 2 x i1> [[C]]
282+
;
283+
%A = getelementptr inbounds <vscale x 2 x i64>, ptr %P, <vscale x 2 x i64> zeroinitializer, i64 %X
284+
%B = getelementptr inbounds <vscale x 2 x i64>, ptr %P, <vscale x 2 x i64> %y
285+
%C = icmp eq <vscale x 2 x ptr> %A, %B
286+
ret <vscale x 2 x i1> %C
287+
}
288+
236289
; This is a test of icmp + shl nuw in disguise - 4611... is 0x3fff...
237290
define <2 x i1> @test13_vector3(i64 %X, <2 x ptr> %P) nounwind {
238291
; CHECK-LABEL: @test13_vector3(

0 commit comments

Comments
 (0)