Skip to content

Commit 3ccbd68

Browse files
authored
[InstCombine] Handle scalable geps in EmitGEPOffset (#71565)
This adds scalable handling for scalable vectors in emitGEPOffset. This was noticed in some tests that Biplob was creating, so might be unlikely to come up much in practice. I've attempted to add test coverage for various places EmitGEPOffset is called. The vscale intrinsics will currently emit multiple copies, relying on later CSE to combine them.
1 parent 98d8b68 commit 3ccbd68

File tree

3 files changed

+138
-4
lines changed

3 files changed

+138
-4
lines changed

llvm/lib/Analysis/Local.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ Value *llvm::emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL,
4545
for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e;
4646
++i, ++GTI) {
4747
Value *Op = *i;
48-
uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask;
48+
TypeSize TSize = DL.getTypeAllocSize(GTI.getIndexedType());
49+
uint64_t Size = TSize.getKnownMinValue() & PtrSizeMask;
4950
if (Constant *OpC = dyn_cast<Constant>(Op)) {
5051
if (OpC->isZeroValue())
5152
continue;
@@ -70,10 +71,12 @@ Value *llvm::emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL,
7071
// Convert to correct type.
7172
if (Op->getType() != IntIdxTy)
7273
Op = Builder->CreateIntCast(Op, IntIdxTy, true, Op->getName() + ".c");
73-
if (Size != 1) {
74+
if (Size != 1 || TSize.isScalable()) {
7475
// We'll let instcombine(mul) convert this to a shl if possible.
75-
Op = Builder->CreateMul(Op, ConstantInt::get(IntIdxTy, Size),
76-
GEP->getName() + ".idx", false /*NUW*/,
76+
auto *ScaleC = ConstantInt::get(IntIdxTy, Size);
77+
Value *Scale =
78+
!TSize.isScalable() ? ScaleC : Builder->CreateVScale(ScaleC);
79+
Op = Builder->CreateMul(Op, Scale, GEP->getName() + ".idx", false /*NUW*/,
7780
isInBounds /*NSW*/);
7881
}
7982
AddOffset(Op);

llvm/test/Transforms/InstCombine/icmp-gep.ll

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,3 +396,73 @@ define i1 @test61_as1(ptr addrspace(1) %foo, i16 %i, i16 %j) {
396396
ret i1 %cmp
397397
; Don't transform non-inbounds GEPs.
398398
}
399+
400+
define i1 @test_scalable_same(ptr %x) {
401+
; CHECK-LABEL: @test_scalable_same(
402+
; CHECK-NEXT: ret i1 false
403+
;
404+
%a = getelementptr <vscale x 4 x i8>, ptr %x, i64 8
405+
%b = getelementptr inbounds <vscale x 4 x i8>, ptr %x, i64 8
406+
%c = icmp ugt ptr %a, %b
407+
ret i1 %c
408+
}
409+
410+
define i1 @test_scalable_x(ptr %x) {
411+
; CHECK-LABEL: @test_scalable_x(
412+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
413+
; CHECK-NEXT: [[A_IDX_MASK:%.*]] = and i64 [[TMP1]], 576460752303423487
414+
; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[A_IDX_MASK]], 0
415+
; CHECK-NEXT: ret i1 [[C]]
416+
;
417+
%a = getelementptr <vscale x 4 x i8>, ptr %x, i64 8
418+
%c = icmp eq ptr %a, %x
419+
ret i1 %c
420+
}
421+
422+
define i1 @test_scalable_xc(ptr %x) {
423+
; CHECK-LABEL: @test_scalable_xc(
424+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
425+
; CHECK-NEXT: [[A_IDX_MASK:%.*]] = and i64 [[TMP1]], 576460752303423487
426+
; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[A_IDX_MASK]], 0
427+
; CHECK-NEXT: ret i1 [[C]]
428+
;
429+
%a = getelementptr <vscale x 4 x i8>, ptr %x, i64 8
430+
%c = icmp eq ptr %x, %a
431+
ret i1 %c
432+
}
433+
434+
define i1 @test_scalable_xy(ptr %foo, i64 %i, i64 %j) {
435+
; CHECK-LABEL: @test_scalable_xy(
436+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
437+
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 2
438+
; CHECK-NEXT: [[GEP2_IDX:%.*]] = mul nsw i64 [[TMP2]], [[J:%.*]]
439+
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
440+
; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[TMP3]], 4
441+
; CHECK-NEXT: [[GEP1_IDX:%.*]] = mul nsw i64 [[TMP4]], [[I:%.*]]
442+
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[GEP2_IDX]], [[GEP1_IDX]]
443+
; CHECK-NEXT: ret i1 [[CMP]]
444+
;
445+
%bit = addrspacecast ptr %foo to ptr addrspace(3)
446+
%gep1 = getelementptr inbounds <vscale x 4 x i32>, ptr addrspace(3) %bit, i64 %i
447+
%gep2 = getelementptr inbounds <vscale x 4 x i8>, ptr %foo, i64 %j
448+
%cast1 = addrspacecast ptr addrspace(3) %gep1 to ptr
449+
%cmp = icmp ult ptr %cast1, %gep2
450+
ret i1 %cmp
451+
}
452+
453+
define i1 @test_scalable_ij(ptr %foo, i64 %i, i64 %j) {
454+
; CHECK-LABEL: @test_scalable_ij(
455+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
456+
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 4
457+
; CHECK-NEXT: [[GEP1_IDX:%.*]] = mul nsw i64 [[TMP2]], [[I:%.*]]
458+
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
459+
; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[TMP3]], 2
460+
; CHECK-NEXT: [[GEP2_IDX:%.*]] = mul nsw i64 [[TMP4]], [[J:%.*]]
461+
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[GEP1_IDX]], [[GEP2_IDX]]
462+
; CHECK-NEXT: ret i1 [[CMP]]
463+
;
464+
%gep1 = getelementptr inbounds <vscale x 4 x i32>, ptr %foo, i64 %i
465+
%gep2 = getelementptr inbounds <vscale x 4 x i8>, ptr %foo, i64 %j
466+
%cmp = icmp ult ptr %gep1, %gep2
467+
ret i1 %cmp
468+
}

llvm/test/Transforms/InstCombine/sub-gep.ll

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,67 @@ define i64 @gep_diff_with_bitcast(ptr %p, i64 %idx) {
370370
ret i64 %i6
371371
}
372372

373+
define i64 @sub_scalable(ptr noundef %val1) {
374+
; CHECK-LABEL: @sub_scalable(
375+
; CHECK-NEXT: entry:
376+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
377+
; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 4
378+
; CHECK-NEXT: ret i64 [[TMP1]]
379+
;
380+
entry:
381+
%gep1 = getelementptr <vscale x 4 x i32>, ptr %val1, i64 1
382+
%sub.ptr.lhs.cast.i = ptrtoint ptr %gep1 to i64
383+
%sub.ptr.rhs.cast.i = ptrtoint ptr %val1 to i64
384+
%sub.ptr.sub.i = sub i64 %sub.ptr.lhs.cast.i, %sub.ptr.rhs.cast.i
385+
ret i64 %sub.ptr.sub.i
386+
}
387+
388+
define i64 @sub_scalable2(ptr noundef %val1) {
389+
; CHECK-LABEL: @sub_scalable2(
390+
; CHECK-NEXT: entry:
391+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
392+
; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 4
393+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
394+
; CHECK-NEXT: [[GEP2_IDX:%.*]] = shl i64 [[TMP2]], 5
395+
; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[TMP1]], [[GEP2_IDX]]
396+
; CHECK-NEXT: ret i64 [[GEPDIFF]]
397+
;
398+
entry:
399+
%gep1 = getelementptr <vscale x 4 x i32>, ptr %val1, i64 1
400+
%sub.ptr.lhs.cast.i = ptrtoint ptr %gep1 to i64
401+
%gep2 = getelementptr <vscale x 4 x i32>, ptr %val1, i64 2
402+
%sub.ptr.rhs.cast.i = ptrtoint ptr %gep2 to i64
403+
%sub.ptr.sub.i = sub i64 %sub.ptr.lhs.cast.i, %sub.ptr.rhs.cast.i
404+
ret i64 %sub.ptr.sub.i
405+
}
406+
407+
define i64 @nullptrtoint_scalable_c() {
408+
; CHECK-LABEL: @nullptrtoint_scalable_c(
409+
; CHECK-NEXT: entry:
410+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
411+
; CHECK-NEXT: [[PTR_IDX:%.*]] = shl i64 [[TMP0]], 7
412+
; CHECK-NEXT: ret i64 [[PTR_IDX]]
413+
;
414+
entry:
415+
%ptr = getelementptr inbounds <vscale x 4 x i32>, ptr null, i64 8
416+
%ret = ptrtoint ptr %ptr to i64
417+
ret i64 %ret
418+
}
419+
420+
define i64 @nullptrtoint_scalable_x(i64 %x) {
421+
; CHECK-LABEL: @nullptrtoint_scalable_x(
422+
; CHECK-NEXT: entry:
423+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
424+
; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 4
425+
; CHECK-NEXT: [[PTR_IDX:%.*]] = mul nsw i64 [[TMP1]], [[X:%.*]]
426+
; CHECK-NEXT: ret i64 [[PTR_IDX]]
427+
;
428+
entry:
429+
%ptr = getelementptr inbounds <vscale x 4 x i32>, ptr null, i64 %x
430+
%ret = ptrtoint ptr %ptr to i64
431+
ret i64 %ret
432+
}
433+
373434
define i1 @_gep_phi1(ptr noundef %str1) {
374435
; CHECK-LABEL: @_gep_phi1(
375436
; CHECK-NEXT: entry:

0 commit comments

Comments
 (0)