Skip to content

Commit a848916

Browse files
authored
[instcombine] Scalarize operands of vector geps if possible (#145402)
If we have a gep with vector indices which were splats (either constants or shuffles), prefer the scalar form of the index. If all operands are scalarizable, then prefer a scalar gep with splat following. This does loose some information about undef/poison lanes, but I'm not sure that's significant versus the number of downstream transformations which get confused by having to manual scalarize operands.
1 parent cb4fb3a commit a848916

File tree

7 files changed

+54
-44
lines changed

7 files changed

+54
-44
lines changed

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2993,10 +2993,6 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
29932993
return replaceInstUsesWith(GEP, V);
29942994
return &GEP;
29952995
}
2996-
2997-
// TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
2998-
// possible (decide on canonical form for pointer broadcast), 3) exploit
2999-
// undef elements to decrease demanded bits
30002996
}
30012997

30022998
// Eliminate unneeded casts for indices, and replace indices which displace
@@ -3058,6 +3054,32 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
30583054
return replaceInstUsesWith(GEP, NewGEP);
30593055
}
30603056

3057+
// Scalarize vector operands; prefer splat-of-gep.as canonical form.
3058+
// Note that this looses information about undef lanes; we run it after
3059+
// demanded bits to partially mitigate that loss.
3060+
if (GEPType->isVectorTy() && llvm::any_of(GEP.operands(), [](Value *Op) {
3061+
return Op->getType()->isVectorTy() && getSplatValue(Op);
3062+
})) {
3063+
SmallVector<Value *> NewOps;
3064+
for (auto &Op : GEP.operands()) {
3065+
if (Op->getType()->isVectorTy())
3066+
if (Value *Scalar = getSplatValue(Op)) {
3067+
NewOps.push_back(Scalar);
3068+
continue;
3069+
}
3070+
NewOps.push_back(Op);
3071+
}
3072+
3073+
Value *Res = Builder.CreateGEP(GEP.getSourceElementType(), NewOps[0],
3074+
ArrayRef(NewOps).drop_front(), GEP.getName(),
3075+
GEP.getNoWrapFlags());
3076+
if (!Res->getType()->isVectorTy()) {
3077+
ElementCount EC = cast<VectorType>(GEPType)->getElementCount();
3078+
Res = Builder.CreateVectorSplat(EC, Res);
3079+
}
3080+
return replaceInstUsesWith(GEP, Res);
3081+
}
3082+
30613083
// Check to see if the inputs to the PHI node are getelementptr instructions.
30623084
if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
30633085
if (Value *NewPtrOp = foldGEPOfPhi(GEP, PN, Builder))

llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,13 @@ define <16 x ptr> @test(i1 %tobool) {
1111
; CHECK-NEXT: [[LANE_15:%.*]] = insertelement <16 x ptr> poison, ptr [[LANE_0]], i64 0
1212
; CHECK-NEXT: br i1 [[TOBOOL]], label %[[F1:.*]], label %[[F0:.*]]
1313
; CHECK: [[F0]]:
14-
; CHECK-NEXT: [[MM_VECTORGEP:%.*]] = getelementptr inbounds [[FOO]], <16 x ptr> [[LANE_15]], <16 x i64> zeroinitializer, <16 x i32> splat (i32 1)
14+
; CHECK-NEXT: [[MM_VECTORGEP1:%.*]] = getelementptr inbounds i8, <16 x ptr> [[LANE_15]], i64 2
1515
; CHECK-NEXT: br label %[[MERGE:.*]]
1616
; CHECK: [[F1]]:
17-
; CHECK-NEXT: [[MM_VECTORGEP2:%.*]] = getelementptr inbounds [[FOO]], <16 x ptr> [[LANE_15]], <16 x i64> zeroinitializer, <16 x i32> splat (i32 2)
17+
; CHECK-NEXT: [[MM_VECTORGEP22:%.*]] = getelementptr inbounds i8, <16 x ptr> [[LANE_15]], i64 4
1818
; CHECK-NEXT: br label %[[MERGE]]
1919
; CHECK: [[MERGE]]:
20-
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x ptr> [ [[MM_VECTORGEP]], %[[F0]] ], [ [[MM_VECTORGEP2]], %[[F1]] ]
20+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x ptr> [ [[MM_VECTORGEP1]], %[[F0]] ], [ [[MM_VECTORGEP22]], %[[F1]] ]
2121
; CHECK-NEXT: ret <16 x ptr> [[VEC_PHI]]
2222
;
2323
entry:

llvm/test/Transforms/InstCombine/getelementptr.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -247,11 +247,11 @@ define <2 x i1> @test13_vector2(i64 %X, <2 x ptr> %P) nounwind {
247247

248248
define <2 x i1> @test13_fixed_fixed(i64 %X, ptr %P, <2 x i64> %y) nounwind {
249249
; CHECK-LABEL: @test13_fixed_fixed(
250-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i64 0
251-
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[DOTSPLATINSERT]], <i64 3, i64 0>
252-
; CHECK-NEXT: [[A_IDX:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <2 x i32> zeroinitializer
253-
; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw <2 x i64> [[Y:%.*]], splat (i64 4)
254-
; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i64> [[A_IDX]], [[B_IDX]]
250+
; CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds <2 x i64>, ptr [[P:%.*]], i64 0, i64 [[X:%.*]]
251+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[A1]], i64 0
252+
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x ptr> [[DOTSPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer
253+
; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds <2 x i64>, ptr [[P]], <2 x i64> [[Y:%.*]]
254+
; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x ptr> [[DOTSPLAT]], [[B]]
255255
; CHECK-NEXT: ret <2 x i1> [[C]]
256256
;
257257
%A = getelementptr inbounds <2 x i64>, ptr %P, <2 x i64> zeroinitializer, i64 %X

llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -525,9 +525,7 @@ define ptr @gep_splat_base_w_s_idx(ptr %base) {
525525

526526
define ptr @gep_splat_base_w_cv_idx(ptr %base) {
527527
; CHECK-LABEL: @gep_splat_base_w_cv_idx(
528-
; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x ptr> poison, ptr [[BASE:%.*]], i64 1
529-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x ptr> [[BASEVEC2]], <2 x i64> <i64 poison, i64 1>
530-
; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x ptr> [[GEP]], i64 1
528+
; CHECK-NEXT: [[EE:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 4
531529
; CHECK-NEXT: ret ptr [[EE]]
532530
;
533531
%basevec1 = insertelement <2 x ptr> poison, ptr %base, i32 0
@@ -539,9 +537,8 @@ define ptr @gep_splat_base_w_cv_idx(ptr %base) {
539537

540538
define ptr @gep_splat_base_w_vidx(ptr %base, <2 x i64> %idxvec) {
541539
; CHECK-LABEL: @gep_splat_base_w_vidx(
542-
; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x ptr> poison, ptr [[BASE:%.*]], i64 1
543-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x ptr> [[BASEVEC2]], <2 x i64> [[IDXVEC:%.*]]
544-
; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x ptr> [[GEP]], i64 1
540+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[IDXVEC:%.*]], i64 1
541+
; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[TMP1]]
545542
; CHECK-NEXT: ret ptr [[EE]]
546543
;
547544
%basevec1 = insertelement <2 x ptr> poison, ptr %base, i32 0
@@ -597,10 +594,7 @@ define ptr @gep_sbase_w_splat_idx(ptr %base, i64 %idx) {
597594
}
598595
define ptr @gep_splat_both(ptr %base, i64 %idx) {
599596
; CHECK-LABEL: @gep_splat_both(
600-
; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x ptr> poison, ptr [[BASE:%.*]], i64 1
601-
; CHECK-NEXT: [[IDXVEC2:%.*]] = insertelement <2 x i64> poison, i64 [[IDX:%.*]], i64 1
602-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x ptr> [[BASEVEC2]], <2 x i64> [[IDXVEC2]]
603-
; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x ptr> [[GEP]], i64 1
597+
; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[IDX:%.*]]
604598
; CHECK-NEXT: ret ptr [[EE]]
605599
;
606600
%basevec1 = insertelement <2 x ptr> poison, ptr %base, i32 0
@@ -641,9 +635,9 @@ define ptr @gep_demanded_lane_undef(ptr %base, i64 %idx) {
641635
;; indices.
642636
define ptr @PR41624(<2 x ptr> %a) {
643637
; CHECK-LABEL: @PR41624(
644-
; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> splat (i64 5), <2 x i32> zeroinitializer
645-
; CHECK-NEXT: [[R:%.*]] = extractelement <2 x ptr> [[W]], i64 0
646-
; CHECK-NEXT: ret ptr [[R]]
638+
; CHECK-NEXT: [[R:%.*]] = extractelement <2 x ptr> [[W:%.*]], i64 0
639+
; CHECK-NEXT: [[R1:%.*]] = getelementptr i8, ptr [[R]], i64 40
640+
; CHECK-NEXT: ret ptr [[R1]]
647641
;
648642
%w = getelementptr { i32, i32 }, <2 x ptr> %a, <2 x i64> <i64 5, i64 5>, <2 x i32> zeroinitializer
649643
%r = extractelement <2 x ptr> %w, i32 0
@@ -657,8 +651,8 @@ define ptr @PR41624(<2 x ptr> %a) {
657651
define ptr @zero_sized_type_extract(<4 x i64> %arg, i64 %arg1) {
658652
; CHECK-LABEL: @zero_sized_type_extract(
659653
; CHECK-NEXT: bb:
660-
; CHECK-NEXT: [[T:%.*]] = getelementptr inbounds [0 x i32], <4 x ptr> <ptr @global, ptr poison, ptr poison, ptr poison>, <4 x i64> <i64 0, i64 poison, i64 poison, i64 poison>, <4 x i64> [[ARG:%.*]]
661-
; CHECK-NEXT: [[T2:%.*]] = extractelement <4 x ptr> [[T]], i64 0
654+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i64> [[ARG:%.*]], i64 0
655+
; CHECK-NEXT: [[T2:%.*]] = getelementptr inbounds [0 x i32], ptr @global, i64 0, i64 [[TMP0]]
662656
; CHECK-NEXT: ret ptr [[T2]]
663657
;
664658
bb:

llvm/test/Transforms/InstCombine/vec_demanded_elts.ll

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -528,9 +528,7 @@ define ptr @gep_splat_base_w_s_idx(ptr %base) {
528528

529529
define ptr @gep_splat_base_w_cv_idx(ptr %base) {
530530
; CHECK-LABEL: @gep_splat_base_w_cv_idx(
531-
; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x ptr> poison, ptr [[BASE:%.*]], i64 1
532-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x ptr> [[BASEVEC2]], <2 x i64> <i64 poison, i64 1>
533-
; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x ptr> [[GEP]], i64 1
531+
; CHECK-NEXT: [[EE:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 4
534532
; CHECK-NEXT: ret ptr [[EE]]
535533
;
536534
%basevec1 = insertelement <2 x ptr> undef, ptr %base, i32 0
@@ -542,9 +540,8 @@ define ptr @gep_splat_base_w_cv_idx(ptr %base) {
542540

543541
define ptr @gep_splat_base_w_vidx(ptr %base, <2 x i64> %idxvec) {
544542
; CHECK-LABEL: @gep_splat_base_w_vidx(
545-
; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x ptr> poison, ptr [[BASE:%.*]], i64 1
546-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x ptr> [[BASEVEC2]], <2 x i64> [[IDXVEC:%.*]]
547-
; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x ptr> [[GEP]], i64 1
543+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[IDXVEC:%.*]], i64 1
544+
; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[TMP1]]
548545
; CHECK-NEXT: ret ptr [[EE]]
549546
;
550547
%basevec1 = insertelement <2 x ptr> undef, ptr %base, i32 0
@@ -600,10 +597,7 @@ define ptr @gep_sbase_w_splat_idx(ptr %base, i64 %idx) {
600597
}
601598
define ptr @gep_splat_both(ptr %base, i64 %idx) {
602599
; CHECK-LABEL: @gep_splat_both(
603-
; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x ptr> poison, ptr [[BASE:%.*]], i64 1
604-
; CHECK-NEXT: [[IDXVEC2:%.*]] = insertelement <2 x i64> poison, i64 [[IDX:%.*]], i64 1
605-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x ptr> [[BASEVEC2]], <2 x i64> [[IDXVEC2]]
606-
; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x ptr> [[GEP]], i64 1
600+
; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[IDX:%.*]]
607601
; CHECK-NEXT: ret ptr [[EE]]
608602
;
609603
%basevec1 = insertelement <2 x ptr> undef, ptr %base, i32 0
@@ -644,9 +638,9 @@ define ptr @gep_demanded_lane_undef(ptr %base, i64 %idx) {
644638
;; indices.
645639
define ptr @PR41624(<2 x ptr> %a) {
646640
; CHECK-LABEL: @PR41624(
647-
; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> splat (i64 5), <2 x i32> zeroinitializer
648-
; CHECK-NEXT: [[R:%.*]] = extractelement <2 x ptr> [[W]], i64 0
649-
; CHECK-NEXT: ret ptr [[R]]
641+
; CHECK-NEXT: [[R:%.*]] = extractelement <2 x ptr> [[W:%.*]], i64 0
642+
; CHECK-NEXT: [[R1:%.*]] = getelementptr i8, ptr [[R]], i64 40
643+
; CHECK-NEXT: ret ptr [[R1]]
650644
;
651645
%w = getelementptr { i32, i32 }, <2 x ptr> %a, <2 x i64> <i64 5, i64 5>, <2 x i32> zeroinitializer
652646
%r = extractelement <2 x ptr> %w, i32 0
@@ -660,8 +654,8 @@ define ptr @PR41624(<2 x ptr> %a) {
660654
define ptr @zero_sized_type_extract(<4 x i64> %arg, i64 %arg1) {
661655
; CHECK-LABEL: @zero_sized_type_extract(
662656
; CHECK-NEXT: bb:
663-
; CHECK-NEXT: [[T:%.*]] = getelementptr inbounds [0 x i32], <4 x ptr> <ptr @global, ptr poison, ptr poison, ptr poison>, <4 x i64> <i64 0, i64 poison, i64 poison, i64 poison>, <4 x i64> [[ARG:%.*]]
664-
; CHECK-NEXT: [[T2:%.*]] = extractelement <4 x ptr> [[T]], i64 0
657+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i64> [[ARG:%.*]], i64 0
658+
; CHECK-NEXT: [[T2:%.*]] = getelementptr inbounds [0 x i32], ptr @global, i64 0, i64 [[TMP0]]
665659
; CHECK-NEXT: ret ptr [[T2]]
666660
;
667661
bb:

llvm/test/Transforms/InstCombine/vector_gep1-inseltpoison.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ define <2 x i1> @test5(<2 x ptr> %a) {
5555

5656
define <2 x ptr> @test7(<2 x ptr> %a) {
5757
; CHECK-LABEL: @test7(
58-
; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> <i64 5, i64 9>, <2 x i32> zeroinitializer
58+
; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> <i64 5, i64 9>, i32 0
5959
; CHECK-NEXT: ret <2 x ptr> [[W]]
6060
;
6161
%w = getelementptr {i32, i32}, <2 x ptr> %a, <2 x i32> <i32 5, i32 9>, <2 x i32> zeroinitializer

llvm/test/Transforms/InstCombine/vector_gep1.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ define <2 x i1> @test5(<2 x ptr> %a) {
5555

5656
define <2 x ptr> @test7(<2 x ptr> %a) {
5757
; CHECK-LABEL: @test7(
58-
; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> <i64 5, i64 9>, <2 x i32> zeroinitializer
58+
; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> <i64 5, i64 9>, i32 0
5959
; CHECK-NEXT: ret <2 x ptr> [[W]]
6060
;
6161
%w = getelementptr {i32, i32}, <2 x ptr> %a, <2 x i32> <i32 5, i32 9>, <2 x i32> zeroinitializer

0 commit comments

Comments
 (0)