Skip to content

[instcombine] Scalarize operands of vector geps if possible #145402

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 26 additions & 4 deletions llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2993,10 +2993,6 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
return replaceInstUsesWith(GEP, V);
return &GEP;
}

// TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
// possible (decide on canonical form for pointer broadcast), 3) exploit
// undef elements to decrease demanded bits
}

// Eliminate unneeded casts for indices, and replace indices which displace
Expand Down Expand Up @@ -3058,6 +3054,32 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
return replaceInstUsesWith(GEP, NewGEP);
}

// Scalarize vector operands; prefer splat-of-gep.as canonical form.
// Note that this looses information about undef lanes; we run it after
// demanded bits to partially mitigate that loss.
if (GEPType->isVectorTy() && llvm::any_of(GEP.operands(), [](Value *Op) {
return Op->getType()->isVectorTy() && getSplatValue(Op);
})) {
SmallVector<Value *> NewOps;
for (auto &Op : GEP.operands()) {
if (Op->getType()->isVectorTy())
if (Value *Scalar = getSplatValue(Op)) {
NewOps.push_back(Scalar);
continue;
}
NewOps.push_back(Op);
}

Value *Res = Builder.CreateGEP(GEP.getSourceElementType(), NewOps[0],
ArrayRef(NewOps).drop_front(), GEP.getName(),
GEP.getNoWrapFlags());
if (!Res->getType()->isVectorTy()) {
ElementCount EC = cast<VectorType>(GEPType)->getElementCount();
Res = Builder.CreateVectorSplat(EC, Res);
}
return replaceInstUsesWith(GEP, Res);
}

// Check to see if the inputs to the PHI node are getelementptr instructions.
if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
if (Value *NewPtrOp = foldGEPOfPhi(GEP, PN, Builder))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ define <16 x ptr> @test(i1 %tobool) {
; CHECK-NEXT: [[LANE_15:%.*]] = insertelement <16 x ptr> poison, ptr [[LANE_0]], i64 0
; CHECK-NEXT: br i1 [[TOBOOL]], label %[[F1:.*]], label %[[F0:.*]]
; CHECK: [[F0]]:
; CHECK-NEXT: [[MM_VECTORGEP:%.*]] = getelementptr inbounds [[FOO]], <16 x ptr> [[LANE_15]], <16 x i64> zeroinitializer, <16 x i32> splat (i32 1)
; CHECK-NEXT: [[MM_VECTORGEP1:%.*]] = getelementptr inbounds i8, <16 x ptr> [[LANE_15]], i64 2
; CHECK-NEXT: br label %[[MERGE:.*]]
; CHECK: [[F1]]:
; CHECK-NEXT: [[MM_VECTORGEP2:%.*]] = getelementptr inbounds [[FOO]], <16 x ptr> [[LANE_15]], <16 x i64> zeroinitializer, <16 x i32> splat (i32 2)
; CHECK-NEXT: [[MM_VECTORGEP22:%.*]] = getelementptr inbounds i8, <16 x ptr> [[LANE_15]], i64 4
; CHECK-NEXT: br label %[[MERGE]]
; CHECK: [[MERGE]]:
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x ptr> [ [[MM_VECTORGEP]], %[[F0]] ], [ [[MM_VECTORGEP2]], %[[F1]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x ptr> [ [[MM_VECTORGEP1]], %[[F0]] ], [ [[MM_VECTORGEP22]], %[[F1]] ]
; CHECK-NEXT: ret <16 x ptr> [[VEC_PHI]]
;
entry:
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/Transforms/InstCombine/getelementptr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -247,11 +247,11 @@ define <2 x i1> @test13_vector2(i64 %X, <2 x ptr> %P) nounwind {

define <2 x i1> @test13_fixed_fixed(i64 %X, ptr %P, <2 x i64> %y) nounwind {
; CHECK-LABEL: @test13_fixed_fixed(
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i64 0
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[DOTSPLATINSERT]], <i64 3, i64 0>
; CHECK-NEXT: [[A_IDX:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw <2 x i64> [[Y:%.*]], splat (i64 4)
; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i64> [[A_IDX]], [[B_IDX]]
; CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds <2 x i64>, ptr [[P:%.*]], i64 0, i64 [[X:%.*]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[A1]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x ptr> [[DOTSPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds <2 x i64>, ptr [[P]], <2 x i64> [[Y:%.*]]
; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x ptr> [[DOTSPLAT]], [[B]]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regression, as we don't look through splats in this transform. But I doubt this matters in practice.

; CHECK-NEXT: ret <2 x i1> [[C]]
;
%A = getelementptr inbounds <2 x i64>, ptr %P, <2 x i64> zeroinitializer, i64 %X
Expand Down
24 changes: 9 additions & 15 deletions llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll
Original file line number Diff line number Diff line change
Expand Up @@ -525,9 +525,7 @@ define ptr @gep_splat_base_w_s_idx(ptr %base) {

define ptr @gep_splat_base_w_cv_idx(ptr %base) {
; CHECK-LABEL: @gep_splat_base_w_cv_idx(
; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x ptr> poison, ptr [[BASE:%.*]], i64 1
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x ptr> [[BASEVEC2]], <2 x i64> <i64 poison, i64 1>
; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x ptr> [[GEP]], i64 1
; CHECK-NEXT: [[EE:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 4
; CHECK-NEXT: ret ptr [[EE]]
;
%basevec1 = insertelement <2 x ptr> poison, ptr %base, i32 0
Expand All @@ -539,9 +537,8 @@ define ptr @gep_splat_base_w_cv_idx(ptr %base) {

define ptr @gep_splat_base_w_vidx(ptr %base, <2 x i64> %idxvec) {
; CHECK-LABEL: @gep_splat_base_w_vidx(
; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x ptr> poison, ptr [[BASE:%.*]], i64 1
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x ptr> [[BASEVEC2]], <2 x i64> [[IDXVEC:%.*]]
; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x ptr> [[GEP]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[IDXVEC:%.*]], i64 1
; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[TMP1]]
; CHECK-NEXT: ret ptr [[EE]]
;
%basevec1 = insertelement <2 x ptr> poison, ptr %base, i32 0
Expand Down Expand Up @@ -597,10 +594,7 @@ define ptr @gep_sbase_w_splat_idx(ptr %base, i64 %idx) {
}
define ptr @gep_splat_both(ptr %base, i64 %idx) {
; CHECK-LABEL: @gep_splat_both(
; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x ptr> poison, ptr [[BASE:%.*]], i64 1
; CHECK-NEXT: [[IDXVEC2:%.*]] = insertelement <2 x i64> poison, i64 [[IDX:%.*]], i64 1
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x ptr> [[BASEVEC2]], <2 x i64> [[IDXVEC2]]
; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x ptr> [[GEP]], i64 1
; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[IDX:%.*]]
; CHECK-NEXT: ret ptr [[EE]]
;
%basevec1 = insertelement <2 x ptr> poison, ptr %base, i32 0
Expand Down Expand Up @@ -641,9 +635,9 @@ define ptr @gep_demanded_lane_undef(ptr %base, i64 %idx) {
;; indices.
define ptr @PR41624(<2 x ptr> %a) {
; CHECK-LABEL: @PR41624(
; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> splat (i64 5), <2 x i32> zeroinitializer
; CHECK-NEXT: [[R:%.*]] = extractelement <2 x ptr> [[W]], i64 0
; CHECK-NEXT: ret ptr [[R]]
; CHECK-NEXT: [[R:%.*]] = extractelement <2 x ptr> [[W:%.*]], i64 0
; CHECK-NEXT: [[R1:%.*]] = getelementptr i8, ptr [[R]], i64 40
; CHECK-NEXT: ret ptr [[R1]]
;
%w = getelementptr { i32, i32 }, <2 x ptr> %a, <2 x i64> <i64 5, i64 5>, <2 x i32> zeroinitializer
%r = extractelement <2 x ptr> %w, i32 0
Expand All @@ -657,8 +651,8 @@ define ptr @PR41624(<2 x ptr> %a) {
define ptr @zero_sized_type_extract(<4 x i64> %arg, i64 %arg1) {
; CHECK-LABEL: @zero_sized_type_extract(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[T:%.*]] = getelementptr inbounds [0 x i32], <4 x ptr> <ptr @global, ptr poison, ptr poison, ptr poison>, <4 x i64> <i64 0, i64 poison, i64 poison, i64 poison>, <4 x i64> [[ARG:%.*]]
; CHECK-NEXT: [[T2:%.*]] = extractelement <4 x ptr> [[T]], i64 0
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i64> [[ARG:%.*]], i64 0
; CHECK-NEXT: [[T2:%.*]] = getelementptr inbounds [0 x i32], ptr @global, i64 0, i64 [[TMP0]]
; CHECK-NEXT: ret ptr [[T2]]
;
bb:
Expand Down
24 changes: 9 additions & 15 deletions llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
Original file line number Diff line number Diff line change
Expand Up @@ -528,9 +528,7 @@ define ptr @gep_splat_base_w_s_idx(ptr %base) {

define ptr @gep_splat_base_w_cv_idx(ptr %base) {
; CHECK-LABEL: @gep_splat_base_w_cv_idx(
; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x ptr> poison, ptr [[BASE:%.*]], i64 1
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x ptr> [[BASEVEC2]], <2 x i64> <i64 poison, i64 1>
; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x ptr> [[GEP]], i64 1
; CHECK-NEXT: [[EE:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 4
; CHECK-NEXT: ret ptr [[EE]]
;
%basevec1 = insertelement <2 x ptr> undef, ptr %base, i32 0
Expand All @@ -542,9 +540,8 @@ define ptr @gep_splat_base_w_cv_idx(ptr %base) {

define ptr @gep_splat_base_w_vidx(ptr %base, <2 x i64> %idxvec) {
; CHECK-LABEL: @gep_splat_base_w_vidx(
; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x ptr> poison, ptr [[BASE:%.*]], i64 1
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x ptr> [[BASEVEC2]], <2 x i64> [[IDXVEC:%.*]]
; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x ptr> [[GEP]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[IDXVEC:%.*]], i64 1
; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[TMP1]]
; CHECK-NEXT: ret ptr [[EE]]
;
%basevec1 = insertelement <2 x ptr> undef, ptr %base, i32 0
Expand Down Expand Up @@ -600,10 +597,7 @@ define ptr @gep_sbase_w_splat_idx(ptr %base, i64 %idx) {
}
define ptr @gep_splat_both(ptr %base, i64 %idx) {
; CHECK-LABEL: @gep_splat_both(
; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x ptr> poison, ptr [[BASE:%.*]], i64 1
; CHECK-NEXT: [[IDXVEC2:%.*]] = insertelement <2 x i64> poison, i64 [[IDX:%.*]], i64 1
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x ptr> [[BASEVEC2]], <2 x i64> [[IDXVEC2]]
; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x ptr> [[GEP]], i64 1
; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[IDX:%.*]]
; CHECK-NEXT: ret ptr [[EE]]
;
%basevec1 = insertelement <2 x ptr> undef, ptr %base, i32 0
Expand Down Expand Up @@ -644,9 +638,9 @@ define ptr @gep_demanded_lane_undef(ptr %base, i64 %idx) {
;; indices.
define ptr @PR41624(<2 x ptr> %a) {
; CHECK-LABEL: @PR41624(
; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> splat (i64 5), <2 x i32> zeroinitializer
; CHECK-NEXT: [[R:%.*]] = extractelement <2 x ptr> [[W]], i64 0
; CHECK-NEXT: ret ptr [[R]]
; CHECK-NEXT: [[R:%.*]] = extractelement <2 x ptr> [[W:%.*]], i64 0
; CHECK-NEXT: [[R1:%.*]] = getelementptr i8, ptr [[R]], i64 40
; CHECK-NEXT: ret ptr [[R1]]
;
%w = getelementptr { i32, i32 }, <2 x ptr> %a, <2 x i64> <i64 5, i64 5>, <2 x i32> zeroinitializer
%r = extractelement <2 x ptr> %w, i32 0
Expand All @@ -660,8 +654,8 @@ define ptr @PR41624(<2 x ptr> %a) {
define ptr @zero_sized_type_extract(<4 x i64> %arg, i64 %arg1) {
; CHECK-LABEL: @zero_sized_type_extract(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[T:%.*]] = getelementptr inbounds [0 x i32], <4 x ptr> <ptr @global, ptr poison, ptr poison, ptr poison>, <4 x i64> <i64 0, i64 poison, i64 poison, i64 poison>, <4 x i64> [[ARG:%.*]]
; CHECK-NEXT: [[T2:%.*]] = extractelement <4 x ptr> [[T]], i64 0
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i64> [[ARG:%.*]], i64 0
; CHECK-NEXT: [[T2:%.*]] = getelementptr inbounds [0 x i32], ptr @global, i64 0, i64 [[TMP0]]
; CHECK-NEXT: ret ptr [[T2]]
;
bb:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ define <2 x i1> @test5(<2 x ptr> %a) {

define <2 x ptr> @test7(<2 x ptr> %a) {
; CHECK-LABEL: @test7(
; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> <i64 5, i64 9>, <2 x i32> zeroinitializer
; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> <i64 5, i64 9>, i32 0
; CHECK-NEXT: ret <2 x ptr> [[W]]
;
%w = getelementptr {i32, i32}, <2 x ptr> %a, <2 x i32> <i32 5, i32 9>, <2 x i32> zeroinitializer
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/InstCombine/vector_gep1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ define <2 x i1> @test5(<2 x ptr> %a) {

define <2 x ptr> @test7(<2 x ptr> %a) {
; CHECK-LABEL: @test7(
; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> <i64 5, i64 9>, <2 x i32> zeroinitializer
; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> <i64 5, i64 9>, i32 0
; CHECK-NEXT: ret <2 x ptr> [[W]]
;
%w = getelementptr {i32, i32}, <2 x ptr> %a, <2 x i32> <i32 5, i32 9>, <2 x i32> zeroinitializer
Expand Down
Loading