-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[InstCombine] Handle "add like" in ADD+GEP->GEP+GEP rewrites #135156
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-systemz @llvm/pr-subscribers-llvm-transforms Author: Björn Pettersson (bjope) ChangesConsidering that "or disjoint" is that canonical for certain add operations, then I think we want to support such "add like" operations when doing ADD+GEP->GEP+GEP rewrites to make things more consistent. Patch is 135.19 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/135156.diff 21 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 19a818f4baa30..da92ef1e3af3b 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3105,10 +3105,10 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
return GEPNoWrapFlags::none();
};
- // Try to replace ADD + GEP with GEP + GEP.
+ // Try to replace ADD/OR + GEP with GEP + GEP.
Value *Idx1, *Idx2;
if (match(GEP.getOperand(1),
- m_OneUse(m_Add(m_Value(Idx1), m_Value(Idx2))))) {
+ m_OneUse(m_AddLike(m_Value(Idx1), m_Value(Idx2))))) {
// %idx = add i64 %idx1, %idx2
// %gep = getelementptr i32, ptr %ptr, i64 %idx
// as:
diff --git a/llvm/test/Transforms/InstCombine/array.ll b/llvm/test/Transforms/InstCombine/array.ll
index 5d389958173a5..ead5cb53d67d1 100644
--- a/llvm/test/Transforms/InstCombine/array.ll
+++ b/llvm/test/Transforms/InstCombine/array.ll
@@ -109,12 +109,11 @@ entry:
ret void
}
-; FIXME: Should be transformed as OR+GEP -> GEP+GEP (similar to gep_inbounds_add_nuw below).
define ptr @gep_inbounds_nuwaddlike(ptr %ptr, i64 %a, i64 %b) {
; CHECK-LABEL: define ptr @gep_inbounds_nuwaddlike(
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) {
-; CHECK-NEXT: [[ADD:%.*]] = or disjoint i64 [[A]], [[B]]
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[PTR]], i64 [[ADD]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[PTR]], i64 [[A]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i64 [[B]]
; CHECK-NEXT: ret ptr [[GEP]]
;
%add = or disjoint i64 %a, %b
diff --git a/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll
index 46f42e16c83ce..acea9f8f555c9 100644
--- a/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll
+++ b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll
@@ -157,8 +157,8 @@ define ptr @partialConstant3(ptr %p) {
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 4
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
; CHECK-NEXT: [[DOTIDX:%.*]] = shl nsw i64 [[TMP2]], 5
-; CHECK-NEXT: [[DOTOFFS:%.*]] = or disjoint i64 [[DOTIDX]], 16
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[DOTOFFS]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP1]], i64 [[DOTIDX]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP4]], i64 16
; CHECK-NEXT: ret ptr [[TMP3]]
;
%1 = getelementptr inbounds i32, ptr %p, i64 1
diff --git a/llvm/test/Transforms/InstCombine/gep-vector.ll b/llvm/test/Transforms/InstCombine/gep-vector.ll
index d8a65b69aceff..27624f790c4c5 100644
--- a/llvm/test/Transforms/InstCombine/gep-vector.ll
+++ b/llvm/test/Transforms/InstCombine/gep-vector.ll
@@ -129,8 +129,8 @@ define ptr @test_accumulate_constant_offset_vscale_nonzero(<vscale x 16 x i1> %p
; CHECK-LABEL: @test_accumulate_constant_offset_vscale_nonzero(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 4
-; CHECK-NEXT: [[GEP_OFFS:%.*]] = or disjoint i64 [[TMP2]], 4
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[GEP_OFFS]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP2]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP3]], i64 4
; CHECK-NEXT: ret ptr [[GEP]]
;
%gep = getelementptr <vscale x 16 x i8>, ptr %base, i64 1, i64 4
diff --git a/llvm/test/Transforms/InstCombine/vscale_gep.ll b/llvm/test/Transforms/InstCombine/vscale_gep.ll
index f85b8f9c5c546..371ee71e45f23 100644
--- a/llvm/test/Transforms/InstCombine/vscale_gep.ll
+++ b/llvm/test/Transforms/InstCombine/vscale_gep.ll
@@ -60,8 +60,8 @@ define i32 @gep_alloca_inbounds_vscale_nonzero() {
; CHECK-NEXT: [[A:%.*]] = alloca <vscale x 4 x i32>, align 16
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 4
-; CHECK-NEXT: [[TMP_OFFS:%.*]] = or disjoint i64 [[TMP2]], 8
-; CHECK-NEXT: [[TMP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP_OFFS]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
+; CHECK-NEXT: [[TMP:%.*]] = getelementptr i8, ptr [[TMP3]], i64 8
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[TMP]], align 4
; CHECK-NEXT: ret i32 [[LOAD]]
;
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll b/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
index 022bd6febe327..d3e5e0b0b6980 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
@@ -35,60 +35,60 @@ define i32 @test1(ptr nocapture %a, i64 %n) {
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[SUM_02]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = or disjoint i64 [[INDVARS_IV]], 1
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_1:%.*]]
; CHECK: for.exiting_block.1:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_1:%.*]]
; CHECK: latch.1:
-; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr i8, ptr [[TMP13]], i64 4
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP4]], [[ADD]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = or disjoint i64 [[INDVARS_IV]], 2
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_2:%.*]]
; CHECK: for.exiting_block.2:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_2:%.*]]
; CHECK: latch.2:
-; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr i8, ptr [[TMP15]], i64 8
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP5]], [[ADD_1]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = or disjoint i64 [[INDVARS_IV]], 3
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_3:%.*]]
; CHECK: for.exiting_block.3:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_3:%.*]]
; CHECK: latch.3:
-; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr i8, ptr [[TMP17]], i64 12
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP6]], [[ADD_2]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = or disjoint i64 [[INDVARS_IV]], 4
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_4:%.*]]
; CHECK: for.exiting_block.4:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_4:%.*]]
; CHECK: latch.4:
-; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_3]]
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr i8, ptr [[TMP18]], i64 16
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP7]], [[ADD_3]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = or disjoint i64 [[INDVARS_IV]], 5
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_5:%.*]]
; CHECK: for.exiting_block.5:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_5:%.*]]
; CHECK: latch.5:
-; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_4]]
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr i8, ptr [[TMP12]], i64 20
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP8]], [[ADD_4]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = or disjoint i64 [[INDVARS_IV]], 6
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_6:%.*]]
; CHECK: for.exiting_block.6:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_6:%.*]]
; CHECK: latch.6:
-; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_5]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr i8, ptr [[TMP14]], i64 24
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP9]], [[ADD_5]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = or disjoint i64 [[INDVARS_IV]], 7
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_7:%.*]]
; CHECK: for.exiting_block.7:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_7]]
; CHECK: latch.7:
-; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_6]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr i8, ptr [[TMP16]], i64 28
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
; CHECK-NEXT: [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8
@@ -763,60 +763,60 @@ define i32 @test5(ptr nocapture %a, i64 %n) {
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[SUM_02]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = or disjoint i64 [[INDVARS_IV]], 1
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_1:%.*]]
; CHECK: for.exiting_block.1:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_1:%.*]]
; CHECK: latch.1:
-; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr i8, ptr [[TMP13]], i64 4
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP4]], [[ADD]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = or disjoint i64 [[INDVARS_IV]], 2
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_2:%.*]]
; CHECK: for.exiting_block.2:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_2:%.*]]
; CHECK: latch.2:
-; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr i8, ptr [[TMP15]], i64 8
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP5]], [[ADD_1]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = or disjoint i64 [[INDVARS_IV]], 3
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_3:%.*]]
; CHECK: for.exiting_block.3:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_3:%.*]]
; CHECK: latch.3:
-; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr i8, ptr [[TMP17]], i64 12
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP6]], [[ADD_2]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = or disjoint i64 [[INDVARS_IV]], 4
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_4:%.*]]
; CHECK: for.exiting_block.4:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_4:%.*]]
; CHECK: latch.4:
-; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_3]]
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr i8, ptr [[TMP18]], i64 16
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP7]], [[ADD_3]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = or disjoint i64 [[INDVARS_IV]], 5
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_5:%.*]]
; CHECK: for.exiting_block.5:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_5:%.*]]
; CHECK: latch.5:
-; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_4]]
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr i8, ptr [[TMP12]], i64 20
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP8]], [[ADD_4]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = or disjoint i64 [[INDVARS_IV]], 6
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_6:%.*]]
; CHECK: for.exiting_block.6:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_6:%.*]]
; CHECK: latch.6:
-; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_5]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr i8, ptr [[TMP14]], i64 24
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP9]], [[ADD_5]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = or disjoint i64 [[INDVARS_IV]], 7
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_7:%.*]]
; CHECK: for.exiting_block.7:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_7]]
; CHECK: latch.7:
-; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_6]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr i8, ptr [[TMP16]], i64 28
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
; CHECK-NEXT: [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll
index c459dd28fdaee..a11cc15a8a85b 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll
@@ -1335,10 +1335,8 @@ define i32 @reduction_interleave_group(i32 %n, ptr %arr) #0 {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 1
-; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i32 [[OFFSET_IDX]], 1
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[ARR:%.*]], i32 [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 -4
+; CHECK-NEXT: [[DOTIDX:%.*]] = shl i32 [[INDEX]], 3
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[ARR:%.*]], i32 [[DOTIDX]]
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -1359,8 +1357,8 @@ define i32 @reduction_interleave_group(i32 %n, ptr %arr) #0 {
; CHECK: for.body:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[RED_PHI:%.*]] = phi i32 [ [[RED_2:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT: [[ADD:%.*]] = or disjoint i32 [[IV]], 1
-; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i32 [[ADD]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[IV]]
+; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr i8, ptr [[TMP11]], i32 4
; CHECK-NEXT: [[L_0:%.*]] = load i32, ptr [[GEP_0]], align 4
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i32 [[IV]]
; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_1]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/addressing.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/addressing.ll
index fad167e916c5d..c5e4eccd1df09 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/addressing.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/addressing.ll
@@ -14,11 +14,11 @@ define i32 @foo(ptr nocapture %A) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i64 [[TMP0]], 4
; CHECK-NEXT: [[DOTIDX:%.*]] = shl nsw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[DOTIDX]]
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP1]]
+; CHECK-NEXT: [[DOTIDX1:%.*]] = shl i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[DOTIDX1]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 16
; CHECK-NEXT: store i32 4, ptr [[TMP2]], align 4
; CHECK-NEXT: store i32 4, ptr [[TMP3]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
@@ -61,9 +61,9 @@ define i32 @foo1(ptr nocapture noalias %A, ptr nocapture %PtrPtr) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = or disjoint i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[PTRPTR:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[PTRPTR]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr ptr, ptr [[PTRPTR]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP11]], i64 8
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP1]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP2]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
index 537fda42d3a1e..e89f41bb94665 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
@@ -142,40 +142,40 @@ define void @fp_iv_loop2(ptr noalias nocapture %A, i32 %N) {
; AUTO_VEC-NEXT: br i1 [[TMP0]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
; AUTO_VEC: for.body.preheader.new:
; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[ZEXT]], 2147483640
+; AUTO_VEC-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 4
+; AUTO_VEC-NEXT: [[INVARIANT_GEP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8
+; AUTO_VEC-NEXT: [[INVARIANT_GEP3:%.*]] = getelementptr inbounds i8, ptr [[A...
[truncated]
|
See also #76981. |
4895b27
to
51ec9b0
Compare
fe38cac
to
85b50b0
Compare
eb5fbb8
to
dd8cdaf
Compare
Considering that "or disjoint" is the canonical for certain add operations, then I think we want to support such "add like" operations when doing ADD+GEP->GEP+GEP rewrites to make things more consistent. Problem was found when improving ValueTracking, which turned an ADD into OR, and then suddenly optimizations got worse due to these rewrites no longer triggering.
85b50b0
to
8148d0c
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…5156) Considering that "or disjoint" is the canonical for certain add operations, then I think we want to support such "add like" operations when doing ADD+GEP->GEP+GEP rewrites to make things more consistent. Problem was found when improving ValueTracking, which turned an ADD into OR, and then suddenly optimizations got worse due to these rewrites no longer triggering.
Considering that "or disjoint" is the canonical for certain add operations, then I think we want to support such "add like" operations when doing ADD+GEP->GEP+GEP rewrites to make things more consistent.
Problem was found when improving ValueTracking, which turned an ADD into OR, and then suddenly optimizations got worse due to these rewrites no longer triggering.