-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[InstCombine] Extend ADD+GEP->GEP+GEP combine to disjoint or. #76981
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Craig Topper (topperc) ChangesPatch is 282.66 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/76981.diff 20 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index f3181dc14792c8..6345a62ad99bf2 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2527,7 +2527,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Try to replace ADD + GEP with GEP + GEP.
Value *Idx1, *Idx2;
if (match(GEP.getOperand(1),
- m_OneUse(m_Add(m_Value(Idx1), m_Value(Idx2))))) {
+ m_OneUse(m_AddLike(m_Value(Idx1), m_Value(Idx2))))) {
// %idx = add i64 %idx1, %idx2
// %gep = getelementptr i32, ptr %ptr, i64 %idx
// as:
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll b/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
index 75222e5ee13716..74195c24578d2c 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
@@ -35,62 +35,62 @@ define i32 @test1(ptr nocapture %a, i64 %n) {
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[SUM_02]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = or disjoint i64 [[INDVARS_IV]], 1
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_1:%.*]]
; CHECK: for.exiting_block.1:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_1:%.*]]
; CHECK: latch.1:
-; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
-; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
-; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP4]], [[ADD]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = or disjoint i64 [[INDVARS_IV]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr i32, ptr [[TMP4]], i64 1
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP5]], [[ADD]]
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_2:%.*]]
; CHECK: for.exiting_block.2:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_2:%.*]]
; CHECK: latch.2:
-; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
-; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
-; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP5]], [[ADD_1]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = or disjoint i64 [[INDVARS_IV]], 3
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr i32, ptr [[TMP6]], i64 2
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP7]], [[ADD_1]]
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_3:%.*]]
; CHECK: for.exiting_block.3:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_3:%.*]]
; CHECK: latch.3:
-; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
-; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
-; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP6]], [[ADD_2]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = or disjoint i64 [[INDVARS_IV]], 4
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr i32, ptr [[TMP8]], i64 3
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP9]], [[ADD_2]]
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_4:%.*]]
; CHECK: for.exiting_block.4:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_4:%.*]]
; CHECK: latch.4:
-; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_3]]
-; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
-; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP7]], [[ADD_3]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = or disjoint i64 [[INDVARS_IV]], 5
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr i32, ptr [[TMP10]], i64 4
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
+; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP11]], [[ADD_3]]
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_5:%.*]]
; CHECK: for.exiting_block.5:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_5:%.*]]
; CHECK: latch.5:
-; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_4]]
-; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
-; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP8]], [[ADD_4]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = or disjoint i64 [[INDVARS_IV]], 6
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr i32, ptr [[TMP12]], i64 5
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
+; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP13]], [[ADD_4]]
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_6:%.*]]
; CHECK: for.exiting_block.6:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_6:%.*]]
; CHECK: latch.6:
-; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_5]]
-; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
-; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP9]], [[ADD_5]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = or disjoint i64 [[INDVARS_IV]], 7
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr i32, ptr [[TMP14]], i64 6
+; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
+; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP15]], [[ADD_5]]
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_7:%.*]]
; CHECK: for.exiting_block.7:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_7]]
; CHECK: latch.7:
-; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_6]]
-; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
-; CHECK-NEXT: [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr i32, ptr [[TMP16]], i64 7
+; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
+; CHECK-NEXT: [[ADD_7]] = add nsw i32 [[TMP17]], [[ADD_6]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8
; CHECK-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER]], 8
; CHECK-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
@@ -115,8 +115,8 @@ define i32 @test1(ptr nocapture %a, i64 %n) {
; CHECK-NEXT: br i1 [[CMP_EPIL]], label [[OTHEREXIT_LOOPEXIT3:%.*]], label [[LATCH_EPIL]]
; CHECK: latch.epil:
; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_EPIL]]
-; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4
-; CHECK-NEXT: [[ADD_EPIL]] = add nsw i32 [[TMP11]], [[SUM_02_EPIL]]
+; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4
+; CHECK-NEXT: [[ADD_EPIL]] = add nsw i32 [[TMP18]], [[SUM_02_EPIL]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL]], 1
; CHECK-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
; CHECK-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
@@ -763,62 +763,62 @@ define i32 @test5(ptr nocapture %a, i64 %n) {
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[SUM_02]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = or disjoint i64 [[INDVARS_IV]], 1
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_1:%.*]]
; CHECK: for.exiting_block.1:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_1:%.*]]
; CHECK: latch.1:
-; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
-; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
-; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP4]], [[ADD]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = or disjoint i64 [[INDVARS_IV]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr i32, ptr [[TMP4]], i64 1
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP5]], [[ADD]]
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_2:%.*]]
; CHECK: for.exiting_block.2:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_2:%.*]]
; CHECK: latch.2:
-; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
-; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
-; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP5]], [[ADD_1]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = or disjoint i64 [[INDVARS_IV]], 3
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr i32, ptr [[TMP6]], i64 2
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP7]], [[ADD_1]]
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_3:%.*]]
; CHECK: for.exiting_block.3:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_3:%.*]]
; CHECK: latch.3:
-; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
-; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
-; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP6]], [[ADD_2]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = or disjoint i64 [[INDVARS_IV]], 4
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr i32, ptr [[TMP8]], i64 3
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP9]], [[ADD_2]]
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_4:%.*]]
; CHECK: for.exiting_block.4:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_4:%.*]]
; CHECK: latch.4:
-; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_3]]
-; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
-; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP7]], [[ADD_3]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = or disjoint i64 [[INDVARS_IV]], 5
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr i32, ptr [[TMP10]], i64 4
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
+; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP11]], [[ADD_3]]
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_5:%.*]]
; CHECK: for.exiting_block.5:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_5:%.*]]
; CHECK: latch.5:
-; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_4]]
-; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
-; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP8]], [[ADD_4]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = or disjoint i64 [[INDVARS_IV]], 6
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr i32, ptr [[TMP12]], i64 5
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
+; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP13]], [[ADD_4]]
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_6:%.*]]
; CHECK: for.exiting_block.6:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_6:%.*]]
; CHECK: latch.6:
-; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_5]]
-; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
-; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP9]], [[ADD_5]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = or disjoint i64 [[INDVARS_IV]], 7
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr i32, ptr [[TMP14]], i64 6
+; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
+; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP15]], [[ADD_5]]
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_7:%.*]]
; CHECK: for.exiting_block.7:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_7]]
; CHECK: latch.7:
-; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_6]]
-; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
-; CHECK-NEXT: [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr i32, ptr [[TMP16]], i64 7
+; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
+; CHECK-NEXT: [[ADD_7]] = add nsw i32 [[TMP17]], [[ADD_6]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8
; CHECK-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER]], 8
; CHECK-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
@@ -843,8 +843,8 @@ define i32 @test5(ptr nocapture %a, i64 %n) {
; CHECK-NEXT: br i1 [[CMP_EPIL]], label [[OTHEREXIT_LOOPEXIT3:%.*]], label [[LATCH_EPIL]]
; CHECK: latch.epil:
; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_EPIL]]
-; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4
-; CHECK-NEXT: [[ADD_EPIL]] = add nsw i32 [[TMP11]], [[SUM_02_EPIL]]
+; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4
+; CHECK-NEXT: [[ADD_EPIL]] = add nsw i32 [[TMP18]], [[SUM_02_EPIL]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL]], 1
; CHECK-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
; CHECK-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
index f600a0d5877d33..bed4b83edc57fa 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
@@ -26,8 +26,8 @@ define void @test_array_load2_store2(i32 %C, i32 %D) #1 {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[C:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[D:%.*]], i64 0
@@ -36,19 +36,19 @@ define void @test_array_load2_store2(i32 %C, i32 %D) #1 {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1024 x i32], ptr @AB, i64 0, i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1024 x i32], ptr @AB, i64 0, i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
-; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
-; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[OFFSET_IDX]], 1
-; CHECK-NEXT: [[TMP4:%.*]] = add nsw <vscale x 4 x i32> [[TMP1]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP5:%.*]] = mul nsw <vscale x 4 x i32> [[TMP2]], [[BROADCAST_SPLAT2]]
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr @CD, i64 0, i64 [[TMP3]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 -1
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.experimental.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]])
-; CHECK-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = or disjoint i64 [[OFFSET_IDX]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = add nsw <vscale x 4 x i32> [[TMP3]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP7:%.*]] = mul nsw <vscale x 4 x i32> [[TMP4]], [[BROADCAST_SPLAT2]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr @CD, i64 0, i64 [[TMP5]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -1
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.experimental.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[TMP6]], <vscale x 4 x i32> [[TMP7]])
+; CHECK-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP9]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
@@ -106,13 +106,13 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 2
-; CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl <vscale x 4 x i64> [[TMP0]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 3
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+; CHECK-NEXT: [[TMP3:%....
[truncated]
|
This is basically fine, but I worry that this will introduce additional negative interactions with LICM, of the kind seen in llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll. Maybe we should disable LICM reassociation for constant GEPs. @dtcxzyw Does this appear to be a problem in IR diffs or not? |
Yeah, I see some similar changes in
@nikic Did you mean it will cause register allocator to introduce more spills? |
Yeah. And as constant offset in GEP + load/store is usually part of the addressing mode and as such "free", we don't really gain anything by hoisting them out of the loop. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
I just landed 092b6e7 |
No description provided.