Skip to content

Commit 9536a62

Browse files
committed
[VPlan] Preserve original induction order when creating scalar steps.
Update createScalarIVSteps to take an insert point as parameter. This ensures that the inserted scalar steps are in the same order as the recipes they replace (vs in reverse order as currently). This helps to reduce the diff for follow-up changes.
1 parent 817d0cb commit 9536a62

13 files changed

+98
-96
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -491,9 +491,9 @@ void VPlanTransforms::removeDeadRecipes(VPlan &Plan) {
491491

492492
static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID,
493493
ScalarEvolution &SE, Instruction *TruncI,
494-
VPValue *StartV, VPValue *Step) {
494+
VPValue *StartV, VPValue *Step,
495+
VPBasicBlock::iterator IP ) {
495496
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
496-
auto IP = HeaderVPBB->getFirstNonPhi();
497497
VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
498498
VPSingleDefRecipe *BaseIV = CanonicalIV;
499499
if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step)) {
@@ -535,6 +535,7 @@ void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
535535
SmallVector<VPRecipeBase *> ToRemove;
536536
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
537537
bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1));
538+
VPBasicBlock::iterator InsertPt = HeaderVPBB->getFirstNonPhi();
538539
for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
539540
auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
540541
if (!WideIV)
@@ -547,7 +548,7 @@ void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
547548
const InductionDescriptor &ID = WideIV->getInductionDescriptor();
548549
VPValue *Steps =
549550
createScalarIVSteps(Plan, ID, SE, WideIV->getTruncInst(),
550-
WideIV->getStartValue(), WideIV->getStepValue());
551+
WideIV->getStartValue(), WideIV->getStepValue(), InsertPt);
551552

552553
// Update scalar users of IV to use Step instead.
553554
if (!HasOnlyVectorVFs)

llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-hoist-load-across-store.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -17,23 +17,23 @@ define void @pr63602_1(ptr %arr) {
1717
; CHECK: vector.body:
1818
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1919
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
20-
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 4, [[TMP0]]
20+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
2121
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
22-
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
23-
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
24-
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9
25-
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[INDEX]], 3
26-
; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = add i64 1, [[TMP5]]
27-
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX2]], 0
28-
; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP6]], 4
22+
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[INDEX]], 3
23+
; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = add i64 4, [[TMP2]]
24+
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX2]], 0
25+
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX2]], 3
26+
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX2]], 6
27+
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX2]], 9
28+
; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP1]], 4
2929
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP7]]
3030
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0
3131
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, ptr [[TMP9]], align 4
3232
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
33-
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP1]]
34-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP2]]
35-
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP3]]
36-
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP4]]
33+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP3]]
34+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP4]]
35+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP5]]
36+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP6]]
3737
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 0
3838
; CHECK-NEXT: store i32 [[TMP14]], ptr [[TMP10]], align 4
3939
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 1
@@ -42,7 +42,7 @@ define void @pr63602_1(ptr %arr) {
4242
; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP12]], align 4
4343
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 3
4444
; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP13]], align 4
45-
; CHECK-NEXT: [[TMP18:%.*]] = add nuw nsw i64 [[TMP6]], 2
45+
; CHECK-NEXT: [[TMP18:%.*]] = add nuw nsw i64 [[TMP1]], 2
4646
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP18]]
4747
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0
4848
; CHECK-NEXT: [[WIDE_VEC3:%.*]] = load <12 x i32>, ptr [[TMP20]], align 4
@@ -131,26 +131,26 @@ define void @pr63602_2(ptr %arr) {
131131
; CHECK: vector.body:
132132
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
133133
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
134-
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 4, [[TMP0]]
134+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
135135
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
136136
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
137137
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
138138
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9
139139
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[INDEX]], 3
140-
; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = add i64 1, [[TMP5]]
140+
; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = add i64 4, [[TMP5]]
141141
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX2]], 0
142142
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX2]], 3
143143
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX2]], 6
144144
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX2]], 9
145-
; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP6]], 4
145+
; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP1]], 4
146146
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP10]]
147147
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0
148148
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, ptr [[TMP12]], align 4
149149
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
150-
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP1]]
151-
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP2]]
152-
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP3]]
153-
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP4]]
150+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP6]]
151+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP7]]
152+
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP8]]
153+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP9]]
154154
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 0
155155
; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP13]], align 4
156156
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 1
@@ -159,10 +159,10 @@ define void @pr63602_2(ptr %arr) {
159159
; CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP15]], align 4
160160
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 3
161161
; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP16]], align 4
162-
; CHECK-NEXT: [[TMP21:%.*]] = add nuw nsw i64 [[TMP6]], 2
163-
; CHECK-NEXT: [[TMP22:%.*]] = add nuw nsw i64 [[TMP7]], 2
164-
; CHECK-NEXT: [[TMP23:%.*]] = add nuw nsw i64 [[TMP8]], 2
165-
; CHECK-NEXT: [[TMP24:%.*]] = add nuw nsw i64 [[TMP9]], 2
162+
; CHECK-NEXT: [[TMP21:%.*]] = add nuw nsw i64 [[TMP1]], 2
163+
; CHECK-NEXT: [[TMP22:%.*]] = add nuw nsw i64 [[TMP2]], 2
164+
; CHECK-NEXT: [[TMP23:%.*]] = add nuw nsw i64 [[TMP3]], 2
165+
; CHECK-NEXT: [[TMP24:%.*]] = add nuw nsw i64 [[TMP4]], 2
166166
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP21]]
167167
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP22]]
168168
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP23]]

llvm/test/Transforms/LoopVectorize/X86/pr36524.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,13 @@ define void @foo(ptr %ptr, ptr %ptr.2) {
1919
; CHECK: vector.body:
2020
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2121
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 2, i64 3, i64 4, i64 5>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
22+
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
2223
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 2, [[INDEX]]
2324
; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[OFFSET_IDX]] to i32
2425
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0
2526
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 1
2627
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], 2
2728
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 3
28-
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
2929
; CHECK-NEXT: store i32 [[TMP4]], ptr [[PTR_2]], align 4, !alias.scope !0, !noalias !3
3030
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[TMP5]]
3131
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0

llvm/test/Transforms/LoopVectorize/cast-induction.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ define void @example12() {
1717
; IC2-LABEL: @example12(
1818
; IC2-LABEL: vector.body:
1919
; IC2-NEXT: [[INDEX:%.+]] = phi i64 [ 0, %vector.ph ]
20-
; IC2-NEXT: [[TRUNC:%.+]] = trunc i64 [[INDEX]] to i32
20+
; IC2: [[TRUNC:%.+]] = trunc i64 [[INDEX]] to i32
2121
; IC2-NEXT: [[TRUNC0:%.+]] = add i32 [[TRUNC]], 0
2222
; IC2-NEXT: [[TRUNC1:%.+]] = add i32 [[TRUNC]], 1
2323
; IC2: store i32 [[TRUNC0]],
@@ -88,7 +88,7 @@ define void @cast_variable_step(i64 %step) {
8888

8989
; IC2-LABEL: vector.body:
9090
; IC2-NEXT: [[INDEX:%.+]] = phi i64 [ 0, %vector.ph ]
91-
; IC2-NEXT: [[MUL:%.+]] = mul i64 %index, %step
91+
; IC2: [[MUL:%.+]] = mul i64 %index, %step
9292
; IC2-NEXT: [[OFFSET_IDX:%.+]] = add i64 10, [[MUL]]
9393
; IC2-NEXT: [[TRUNC_OFF:%.+]] = trunc i64 [[OFFSET_IDX]] to i32
9494
; IC2-NEXT: [[STEP0:%.+]] = mul i32 0, [[TRUNC_STEP]]

0 commit comments

Comments
 (0)