Skip to content

Commit 0f66ee1

Browse files
committed
[LV] Add test with FP induction and increment operands swapped.
1 parent 4a7c0b8 commit 0f66ee1

File tree

3 files changed

+136
-4
lines changed

3 files changed

+136
-4
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9311,6 +9311,20 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
93119311
"VPBasicBlock");
93129312
RecipeBuilder.fixHeaderPhis();
93139313

9314+
// Update wide induction increments to use the same step as the corresponding
9315+
// wide induction. This enables detecting induction increments directly in
9316+
// VPlan and removes redundant splats.
9317+
for (const auto &[Phi, ID] : Legal->getInductionVars()) {
9318+
auto *IVInc = cast<Instruction>(
9319+
Phi->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
9320+
if (IVInc->getOperand(0) != Phi || IVInc->getOpcode() != Instruction::Add)
9321+
continue;
9322+
VPWidenInductionRecipe *WideIV =
9323+
cast<VPWidenInductionRecipe>(RecipeBuilder.getRecipe(Phi));
9324+
VPRecipeBase *R = RecipeBuilder.getRecipe(IVInc);
9325+
R->setOperand(1, WideIV->getStepValue());
9326+
}
9327+
93149328
if (auto *UncountableExitingBlock =
93159329
Legal->getUncountableEarlyExitingBlock()) {
93169330
VPlanTransforms::handleUncountableEarlyExit(

llvm/test/Transforms/LoopVectorize/X86/induction-step.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,14 @@ define i16 @wide_add_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
2121
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i16> [[DOTSPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
2222
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i16> <i16 0, i16 1, i16 2, i16 3>, [[DOTSPLAT]]
2323
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i16> zeroinitializer, [[TMP2]]
24-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i16> poison, i16 [[O_1]], i64 0
25-
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT1]], <4 x i16> poison, <4 x i32> zeroinitializer
2624
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
2725
; CHECK: vector.body:
2826
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2927
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
3028
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], [[TMP1]]
3129
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
32-
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[VEC_IND]], [[BROADCAST_SPLAT2]]
33-
; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i16> [[STEP_ADD]], [[BROADCAST_SPLAT2]]
30+
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]]
31+
; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i16> [[STEP_ADD]], [[BROADCAST_SPLAT]]
3432
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[DST:%.*]], i64 [[TMP3]]
3533
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0
3634
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 4

llvm/test/Transforms/LoopVectorize/iv_outside_user.ll

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -860,6 +860,126 @@ exit:
860860
ret float %add
861861
}
862862

863+
define float @fp_postinc_use_fadd_ops_swapped(float %init, ptr noalias nocapture %A, i64 %N, float %fpinc) {
864+
; VEC-LABEL: define float @fp_postinc_use_fadd_ops_swapped(
865+
; VEC-SAME: float [[INIT:%.*]], ptr noalias nocapture [[A:%.*]], i64 [[N:%.*]], float [[FPINC:%.*]]) {
866+
; VEC-NEXT: [[ENTRY:.*]]:
867+
; VEC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
868+
; VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
869+
; VEC: [[VECTOR_PH]]:
870+
; VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
871+
; VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
872+
; VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
873+
; VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]]
874+
; VEC-NEXT: [[TMP1:%.*]] = fadd fast float [[INIT]], [[TMP0]]
875+
; VEC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
876+
; VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
877+
; VEC-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0
878+
; VEC-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer
879+
; VEC-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> <float 0.000000e+00, float 1.000000e+00>, [[DOTSPLAT2]]
880+
; VEC-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT]], [[TMP2]]
881+
; VEC-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 2.000000e+00
882+
; VEC-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0
883+
; VEC-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT3]], <2 x float> poison, <2 x i32> zeroinitializer
884+
; VEC-NEXT: br label %[[VECTOR_BODY:.*]]
885+
; VEC: [[VECTOR_BODY]]:
886+
; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
887+
; VEC-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
888+
; VEC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
889+
; VEC-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]]
890+
; VEC-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0
891+
; VEC-NEXT: store <2 x float> [[VEC_IND]], ptr [[TMP6]], align 4
892+
; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
893+
; VEC-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], [[DOTSPLAT4]]
894+
; VEC-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
895+
; VEC-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
896+
; VEC: [[MIDDLE_BLOCK]]:
897+
; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
898+
; VEC-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
899+
; VEC: [[SCALAR_PH]]:
900+
; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
901+
; VEC-NEXT: [[BC_RESUME_VAL5:%.*]] = phi float [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ]
902+
; VEC-NEXT: br label %[[LOOP:.*]]
903+
; VEC: [[LOOP]]:
904+
; VEC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
905+
; VEC-NEXT: [[FP_IV:%.*]] = phi float [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ]
906+
; VEC-NEXT: [[GEP_A:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
907+
; VEC-NEXT: store float [[FP_IV]], ptr [[GEP_A]], align 4
908+
; VEC-NEXT: [[ADD]] = fadd fast float [[FPINC]], [[FP_IV]]
909+
; VEC-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
910+
; VEC-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
911+
; VEC-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], {{!llvm.loop ![0-9]+}}
912+
; VEC: [[EXIT]]:
913+
; VEC-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], %[[LOOP]] ], [ [[TMP1]], %[[MIDDLE_BLOCK]] ]
914+
; VEC-NEXT: ret float [[ADD_LCSSA]]
915+
;
916+
; INTERLEAVE-LABEL: define float @fp_postinc_use_fadd_ops_swapped(
917+
; INTERLEAVE-SAME: float [[INIT:%.*]], ptr noalias nocapture [[A:%.*]], i64 [[N:%.*]], float [[FPINC:%.*]]) {
918+
; INTERLEAVE-NEXT: [[ENTRY:.*]]:
919+
; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
920+
; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
921+
; INTERLEAVE: [[VECTOR_PH]]:
922+
; INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
923+
; INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
924+
; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
925+
; INTERLEAVE-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]]
926+
; INTERLEAVE-NEXT: [[TMP1:%.*]] = fadd fast float [[INIT]], [[TMP0]]
927+
; INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]]
928+
; INTERLEAVE: [[VECTOR_BODY]]:
929+
; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
930+
; INTERLEAVE-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
931+
; INTERLEAVE-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 1
932+
; INTERLEAVE-NEXT: [[DOTCAST1:%.*]] = sitofp i64 [[INDEX]] to float
933+
; INTERLEAVE-NEXT: [[TMP4:%.*]] = fmul fast float [[FPINC]], [[DOTCAST1]]
934+
; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[INIT]], [[TMP4]]
935+
; INTERLEAVE-NEXT: [[TMP5:%.*]] = fmul fast float 0.000000e+00, [[FPINC]]
936+
; INTERLEAVE-NEXT: [[TMP6:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP5]]
937+
; INTERLEAVE-NEXT: [[TMP7:%.*]] = fmul fast float 1.000000e+00, [[FPINC]]
938+
; INTERLEAVE-NEXT: [[TMP8:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP7]]
939+
; INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP2]]
940+
; INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP3]]
941+
; INTERLEAVE-NEXT: store float [[TMP6]], ptr [[TMP9]], align 4
942+
; INTERLEAVE-NEXT: store float [[TMP8]], ptr [[TMP10]], align 4
943+
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
944+
; INTERLEAVE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
945+
; INTERLEAVE-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
946+
; INTERLEAVE: [[MIDDLE_BLOCK]]:
947+
; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
948+
; INTERLEAVE-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
949+
; INTERLEAVE: [[SCALAR_PH]]:
950+
; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
951+
; INTERLEAVE-NEXT: [[BC_RESUME_VAL2:%.*]] = phi float [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ]
952+
; INTERLEAVE-NEXT: br label %[[LOOP:.*]]
953+
; INTERLEAVE: [[LOOP]]:
954+
; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
955+
; INTERLEAVE-NEXT: [[FP_IV:%.*]] = phi float [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ]
956+
; INTERLEAVE-NEXT: [[GEP_A:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
957+
; INTERLEAVE-NEXT: store float [[FP_IV]], ptr [[GEP_A]], align 4
958+
; INTERLEAVE-NEXT: [[ADD]] = fadd fast float [[FPINC]], [[FP_IV]]
959+
; INTERLEAVE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
960+
; INTERLEAVE-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
961+
; INTERLEAVE-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], {{!llvm.loop ![0-9]+}}
962+
; INTERLEAVE: [[EXIT]]:
963+
; INTERLEAVE-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], %[[LOOP]] ], [ [[TMP1]], %[[MIDDLE_BLOCK]] ]
964+
; INTERLEAVE-NEXT: ret float [[ADD_LCSSA]]
965+
;
966+
entry:
967+
br label %loop
968+
969+
loop:
970+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
971+
%fp.iv = phi float [ %init, %entry ], [ %add, %loop ]
972+
%gep.A = getelementptr inbounds float, ptr %A, i64 %iv
973+
store float %fp.iv, ptr %gep.A, align 4
974+
%add = fadd fast float %fpinc, %fp.iv
975+
%iv.next = add nuw nsw i64 %iv, 1
976+
%ec = icmp eq i64 %iv.next, %N
977+
br i1 %ec, label %exit, label %loop
978+
979+
exit:
980+
ret float %add
981+
}
982+
863983
define float @fp_postinc_use_fsub(float %init, ptr noalias nocapture %A, i64 %N, float %fpinc) {
864984
; VEC-LABEL: define float @fp_postinc_use_fsub(
865985
; VEC-SAME: float [[INIT:%.*]], ptr noalias nocapture [[A:%.*]], i64 [[N:%.*]], float [[FPINC:%.*]]) {

0 commit comments

Comments
 (0)