-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LV] Compute value of escaped induction based on the computed end value. #110576
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Update fixupIVUsers to compute the value for escaped inductions using the already computed end value of the induction (EndValue), but subtracting the step. This results in slightly simpler codegen, as we avoid computing the full transformed index at VectorTripCount - 1.
@llvm/pr-subscribers-llvm-transforms Author: Florian Hahn (fhahn) ChangesUpdate fixupIVUsers to compute the value for escaped inductions using the already computed end value of the induction (EndValue), but subtracting the step. This results in slightly simpler codegen, as we avoid computing the full transformed index at VectorTripCount - 1. Full diff: https://github.com/llvm/llvm-project/pull/110576.diff 6 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 034765bee40e7b..c2629c5ee7087a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2755,17 +2755,22 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
- Value *CountMinusOne = B.CreateSub(
- VectorTripCount, ConstantInt::get(VectorTripCount->getType(), 1));
- CountMinusOne->setName("cmo");
-
VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep());
assert(StepVPV && "step must have been expanded during VPlan execution");
Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue()
: State.get(StepVPV, VPLane(0));
- Value *Escape =
- emitTransformedIndex(B, CountMinusOne, II.getStartValue(), Step,
- II.getKind(), II.getInductionBinOp());
+ Value *Escape = nullptr;
+ if (EndValue->getType()->isIntegerTy())
+ Escape = B.CreateSub(EndValue, Step);
+ else if (EndValue->getType()->isPointerTy())
+ Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step));
+ else if (EndValue->getType()->isFloatingPointTy()) {
+ Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() ==
+ Instruction::FAdd
+ ? Instruction::FSub
+ : Instruction::FAdd,
+ EndValue, Step);
+ }
Escape->setName("ind.escape");
MissingVals[UI] = Escape;
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll
index c28776e82776b7..64b69be5f52598 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll
@@ -42,9 +42,7 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) {
; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
-; CHECK-NEXT: [[CMO:%.*]] = sub i64 [[N_VEC]], 1
-; CHECK-NEXT: [[TMP37:%.*]] = mul i64 [[CMO]], 8
-; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP37]]
+; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[IND_END]], i64 -8
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START_1]], [[ENTRY:%.*]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
index cb4f5f6d9eabaf..54dd9c870a1709 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
@@ -208,25 +208,23 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) {
; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; AUTO_VEC-NEXT: [[VEC_IND:%.*]] = phi <4 x double> [ <double 0.000000e+00, double 3.000000e+00, double 6.000000e+00, double 9.000000e+00>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; AUTO_VEC-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 1.200000e+01, double 1.200000e+01, double 1.200000e+01, double 1.200000e+01>
-; AUTO_VEC-NEXT: [[STEP_ADD2:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 2.400000e+01, double 2.400000e+01, double 2.400000e+01, double 2.400000e+01>
-; AUTO_VEC-NEXT: [[STEP_ADD3:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 3.600000e+01, double 3.600000e+01, double 3.600000e+01, double 3.600000e+01>
+; AUTO_VEC-NEXT: [[STEP_ADD_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 2.400000e+01, double 2.400000e+01, double 2.400000e+01, double 2.400000e+01>
+; AUTO_VEC-NEXT: [[STEP_ADD_3:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 3.600000e+01, double 3.600000e+01, double 3.600000e+01, double 3.600000e+01>
; AUTO_VEC-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[INDEX]]
; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 32
; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 64
; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP1]], i64 96
; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND]], ptr [[TMP1]], align 8
; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD]], ptr [[TMP2]], align 8
-; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD2]], ptr [[TMP3]], align 8
-; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD3]], ptr [[TMP4]], align 8
+; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_2]], ptr [[TMP3]], align 8
+; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_3]], ptr [[TMP4]], align 8
; AUTO_VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; AUTO_VEC-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x double> [[VEC_IND]], <double 4.800000e+01, double 4.800000e+01, double 4.800000e+01, double 4.800000e+01>
; AUTO_VEC-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; AUTO_VEC-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; AUTO_VEC: middle.block:
; AUTO_VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
-; AUTO_VEC-NEXT: [[CMO:%.*]] = add nsw i64 [[N_VEC]], -1
-; AUTO_VEC-NEXT: [[DOTCAST6:%.*]] = sitofp i64 [[CMO]] to double
-; AUTO_VEC-NEXT: [[TMP6:%.*]] = fmul fast double [[DOTCAST6]], 3.000000e+00
+; AUTO_VEC-NEXT: [[IND_ESCAPE:%.*]] = fadd fast double [[TMP0]], -3.000000e+00
; AUTO_VEC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; AUTO_VEC: for.body:
; AUTO_VEC-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
@@ -238,7 +236,7 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) {
; AUTO_VEC-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[I_NEXT]], [[SMAX]]
; AUTO_VEC-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; AUTO_VEC: for.end:
-; AUTO_VEC-NEXT: [[J_LCSSA:%.*]] = phi double [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[J]], [[FOR_BODY]] ]
+; AUTO_VEC-NEXT: [[J_LCSSA:%.*]] = phi double [ [[IND_ESCAPE]], [[MIDDLE_BLOCK]] ], [ [[J]], [[FOR_BODY]] ]
; AUTO_VEC-NEXT: ret double [[J_LCSSA]]
;
entry:
diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
index bf27c146ec9ce1..02fdbc05ed5188 100644
--- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
@@ -63,7 +63,7 @@ for.end:
; CHECK-LABEL: @geppre
; CHECK-LABEL: middle.block:
-; CHECK: %ind.escape = getelementptr i8, ptr %ptr, i64 496
+; CHECK: %ind.escape = getelementptr i8, ptr %ind.end, i64 -16
; CHECK-LABEL: for.end:
; CHECK: %[[RET:.*]] = phi ptr [ {{.*}}, %for.body ], [ %ind.escape, %middle.block ]
; CHECK: ret ptr %[[RET]]
@@ -85,9 +85,7 @@ for.end:
; CHECK-LABEL: @both
; CHECK-LABEL: middle.block:
-; CHECK: %[[END:.*]] = sub i64 %n.vec, 1
-; CHECK: %[[END_OFFSET:.*]] = mul i64 %[[END]], 4
-; CHECK: %ind.escape = getelementptr i8, ptr %base, i64 %[[END_OFFSET]]
+; CHECK: %ind.escape = getelementptr i8, ptr %ind.end1, i64 -4
; CHECK-LABEL: for.end:
; CHECK: %[[RET:.*]] = phi ptr [ %inc.lag1, %for.body ], [ %ind.escape, %middle.block ]
; CHECK: ret ptr %[[RET]]
@@ -142,7 +140,7 @@ for.end:
; CHECK: %[[N_VEC:.+]] = sub i32 %[[T5]], %[[N_MOD_VF]]
; CHECK: middle.block
; CHECK: %[[CMP:.+]] = icmp eq i32 %[[T5]], %[[N_VEC]]
-; CHECK: %ind.escape = add i32 %[[T15]],
+; CHECK: %ind.escape = sub i32 %ind.end8, -8
; CHECK: br i1 %[[CMP]], label %BB3, label %scalar.ph
define void @PR30742() {
BB0:
diff --git a/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll b/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll
index 80a6bb50ca91b6..d462d3aa650d28 100644
--- a/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll
+++ b/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll
@@ -51,8 +51,7 @@ define i32 @test(ptr %arr, i64 %n) {
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
-; CHECK-NEXT: [[CMO:%.*]] = sub i64 [[N_VEC]], 1
-; CHECK-NEXT: [[IND_ESCAPE:%.*]] = add i64 1, [[CMO]]
+; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i64 [[IND_END]], 1
; CHECK-NEXT: br i1 [[CMP_N]], label [[LOAD_VAL:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ], [ 1, [[VECTOR_SCEVCHECK]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll
index c0eb4ccdd6d7e5..af1c146c2c6c4c 100644
--- a/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll
@@ -28,10 +28,10 @@ define void @test1_pr58811() {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196
-; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196
+; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[IND_ESCAPE:%.*]] = mul i32 195, [[INDUCTION_IV_LCSSA]]
+; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i32 [[IND_END]], [[INDUCTION_IV_LCSSA]]
; CHECK-NEXT: br i1 false, label [[LOOP_3_PREHEADER:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 196, [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_2_PREHEADER]] ]
@@ -123,28 +123,28 @@ define void @test2_pr58811() {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196
-; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196
+; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[IND_ESCAPE:%.*]] = mul i32 195, [[INDUCTION_IV_LCSSA]]
+; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i32 [[IND_END]], [[INDUCTION_IV_LCSSA]]
; CHECK-NEXT: br i1 false, label [[LOOP_4_PREHEADER:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 196, [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_3_PREHEADER]] ]
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_3_PREHEADER]] ]
; CHECK-NEXT: br label [[LOOP_3:%.*]]
; CHECK: loop.3:
-; CHECK-NEXT: [[INT16_TINDARRAYSAFEVAR_186_0747_1:%.*]] = phi i16 [ [[INC_1:%.*]], [[LOOP_3]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT: [[UINT32_TVAR_177_2745_1:%.*]] = phi i32 [ [[SUB93_1:%.*]], [[LOOP_3]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; CHECK-NEXT: [[SUB93_1]] = sub i32 [[UINT32_TVAR_177_2745_1]], [[IV_2_LCSSA]]
-; CHECK-NEXT: [[INC_1]] = add i16 [[INT16_TINDARRAYSAFEVAR_186_0747_1]], 1
-; CHECK-NEXT: [[CMP88_1:%.*]] = icmp ult i16 [[INT16_TINDARRAYSAFEVAR_186_0747_1]], 198
+; CHECK-NEXT: [[IV_4:%.*]] = phi i16 [ [[INC_1:%.*]], [[LOOP_3]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[IV_5:%.*]] = phi i32 [ [[SUB93_1:%.*]], [[LOOP_3]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[SUB93_1]] = sub i32 [[IV_5]], [[IV_2_LCSSA]]
+; CHECK-NEXT: [[INC_1]] = add i16 [[IV_4]], 1
+; CHECK-NEXT: [[CMP88_1:%.*]] = icmp ult i16 [[IV_4]], 198
; CHECK-NEXT: br i1 [[CMP88_1]], label [[LOOP_3]], label [[LOOP_4_PREHEADER]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: loop.4.preheader:
-; CHECK-NEXT: [[UINT32_TVAR_177_2745_1_LCSSA:%.*]] = phi i32 [ [[UINT32_TVAR_177_2745_1]], [[LOOP_3]] ], [ [[IND_ESCAPE]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[IV_5_LCSSA:%.*]] = phi i32 [ [[IV_5]], [[LOOP_3]] ], [ [[IND_ESCAPE]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[LOOP_4]]
; CHECK: loop.4:
-; CHECK-NEXT: [[UINT32_TVAR_177_2745_2:%.*]] = phi i32 [ [[SUB93_2]], [[LOOP_4]] ], [ 0, [[LOOP_4_PREHEADER]] ]
-; CHECK-NEXT: [[SUB93_2]] = sub i32 [[UINT32_TVAR_177_2745_2]], [[UINT32_TVAR_177_2745_1_LCSSA]]
+; CHECK-NEXT: [[IV_6:%.*]] = phi i32 [ [[SUB93_2]], [[LOOP_4]] ], [ 0, [[LOOP_4_PREHEADER]] ]
+; CHECK-NEXT: [[SUB93_2]] = sub i32 [[IV_6]], [[IV_5_LCSSA]]
; CHECK-NEXT: br i1 false, label [[LOOP_4]], label [[LOOP_1_HEADER_LOOPEXIT]]
;
entry:
@@ -201,10 +201,10 @@ define void @test3_pr58811() {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196
-; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196
+; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[IND_ESCAPE:%.*]] = mul i32 195, [[TMP3]]
+; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i32 [[IND_END]], [[TMP3]]
; CHECK-NEXT: br i1 false, label [[LOOP_4_PREHEADER:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 196, [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_3_PREHEADER]] ]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The end result definitely looks like better code! I just had a couple of questions regarding equivalence.
Value *Escape = | ||
emitTransformedIndex(B, CountMinusOne, II.getStartValue(), Step, | ||
II.getKind(), II.getInductionBinOp()); | ||
Value *Escape = nullptr; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One of the things that emitTransformedIndex
does is add a cast between CountMinusOne
and the type of Step
. Is it worth adding an assert that the types match?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CountMinusOne
has the same type as VectorTripCount
which may be different to the type of Step
. EndValue
should always have the same type as the Step
, as it is computed using it. There should already be asserts when creating the expression that the types match which should catch any differences here.
II.getKind(), II.getInductionBinOp()); | ||
Value *Escape = nullptr; | ||
if (EndValue->getType()->isIntegerTy()) | ||
Escape = B.CreateSub(EndValue, Step); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this guaranteed to be the same value as before? For example, do we know for sure that the overflow behaviour in each scenario is the same?
Basically does this statement hold true:
start + (n - 1)
== end - 1
where I assume that end
is equivalent to start + n
. For example, I can imagine a scenario where start + (n - 1)
is UINT64_MAX and so end
might be 0. Perhaps it doesn't matter since end
will underflow back to UINT64_MAX?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes it should always produce the same result, except if the step is undef
as each use of the undef
value may see a different value: https://alive2.llvm.org/ce/z/DS8Cuc.
But I don't think the undef
case is relevant here; if step
would be undef
, then (tc - 1) * step
could also produce a different result to the scalar induction incremented by undef
in each loop iteration.
? Instruction::FSub | ||
: Instruction::FAdd, | ||
EndValue, Step); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think there is a problem here because you have initialised Escape
to nullptr
, but there is a missing else
case so we could seg fault. I think we should have a llvm_unreachable
in an else case to make it easier to catch any bugs.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
All possible induction types should be handled above, added an unreachable, thanks
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM!
if (EndValue->getType()->isIntegerTy()) | ||
Escape = B.CreateSub(EndValue, Step); | ||
else if (EndValue->getType()->isPointerTy()) | ||
Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step)); | ||
else if (EndValue->getType()->isFloatingPointTy()) { | ||
Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() == | ||
Instruction::FAdd | ||
? Instruction::FSub | ||
: Instruction::FAdd, | ||
EndValue, Step); | ||
} else | ||
llvm_unreachable("all possible induction types must be handled"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (EndValue->getType()->isIntegerTy()) | |
Escape = B.CreateSub(EndValue, Step); | |
else if (EndValue->getType()->isPointerTy()) | |
Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step)); | |
else if (EndValue->getType()->isFloatingPointTy()) { | |
Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() == | |
Instruction::FAdd | |
? Instruction::FSub | |
: Instruction::FAdd, | |
EndValue, Step); | |
} else | |
llvm_unreachable("all possible induction types must be handled"); | |
if (EndValue->getType()->isIntegerTy()) { | |
Escape = B.CreateSub(EndValue, Step); | |
} else if (EndValue->getType()->isPointerTy()) { | |
Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step)); | |
} else if (EndValue->getType()->isFloatingPointTy()) { | |
Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() == | |
Instruction::FAdd | |
? Instruction::FSub | |
: Instruction::FAdd, | |
EndValue, Step); | |
} else { | |
llvm_unreachable("all possible induction types must be handled"); | |
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added {}
to the else
, thanks! Hope that was the only suggested change for the block
…ue. (llvm#110576) Update fixupIVUsers to compute the value for escaped inductions using the already computed end value of the induction (EndValue), but subtracting the step. This results in slightly simpler codegen, as we avoid computing the full transformed index at VectorTripCount - 1. PR: llvm#110576
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Minor couple of post-commit nits.
Escape = B.CreateSub(EndValue, Step); | ||
else if (EndValue->getType()->isPointerTy()) | ||
Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step)); | ||
else if (EndValue->getType()->isFloatingPointTy()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit (post-commit):
else {
assert(EndValue->getType()->isFloatingPointTy() && "...");
...
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Adjusted in 2437784, thanks!
else if (EndValue->getType()->isPointerTy()) | ||
Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step)); | ||
else if (EndValue->getType()->isFloatingPointTy()) { | ||
Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() == |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit (post-commit): is there a more consistent way to handle these three cases rather than creating sub for integer (assuming original opcode is add?), negating step for pointer, and reversing the opcode for floating point? Would negating the step work for all, with their original opcode?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unfortunately I am not sure there's a way to do this more concisely overall. Negating the step would work for both integer and pointers, but then we still would need to select between Add/PtrAdd. And add(neg()) requires an additional instructions for integers.
For floating points, perhaps it might be possible to normalize the representation of currently fsub step
to fadd -step
in general, but we would still need to handle FP values separately. I left things as is for now
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps CreateBinOp() could be used for both integer and FP, as in something like:
// FP induction opcodes are inversed FAdd<->FSub,
// integer induction opcode Add is inversed -> Sub,
// otherwise zero is returned.
auto getInverseOpcode = []([BinaryOperator *BinOp) {
if (!BinOp)
return 0;
switch (BinOp->getOpcode()) {
case Instruction::Add: return Instruction::Sub;
case Instruction::FAdd: return Instruction::FSub;
case Instruction::FSub: return Instruction::FAdd;
default : return 0;
};
}
// For pointer inductions negate Step, for integer and
// FP inductions better to inverse the opcode instead.
unsigned InverseOpcode = getInverseOpcode(II.getInductionBinOp());
assert((InverseOpcode || II.getKind() == IK_PtrInduction) && "Unsupported induction opcode or type");
Value *Escape = InverseOpcode
? B.CreateBinOp(InverseOpcode, EndValue, Step)
: B.CreatePtrAdd(EndValue, B.CreateNeg(Step));
Simplify code as suggested post-commit in #110576.
else if (EndValue->getType()->isPointerTy()) | ||
Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step)); | ||
else if (EndValue->getType()->isFloatingPointTy()) { | ||
Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() == |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps CreateBinOp() could be used for both integer and FP, as in something like:
// FP induction opcodes are inversed FAdd<->FSub,
// integer induction opcode Add is inversed -> Sub,
// otherwise zero is returned.
auto getInverseOpcode = []([BinaryOperator *BinOp) {
if (!BinOp)
return 0;
switch (BinOp->getOpcode()) {
case Instruction::Add: return Instruction::Sub;
case Instruction::FAdd: return Instruction::FSub;
case Instruction::FSub: return Instruction::FAdd;
default : return 0;
};
}
// For pointer inductions negate Step, for integer and
// FP inductions better to inverse the opcode instead.
unsigned InverseOpcode = getInverseOpcode(II.getInductionBinOp());
assert((InverseOpcode || II.getKind() == IK_PtrInduction) && "Unsupported induction opcode or type");
Value *Escape = InverseOpcode
? B.CreateBinOp(InverseOpcode, EndValue, Step)
: B.CreatePtrAdd(EndValue, B.CreateNeg(Step));
Value *CountMinusOne = B.CreateSub( | ||
VectorTripCount, ConstantInt::get(VectorTripCount->getType(), 1)); | ||
CountMinusOne->setName("cmo"); | ||
|
||
VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep()); | ||
assert(StepVPV && "step must have been expanded during VPlan execution"); | ||
Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue() | ||
: State.get(StepVPV, VPLane(0)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Above comment deserves update:
// The simplest way to get this is to recompute it from the constituent SCEVs,
// that is Start + (Step * (CRD - 1)).
Update fixupIVUsers to compute the value for escaped inductions using the already computed end value of the induction (EndValue), but subtracting the step.
This results in slightly simpler codegen, as we avoid computing the full transformed index at VectorTripCount - 1.