[VPlan] Add opcode to create step for wide inductions.

fhahn · lukel97 · commit f5a5d655b0b2 · 2024-12-17T12:38:05.000+08:00
This patch adds a WideIVStep opcode that can be used to create a vector with the steps to increment a wide induction. The opcode has 3 operands * the vector step * the scale of the vector step * a constant indicating the target type of the VPInstruction (this is working around having explicit types for VPInstructions, we could also introduce a dedicated recipe, at the cost of a lot more scaffolding) The opcode is later converted into a sequence of recipes that convert the scale and step to the target type, if needed, and then multiply vector step by scale. This simplifies code that needs to materialize step vectors, e.g. replacing wide IVs as follow up to llvm#108378 with an increment of the wide IV step.
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1228,6 +1228,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
     CalculateTripCountMinusVF,
     // Increment the canonical IV separately for each unrolled part.
     CanonicalIVIncrementForPart,
+    WideIVStep,
     BranchOnCount,
     BranchOnCond,
     ComputeReductionResult,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -678,7 +678,8 @@ bool VPInstruction::isFPMathOp() const {
   return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
          Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
          Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
-         Opcode == Instruction::FCmp || Opcode == Instruction::Select;
+         Opcode == Instruction::FCmp || Opcode == Instruction::Select ||
+         Opcode == VPInstruction::WideIVStep;
 }
 #endif
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1962,7 +1962,8 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR,
 }
 
 void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
-  VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
+  Type *CanonicalIVType = Plan.getCanonicalIV()->getScalarType();
+  VPTypeAnalysis TypeInfo(CanonicalIVType);
 
   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
            vp_depth_first_deep(Plan.getEntry()))) {
@@ -1979,23 +1980,61 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
         PointerIVs.push_back(&R);
     for (VPRecipeBase *R : PointerIVs)
       R->moveBefore(*VPBB, VPBB->getFirstNonPhi());
-
-    for (VPRecipeBase &R : make_early_inc_range(VPBB->phis())) {
+    
+    for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
       if (auto *WidenIVR = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
         expandVPWidenIntOrFpInduction(WidenIVR, TypeInfo);
         continue;
       }
-      if (!isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(&R))
+      if (isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(&R)) {
+        auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
+        StringRef Name =
+            isa<VPCanonicalIVPHIRecipe>(PhiR) ? "index" : "evl.based.iv";
+        auto *ScalarR = new VPScalarPHIRecipe(PhiR->getStartValue(),
+                                              PhiR->getBackedgeValue(),
+                                              PhiR->getDebugLoc(), Name);
+        ScalarR->insertBefore(PhiR);
+        PhiR->replaceAllUsesWith(ScalarR);
+        PhiR->eraseFromParent();
         continue;
-      auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
-      StringRef Name =
-          isa<VPCanonicalIVPHIRecipe>(PhiR) ? "index" : "evl.based.iv";
-      auto *ScalarR =
-          new VPScalarPHIRecipe(PhiR->getStartValue(), PhiR->getBackedgeValue(),
-                                PhiR->getDebugLoc(), Name);
-      ScalarR->insertBefore(PhiR);
-      PhiR->replaceAllUsesWith(ScalarR);
-      PhiR->eraseFromParent();
+      }
+
+      auto *VPI = dyn_cast<VPInstruction>(&R);
+      if (VPI && VPI->getOpcode() == VPInstruction::WideIVStep) {
+        VPBuilder Builder(VPI->getParent(), VPI->getIterator());
+        VPValue *VectorStep = VPI->getOperand(0);
+        Type *IVTy = TypeInfo.inferScalarType(VPI->getOperand(2));
+        if (TypeInfo.inferScalarType(VectorStep) != IVTy) {
+          Instruction::CastOps CastOp = IVTy->isFloatingPointTy()
+                                            ? Instruction::UIToFP
+                                            : Instruction::Trunc;
+          VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
+        }
+
+        VPValue *ScalarStep = VPI->getOperand(1);
+        auto *ConstStep =
+            ScalarStep->isLiveIn()
+                ? dyn_cast<ConstantInt>(ScalarStep->getLiveInIRValue())
+                : nullptr;
+        if (!ConstStep || ConstStep->getValue() != 1) {
+          if (TypeInfo.inferScalarType(ScalarStep) != IVTy) {
+            ScalarStep =
+                Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
+          }
+
+          std::optional<FastMathFlags> FMFs;
+          if (IVTy->isFloatingPointTy())
+            FMFs = VPI->getFastMathFlags();
+
+          unsigned MulOpc =
+              IVTy->isFloatingPointTy() ? Instruction::FMul : Instruction::Mul;
+          VPInstruction *Mul = Builder.createNaryOp(
+              MulOpc, {VectorStep, ScalarStep}, FMFs, R.getDebugLoc());
+          VectorStep = Mul;
+        }
+        VPI->replaceAllUsesWith(VectorStep);
+        VPI->eraseFromParent();
+      }
     }
   }
 }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -155,33 +155,15 @@ void UnrollState::unrollWidenInductionByUF(
   if (isa_and_present<FPMathOperator>(ID.getInductionBinOp()))
     FMFs = ID.getInductionBinOp()->getFastMathFlags();
 
-  VPValue *VectorStep = &Plan.getVF();
-  VPBuilder Builder(PH);
-  if (TypeInfo.inferScalarType(VectorStep) != IVTy) {
-    Instruction::CastOps CastOp =
-        IVTy->isFloatingPointTy() ? Instruction::UIToFP : Instruction::Trunc;
-    VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
-    ToSkip.insert(VectorStep->getDefiningRecipe());
-  }
-
   VPValue *ScalarStep = IV->getStepValue();
-  auto *ConstStep = ScalarStep->isLiveIn()
-                        ? dyn_cast<ConstantInt>(ScalarStep->getLiveInIRValue())
-                        : nullptr;
-  if (!ConstStep || ConstStep->getValue() != 1) {
-    if (TypeInfo.inferScalarType(ScalarStep) != IVTy) {
-      ScalarStep =
-          Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
-      ToSkip.insert(ScalarStep->getDefiningRecipe());
-    }
+  VPBuilder Builder(PH);
+  VPInstruction *VectorStep =
+      Builder.createNaryOp(VPInstruction::WideIVStep,
+                           {&Plan.getVF(), ScalarStep,
+                            Plan.getOrAddLiveIn(Constant::getNullValue(IVTy))},
+                           FMFs, IV->getDebugLoc());
 
-    unsigned MulOpc =
-        IVTy->isFloatingPointTy() ? Instruction::FMul : Instruction::Mul;
-    VPInstruction *Mul = Builder.createNaryOp(MulOpc, {VectorStep, ScalarStep},
-                                              FMFs, IV->getDebugLoc());
-    VectorStep = Mul;
-    ToSkip.insert(Mul);
-  }
+  ToSkip.insert(VectorStep);
 
   // Now create recipes to compute the induction steps for part 1 .. UF. Part 0
   // remains the header phi. Parts > 0 are computed by adding Step to the

Original file line number	Diff line number	Diff line change
`@@ -678,7 +678,8 @@ bool VPInstruction::isFPMathOp() const {`
`678`	`678`	`return Opcode == Instruction::FAdd \|\| Opcode == Instruction::FMul \|\|`
`679`	`679`	`Opcode == Instruction::FNeg \|\| Opcode == Instruction::FSub \|\|`
`680`	`680`	`Opcode == Instruction::FDiv \|\| Opcode == Instruction::FRem \|\|`
`681`		`- Opcode == Instruction::FCmp \|\| Opcode == Instruction::Select;`
	`681`	`+ Opcode == Instruction::FCmp \|\| Opcode == Instruction::Select \|\|`
	`682`	`+ Opcode == VPInstruction::WideIVStep;`
`682`	`683`	`}`
`683`	`684`	`#endif`
`684`	`685`