llvm · preames · Aug 22, 2024 · Jul 24, 2024 · Aug 18, 2024 · Aug 21, 2024
diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -61,6 +61,7 @@ class RISCVVectorPeephole : public MachineFunctionPass {
   }
 
 private:
+  bool tryToReduceVL(MachineInstr &MI) const;
   bool convertToVLMAX(MachineInstr &MI) const;
   bool convertToWholeRegister(MachineInstr &MI) const;
   bool convertToUnmasked(MachineInstr &MI) const;
@@ -81,6 +82,96 @@ char RISCVVectorPeephole::ID = 0;
 INITIALIZE_PASS(RISCVVectorPeephole, DEBUG_TYPE, "RISC-V Fold Masks", false,
                 false)
 
+/// Given two VL operands, do we know that LHS <= RHS?
+static bool isVLKnownLE(const MachineOperand &LHS, const MachineOperand &RHS) {
+  if (LHS.isReg() && RHS.isReg() && LHS.getReg().isVirtual() &&
+      LHS.getReg() == RHS.getReg())
+    return true;
+  if (RHS.isImm() && RHS.getImm() == RISCV::VLMaxSentinel)
+    return true;
+  if (LHS.isImm() && LHS.getImm() == RISCV::VLMaxSentinel)
+    return false;
+  if (!LHS.isImm() || !RHS.isImm())
+    return false;
+  return LHS.getImm() <= RHS.getImm();
+}
+
+static unsigned getSEWLMULRatio(const MachineInstr &MI) {
+  RISCVII::VLMUL LMUL = RISCVII::getLMul(MI.getDesc().TSFlags);
+  unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
+  return RISCVVType::getSEWLMULRatio(1 << Log2SEW, LMUL);
+}
+
+// Attempt to reduce the VL of an instruction whose sole use is feeding a
+// instruction with a narrower VL.  This currently works backwards from the
+// user instruction (which might have a smaller VL).
+bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const {
+  // Note that the goal here is a bit multifaceted.
+  // 1) For store's reducing the VL of the value being stored may help to
+  //    reduce VL toggles.  This is somewhat of an artifact of the fact we
+  //    promote arithmetic instructions but VL predicate stores.
+  // 2) For vmv.v.v reducing VL eagerly on the source instruction allows us
+  //    to share code with the foldVMV_V_V transform below.
+  //
+  // Note that to the best of our knowledge, reducing VL is generally not
+  // a significant win on real hardware unless we can also reduce LMUL which
+  // this code doesn't try to do.
+  //
+  // TODO: We can handle a bunch more instructions here, and probably
+  // recurse backwards through operands too.
+  unsigned SrcIdx = 0;
+  switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
+  default:
+    return false;
+  case RISCV::VSE8_V:
+  case RISCV::VSE16_V:
+  case RISCV::VSE32_V:
+  case RISCV::VSE64_V:
+    break;
+  case RISCV::VMV_V_V:
+    SrcIdx = 2;
+    break;
+  }
+
+  MachineOperand &VL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc()));
+  if (VL.isImm() && VL.getImm() == RISCV::VLMaxSentinel)
+    return false;
+
+  Register SrcReg = MI.getOperand(SrcIdx).getReg();
+  // Note: one *use*, not one *user*.
+  if (!MRI->hasOneUse(SrcReg))
+    return false;
+
+  MachineInstr *Src = MRI->getVRegDef(SrcReg);
+  if (!Src || Src->hasUnmodeledSideEffects() ||
+      Src->getParent() != MI.getParent() || Src->getNumDefs() != 1 ||
+      !RISCVII::hasVLOp(Src->getDesc().TSFlags) ||
+      !RISCVII::hasSEWOp(Src->getDesc().TSFlags))
+    return false;
+
+  // Src needs to have the same VLMAX as MI
+  if (getSEWLMULRatio(MI) != getSEWLMULRatio(*Src))
+    return false;
+
+  bool ActiveElementsAffectResult = RISCVII::activeElementsAffectResult(
+      TII->get(RISCV::getRVVMCOpcode(Src->getOpcode())).TSFlags);
+  if (ActiveElementsAffectResult || Src->mayRaiseFPException())
+    return false;
+
+  MachineOperand &SrcVL = Src->getOperand(RISCVII::getVLOpNum(Src->getDesc()));
+  if (VL.isIdenticalTo(SrcVL) || !isVLKnownLE(VL, SrcVL))
+    return false;
+
+  if (VL.isImm())
+    SrcVL.ChangeToImmediate(VL.getImm());
+  else if (VL.isReg())
+    SrcVL.ChangeToRegister(VL.getReg(), false);
+
+  // TODO: For instructions with a passthru, we could clear the passthru
+  // and tail policy since we've just proven the tail is not demanded.
+  return true;
+}
+
 /// Check if an operand is an immediate or a materialized ADDI $x0, imm.
 std::optional<unsigned>
 RISCVVectorPeephole::getConstant(const MachineOperand &VL) const {
@@ -325,22 +416,6 @@ bool RISCVVectorPeephole::convertToUnmasked(MachineInstr &MI) const {
   return true;
 }
 
-/// Given two VL operands, returns the one known to be the smallest or nullptr
-/// if unknown.
-static const MachineOperand *getKnownMinVL(const MachineOperand *LHS,
-                                           const MachineOperand *RHS) {
-  if (LHS->isReg() && RHS->isReg() && LHS->getReg().isVirtual() &&
-      LHS->getReg() == RHS->getReg())
-    return LHS;
-  if (LHS->isImm() && LHS->getImm() == RISCV::VLMaxSentinel)
-    return RHS;
-  if (RHS->isImm() && RHS->getImm() == RISCV::VLMaxSentinel)
-    return LHS;
-  if (!LHS->isImm() || !RHS->isImm())
-    return nullptr;
-  return LHS->getImm() <= RHS->getImm() ? LHS : RHS;
-}
-
 /// Check if it's safe to move From down to To, checking that no physical
 /// registers are clobbered.
 static bool isSafeToMove(const MachineInstr &From, const MachineInstr &To) {
@@ -362,21 +437,16 @@ static bool isSafeToMove(const MachineInstr &From, const MachineInstr &To) {
   return From.isSafeToMove(SawStore);
 }
 
-static unsigned getSEWLMULRatio(const MachineInstr &MI) {
-  RISCVII::VLMUL LMUL = RISCVII::getLMul(MI.getDesc().TSFlags);
-  unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
-  return RISCVVType::getSEWLMULRatio(1 << Log2SEW, LMUL);
-}
-
 /// If a PseudoVMV_V_V is the only user of its input, fold its passthru and VL
 /// into it.
 ///
 /// %x = PseudoVADD_V_V_M1 %passthru, %a, %b, %vl1, sew, policy
 /// %y = PseudoVMV_V_V_M1 %passthru, %x, %vl2, sew, policy
+///    (where %vl1 <= %vl2, see related tryToReduceVL)
 ///
 /// ->
 ///
-/// %y = PseudoVADD_V_V_M1 %passthru, %a, %b, min(vl1, vl2), sew, policy
+/// %y = PseudoVADD_V_V_M1 %passthru, %a, %b, vl1, sew, policy
 bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
   if (RISCV::getRVVMCOpcode(MI.getOpcode()) != RISCV::VMV_V_V)
     return false;
@@ -404,33 +474,16 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
       SrcPassthru.getReg() != Passthru.getReg())
     return false;
 
-  // Because Src and MI have the same passthru, we can use either AVL as long as
-  // it's the smaller of the two.
-  //
-  // (src pt, ..., vl=5)       x x x x x|. . .
-  // (vmv.v.v pt, src, vl=3)   x x x|. . . . .
-  // ->
-  // (src pt, ..., vl=3)       x x x|. . . . .
-  //
-  // (src pt, ..., vl=3)       x x x|. . . . .
-  // (vmv.v.v pt, src, vl=6)   x x x . . .|. .
-  // ->
-  // (src pt, ..., vl=3)       x x x|. . . . .
+  // Src VL will have already been reduced if legal (see tryToReduceVL),
+  // so we don't need to handle a smaller source VL here.  However, the
+  // user's VL may be larger
   MachineOperand &SrcVL = Src->getOperand(RISCVII::getVLOpNum(Src->getDesc()));
-  const MachineOperand *MinVL = getKnownMinVL(&MI.getOperand(3), &SrcVL);
-  if (!MinVL)
-    return false;
-
-  bool VLChanged = !MinVL->isIdenticalTo(SrcVL);
-  bool ActiveElementsAffectResult = RISCVII::activeElementsAffectResult(
-      TII->get(RISCV::getRVVMCOpcode(Src->getOpcode())).TSFlags);
-
-  if (VLChanged && (ActiveElementsAffectResult || Src->mayRaiseFPException()))
+  if (!isVLKnownLE(SrcVL, MI.getOperand(3)))
     return false;
 
   // If Src ends up using MI's passthru/VL, move it so it can access it.
   // TODO: We don't need to do this if they already dominate Src.
-  if (!SrcVL.isIdenticalTo(*MinVL) || !SrcPassthru.isIdenticalTo(Passthru)) {
+  if (!SrcPassthru.isIdenticalTo(Passthru)) {
     if (!isSafeToMove(*Src, MI))
       return false;
     Src->moveBefore(&MI);
@@ -445,11 +498,6 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
                                               *Src->getParent()->getParent()));
   }
 
-  if (MinVL->isImm())
-    SrcVL.ChangeToImmediate(MinVL->getImm());
-  else if (MinVL->isReg())
-    SrcVL.ChangeToRegister(MinVL->getReg(), false);
-
   // Use a conservative tu,mu policy, RISCVInsertVSETVLI will relax it if
   // passthru is undef.
   Src->getOperand(RISCVII::getVecPolicyOpNum(Src->getDesc()))
@@ -498,6 +546,7 @@ bool RISCVVectorPeephole::runOnMachineFunction(MachineFunction &MF) {
   for (MachineBasicBlock &MBB : MF) {
     for (MachineInstr &MI : make_early_inc_range(MBB)) {
       Changed |= convertToVLMAX(MI);
+      Changed |= tryToReduceVL(MI);
       Changed |= convertToUnmasked(MI);
       Changed |= convertToWholeRegister(MI);
       Changed |= convertVMergeToVMv(MI);

diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll
@@ -41,8 +41,8 @@ define void @abs_v6i16(ptr %x) {
 ; CHECK-NEXT:    vle16.v v8, (a0)
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vrsub.vi v9, v8, 0
-; CHECK-NEXT:    vmax.vv v8, v8, v9
 ; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vmax.vv v8, v8, v9
 ; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %a = load <6 x i16>, ptr %x