llvm
diff --git a/‎llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
Lines changed: 54 additions & 48 deletions b/‎llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
Lines changed: 54 additions & 48 deletions
diff --git a/‎llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
Lines changed: 10 additions & 10 deletions b/‎llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
Lines changed: 10 additions & 10 deletions
@@ -479,6 +479,8 @@ class VSETVLIInfo {
 
   unsigned getSEW() const { return SEW; }
   RISCVII::VLMUL getVLMUL() const { return VLMul; }
+  bool getTailAgnostic() const { return TailAgnostic; }
+  bool getMaskAgnostic() const { return MaskAgnostic; }
 
   bool hasNonZeroAVL(const MachineRegisterInfo &MRI) const {
     if (hasAVLImm())
@@ -1013,73 +1015,77 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
   return true;
 }
 
-// Given an incoming state reaching MI, modifies that state so that it is minimally
-// compatible with MI.  The resulting state is guaranteed to be semantically legal
-// for MI, but may not be the state requested by MI.
+// Given an incoming state reaching MI, minimally modifies that state so that it
+// is compatible with MI. The resulting state is guaranteed to be semantically
+// legal for MI, but may not be the state requested by MI.
 void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
                                         const MachineInstr &MI) const {
   uint64_t TSFlags = MI.getDesc().TSFlags;
   if (!RISCVII::hasSEWOp(TSFlags))
     return;
 
-  const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
+  VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
   assert(NewInfo.isValid() && !NewInfo.isUnknown());
   if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info))
     return;
 
-  const VSETVLIInfo PrevInfo = Info;
-  Info = NewInfo;
+  if (Info.hasSEWLMULRatioOnly() || !Info.isValid() || Info.isUnknown())
+    Info = NewInfo;
 
-  if (!RISCVII::hasVLOp(TSFlags))
-    return;
+  DemandedFields Demanded = getDemanded(MI, MRI, ST);
 
   // If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
   // maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
   // places.
-  DemandedFields Demanded = getDemanded(MI, MRI, ST);
-  if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() &&
-      !PrevInfo.isUnknown()) {
-    if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
-            PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW()))
-      Info.setVLMul(*NewVLMul);
-  }
-
-  // If we only demand VL zeroness (i.e. vmv.s.x and vmv.x.s), then there are
-  // only two behaviors, VL = 0 and VL > 0. We can discard the user requested
-  // AVL and just use the last one if we can prove it equally zero. This
-  // removes a vsetvli entirely if the types match or allows use of cheaper avl
-  // preserving variant if VLMAX doesn't change. If VLMAX might change, we
-  // couldn't use the 'vsetvli x0, x0, vtype" variant, so we avoid the transform
-  // to prevent extending live range of an avl register operand.
-  // TODO: We can probably relax this for immediates.
-  if (Demanded.VLZeroness && !Demanded.VLAny && PrevInfo.isValid() &&
-      PrevInfo.hasEquallyZeroAVL(Info, *MRI) && Info.hasSameVLMAX(PrevInfo)) {
-    if (PrevInfo.hasAVLImm())
-      Info.setAVLImm(PrevInfo.getAVLImm());
-    else
-      Info.setAVLReg(PrevInfo.getAVLReg());
-    return;
+  if (!Demanded.LMUL && !Demanded.SEWLMULRatio && Info.isValid() &&
+      !Info.isUnknown()) {
+    if (auto SameRatioLMUL = RISCVVType::getSameRatioLMUL(
+            Info.getSEW(), Info.getVLMUL(), NewInfo.getSEW())) {
+      NewInfo.setVLMul(*SameRatioLMUL);
+      Demanded.LMUL = true;
+    }
   }
 
-  // If AVL is defined by a vsetvli with the same VLMAX, we can
-  // replace the AVL operand with the AVL of the defining vsetvli.
-  // We avoid general register AVLs to avoid extending live ranges
-  // without being sure we can kill the original source reg entirely.
-  if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual())
-    return;
-  MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg());
-  if (!DefMI || !isVectorConfigInstr(*DefMI))
-    return;
+  // If AVL is defined by a vsetvli with the same VLMAX, we can replace the AVL
+  // operand with the AVL of the defining vsetvli.  We avoid general register
+  // AVLs to avoid extending live ranges without being sure we can kill the
+  // original source reg entirely.
+  if (RISCVII::hasVLOp(TSFlags) && NewInfo.hasAVLReg() &&
+      NewInfo.getAVLReg().isVirtual()) {
+    MachineInstr *DefMI = MRI->getVRegDef(NewInfo.getAVLReg());
+    if (DefMI && isVectorConfigInstr(*DefMI)) {
+      VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
+      if (DefInfo.hasSameVLMAX(NewInfo) &&
+          (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) {
+        if (DefInfo.hasAVLImm())
+          NewInfo.setAVLImm(DefInfo.getAVLImm());
+        else
+          NewInfo.setAVLReg(DefInfo.getAVLReg());
+      }
+    }
+  }
 
-  VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
-  if (DefInfo.hasSameVLMAX(Info) &&
-      (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) {
-    if (DefInfo.hasAVLImm())
-      Info.setAVLImm(DefInfo.getAVLImm());
+  // If MI only demands that VL has the same zeroness, we only need to set the
+  // AVL if the zeroness differs, or if VLMAX changes (since that prevents us
+  // from using vsetvli x0, x0).
+  bool CanUseX0X0Form =
+      Info.hasEquallyZeroAVL(NewInfo, *MRI) && Info.hasSameVLMAX(NewInfo);
+  if (Demanded.VLAny || (Demanded.VLZeroness && !CanUseX0X0Form)) {
+    if (NewInfo.hasAVLImm())
+      Info.setAVLImm(NewInfo.getAVLImm());
     else
-      Info.setAVLReg(DefInfo.getAVLReg());
-    return;
-  }
+      Info.setAVLReg(NewInfo.getAVLReg());
+  }
+
+  Info.setVTYPE(
+      ((Demanded.LMUL || Demanded.SEWLMULRatio) ? NewInfo : Info).getVLMUL(),
+      ((Demanded.SEW || Demanded.SEWLMULRatio) ? NewInfo : Info).getSEW(),
+      // Prefer tail/mask agnostic since it can be relaxed to undisturbed later
+      // if needed.
+      (Demanded.TailPolicy ? NewInfo : Info).getTailAgnostic() ||
+          NewInfo.getTailAgnostic(),
+      (Demanded.MaskPolicy ? NewInfo : Info).getMaskAgnostic() ||
+          NewInfo.getMaskAgnostic());
 }
 
 // Given a state with which we evaluated MI (see transferBefore above for why
 
@@ -106,7 +106,7 @@ define i1 @extractelt_v16i1(ptr %x, i64 %idx) nounwind {
 ; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
 ; RV32-NEXT:    vle8.v v8, (a0)
 ; RV32-NEXT:    vmseq.vi v8, v8, 0
-; RV32-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV32-NEXT:    vmv.x.s a0, v8
 ; RV32-NEXT:    srl a0, a0, a1
 ; RV32-NEXT:    andi a0, a0, 1
@@ -117,7 +117,7 @@ define i1 @extractelt_v16i1(ptr %x, i64 %idx) nounwind {
 ; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
 ; RV64-NEXT:    vle8.v v8, (a0)
 ; RV64-NEXT:    vmseq.vi v8, v8, 0
-; RV64-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV64-NEXT:    vmv.x.s a0, v8
 ; RV64-NEXT:    srl a0, a0, a1
 ; RV64-NEXT:    andi a0, a0, 1
@@ -128,7 +128,7 @@ define i1 @extractelt_v16i1(ptr %x, i64 %idx) nounwind {
 ; RV32ZBS-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
 ; RV32ZBS-NEXT:    vle8.v v8, (a0)
 ; RV32ZBS-NEXT:    vmseq.vi v8, v8, 0
-; RV32ZBS-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; RV32ZBS-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV32ZBS-NEXT:    vmv.x.s a0, v8
 ; RV32ZBS-NEXT:    bext a0, a0, a1
 ; RV32ZBS-NEXT:    ret
@@ -138,7 +138,7 @@ define i1 @extractelt_v16i1(ptr %x, i64 %idx) nounwind {
 ; RV64ZBS-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
 ; RV64ZBS-NEXT:    vle8.v v8, (a0)
 ; RV64ZBS-NEXT:    vmseq.vi v8, v8, 0
-; RV64ZBS-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; RV64ZBS-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; RV64ZBS-NEXT:    vmv.x.s a0, v8
 ; RV64ZBS-NEXT:    bext a0, a0, a1
 ; RV64ZBS-NEXT:    ret
@@ -155,7 +155,7 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind {
 ; RV32-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
 ; RV32-NEXT:    vle8.v v8, (a0)
 ; RV32-NEXT:    vmseq.vi v10, v8, 0
-; RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; RV32-NEXT:    vmv.x.s a0, v10
 ; RV32-NEXT:    srl a0, a0, a1
 ; RV32-NEXT:    andi a0, a0, 1
@@ -167,7 +167,7 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind {
 ; RV64-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
 ; RV64-NEXT:    vle8.v v8, (a0)
 ; RV64-NEXT:    vmseq.vi v10, v8, 0
-; RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; RV64-NEXT:    vmv.x.s a0, v10
 ; RV64-NEXT:    srl a0, a0, a1
 ; RV64-NEXT:    andi a0, a0, 1
@@ -179,7 +179,7 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind {
 ; RV32ZBS-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
 ; RV32ZBS-NEXT:    vle8.v v8, (a0)
 ; RV32ZBS-NEXT:    vmseq.vi v10, v8, 0
-; RV32ZBS-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; RV32ZBS-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; RV32ZBS-NEXT:    vmv.x.s a0, v10
 ; RV32ZBS-NEXT:    bext a0, a0, a1
 ; RV32ZBS-NEXT:    ret
@@ -190,7 +190,7 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind {
 ; RV64ZBS-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
 ; RV64ZBS-NEXT:    vle8.v v8, (a0)
 ; RV64ZBS-NEXT:    vmseq.vi v10, v8, 0
-; RV64ZBS-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; RV64ZBS-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; RV64ZBS-NEXT:    vmv.x.s a0, v10
 ; RV64ZBS-NEXT:    bext a0, a0, a1
 ; RV64ZBS-NEXT:    ret
@@ -221,7 +221,7 @@ define i1 @extractelt_v64i1(ptr %x, i64 %idx) nounwind {
 ; RV64-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; RV64-NEXT:    vle8.v v8, (a0)
 ; RV64-NEXT:    vmseq.vi v12, v8, 0
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
 ; RV64-NEXT:    vmv.x.s a0, v12
 ; RV64-NEXT:    srl a0, a0, a1
 ; RV64-NEXT:    andi a0, a0, 1
@@ -246,7 +246,7 @@ define i1 @extractelt_v64i1(ptr %x, i64 %idx) nounwind {
 ; RV64ZBS-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
 ; RV64ZBS-NEXT:    vle8.v v8, (a0)
 ; RV64ZBS-NEXT:    vmseq.vi v12, v8, 0
-; RV64ZBS-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64ZBS-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
 ; RV64ZBS-NEXT:    vmv.x.s a0, v12
 ; RV64ZBS-NEXT:    bext a0, a0, a1
 ; RV64ZBS-NEXT:    ret