Skip to content

Commit 31f3692

Browse files
committed
[RISCV] Adjust LMUL if not used to avoid VL toggle
A common pattern with vmv.s.x is that we need to set VL afterwards because the SEW/LMUL ratio has changed, and thus VLMAX has changed: vsetvli zero, a1, e64, m1, ta, ma vmv.s.x v16, a0 vsetvli zero, a1, e32, m2, ta, ma However since LMUL and the SEW/LMUL ratio are ignored by vmv.s.x, we can avoid a VL toggle in the second vsetvli instruction by adjusting LMUL so that the SEW/LMUL ratio remains the same between the two instructions: vsetvli zero, a1, e64, m4, ta, ma vmv.s.x v16, a0 vsetvli zero, zero, e32, m2, ta, ma Avoiding a VL toggle may be more performant on some architectures, and in some cases allows a vsetvli to be deleted.
1 parent 1981b1b commit 31f3692

8 files changed

+193
-142
lines changed

llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1463,6 +1463,55 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
14631463
return areCompatibleVTYPEs(PriorVType, VType, Used);
14641464
}
14651465

1466+
// If LMUL or the SEW/LMUL ratio aren't demanded and MI and NextMI have the same
1467+
// AVL, then we can try and change MI's LMUL so that we can avoid setting VL in
1468+
// NextMI, e.g:
1469+
//
1470+
// vsetivli zero, 4, e32, m1, ta, ma
1471+
// vsetivli zero, 4, e16, mf4, ta, ma
1472+
//
1473+
// vsetivli zero, 4, e32, mf2, ta, ma
1474+
// vsetvli zero, zero, e16, mf4, ta, ma
1475+
//
1476+
// If possible, returns the new VTYPE that should be used for MI.
1477+
static std::optional<unsigned>
1478+
canAdjustSEWLMULRatio(const MachineInstr &MI, const MachineInstr &NextMI,
1479+
const DemandedFields &Used) {
1480+
if (Used.LMUL || Used.SEWLMULRatio)
1481+
return std::nullopt;
1482+
if (!NextMI.getOperand(0).isDead())
1483+
return std::nullopt;
1484+
// If we end up increasing the SEW/LMUL ratio, then we will decrease VLMAX,
1485+
// which means we might end up changing VL in the case that AVL > VLMAX. So
1486+
// bail if the exact VL value is needed.
1487+
//
1488+
// TODO: We could potentially relax this when we know we're increasing VLMAX.
1489+
if (Used.VLAny)
1490+
return std::nullopt;
1491+
1492+
// If NextMI is already zero, zero then bail. If MI is zero, zero then we
1493+
// won't be able to tell if it has the same AVL as NextMI, so also bail.
1494+
if (isVLPreservingConfig(MI) || isVLPreservingConfig(NextMI))
1495+
return std::nullopt;
1496+
1497+
VSETVLIInfo NextMIInfo = getInfoForVSETVLI(NextMI);
1498+
VSETVLIInfo MIInfo = getInfoForVSETVLI(MI);
1499+
if (!MIInfo.hasSameAVL(NextMIInfo))
1500+
return std::nullopt;
1501+
1502+
unsigned SEW = MIInfo.getSEW() * 8;
1503+
// Fixed point value with 3 fractional bits.
1504+
unsigned NewRatio = SEW / NextMIInfo.getSEWLMULRatio();
1505+
bool Fractional = NewRatio < 8;
1506+
RISCVII::VLMUL NewVLMul = RISCVVType::encodeLMUL(
1507+
Fractional ? 8 / NewRatio : NewRatio / 8, Fractional);
1508+
1509+
unsigned VType = MIInfo.encodeVTYPE();
1510+
return RISCVVType::encodeVTYPE(NewVLMul, SEW / 8,
1511+
RISCVVType::isTailAgnostic(VType),
1512+
RISCVVType::isMaskAgnostic(VType));
1513+
}
1514+
14661515
void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
14671516
MachineInstr *NextMI = nullptr;
14681517
// We can have arbitrary code in successors, so VL and VTYPE
@@ -1484,6 +1533,15 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
14841533
Used.demandVL();
14851534

14861535
if (NextMI) {
1536+
if (auto NewVType = canAdjustSEWLMULRatio(MI, *NextMI, Used)) {
1537+
MI.getOperand(2).setImm(*NewVType);
1538+
// Convert NextMI to vsetvli zero, zero
1539+
NextMI->setDesc(TII->get(RISCV::PseudoVSETVLIX0));
1540+
NextMI->getOperand(0).setReg(RISCV::X0);
1541+
NextMI->getOperand(0).setIsDead(true);
1542+
NextMI->getOperand(1).ChangeToRegister(RISCV::X0, false, false, true);
1543+
}
1544+
14871545
if (!Used.usedVL() && !Used.usedVTYPE()) {
14881546
ToDelete.push_back(&MI);
14891547
// Leave NextMI unchanged

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,8 @@ define <32 x i32> @insertelt_v32i32_31(<32 x i32> %a, i32 %y) {
6565
; CHECK-LABEL: insertelt_v32i32_31:
6666
; CHECK: # %bb.0:
6767
; CHECK-NEXT: li a1, 32
68-
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
69-
; CHECK-NEXT: vmv.s.x v16, a0
7068
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
69+
; CHECK-NEXT: vmv.s.x v16, a0
7170
; CHECK-NEXT: vslideup.vi v8, v16, 31
7271
; CHECK-NEXT: ret
7372
%b = insertelement <32 x i32> %a, i32 %y, i32 31
@@ -103,9 +102,8 @@ define <64 x i32> @insertelt_v64i32_63(<64 x i32> %a, i32 %y) {
103102
; CHECK-LABEL: insertelt_v64i32_63:
104103
; CHECK: # %bb.0:
105104
; CHECK-NEXT: li a1, 32
106-
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
107-
; CHECK-NEXT: vmv.s.x v24, a0
108105
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
106+
; CHECK-NEXT: vmv.s.x v24, a0
109107
; CHECK-NEXT: vslideup.vi v16, v24, 31
110108
; CHECK-NEXT: ret
111109
%b = insertelement <64 x i32> %a, i32 %y, i32 63
@@ -550,9 +548,9 @@ define void @insertelt_c6_v8i64_0_add(ptr %x, ptr %y) {
550548
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
551549
; CHECK-NEXT: vle64.v v8, (a0)
552550
; CHECK-NEXT: li a2, 6
553-
; CHECK-NEXT: vsetivli zero, 8, e64, m1, tu, ma
551+
; CHECK-NEXT: vsetivli zero, 8, e64, m4, tu, ma
554552
; CHECK-NEXT: vmv.s.x v8, a2
555-
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
553+
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
556554
; CHECK-NEXT: vle64.v v12, (a1)
557555
; CHECK-NEXT: vadd.vv v8, v8, v12
558556
; CHECK-NEXT: vse64.v v8, (a0)

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1138,11 +1138,11 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %id
11381138
; RV64ZVE32F-NEXT: andi a2, a1, 16
11391139
; RV64ZVE32F-NEXT: beqz a2, .LBB18_7
11401140
; RV64ZVE32F-NEXT: .LBB18_14: # %cond.store7
1141-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1141+
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
11421142
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
11431143
; RV64ZVE32F-NEXT: slli a2, a2, 1
11441144
; RV64ZVE32F-NEXT: add a2, a0, a2
1145-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1145+
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
11461146
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
11471147
; RV64ZVE32F-NEXT: vse16.v v9, (a2)
11481148
; RV64ZVE32F-NEXT: andi a2, a1, 32
@@ -1271,11 +1271,11 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
12711271
; RV64ZVE32F-NEXT: andi a2, a1, 16
12721272
; RV64ZVE32F-NEXT: beqz a2, .LBB19_7
12731273
; RV64ZVE32F-NEXT: .LBB19_14: # %cond.store7
1274-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1274+
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
12751275
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
12761276
; RV64ZVE32F-NEXT: slli a2, a2, 1
12771277
; RV64ZVE32F-NEXT: add a2, a0, a2
1278-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1278+
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
12791279
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
12801280
; RV64ZVE32F-NEXT: vse16.v v9, (a2)
12811281
; RV64ZVE32F-NEXT: andi a2, a1, 32
@@ -1408,12 +1408,12 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
14081408
; RV64ZVE32F-NEXT: andi a2, a1, 16
14091409
; RV64ZVE32F-NEXT: beqz a2, .LBB20_7
14101410
; RV64ZVE32F-NEXT: .LBB20_14: # %cond.store7
1411-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1411+
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
14121412
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
14131413
; RV64ZVE32F-NEXT: andi a2, a2, 255
14141414
; RV64ZVE32F-NEXT: slli a2, a2, 1
14151415
; RV64ZVE32F-NEXT: add a2, a0, a2
1416-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1416+
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
14171417
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
14181418
; RV64ZVE32F-NEXT: vse16.v v9, (a2)
14191419
; RV64ZVE32F-NEXT: andi a2, a1, 32
@@ -2043,11 +2043,11 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %id
20432043
; RV64ZVE32F-NEXT: andi a2, a1, 16
20442044
; RV64ZVE32F-NEXT: beqz a2, .LBB29_7
20452045
; RV64ZVE32F-NEXT: .LBB29_14: # %cond.store7
2046-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2046+
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
20472047
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
20482048
; RV64ZVE32F-NEXT: slli a2, a2, 2
20492049
; RV64ZVE32F-NEXT: add a2, a0, a2
2050-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2050+
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
20512051
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
20522052
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
20532053
; RV64ZVE32F-NEXT: andi a2, a1, 32
@@ -2175,11 +2175,11 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
21752175
; RV64ZVE32F-NEXT: andi a2, a1, 16
21762176
; RV64ZVE32F-NEXT: beqz a2, .LBB30_7
21772177
; RV64ZVE32F-NEXT: .LBB30_14: # %cond.store7
2178-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2178+
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
21792179
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
21802180
; RV64ZVE32F-NEXT: slli a2, a2, 2
21812181
; RV64ZVE32F-NEXT: add a2, a0, a2
2182-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2182+
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
21832183
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
21842184
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
21852185
; RV64ZVE32F-NEXT: andi a2, a1, 32
@@ -2314,12 +2314,12 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
23142314
; RV64ZVE32F-NEXT: andi a2, a1, 16
23152315
; RV64ZVE32F-NEXT: beqz a2, .LBB31_7
23162316
; RV64ZVE32F-NEXT: .LBB31_14: # %cond.store7
2317-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2317+
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
23182318
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
23192319
; RV64ZVE32F-NEXT: andi a2, a2, 255
23202320
; RV64ZVE32F-NEXT: slli a2, a2, 2
23212321
; RV64ZVE32F-NEXT: add a2, a0, a2
2322-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2322+
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
23232323
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
23242324
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
23252325
; RV64ZVE32F-NEXT: andi a2, a1, 32
@@ -2451,11 +2451,11 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %
24512451
; RV64ZVE32F-NEXT: andi a2, a1, 16
24522452
; RV64ZVE32F-NEXT: beqz a2, .LBB32_7
24532453
; RV64ZVE32F-NEXT: .LBB32_14: # %cond.store7
2454-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2454+
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
24552455
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
24562456
; RV64ZVE32F-NEXT: slli a2, a2, 2
24572457
; RV64ZVE32F-NEXT: add a2, a0, a2
2458-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2458+
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
24592459
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
24602460
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
24612461
; RV64ZVE32F-NEXT: andi a2, a1, 32
@@ -2584,11 +2584,11 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
25842584
; RV64ZVE32F-NEXT: andi a2, a1, 16
25852585
; RV64ZVE32F-NEXT: beqz a2, .LBB33_7
25862586
; RV64ZVE32F-NEXT: .LBB33_14: # %cond.store7
2587-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2587+
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
25882588
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
25892589
; RV64ZVE32F-NEXT: slli a2, a2, 2
25902590
; RV64ZVE32F-NEXT: add a2, a0, a2
2591-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2591+
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
25922592
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
25932593
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
25942594
; RV64ZVE32F-NEXT: andi a2, a1, 32
@@ -2724,12 +2724,12 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
27242724
; RV64ZVE32F-NEXT: andi a3, a2, 16
27252725
; RV64ZVE32F-NEXT: beqz a3, .LBB34_7
27262726
; RV64ZVE32F-NEXT: .LBB34_14: # %cond.store7
2727-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2727+
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
27282728
; RV64ZVE32F-NEXT: vmv.x.s a3, v11
27292729
; RV64ZVE32F-NEXT: and a3, a3, a1
27302730
; RV64ZVE32F-NEXT: slli a3, a3, 2
27312731
; RV64ZVE32F-NEXT: add a3, a0, a3
2732-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2732+
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
27332733
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
27342734
; RV64ZVE32F-NEXT: vse32.v v12, (a3)
27352735
; RV64ZVE32F-NEXT: andi a3, a2, 32
@@ -6393,11 +6393,11 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
63936393
; RV64ZVE32F-NEXT: andi a2, a1, 16
63946394
; RV64ZVE32F-NEXT: beqz a2, .LBB58_7
63956395
; RV64ZVE32F-NEXT: .LBB58_14: # %cond.store7
6396-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6396+
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
63976397
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
63986398
; RV64ZVE32F-NEXT: slli a2, a2, 1
63996399
; RV64ZVE32F-NEXT: add a2, a0, a2
6400-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6400+
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
64016401
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
64026402
; RV64ZVE32F-NEXT: vse16.v v9, (a2)
64036403
; RV64ZVE32F-NEXT: andi a2, a1, 32
@@ -6526,11 +6526,11 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
65266526
; RV64ZVE32F-NEXT: andi a2, a1, 16
65276527
; RV64ZVE32F-NEXT: beqz a2, .LBB59_7
65286528
; RV64ZVE32F-NEXT: .LBB59_14: # %cond.store7
6529-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6529+
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
65306530
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
65316531
; RV64ZVE32F-NEXT: slli a2, a2, 1
65326532
; RV64ZVE32F-NEXT: add a2, a0, a2
6533-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6533+
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
65346534
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
65356535
; RV64ZVE32F-NEXT: vse16.v v9, (a2)
65366536
; RV64ZVE32F-NEXT: andi a2, a1, 32
@@ -6663,12 +6663,12 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
66636663
; RV64ZVE32F-NEXT: andi a2, a1, 16
66646664
; RV64ZVE32F-NEXT: beqz a2, .LBB60_7
66656665
; RV64ZVE32F-NEXT: .LBB60_14: # %cond.store7
6666-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6666+
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
66676667
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
66686668
; RV64ZVE32F-NEXT: andi a2, a2, 255
66696669
; RV64ZVE32F-NEXT: slli a2, a2, 1
66706670
; RV64ZVE32F-NEXT: add a2, a0, a2
6671-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6671+
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
66726672
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
66736673
; RV64ZVE32F-NEXT: vse16.v v9, (a2)
66746674
; RV64ZVE32F-NEXT: andi a2, a1, 32
@@ -7249,11 +7249,11 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %
72497249
; RV64ZVE32F-NEXT: andi a2, a1, 16
72507250
; RV64ZVE32F-NEXT: beqz a2, .LBB68_7
72517251
; RV64ZVE32F-NEXT: .LBB68_14: # %cond.store7
7252-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7252+
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
72537253
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
72547254
; RV64ZVE32F-NEXT: slli a2, a2, 2
72557255
; RV64ZVE32F-NEXT: add a2, a0, a2
7256-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7256+
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
72577257
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
72587258
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
72597259
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
@@ -7385,11 +7385,11 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
73857385
; RV64ZVE32F-NEXT: andi a2, a1, 16
73867386
; RV64ZVE32F-NEXT: beqz a2, .LBB69_7
73877387
; RV64ZVE32F-NEXT: .LBB69_14: # %cond.store7
7388-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7388+
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
73897389
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
73907390
; RV64ZVE32F-NEXT: slli a2, a2, 2
73917391
; RV64ZVE32F-NEXT: add a2, a0, a2
7392-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7392+
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
73937393
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
73947394
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
73957395
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
@@ -7528,12 +7528,12 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
75287528
; RV64ZVE32F-NEXT: andi a2, a1, 16
75297529
; RV64ZVE32F-NEXT: beqz a2, .LBB70_7
75307530
; RV64ZVE32F-NEXT: .LBB70_14: # %cond.store7
7531-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7531+
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
75327532
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
75337533
; RV64ZVE32F-NEXT: andi a2, a2, 255
75347534
; RV64ZVE32F-NEXT: slli a2, a2, 2
75357535
; RV64ZVE32F-NEXT: add a2, a0, a2
7536-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7536+
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
75377537
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
75387538
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
75397539
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
@@ -7669,11 +7669,11 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16>
76697669
; RV64ZVE32F-NEXT: andi a2, a1, 16
76707670
; RV64ZVE32F-NEXT: beqz a2, .LBB71_7
76717671
; RV64ZVE32F-NEXT: .LBB71_14: # %cond.store7
7672-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7672+
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
76737673
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
76747674
; RV64ZVE32F-NEXT: slli a2, a2, 2
76757675
; RV64ZVE32F-NEXT: add a2, a0, a2
7676-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7676+
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
76777677
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
76787678
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
76797679
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
@@ -7806,11 +7806,11 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
78067806
; RV64ZVE32F-NEXT: andi a2, a1, 16
78077807
; RV64ZVE32F-NEXT: beqz a2, .LBB72_7
78087808
; RV64ZVE32F-NEXT: .LBB72_14: # %cond.store7
7809-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7809+
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
78107810
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
78117811
; RV64ZVE32F-NEXT: slli a2, a2, 2
78127812
; RV64ZVE32F-NEXT: add a2, a0, a2
7813-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7813+
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
78147814
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
78157815
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
78167816
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
@@ -7950,12 +7950,12 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
79507950
; RV64ZVE32F-NEXT: andi a3, a2, 16
79517951
; RV64ZVE32F-NEXT: beqz a3, .LBB73_7
79527952
; RV64ZVE32F-NEXT: .LBB73_14: # %cond.store7
7953-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7953+
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
79547954
; RV64ZVE32F-NEXT: vmv.x.s a3, v11
79557955
; RV64ZVE32F-NEXT: and a3, a3, a1
79567956
; RV64ZVE32F-NEXT: slli a3, a3, 2
79577957
; RV64ZVE32F-NEXT: add a3, a0, a3
7958-
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7958+
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
79597959
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
79607960
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
79617961
; RV64ZVE32F-NEXT: vse32.v v12, (a3)

0 commit comments

Comments
 (0)