Skip to content

Commit be2cb82

Browse files
committed
[riscv] Remove mutation of prior vsetvli from insertion dataflow
This moves mutation entirely out of the main algorithm. The immediate trigger is that we hit another case of the same issue I thought we'd fixed in 72925d9. It turns out we hadn't considered the cross block case. As a brief summary, the issue being fixed is that if we mutate a previous vsetvli in phase 3, there's a possibility that some later use of that vsetvli changes "compatibility". In the cross_block_mutate test, this later vsetvli occurs in another block (and is thus visit order dependent too!). This causes us to fail strict asserts. (To be explicit, the current on by default workaround should compensate. It's only when we turn that off that we have problems.) Now, I want to explicitly call out an alternate workaround. We could leave the mutation in phase 3, and simplify restrict it to the case where the previous vsetvli's GPR result is unused. That covers the case we've actually seen. (I'll note that codegen regressions with a simple form of this were significant. We might have to check specifically for the use outside block case to keep them reasonable, which complicates the workaround slightly.) Personally, I'm at the point where I want the mutation pulled out just for robustness sake. I'm worried there's yet one more form of this bug we haven't thought about. The other motivation for this change is that it does give us a couple of minor codegen wins. None appear to be hugely significant, but improvements never hurt right? Differential Revision: https://reviews.llvm.org/D125270
1 parent 9368bf9 commit be2cb82

File tree

3 files changed

+68
-33
lines changed

3 files changed

+68
-33
lines changed

llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp

Lines changed: 66 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,7 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
462462
void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
463463
void emitVSETVLIs(MachineBasicBlock &MBB);
464464
void doLocalPrepass(MachineBasicBlock &MBB);
465+
void doLocalPostpass(MachineBasicBlock &MBB);
465466
void doPRE(MachineBasicBlock &MBB);
466467
};
467468

@@ -478,6 +479,15 @@ static bool isVectorConfigInstr(const MachineInstr &MI) {
478479
MI.getOpcode() == RISCV::PseudoVSETIVLI;
479480
}
480481

482+
/// Return true if this is 'vsetvli x0, x0, vtype' which preserves
483+
/// VL and only sets VTYPE.
484+
static bool isVLPreservingConfig(const MachineInstr &MI) {
485+
if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
486+
return false;
487+
assert(RISCV::X0 == MI.getOperand(1).getReg());
488+
return RISCV::X0 == MI.getOperand(0).getReg();
489+
}
490+
481491
static MachineInstr *elideCopies(MachineInstr *MI,
482492
const MachineRegisterInfo *MRI) {
483493
while (true) {
@@ -1065,9 +1075,6 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
10651075

10661076
void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
10671077
VSETVLIInfo CurInfo;
1068-
// Only be set if current VSETVLIInfo is from an explicit VSET(I)VLI.
1069-
MachineInstr *PrevVSETVLIMI = nullptr;
1070-
10711078
for (MachineInstr &MI : MBB) {
10721079
// If this is an explicit VSETVLI or VSETIVLI, update our state.
10731080
if (isVectorConfigInstr(MI)) {
@@ -1078,7 +1085,6 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
10781085
MI.getOperand(3).setIsDead(false);
10791086
MI.getOperand(4).setIsDead(false);
10801087
CurInfo = getInfoForVSETVLI(MI);
1081-
PrevVSETVLIMI = &MI;
10821088
continue;
10831089
}
10841090

@@ -1125,37 +1131,17 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
11251131
// vl/vtype for succesor blocks.
11261132
if (!canSkipVSETVLIForLoadStore(MI, NewInfo, CurInfo) &&
11271133
needVSETVLI(NewInfo, CurInfo)) {
1128-
// If the previous VL/VTYPE is set by VSETVLI and do not use, Merge it
1129-
// with current VL/VTYPE.
1130-
bool NeedInsertVSETVLI = true;
1131-
if (PrevVSETVLIMI) {
1132-
// If these two VSETVLI have the same AVL and the same VLMAX,
1133-
// we could merge these two VSETVLI.
1134-
// TODO: If we remove this, we get a `vsetvli x0, x0, vtype'
1135-
// here. We could simply let this be emitted, then remove
1136-
// the unused vsetvlis in a post-pass.
1137-
if (CurInfo.hasSameAVL(NewInfo) && CurInfo.hasSameVLMAX(NewInfo)) {
1138-
// WARNING: For correctness, it is essential the contents of VL
1139-
// and VTYPE stay the same after MI. This greatly limits the
1140-
// mutation we can legally do here.
1141-
PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE());
1142-
NeedInsertVSETVLI = false;
1143-
}
1144-
}
1145-
if (NeedInsertVSETVLI)
1146-
insertVSETVLI(MBB, MI, NewInfo, CurInfo);
1134+
insertVSETVLI(MBB, MI, NewInfo, CurInfo);
11471135
CurInfo = NewInfo;
11481136
}
11491137
}
1150-
PrevVSETVLIMI = nullptr;
11511138
}
11521139

11531140
// If this is something that updates VL/VTYPE that we don't know about, set
11541141
// the state to unknown.
11551142
if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
11561143
MI.modifiesRegister(RISCV::VTYPE)) {
11571144
CurInfo = VSETVLIInfo::getUnknown();
1158-
PrevVSETVLIMI = nullptr;
11591145
}
11601146
}
11611147

@@ -1378,6 +1364,52 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
13781364
AvailableInfo, OldInfo);
13791365
}
13801366

1367+
void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
1368+
MachineInstr *PrevMI = nullptr;
1369+
bool UsedVL = false, UsedVTYPE = false;
1370+
SmallVector<MachineInstr*> ToDelete;
1371+
for (MachineInstr &MI : MBB) {
1372+
// Note: Must be *before* vsetvli handling to account for config cases
1373+
// which only change some subfields.
1374+
if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL))
1375+
UsedVL = true;
1376+
if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE))
1377+
UsedVTYPE = true;
1378+
1379+
if (!isVectorConfigInstr(MI))
1380+
continue;
1381+
1382+
if (PrevMI) {
1383+
if (!UsedVL && !UsedVTYPE) {
1384+
ToDelete.push_back(PrevMI);
1385+
// fallthrough
1386+
} else if (!UsedVTYPE && isVLPreservingConfig(MI)) {
1387+
// Note: `vsetvli x0, x0, vtype' is the canonical instruction
1388+
// for this case. If you find yourself wanting to add other forms
1389+
// to this "unused VTYPE" case, we're probably missing a
1390+
// canonicalization earlier.
1391+
// Note: We don't need to explicitly check vtype compatibility
1392+
// here because this form is only legal (per ISA) when not
1393+
// changing VL.
1394+
PrevMI->getOperand(2).setImm(MI.getOperand(2).getImm());
1395+
ToDelete.push_back(&MI);
1396+
// Leave PrevMI unchanged
1397+
continue;
1398+
}
1399+
}
1400+
PrevMI = &MI;
1401+
UsedVL = false;
1402+
UsedVTYPE = false;
1403+
Register VRegDef = MI.getOperand(0).getReg();
1404+
if (VRegDef != RISCV::X0 &&
1405+
!(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)))
1406+
UsedVL = true;
1407+
}
1408+
1409+
for (auto *MI : ToDelete)
1410+
MI->eraseFromParent();
1411+
}
1412+
13811413
bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
13821414
// Skip if the vector extension is not enabled.
13831415
const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
@@ -1443,6 +1475,15 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
14431475
for (MachineBasicBlock &MBB : MF)
14441476
emitVSETVLIs(MBB);
14451477

1478+
// Now that all vsetvlis are explicit, go through and do block local
1479+
// DSE and peephole based demanded fields based transforms. Note that
1480+
// this *must* be done outside the main dataflow so long as we allow
1481+
// any cross block analysis within the dataflow. We can't have both
1482+
// demanded fields based mutation and non-local analysis in the
1483+
// dataflow at the same time without introducing inconsistencies.
1484+
for (MachineBasicBlock &MBB : MF)
1485+
doLocalPostpass(MBB);
1486+
14461487
// Once we're fully done rewriting all the instructions, do a final pass
14471488
// through to check for VSETVLIs which write to an unused destination.
14481489
// For the non X0, X0 variant, we can replace the destination register

llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -747,7 +747,6 @@ define void @vector_init_vsetvli_fv2(i64 %N, double* %c) {
747747
; CHECK-LABEL: vector_init_vsetvli_fv2:
748748
; CHECK: # %bb.0: # %entry
749749
; CHECK-NEXT: li a2, 0
750-
; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, mu
751750
; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, mu
752751
; CHECK-NEXT: vmv.v.i v8, 0
753752
; CHECK-NEXT: .LBB15_1: # %for.body
@@ -812,14 +811,13 @@ for.end: ; preds = %for.body
812811
; Demonstrates a case where mutation in phase3 is problematic. We mutate the
813812
; vsetvli without considering that it changes the compatibility result of the
814813
; vadd in the second block.
815-
; FIXME: This currently crashes with strict asserts enabled.
816814
define <vscale x 4 x i32> @cross_block_mutate(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b,
817815
; CHECK-LABEL: cross_block_mutate:
818816
; CHECK: # %bb.0: # %entry
819817
; CHECK-NEXT: vsetivli a0, 6, e32, m2, tu, mu
820818
; CHECK-NEXT: vmv.s.x v8, a0
821-
; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
822819
; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu
820+
; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
823821
; CHECK-NEXT: ret
824822
<vscale x 4 x i1> %mask) {
825823
entry:

llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -328,8 +328,7 @@ entry:
328328
define double @test17(i64 %avl, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
329329
; CHECK-LABEL: test17:
330330
; CHECK: # %bb.0: # %entry
331-
; CHECK-NEXT: vsetvli a0, a0, e32, mf2, ta, mu
332-
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
331+
; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu
333332
; CHECK-NEXT: vfmv.f.s ft0, v8
334333
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
335334
; CHECK-NEXT: vfadd.vv v8, v8, v9
@@ -468,7 +467,6 @@ entry:
468467
define void @avl_forward4(<vscale x 2 x i32> %v, <vscale x 2 x i32>* %p, i64 %reg) nounwind {
469468
; CHECK-LABEL: avl_forward4:
470469
; CHECK: # %bb.0: # %entry
471-
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu
472470
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
473471
; CHECK-NEXT: vse32.v v8, (a0)
474472
; CHECK-NEXT: ret
@@ -498,7 +496,6 @@ entry:
498496
define <vscale x 1 x i64> @vleNff(i64* %str, i64 %n, i64 %x) {
499497
; CHECK-LABEL: vleNff:
500498
; CHECK: # %bb.0: # %entry
501-
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu
502499
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu
503500
; CHECK-NEXT: vle64ff.v v8, (a0)
504501
; CHECK-NEXT: csrr a0, vl
@@ -526,7 +523,6 @@ define <vscale x 2 x i32> @avl_forward5(<vscale x 2 x i32>* %addr) {
526523
; CHECK-LABEL: avl_forward5:
527524
; CHECK: # %bb.0:
528525
; CHECK-NEXT: li a1, 32
529-
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu
530526
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
531527
; CHECK-NEXT: vle32.v v8, (a0)
532528
; CHECK-NEXT: ret

0 commit comments

Comments
 (0)