@@ -61,6 +61,7 @@ class RISCVVectorPeephole : public MachineFunctionPass {
61
61
}
62
62
63
63
private:
64
+ bool tryToReduceVL (MachineInstr &MI) const ;
64
65
bool convertToVLMAX (MachineInstr &MI) const ;
65
66
bool convertToWholeRegister (MachineInstr &MI) const ;
66
67
bool convertToUnmasked (MachineInstr &MI) const ;
@@ -81,6 +82,96 @@ char RISCVVectorPeephole::ID = 0;
81
82
INITIALIZE_PASS (RISCVVectorPeephole, DEBUG_TYPE, " RISC-V Fold Masks" , false ,
82
83
false )
83
84
85
+ // / Given two VL operands, do we know that LHS <= RHS?
86
+ static bool isVLKnownLE(const MachineOperand &LHS, const MachineOperand &RHS) {
87
+ if (LHS.isReg () && RHS.isReg () && LHS.getReg ().isVirtual () &&
88
+ LHS.getReg () == RHS.getReg ())
89
+ return true ;
90
+ if (RHS.isImm () && RHS.getImm () == RISCV::VLMaxSentinel)
91
+ return true ;
92
+ if (LHS.isImm () && LHS.getImm () == RISCV::VLMaxSentinel)
93
+ return false ;
94
+ if (!LHS.isImm () || !RHS.isImm ())
95
+ return false ;
96
+ return LHS.getImm () <= RHS.getImm ();
97
+ }
98
+
99
+ static unsigned getSEWLMULRatio (const MachineInstr &MI) {
100
+ RISCVII::VLMUL LMUL = RISCVII::getLMul (MI.getDesc ().TSFlags );
101
+ unsigned Log2SEW = MI.getOperand (RISCVII::getSEWOpNum (MI.getDesc ())).getImm ();
102
+ return RISCVVType::getSEWLMULRatio (1 << Log2SEW, LMUL);
103
+ }
104
+
105
+ // Attempt to reduce the VL of an instruction whose sole use is feeding a
106
+ // instruction with a narrower VL. This currently works backwards from the
107
+ // user instruction (which might have a smaller VL).
108
+ bool RISCVVectorPeephole::tryToReduceVL (MachineInstr &MI) const {
109
+ // Note that the goal here is a bit multifaceted.
110
+ // 1) For store's reducing the VL of the value being stored may help to
111
+ // reduce VL toggles. This is somewhat of an artifact of the fact we
112
+ // promote arithmetic instructions but VL predicate stores.
113
+ // 2) For vmv.v.v reducing VL eagerly on the source instruction allows us
114
+ // to share code with the foldVMV_V_V transform below.
115
+ //
116
+ // Note that to the best of our knowledge, reducing VL is generally not
117
+ // a significant win on real hardware unless we can also reduce LMUL which
118
+ // this code doesn't try to do.
119
+ //
120
+ // TODO: We can handle a bunch more instructions here, and probably
121
+ // recurse backwards through operands too.
122
+ unsigned SrcIdx = 0 ;
123
+ switch (RISCV::getRVVMCOpcode (MI.getOpcode ())) {
124
+ default :
125
+ return false ;
126
+ case RISCV::VSE8_V:
127
+ case RISCV::VSE16_V:
128
+ case RISCV::VSE32_V:
129
+ case RISCV::VSE64_V:
130
+ break ;
131
+ case RISCV::VMV_V_V:
132
+ SrcIdx = 2 ;
133
+ break ;
134
+ }
135
+
136
+ MachineOperand &VL = MI.getOperand (RISCVII::getVLOpNum (MI.getDesc ()));
137
+ if (VL.isImm () && VL.getImm () == RISCV::VLMaxSentinel)
138
+ return false ;
139
+
140
+ Register SrcReg = MI.getOperand (SrcIdx).getReg ();
141
+ // Note: one *use*, not one *user*.
142
+ if (!MRI->hasOneUse (SrcReg))
143
+ return false ;
144
+
145
+ MachineInstr *Src = MRI->getVRegDef (SrcReg);
146
+ if (!Src || Src->hasUnmodeledSideEffects () ||
147
+ Src->getParent () != MI.getParent () || Src->getNumDefs () != 1 ||
148
+ !RISCVII::hasVLOp (Src->getDesc ().TSFlags ) ||
149
+ !RISCVII::hasSEWOp (Src->getDesc ().TSFlags ))
150
+ return false ;
151
+
152
+ // Src needs to have the same VLMAX as MI
153
+ if (getSEWLMULRatio (MI) != getSEWLMULRatio (*Src))
154
+ return false ;
155
+
156
+ bool ActiveElementsAffectResult = RISCVII::activeElementsAffectResult (
157
+ TII->get (RISCV::getRVVMCOpcode (Src->getOpcode ())).TSFlags );
158
+ if (ActiveElementsAffectResult || Src->mayRaiseFPException ())
159
+ return false ;
160
+
161
+ MachineOperand &SrcVL = Src->getOperand (RISCVII::getVLOpNum (Src->getDesc ()));
162
+ if (VL.isIdenticalTo (SrcVL) || !isVLKnownLE (VL, SrcVL))
163
+ return false ;
164
+
165
+ if (VL.isImm ())
166
+ SrcVL.ChangeToImmediate (VL.getImm ());
167
+ else if (VL.isReg ())
168
+ SrcVL.ChangeToRegister (VL.getReg (), false );
169
+
170
+ // TODO: For instructions with a passthru, we could clear the passthru
171
+ // and tail policy since we've just proven the tail is not demanded.
172
+ return true ;
173
+ }
174
+
84
175
// / Check if an operand is an immediate or a materialized ADDI $x0, imm.
85
176
std::optional<unsigned >
86
177
RISCVVectorPeephole::getConstant (const MachineOperand &VL) const {
@@ -325,22 +416,6 @@ bool RISCVVectorPeephole::convertToUnmasked(MachineInstr &MI) const {
325
416
return true ;
326
417
}
327
418
328
- // / Given two VL operands, returns the one known to be the smallest or nullptr
329
- // / if unknown.
330
- static const MachineOperand *getKnownMinVL (const MachineOperand *LHS,
331
- const MachineOperand *RHS) {
332
- if (LHS->isReg () && RHS->isReg () && LHS->getReg ().isVirtual () &&
333
- LHS->getReg () == RHS->getReg ())
334
- return LHS;
335
- if (LHS->isImm () && LHS->getImm () == RISCV::VLMaxSentinel)
336
- return RHS;
337
- if (RHS->isImm () && RHS->getImm () == RISCV::VLMaxSentinel)
338
- return LHS;
339
- if (!LHS->isImm () || !RHS->isImm ())
340
- return nullptr ;
341
- return LHS->getImm () <= RHS->getImm () ? LHS : RHS;
342
- }
343
-
344
419
// / Check if it's safe to move From down to To, checking that no physical
345
420
// / registers are clobbered.
346
421
static bool isSafeToMove (const MachineInstr &From, const MachineInstr &To) {
@@ -362,21 +437,16 @@ static bool isSafeToMove(const MachineInstr &From, const MachineInstr &To) {
362
437
return From.isSafeToMove (SawStore);
363
438
}
364
439
365
- static unsigned getSEWLMULRatio (const MachineInstr &MI) {
366
- RISCVII::VLMUL LMUL = RISCVII::getLMul (MI.getDesc ().TSFlags );
367
- unsigned Log2SEW = MI.getOperand (RISCVII::getSEWOpNum (MI.getDesc ())).getImm ();
368
- return RISCVVType::getSEWLMULRatio (1 << Log2SEW, LMUL);
369
- }
370
-
371
440
// / If a PseudoVMV_V_V is the only user of its input, fold its passthru and VL
372
441
// / into it.
373
442
// /
374
443
// / %x = PseudoVADD_V_V_M1 %passthru, %a, %b, %vl1, sew, policy
375
444
// / %y = PseudoVMV_V_V_M1 %passthru, %x, %vl2, sew, policy
445
+ // / (where %vl1 <= %vl2, see related tryToReduceVL)
376
446
// /
377
447
// / ->
378
448
// /
379
- // / %y = PseudoVADD_V_V_M1 %passthru, %a, %b, min( vl1, vl2) , sew, policy
449
+ // / %y = PseudoVADD_V_V_M1 %passthru, %a, %b, vl1, sew, policy
380
450
bool RISCVVectorPeephole::foldVMV_V_V (MachineInstr &MI) {
381
451
if (RISCV::getRVVMCOpcode (MI.getOpcode ()) != RISCV::VMV_V_V)
382
452
return false ;
@@ -404,33 +474,16 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
404
474
SrcPassthru.getReg () != Passthru.getReg ())
405
475
return false ;
406
476
407
- // Because Src and MI have the same passthru, we can use either AVL as long as
408
- // it's the smaller of the two.
409
- //
410
- // (src pt, ..., vl=5) x x x x x|. . .
411
- // (vmv.v.v pt, src, vl=3) x x x|. . . . .
412
- // ->
413
- // (src pt, ..., vl=3) x x x|. . . . .
414
- //
415
- // (src pt, ..., vl=3) x x x|. . . . .
416
- // (vmv.v.v pt, src, vl=6) x x x . . .|. .
417
- // ->
418
- // (src pt, ..., vl=3) x x x|. . . . .
477
+ // Src VL will have already been reduced if legal (see tryToReduceVL),
478
+ // so we don't need to handle a smaller source VL here. However, the
479
+ // user's VL may be larger
419
480
MachineOperand &SrcVL = Src->getOperand (RISCVII::getVLOpNum (Src->getDesc ()));
420
- const MachineOperand *MinVL = getKnownMinVL (&MI.getOperand (3 ), &SrcVL);
421
- if (!MinVL)
422
- return false ;
423
-
424
- bool VLChanged = !MinVL->isIdenticalTo (SrcVL);
425
- bool ActiveElementsAffectResult = RISCVII::activeElementsAffectResult (
426
- TII->get (RISCV::getRVVMCOpcode (Src->getOpcode ())).TSFlags );
427
-
428
- if (VLChanged && (ActiveElementsAffectResult || Src->mayRaiseFPException ()))
481
+ if (!isVLKnownLE (SrcVL, MI.getOperand (3 )))
429
482
return false ;
430
483
431
484
// If Src ends up using MI's passthru/VL, move it so it can access it.
432
485
// TODO: We don't need to do this if they already dominate Src.
433
- if (!SrcVL. isIdenticalTo (*MinVL) || ! SrcPassthru.isIdenticalTo (Passthru)) {
486
+ if (!SrcPassthru.isIdenticalTo (Passthru)) {
434
487
if (!isSafeToMove (*Src, MI))
435
488
return false ;
436
489
Src->moveBefore (&MI);
@@ -445,11 +498,6 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
445
498
*Src->getParent ()->getParent ()));
446
499
}
447
500
448
- if (MinVL->isImm ())
449
- SrcVL.ChangeToImmediate (MinVL->getImm ());
450
- else if (MinVL->isReg ())
451
- SrcVL.ChangeToRegister (MinVL->getReg (), false );
452
-
453
501
// Use a conservative tu,mu policy, RISCVInsertVSETVLI will relax it if
454
502
// passthru is undef.
455
503
Src->getOperand (RISCVII::getVecPolicyOpNum (Src->getDesc ()))
@@ -498,6 +546,7 @@ bool RISCVVectorPeephole::runOnMachineFunction(MachineFunction &MF) {
498
546
for (MachineBasicBlock &MBB : MF) {
499
547
for (MachineInstr &MI : make_early_inc_range (MBB)) {
500
548
Changed |= convertToVLMAX (MI);
549
+ Changed |= tryToReduceVL (MI);
501
550
Changed |= convertToUnmasked (MI);
502
551
Changed |= convertToWholeRegister (MI);
503
552
Changed |= convertVMergeToVMv (MI);
0 commit comments