Skip to content

Commit 34c2cf0

Browse files
committed
[RISCV] Move vmerge same mask peephole to RISCVVectorPeephole
We currently fold a vmerge.vvm into its true operand if the true operand is a masked pseudo with the same mask. We can move this over to RISCVVectorPeephole by instead splitting it up into a smaller peephole which converts it to a vmv.v.v first. The existing foldVMV_V_V peephole will then take care of folding it if needed. This is very similar to the existing all-ones mask peephole and we could potentially do it inside of it. I opted to put it in a separate peephole to make it easier to reason about, given that the duplication is small, but I could be persuaded either way.
1 parent b45b366 commit 34c2cf0

File tree

4 files changed

+80
-61
lines changed

4 files changed

+80
-61
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 5 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -3833,15 +3833,8 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
38333833
uint64_t TrueTSFlags = TrueMCID.TSFlags;
38343834
bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
38353835

3836-
bool IsMasked = false;
38373836
const RISCV::RISCVMaskedPseudoInfo *Info =
38383837
RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
3839-
if (!Info && HasTiedDest) {
3840-
Info = RISCV::getMaskedPseudoInfo(TrueOpc);
3841-
IsMasked = true;
3842-
}
3843-
assert(!(IsMasked && !HasTiedDest) && "Expected tied dest");
3844-
38453838
if (!Info)
38463839
return false;
38473840

@@ -3853,19 +3846,6 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
38533846
return false;
38543847
}
38553848

3856-
// If True is masked then the vmerge must have either the same mask or an all
3857-
// 1s mask, since we're going to keep the mask from True.
3858-
if (IsMasked) {
3859-
// FIXME: Support mask agnostic True instruction which would have an
3860-
// undef passthru operand.
3861-
SDValue TrueMask =
3862-
getMaskSetter(True->getOperand(Info->MaskOpIdx),
3863-
True->getOperand(True->getNumOperands() - 1));
3864-
assert(TrueMask);
3865-
if (!usesAllOnesMask(Mask, Glue) && getMaskSetter(Mask, Glue) != TrueMask)
3866-
return false;
3867-
}
3868-
38693849
// Skip if True has side effect.
38703850
if (TII->get(TrueOpc).hasUnmodeledSideEffects())
38713851
return false;
@@ -3930,24 +3910,13 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
39303910
(Mask && !usesAllOnesMask(Mask, Glue)))
39313911
return false;
39323912

3933-
// If we end up changing the VL or mask of True, then we need to make sure it
3934-
// doesn't raise any observable fp exceptions, since changing the active
3935-
// elements will affect how fflags is set.
3936-
if (TrueVL != VL || !IsMasked)
3937-
if (mayRaiseFPException(True.getNode()) &&
3938-
!True->getFlags().hasNoFPExcept())
3939-
return false;
3913+
// Make sure it doesn't raise any observable fp exceptions, since changing the
3914+
// active elements will affect how fflags is set.
3915+
if (mayRaiseFPException(True.getNode()) && !True->getFlags().hasNoFPExcept())
3916+
return false;
39403917

39413918
SDLoc DL(N);
39423919

3943-
// From the preconditions we checked above, we know the mask and thus glue
3944-
// for the result node will be taken from True.
3945-
if (IsMasked) {
3946-
Mask = True->getOperand(Info->MaskOpIdx);
3947-
Glue = True->getOperand(True->getNumOperands() - 1);
3948-
assert(Glue.getValueType() == MVT::Glue);
3949-
}
3950-
39513920
unsigned MaskedOpc = Info->MaskedPseudo;
39523921
#ifndef NDEBUG
39533922
const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
@@ -3977,8 +3946,7 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
39773946
Ops.push_back(False);
39783947

39793948
const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);
3980-
const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode;
3981-
assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx);
3949+
const unsigned NormalOpsEnd = TrueVLIndex - HasRoundingMode;
39823950
Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);
39833951

39843952
Ops.push_back(Mask);

llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp

Lines changed: 63 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ class RISCVVectorPeephole : public MachineFunctionPass {
6565
bool convertToVLMAX(MachineInstr &MI) const;
6666
bool convertToWholeRegister(MachineInstr &MI) const;
6767
bool convertToUnmasked(MachineInstr &MI) const;
68-
bool convertVMergeToVMv(MachineInstr &MI) const;
68+
bool convertAllOnesVMergeToVMv(MachineInstr &MI) const;
69+
bool convertSameMaskVMergeToVMv(MachineInstr &MI) const;
6970
bool foldUndefPassthruVMV_V_V(MachineInstr &MI);
7071
bool foldVMV_V_V(MachineInstr &MI);
7172

@@ -342,17 +343,14 @@ bool RISCVVectorPeephole::convertToWholeRegister(MachineInstr &MI) const {
342343
return true;
343344
}
344345

345-
// Transform (VMERGE_VVM_<LMUL> pt, false, true, allones, vl, sew) to
346-
// (VMV_V_V_<LMUL> pt, true, vl, sew). It may decrease uses of VMSET.
347-
bool RISCVVectorPeephole::convertVMergeToVMv(MachineInstr &MI) const {
346+
static unsigned getVMV_V_VOpcodeForVMERGE_VVM(const MachineInstr &MI) {
348347
#define CASE_VMERGE_TO_VMV(lmul) \
349348
case RISCV::PseudoVMERGE_VVM_##lmul: \
350-
NewOpc = RISCV::PseudoVMV_V_V_##lmul; \
349+
return RISCV::PseudoVMV_V_V_##lmul; \
351350
break;
352-
unsigned NewOpc;
353351
switch (MI.getOpcode()) {
354352
default:
355-
return false;
353+
return 0;
356354
CASE_VMERGE_TO_VMV(MF8)
357355
CASE_VMERGE_TO_VMV(MF4)
358356
CASE_VMERGE_TO_VMV(MF2)
@@ -361,14 +359,68 @@ bool RISCVVectorPeephole::convertVMergeToVMv(MachineInstr &MI) const {
361359
CASE_VMERGE_TO_VMV(M4)
362360
CASE_VMERGE_TO_VMV(M8)
363361
}
362+
}
364363

364+
/// Convert a PseudoVMERGE_VVM with an all ones mask to a PseudoVMV_V_V.
365+
///
366+
/// %x = PseudoVMERGE_VVM %passthru, %false, %true, %allones, sew, vl
367+
/// ->
368+
/// %x = PseudoVMV_V_V %passthru, %true, vl, sew, tu_mu
369+
bool RISCVVectorPeephole::convertAllOnesVMergeToVMv(MachineInstr &MI) const {
370+
unsigned NewOpc = getVMV_V_VOpcodeForVMERGE_VVM(MI);
371+
if (!NewOpc)
372+
return false;
365373
assert(MI.getOperand(4).isReg() && MI.getOperand(4).getReg() == RISCV::V0);
366374
if (!isAllOnesMask(V0Defs.lookup(&MI)))
367375
return false;
368376

369377
MI.setDesc(TII->get(NewOpc));
370-
MI.removeOperand(2); // False operand
371-
MI.removeOperand(3); // Mask operand
378+
MI.removeOperand(2); // False operand
379+
MI.removeOperand(3); // Mask operand
380+
MI.addOperand(
381+
MachineOperand::CreateImm(RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED));
382+
383+
// vmv.v.v doesn't have a mask operand, so we may be able to inflate the
384+
// register class for the destination and passthru operands e.g. VRNoV0 -> VR
385+
MRI->recomputeRegClass(MI.getOperand(0).getReg());
386+
if (MI.getOperand(1).getReg() != RISCV::NoRegister)
387+
MRI->recomputeRegClass(MI.getOperand(1).getReg());
388+
return true;
389+
}
390+
391+
/// If a PseudoVMERGE_VVM's true operand is a masked pseudo and both have the
392+
/// same mask, and the masked pseudo's passthru is the same as the false
393+
/// operand, we can convert the PseudoVMERGE_VVM to a PseudoVMV_V_V.
394+
///
395+
/// %true = PseudoVADD_VV_M1_MASK %false, %x, %y, %mask, vl1, sew, policy
396+
/// %x = PseudoVMERGE_VVM %passthru, %false, %true, %mask, vl2, sew
397+
/// ->
398+
/// %true = PseudoVADD_VV_M1_MASK %false, %x, %y, %mask, vl1, sew, policy
399+
/// %x = PseudoVMV_V_V %passthru, %true, vl2, sew, tu_mu
400+
bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) const {
401+
unsigned NewOpc = getVMV_V_VOpcodeForVMERGE_VVM(MI);
402+
if (!NewOpc)
403+
return false;
404+
MachineInstr *True = MRI->getVRegDef(MI.getOperand(3).getReg());
405+
if (!True || !RISCV::getMaskedPseudoInfo(True->getOpcode()) ||
406+
!hasSameEEW(MI, *True))
407+
return false;
408+
409+
// True's passthru needs to be equivalent to False
410+
Register TruePassthruReg = True->getOperand(1).getReg();
411+
Register FalseReg = MI.getOperand(2).getReg();
412+
if (TruePassthruReg != RISCV::NoRegister && TruePassthruReg != FalseReg)
413+
return false;
414+
415+
const MachineInstr *TrueV0Def = V0Defs.lookup(True);
416+
const MachineInstr *MIV0Def = V0Defs.lookup(&MI);
417+
assert(TrueV0Def->isCopy() && MIV0Def->isCopy());
418+
if (TrueV0Def->getOperand(1).getReg() != MIV0Def->getOperand(1).getReg())
419+
return false;
420+
421+
MI.setDesc(TII->get(NewOpc));
422+
MI.removeOperand(2); // False operand
423+
MI.removeOperand(3); // Mask operand
372424
MI.addOperand(
373425
MachineOperand::CreateImm(RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED));
374426

@@ -622,7 +674,8 @@ bool RISCVVectorPeephole::runOnMachineFunction(MachineFunction &MF) {
622674
Changed |= tryToReduceVL(MI);
623675
Changed |= convertToUnmasked(MI);
624676
Changed |= convertToWholeRegister(MI);
625-
Changed |= convertVMergeToVMv(MI);
677+
Changed |= convertAllOnesVMergeToVMv(MI);
678+
Changed |= convertSameMaskVMergeToVMv(MI);
626679
if (foldUndefPassthruVMV_V_V(MI)) {
627680
Changed |= true;
628681
continue; // MI is erased

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,11 @@ define void @gather_masked(ptr noalias nocapture %A, ptr noalias nocapture reado
6262
; CHECK-NEXT: li a4, 5
6363
; CHECK-NEXT: .LBB1_1: # %vector.body
6464
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
65-
; CHECK-NEXT: vmv1r.v v9, v8
66-
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, mu
67-
; CHECK-NEXT: vlse8.v v9, (a1), a4, v0.t
68-
; CHECK-NEXT: vle8.v v10, (a0)
69-
; CHECK-NEXT: vadd.vv v9, v10, v9
70-
; CHECK-NEXT: vse8.v v9, (a0)
65+
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma
66+
; CHECK-NEXT: vlse8.v v8, (a1), a4, v0.t
67+
; CHECK-NEXT: vle8.v v9, (a0)
68+
; CHECK-NEXT: vadd.vv v8, v9, v8
69+
; CHECK-NEXT: vse8.v v8, (a0)
7170
; CHECK-NEXT: addi a0, a0, 32
7271
; CHECK-NEXT: addi a1, a1, 160
7372
; CHECK-NEXT: bne a0, a2, .LBB1_1
@@ -344,12 +343,11 @@ define void @scatter_masked(ptr noalias nocapture %A, ptr noalias nocapture read
344343
; CHECK-NEXT: li a4, 5
345344
; CHECK-NEXT: .LBB7_1: # %vector.body
346345
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
347-
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, mu
348-
; CHECK-NEXT: vle8.v v9, (a1)
349-
; CHECK-NEXT: vmv1r.v v10, v8
350-
; CHECK-NEXT: vlse8.v v10, (a0), a4, v0.t
351-
; CHECK-NEXT: vadd.vv v9, v10, v9
352-
; CHECK-NEXT: vsse8.v v9, (a0), a4, v0.t
346+
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma
347+
; CHECK-NEXT: vle8.v v8, (a1)
348+
; CHECK-NEXT: vlse8.v v9, (a0), a4, v0.t
349+
; CHECK-NEXT: vadd.vv v8, v9, v8
350+
; CHECK-NEXT: vsse8.v v8, (a0), a4, v0.t
353351
; CHECK-NEXT: addi a1, a1, 32
354352
; CHECK-NEXT: addi a0, a0, 160
355353
; CHECK-NEXT: bne a1, a2, .LBB7_1

llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,13 +76,13 @@ body: |
7676
; CHECK-LABEL: name: same_mask
7777
; CHECK: liveins: $v8, $v9, $v0
7878
; CHECK-NEXT: {{ $}}
79-
; CHECK-NEXT: %pt:vrnov0 = COPY $v8
79+
; CHECK-NEXT: %pt:vr = COPY $v8
8080
; CHECK-NEXT: %false:vrnov0 = COPY $v9
8181
; CHECK-NEXT: %mask:vr = COPY $v0
8282
; CHECK-NEXT: $v0 = COPY %mask
8383
; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, $v0, 4, 5 /* e32 */, 0 /* tu, mu */
8484
; CHECK-NEXT: $v0 = COPY %mask
85-
; CHECK-NEXT: %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, 8, 5 /* e32 */
85+
; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %pt, %true, 8, 5 /* e32 */, 0 /* tu, mu */
8686
%pt:vrnov0 = COPY $v8
8787
%false:vrnov0 = COPY $v9
8888
%mask:vr = COPY $v0

0 commit comments

Comments
 (0)