Skip to content

Commit d992949

Browse files
committed
[RISCV] Merge vmerge into its ops if both masks are the same
We currently only allow a vmerge to be merged into its ops if the op isn't masked or if the vmerge has an all ones mask. We can extend this to also allow cases where they share the same mask.
1 parent b5e0848 commit d992949

File tree

3 files changed

+27
-19
lines changed

3 files changed

+27
-19
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3570,12 +3570,27 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
35703570
// The vmerge instruction must be TU.
35713571
if (isImplicitDef(Merge))
35723572
return false;
3573-
// The vmerge instruction must have an all 1s mask since we're going to keep
3574-
// the mask from the True instruction.
3575-
// FIXME: Support mask agnostic True instruction which would have an
3576-
// undef merge operand.
3577-
if (Mask && !usesAllOnesMask(Mask, Glue))
3578-
return false;
3573+
if (Mask) {
3574+
// THE vmerge instruction must have an all 1s mask or equal to True's mask
3575+
// since we're going to keep the mask from the True instruction.
3576+
// FIXME: Support mask agnostic True instruction which would have an
3577+
// undef merge operand.
3578+
assert(Glue.getNode() == N->getGluedNode());
3579+
// We're expecting something like:
3580+
// t24: nxv2i32 = PseudoVMERGE_VVM_M1 ..., $v0, ..., t27:1
3581+
// t23: nxv2i32 = PseudoVADD_VV_M1_MASK ..., $v0, ..., t29:1
3582+
// t29: ch,glue = CopyToReg t0, $v0, t8
3583+
// t27: ch,glue = CopyToReg t0, $v0, t8
3584+
assert(Glue->getOpcode() == ISD::CopyToReg);
3585+
assert(True->getGluedNode()->getOpcode() == ISD::CopyToReg);
3586+
assert(Glue->getOperand(1) == Mask);
3587+
assert(True->getGluedNode()->getOperand(1) ==
3588+
True->getOperand(Info->MaskOpIdx));
3589+
SDValue TrueMask = N->getGluedNode()->getOperand(2);
3590+
SDValue NMask = True->getGluedNode()->getOperand(2);
3591+
if (!usesAllOnesMask(Mask, Glue) && TrueMask != NMask)
3592+
return false;
3593+
}
35793594
}
35803595

35813596
// Skip if True has side effect.

llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,8 @@ declare <vscale x 2 x i32> @llvm.riscv.vadd.mask.nxv2i32.nxv2i32(<vscale x 2 x i
2222
define <vscale x 2 x i32> @vmerge_vadd_same_mask(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %vl) {
2323
; CHECK-LABEL: vmerge_vadd_same_mask:
2424
; CHECK: # %bb.0:
25-
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
26-
; CHECK-NEXT: vadd.vv v9, v9, v10, v0.t
27-
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
28-
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
25+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
26+
; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t
2927
; CHECK-NEXT: ret
3028
%a = call <vscale x 2 x i32> @llvm.riscv.vadd.mask.nxv2i32.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %vl, i64 0)
3129
%b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 %vl)
@@ -278,10 +276,8 @@ declare <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32>, <vscale x 2
278276
define <vscale x 2 x i32> @vmerge_vadd(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i32 zeroext %evl) {
279277
; CHECK-LABEL: vmerge_vadd:
280278
; CHECK: # %bb.0:
281-
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
282-
; CHECK-NEXT: vadd.vv v9, v9, v10, v0.t
283-
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
284-
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
279+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
280+
; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t
285281
; CHECK-NEXT: ret
286282
%1 = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i32 %evl)
287283
%2 = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %1, <vscale x 2 x i32> %passthru, i32 %evl)

llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,8 @@ define <vscale x 1 x float> @vfmacc_vv_nxv1f32_tu(<vscale x 1 x half> %a, <vscal
8989
define <vscale x 1 x float> @vfmacc_vv_nxv1f32_masked__tu(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
9090
; ZVFH-LABEL: vfmacc_vv_nxv1f32_masked__tu:
9191
; ZVFH: # %bb.0:
92-
; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
93-
; ZVFH-NEXT: vmv1r.v v11, v10
94-
; ZVFH-NEXT: vfwmacc.vv v11, v8, v9, v0.t
95-
; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, tu, ma
96-
; ZVFH-NEXT: vmerge.vvm v10, v10, v11, v0
92+
; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
93+
; ZVFH-NEXT: vfwmacc.vv v10, v8, v9, v0.t
9794
; ZVFH-NEXT: vmv1r.v v8, v10
9895
; ZVFH-NEXT: ret
9996
;

0 commit comments

Comments
 (0)