Skip to content

Commit 1cbf44b

Browse files
jacquesguanjacquesguan
authored andcommitted
[RISCV] Support peephole optimization to fold vmerge.vvm that has tail agnostic policy and unmasked intrinsics.
This patch supports the tail agnostic part of D130442. Reviewed By: fakepaper56 Differential Revision: https://reviews.llvm.org/D132923
1 parent 514ac16 commit 1cbf44b

9 files changed

+134
-216
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2630,11 +2630,14 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) {
26302630
// peephole only deals with VMERGE_VVM which is TU and has false operand same as
26312631
// its true operand now. E.g. (VMERGE_VVM_M1_TU False, False, (VADD_M1 ...),
26322632
// ...) -> (VADD_VV_M1_MASK)
2633-
bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
2634-
SDValue Merge = N->getOperand(0);
2635-
SDValue True = N->getOperand(2);
2636-
SDValue Mask = N->getOperand(3);
2637-
SDValue VL = N->getOperand(4);
2633+
bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N, bool IsTA) {
2634+
unsigned Offset = IsTA ? 0 : 1;
2635+
uint64_t Policy = IsTA ? RISCVII::TAIL_AGNOSTIC : /*TUMU*/ 0;
2636+
2637+
SDValue False = N->getOperand(0 + Offset);
2638+
SDValue True = N->getOperand(1 + Offset);
2639+
SDValue Mask = N->getOperand(2 + Offset);
2640+
SDValue VL = N->getOperand(3 + Offset);
26382641

26392642
assert(True.getResNo() == 0 &&
26402643
"Expect True is the first output of an instruction.");
@@ -2688,13 +2691,14 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
26882691
unsigned MaskedOpc = Info->MaskedPseudo;
26892692
assert(RISCVII::hasVecPolicyOp(TII->get(MaskedOpc).TSFlags) &&
26902693
"Expected instructions with mask have policy operand.");
2694+
assert(RISCVII::hasMergeOp(TII->get(MaskedOpc).TSFlags) &&
2695+
"Expected instructions with mask have merge operand.");
26912696

26922697
SmallVector<SDValue, 8> Ops;
2693-
Ops.push_back(Merge);
2698+
Ops.push_back(False);
26942699
Ops.append(True->op_begin(), True->op_begin() + TrueVLIndex);
26952700
Ops.append({Mask, VL, /* SEW */ True.getOperand(TrueVLIndex + 1)});
2696-
Ops.push_back(
2697-
CurDAG->getTargetConstant(/* TUMU */ 0, DL, Subtarget->getXLenVT()));
2701+
Ops.push_back(CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT()));
26982702

26992703
// Result node should have chain operand of True.
27002704
if (HasChainOp)
@@ -2782,15 +2786,24 @@ bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
27822786
Opcode == RISCV::PseudoVMERGE_VVM_M8_TU;
27832787
};
27842788

2789+
auto IsVMergeTA = [](unsigned Opcode) {
2790+
return Opcode == RISCV::PseudoVMERGE_VVM_MF8 ||
2791+
Opcode == RISCV::PseudoVMERGE_VVM_MF4 ||
2792+
Opcode == RISCV::PseudoVMERGE_VVM_MF2 ||
2793+
Opcode == RISCV::PseudoVMERGE_VVM_M1 ||
2794+
Opcode == RISCV::PseudoVMERGE_VVM_M2 ||
2795+
Opcode == RISCV::PseudoVMERGE_VVM_M4 ||
2796+
Opcode == RISCV::PseudoVMERGE_VVM_M8;
2797+
};
2798+
27852799
unsigned Opc = N->getMachineOpcode();
27862800
// The following optimizations require that the merge operand of N is same
27872801
// as the false operand of N.
2788-
// TODO: Also deal with TA VMerge nodes.
2789-
if (!IsVMergeTU(Opc) || N->getOperand(0) != N->getOperand(1))
2790-
continue;
2791-
2792-
MadeChange |= performCombineVMergeAndVOps(N);
2793-
MadeChange |= performVMergeToVAdd(N);
2802+
if ((IsVMergeTU(Opc) && N->getOperand(0) == N->getOperand(1)) ||
2803+
IsVMergeTA(Opc))
2804+
MadeChange |= performCombineVMergeAndVOps(N, IsVMergeTA(Opc));
2805+
if (IsVMergeTU(Opc) && N->getOperand(0) == N->getOperand(1))
2806+
MadeChange |= performVMergeToVAdd(N);
27942807
}
27952808
return MadeChange;
27962809
}

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
134134
bool doPeepholeMaskedRVV(SDNode *Node);
135135
bool doPeepholeMergeVVMFold();
136136
bool performVMergeToVAdd(SDNode *N);
137-
bool performCombineVMergeAndVOps(SDNode *N);
137+
bool performCombineVMergeAndVOps(SDNode *N, bool IsTA);
138138
};
139139

140140
namespace RISCV {

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-peephole-vmerge-vops.ll

Lines changed: 12 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -200,8 +200,7 @@ define <8 x i32> @vpselect_vpadd(<8 x i32> %passthru, <8 x i32> %x, <8 x i32> %y
200200
; CHECK-LABEL: vpselect_vpadd:
201201
; CHECK: # %bb.0:
202202
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
203-
; CHECK-NEXT: vadd.vv v9, v9, v10
204-
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
203+
; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t
205204
; CHECK-NEXT: ret
206205
%splat = insertelement <8 x i1> poison, i1 -1, i32 0
207206
%mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
@@ -216,8 +215,7 @@ define <8 x i32> @vpselect_vpadd2(<8 x i32> %passthru, <8 x i32> %x, <8 x i32> %
216215
; CHECK: # %bb.0:
217216
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
218217
; CHECK-NEXT: vmseq.vv v0, v9, v10
219-
; CHECK-NEXT: vadd.vv v9, v9, v10
220-
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
218+
; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t
221219
; CHECK-NEXT: ret
222220
%splat = insertelement <8 x i1> poison, i1 -1, i32 0
223221
%mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
@@ -231,11 +229,8 @@ define <8 x i32> @vpselect_vpadd2(<8 x i32> %passthru, <8 x i32> %x, <8 x i32> %
231229
define <8 x i32> @vpselect_vpadd3(<8 x i32> %passthru, <8 x i32> %x, <8 x i32> %y, i32 zeroext %vl) {
232230
; CHECK-LABEL: vpselect_vpadd3:
233231
; CHECK: # %bb.0:
234-
; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, mu
235-
; CHECK-NEXT: vmset.m v0
236232
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
237-
; CHECK-NEXT: vadd.vv v9, v9, v10
238-
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
233+
; CHECK-NEXT: vadd.vv v8, v9, v10
239234
; CHECK-NEXT: ret
240235
%splat = insertelement <8 x i1> poison, i1 -1, i32 0
241236
%mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
@@ -249,8 +244,7 @@ define <8 x float> @vpselect_vpfadd(<8 x float> %passthru, <8 x float> %x, <8 x
249244
; CHECK-LABEL: vpselect_vpfadd:
250245
; CHECK: # %bb.0:
251246
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
252-
; CHECK-NEXT: vfadd.vv v9, v9, v10
253-
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
247+
; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t
254248
; CHECK-NEXT: ret
255249
%splat = insertelement <8 x i1> poison, i1 -1, i32 0
256250
%mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
@@ -264,8 +258,7 @@ define <8 x i16> @vpselect_vpfptosi(<8 x i16> %passthru, <8 x float> %x, <8 x i1
264258
; CHECK-LABEL: vpselect_vpfptosi:
265259
; CHECK: # %bb.0:
266260
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
267-
; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v9
268-
; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
261+
; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t
269262
; CHECK-NEXT: ret
270263
%splat = insertelement <8 x i1> poison, i1 -1, i32 0
271264
%mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
@@ -279,8 +272,7 @@ define <8 x float> @vpselect_vpsitofp(<8 x float> %passthru, <8 x i64> %x, <8 x
279272
; CHECK-LABEL: vpselect_vpsitofp:
280273
; CHECK: # %bb.0:
281274
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
282-
; CHECK-NEXT: vfncvt.f.x.w v9, v10
283-
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
275+
; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t
284276
; CHECK-NEXT: ret
285277
%splat = insertelement <8 x i1> poison, i1 -1, i32 0
286278
%mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
@@ -294,8 +286,7 @@ define <8 x i32> @vpselect_vpzext(<8 x i32> %passthru, <8 x i8> %x, <8 x i1> %m,
294286
; CHECK-LABEL: vpselect_vpzext:
295287
; CHECK: # %bb.0:
296288
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
297-
; CHECK-NEXT: vzext.vf4 v10, v9
298-
; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
289+
; CHECK-NEXT: vzext.vf4 v8, v9, v0.t
299290
; CHECK-NEXT: ret
300291
%splat = insertelement <8 x i1> poison, i1 -1, i32 0
301292
%mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
@@ -309,8 +300,7 @@ define <8 x i32> @vpselect_vptrunc(<8 x i32> %passthru, <8 x i64> %x, <8 x i1> %
309300
; CHECK-LABEL: vpselect_vptrunc:
310301
; CHECK: # %bb.0:
311302
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
312-
; CHECK-NEXT: vnsrl.wi v9, v10, 0
313-
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
303+
; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t
314304
; CHECK-NEXT: ret
315305
%splat = insertelement <8 x i1> poison, i1 -1, i32 0
316306
%mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
@@ -324,9 +314,7 @@ define <8 x double> @vpselect_vpfpext(<8 x double> %passthru, <8 x float> %x, <8
324314
; CHECK-LABEL: vpselect_vpfpext:
325315
; CHECK: # %bb.0:
326316
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
327-
; CHECK-NEXT: vfwcvt.f.f.v v12, v10
328-
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
329-
; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0
317+
; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t
330318
; CHECK-NEXT: ret
331319
%splat = insertelement <8 x i1> poison, i1 -1, i32 0
332320
%mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
@@ -340,8 +328,7 @@ define <8 x float> @vpselect_vpfptrunc(<8 x float> %passthru, <8 x double> %x, <
340328
; CHECK-LABEL: vpselect_vpfptrunc:
341329
; CHECK: # %bb.0:
342330
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
343-
; CHECK-NEXT: vfncvt.f.f.w v9, v10
344-
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
331+
; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t
345332
; CHECK-NEXT: ret
346333
%splat = insertelement <8 x i1> poison, i1 -1, i32 0
347334
%mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
@@ -355,8 +342,7 @@ define <8 x i32> @vpselect_vpload(<8 x i32> %passthru, <8 x i32> * %p, <8 x i1>
355342
; CHECK-LABEL: vpselect_vpload:
356343
; CHECK: # %bb.0:
357344
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
358-
; CHECK-NEXT: vle32.v v9, (a0)
359-
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
345+
; CHECK-NEXT: vle32.v v8, (a0), v0.t
360346
; CHECK-NEXT: ret
361347
%splat = insertelement <8 x i1> poison, i1 -1, i32 0
362348
%mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
@@ -370,9 +356,8 @@ define <8 x i32> @vpselect_vpload2(<8 x i32> %passthru, <8 x i32> * %p, <8 x i32
370356
; CHECK-LABEL: vpselect_vpload2:
371357
; CHECK: # %bb.0:
372358
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
373-
; CHECK-NEXT: vle32.v v11, (a0)
374359
; CHECK-NEXT: vmseq.vv v0, v9, v10
375-
; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0
360+
; CHECK-NEXT: vle32.v v8, (a0), v0.t
376361
; CHECK-NEXT: ret
377362
%splat = insertelement <8 x i1> poison, i1 -1, i32 0
378363
%mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,9 @@ define void @vselect_vv_v8i32(<8 x i32>* %a, <8 x i32>* %b, <8 x i1>* %cc, <8 x
66
; CHECK-LABEL: vselect_vv_v8i32:
77
; CHECK: # %bb.0:
88
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
9-
; CHECK-NEXT: vle32.v v8, (a0)
109
; CHECK-NEXT: vlm.v v0, (a2)
11-
; CHECK-NEXT: vle32.v v10, (a1)
12-
; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
10+
; CHECK-NEXT: vle32.v v8, (a1)
11+
; CHECK-NEXT: vle32.v v8, (a0), v0.t
1312
; CHECK-NEXT: vse32.v v8, (a3)
1413
; CHECK-NEXT: ret
1514
%va = load <8 x i32>, <8 x i32>* %a
@@ -60,10 +59,9 @@ define void @vselect_vv_v8f32(<8 x float>* %a, <8 x float>* %b, <8 x i1>* %cc, <
6059
; CHECK-LABEL: vselect_vv_v8f32:
6160
; CHECK: # %bb.0:
6261
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
63-
; CHECK-NEXT: vle32.v v8, (a0)
6462
; CHECK-NEXT: vlm.v v0, (a2)
65-
; CHECK-NEXT: vle32.v v10, (a1)
66-
; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
63+
; CHECK-NEXT: vle32.v v8, (a1)
64+
; CHECK-NEXT: vle32.v v8, (a0), v0.t
6765
; CHECK-NEXT: vse32.v v8, (a3)
6866
; CHECK-NEXT: ret
6967
%va = load <8 x float>, <8 x float>* %a
@@ -114,10 +112,9 @@ define void @vselect_vv_v16i16(<16 x i16>* %a, <16 x i16>* %b, <16 x i1>* %cc, <
114112
; CHECK-LABEL: vselect_vv_v16i16:
115113
; CHECK: # %bb.0:
116114
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
117-
; CHECK-NEXT: vle16.v v8, (a0)
118115
; CHECK-NEXT: vlm.v v0, (a2)
119-
; CHECK-NEXT: vle16.v v10, (a1)
120-
; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
116+
; CHECK-NEXT: vle16.v v8, (a1)
117+
; CHECK-NEXT: vle16.v v8, (a0), v0.t
121118
; CHECK-NEXT: vse16.v v8, (a3)
122119
; CHECK-NEXT: ret
123120
%va = load <16 x i16>, <16 x i16>* %a
@@ -169,10 +166,9 @@ define void @vselect_vv_v32f16(<32 x half>* %a, <32 x half>* %b, <32 x i1>* %cc,
169166
; CHECK: # %bb.0:
170167
; CHECK-NEXT: li a4, 32
171168
; CHECK-NEXT: vsetvli zero, a4, e16, m4, ta, mu
172-
; CHECK-NEXT: vle16.v v8, (a0)
173169
; CHECK-NEXT: vlm.v v0, (a2)
174-
; CHECK-NEXT: vle16.v v12, (a1)
175-
; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
170+
; CHECK-NEXT: vle16.v v8, (a1)
171+
; CHECK-NEXT: vle16.v v8, (a0), v0.t
176172
; CHECK-NEXT: vse16.v v8, (a3)
177173
; CHECK-NEXT: ret
178174
%va = load <32 x half>, <32 x half>* %a

0 commit comments

Comments
 (0)