Skip to content

Commit a14c83a

Browse files
author
Yeting Kuo
committed
[RISCV] Disable performCombineVMergeAndVOps for PseduoVIOTA_M.
This transformation is illegal for PseduoVIOTA_M. The value of `viota.m vd, vs2` is the prefix sum of vd2 and adding mask for it may cause wrong prefix sum. Take an example, the result of following expression is `{5, 5, 5, 3}`, ``` ; v4 = {1, 1, 1, 1} viota.m v1, v4 ; v0 = {0, 0, 0, 1}, v1 = {0, 1, 2, 3}, v8 = {5, 5, 5, 5} vmerge.vvm v8, v8, v1, v0.t ; v8 = {5, 5, 5, 3} ``` but if we merge them to `viota.m v8, v4, v0.t`, then the result of is `{5, 5, 5, 0}` We still does the transformation when mask of vmerge.vvm is a true mask.
1 parent 23099ac commit a14c83a

File tree

3 files changed

+52
-5
lines changed

3 files changed

+52
-5
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3501,6 +3501,19 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
35013501
if (!True.isMachineOpcode())
35023502
return false;
35033503

3504+
// This transformation is illegal for viota.m when Mask is not a true mask.
3505+
switch (True->getMachineOpcode()) {
3506+
case RISCV::PseudoVIOTA_M_MF8:
3507+
case RISCV::PseudoVIOTA_M_MF4:
3508+
case RISCV::PseudoVIOTA_M_MF2:
3509+
case RISCV::PseudoVIOTA_M_M1:
3510+
case RISCV::PseudoVIOTA_M_M2:
3511+
case RISCV::PseudoVIOTA_M_M4:
3512+
case RISCV::PseudoVIOTA_M_M8:
3513+
if (Mask && !usesAllOnesMask(Mask, Glue))
3514+
return false;
3515+
}
3516+
35043517
unsigned TrueOpc = True.getMachineOpcode();
35053518
const MCInstrDesc &TrueMCID = TII->get(TrueOpc);
35063519
uint64_t TrueTSFlags = TrueMCID.TSFlags;

llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,3 +258,19 @@ entry:
258258
%res = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %i, <vscale x 2 x i32> %passthru, i32 %evl)
259259
ret <vscale x 2 x i32> %res
260260
}
261+
262+
; Test VIOTA_M
263+
declare <vscale x 2 x i32> @llvm.riscv.viota.mask.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, <vscale x 2 x i1>, i64, i64)
264+
define <vscale x 2 x i32> @vpmerge_viota(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %m, <vscale x 2 x i1> %vm, i32 zeroext %vl) {
265+
; CHECK-LABEL: vpmerge_viota:
266+
; CHECK: # %bb.0:
267+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
268+
; CHECK-NEXT: viota.m v8, v9, v0.t
269+
; CHECK-NEXT: ret
270+
%1 = zext i32 %vl to i64
271+
%a = call <vscale x 2 x i32> @llvm.riscv.viota.mask.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> %vm, <vscale x 2 x i1> %m, i64 %1, i64 0)
272+
%splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
273+
%mask = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
274+
%b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %mask, i64 %1)
275+
ret <vscale x 2 x i32> %b
276+
}

llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -279,20 +279,37 @@ define <vscale x 2 x i32> @vpmerge_vid(<vscale x 2 x i32> %passthru, <vscale x 2
279279
ret <vscale x 2 x i32> %b
280280
}
281281

282-
; Test riscv.viota
282+
; Test not combine VIOTA_M and VMERGE_VVM without true mask.
283283
declare <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i64)
284284
define <vscale x 2 x i32> @vpmerge_viota(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %m, <vscale x 2 x i1> %vm, i32 zeroext %vl) {
285285
; CHECK-LABEL: vpmerge_viota:
286286
; CHECK: # %bb.0:
287-
; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
288-
; CHECK-NEXT: viota.m v8, v9, v0.t
287+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
288+
; CHECK-NEXT: viota.m v10, v9
289+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
290+
; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
289291
; CHECK-NEXT: ret
290292
%1 = zext i32 %vl to i64
291293
%a = call <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> %vm, i64 %1)
292294
%b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
293295
ret <vscale x 2 x i32> %b
294296
}
295297

298+
; Test combine VIOTA_M and VMERGE_VVM with true mask.
299+
define <vscale x 2 x i32> @vpmerge_viota2(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %vm, i32 zeroext %vl) {
300+
; CHECK-LABEL: vpmerge_viota2:
301+
; CHECK: # %bb.0:
302+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
303+
; CHECK-NEXT: viota.m v8, v0
304+
; CHECK-NEXT: ret
305+
%1 = zext i32 %vl to i64
306+
%a = call <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> %vm, i64 %1)
307+
%splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
308+
%true = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
309+
%b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %true, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
310+
ret <vscale x 2 x i32> %b
311+
}
312+
296313
; Test riscv.vfclass
297314
declare <vscale x 2 x i32> @llvm.riscv.vfclass.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x float>, i64)
298315
define <vscale x 2 x i32> @vpmerge_vflcass(<vscale x 2 x i32> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) {
@@ -730,8 +747,9 @@ define <vscale x 2 x i32> @vpselect_vid(<vscale x 2 x i32> %passthru, <vscale x
730747
define <vscale x 2 x i32> @vpselect_viota(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %m, <vscale x 2 x i1> %vm, i32 zeroext %vl) {
731748
; CHECK-LABEL: vpselect_viota:
732749
; CHECK: # %bb.0:
733-
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
734-
; CHECK-NEXT: viota.m v8, v9, v0.t
750+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
751+
; CHECK-NEXT: viota.m v10, v9
752+
; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
735753
; CHECK-NEXT: ret
736754
%1 = zext i32 %vl to i64
737755
%a = call <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> %vm, i64 %1)

0 commit comments

Comments
 (0)