Skip to content

Commit 0fa1659

Browse files
committed
[RISCV] Add DAG combine for (vmv_s_x_vl merge, (and scalar, mask), vl).
If the number of trailing one bits of `mask` is not less than SEW, combine (vmv_s_x_vl merge, (and scalar, mask), vl) -> (vmv_s_x_vl merge, scalar, vl). The vmv.s.x instruction copies the scalar integer register to element 0 of the destination vector register. If SEW < XLEN, the least-significant bits are copied and the upper XLEN-SEW bits are ignored.
1 parent 297f6d9 commit 0fa1659

File tree

3 files changed

+88
-221
lines changed

3 files changed

+88
-221
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19183,6 +19183,14 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
1918319183
SDValue Scalar = N->getOperand(1);
1918419184
SDValue VL = N->getOperand(2);
1918519185

19186+
// The vmv.s.x instruction copies the scalar integer register to element 0
19187+
// of the destination vector register. If SEW < XLEN, the least-significant
19188+
// bits are copied and the upper XLEN-SEW bits are ignored.
19189+
unsigned ScalarSize = Scalar.getValueSizeInBits();
19190+
unsigned EltWidth = VT.getScalarSizeInBits();
19191+
if (ScalarSize > EltWidth && SimplifyDemandedLowBitsHelper(1, EltWidth))
19192+
return SDValue(N, 0);
19193+
1918619194
if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
1918719195
Scalar.getOperand(0).getValueType() == N->getValueType(0))
1918819196
return Scalar.getOperand(0);

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll

Lines changed: 32 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ declare i8 @llvm.vp.reduce.umax.v2i8(i8, <2 x i8>, <2 x i1>, i32)
2424
define signext i8 @vpreduce_umax_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
2525
; CHECK-LABEL: vpreduce_umax_v2i8:
2626
; CHECK: # %bb.0:
27-
; CHECK-NEXT: andi a0, a0, 255
2827
; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2928
; CHECK-NEXT: vmv.s.x v9, a0
3029
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
@@ -55,7 +54,6 @@ declare i8 @llvm.vp.reduce.umin.v2i8(i8, <2 x i8>, <2 x i1>, i32)
5554
define signext i8 @vpreduce_umin_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
5655
; CHECK-LABEL: vpreduce_umin_v2i8:
5756
; CHECK: # %bb.0:
58-
; CHECK-NEXT: andi a0, a0, 255
5957
; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6058
; CHECK-NEXT: vmv.s.x v9, a0
6159
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
@@ -131,7 +129,6 @@ declare i8 @llvm.vp.reduce.umin.v3i8(i8, <3 x i8>, <3 x i1>, i32)
131129
define signext i8 @vpreduce_umin_v3i8(i8 signext %s, <3 x i8> %v, <3 x i1> %m, i32 zeroext %evl) {
132130
; CHECK-LABEL: vpreduce_umin_v3i8:
133131
; CHECK: # %bb.0:
134-
; CHECK-NEXT: andi a0, a0, 255
135132
; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
136133
; CHECK-NEXT: vmv.s.x v9, a0
137134
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
@@ -162,7 +159,6 @@ declare i8 @llvm.vp.reduce.umax.v4i8(i8, <4 x i8>, <4 x i1>, i32)
162159
define signext i8 @vpreduce_umax_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
163160
; CHECK-LABEL: vpreduce_umax_v4i8:
164161
; CHECK: # %bb.0:
165-
; CHECK-NEXT: andi a0, a0, 255
166162
; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
167163
; CHECK-NEXT: vmv.s.x v9, a0
168164
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
@@ -193,7 +189,6 @@ declare i8 @llvm.vp.reduce.umin.v4i8(i8, <4 x i8>, <4 x i1>, i32)
193189
define signext i8 @vpreduce_umin_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
194190
; CHECK-LABEL: vpreduce_umin_v4i8:
195191
; CHECK: # %bb.0:
196-
; CHECK-NEXT: andi a0, a0, 255
197192
; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
198193
; CHECK-NEXT: vmv.s.x v9, a0
199194
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
@@ -282,27 +277,14 @@ define signext i16 @vpreduce_add_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m
282277
declare i16 @llvm.vp.reduce.umax.v2i16(i16, <2 x i16>, <2 x i1>, i32)
283278

284279
define signext i16 @vpreduce_umax_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
285-
; RV32-LABEL: vpreduce_umax_v2i16:
286-
; RV32: # %bb.0:
287-
; RV32-NEXT: slli a0, a0, 16
288-
; RV32-NEXT: srli a0, a0, 16
289-
; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
290-
; RV32-NEXT: vmv.s.x v9, a0
291-
; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
292-
; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t
293-
; RV32-NEXT: vmv.x.s a0, v9
294-
; RV32-NEXT: ret
295-
;
296-
; RV64-LABEL: vpreduce_umax_v2i16:
297-
; RV64: # %bb.0:
298-
; RV64-NEXT: slli a0, a0, 48
299-
; RV64-NEXT: srli a0, a0, 48
300-
; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
301-
; RV64-NEXT: vmv.s.x v9, a0
302-
; RV64-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
303-
; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t
304-
; RV64-NEXT: vmv.x.s a0, v9
305-
; RV64-NEXT: ret
280+
; CHECK-LABEL: vpreduce_umax_v2i16:
281+
; CHECK: # %bb.0:
282+
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
283+
; CHECK-NEXT: vmv.s.x v9, a0
284+
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
285+
; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t
286+
; CHECK-NEXT: vmv.x.s a0, v9
287+
; CHECK-NEXT: ret
306288
%r = call i16 @llvm.vp.reduce.umax.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
307289
ret i16 %r
308290
}
@@ -325,27 +307,14 @@ define signext i16 @vpreduce_smax_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %
325307
declare i16 @llvm.vp.reduce.umin.v2i16(i16, <2 x i16>, <2 x i1>, i32)
326308

327309
define signext i16 @vpreduce_umin_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
328-
; RV32-LABEL: vpreduce_umin_v2i16:
329-
; RV32: # %bb.0:
330-
; RV32-NEXT: slli a0, a0, 16
331-
; RV32-NEXT: srli a0, a0, 16
332-
; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
333-
; RV32-NEXT: vmv.s.x v9, a0
334-
; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
335-
; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t
336-
; RV32-NEXT: vmv.x.s a0, v9
337-
; RV32-NEXT: ret
338-
;
339-
; RV64-LABEL: vpreduce_umin_v2i16:
340-
; RV64: # %bb.0:
341-
; RV64-NEXT: slli a0, a0, 48
342-
; RV64-NEXT: srli a0, a0, 48
343-
; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
344-
; RV64-NEXT: vmv.s.x v9, a0
345-
; RV64-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
346-
; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t
347-
; RV64-NEXT: vmv.x.s a0, v9
348-
; RV64-NEXT: ret
310+
; CHECK-LABEL: vpreduce_umin_v2i16:
311+
; CHECK: # %bb.0:
312+
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
313+
; CHECK-NEXT: vmv.s.x v9, a0
314+
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
315+
; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t
316+
; CHECK-NEXT: vmv.x.s a0, v9
317+
; CHECK-NEXT: ret
349318
%r = call i16 @llvm.vp.reduce.umin.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
350319
ret i16 %r
351320
}
@@ -428,27 +397,14 @@ define signext i16 @vpreduce_add_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m
428397
declare i16 @llvm.vp.reduce.umax.v4i16(i16, <4 x i16>, <4 x i1>, i32)
429398

430399
define signext i16 @vpreduce_umax_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
431-
; RV32-LABEL: vpreduce_umax_v4i16:
432-
; RV32: # %bb.0:
433-
; RV32-NEXT: slli a0, a0, 16
434-
; RV32-NEXT: srli a0, a0, 16
435-
; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
436-
; RV32-NEXT: vmv.s.x v9, a0
437-
; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
438-
; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t
439-
; RV32-NEXT: vmv.x.s a0, v9
440-
; RV32-NEXT: ret
441-
;
442-
; RV64-LABEL: vpreduce_umax_v4i16:
443-
; RV64: # %bb.0:
444-
; RV64-NEXT: slli a0, a0, 48
445-
; RV64-NEXT: srli a0, a0, 48
446-
; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
447-
; RV64-NEXT: vmv.s.x v9, a0
448-
; RV64-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
449-
; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t
450-
; RV64-NEXT: vmv.x.s a0, v9
451-
; RV64-NEXT: ret
400+
; CHECK-LABEL: vpreduce_umax_v4i16:
401+
; CHECK: # %bb.0:
402+
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
403+
; CHECK-NEXT: vmv.s.x v9, a0
404+
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
405+
; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t
406+
; CHECK-NEXT: vmv.x.s a0, v9
407+
; CHECK-NEXT: ret
452408
%r = call i16 @llvm.vp.reduce.umax.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
453409
ret i16 %r
454410
}
@@ -471,27 +427,14 @@ define signext i16 @vpreduce_smax_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %
471427
declare i16 @llvm.vp.reduce.umin.v4i16(i16, <4 x i16>, <4 x i1>, i32)
472428

473429
define signext i16 @vpreduce_umin_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
474-
; RV32-LABEL: vpreduce_umin_v4i16:
475-
; RV32: # %bb.0:
476-
; RV32-NEXT: slli a0, a0, 16
477-
; RV32-NEXT: srli a0, a0, 16
478-
; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
479-
; RV32-NEXT: vmv.s.x v9, a0
480-
; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
481-
; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t
482-
; RV32-NEXT: vmv.x.s a0, v9
483-
; RV32-NEXT: ret
484-
;
485-
; RV64-LABEL: vpreduce_umin_v4i16:
486-
; RV64: # %bb.0:
487-
; RV64-NEXT: slli a0, a0, 48
488-
; RV64-NEXT: srli a0, a0, 48
489-
; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
490-
; RV64-NEXT: vmv.s.x v9, a0
491-
; RV64-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
492-
; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t
493-
; RV64-NEXT: vmv.x.s a0, v9
494-
; RV64-NEXT: ret
430+
; CHECK-LABEL: vpreduce_umin_v4i16:
431+
; CHECK: # %bb.0:
432+
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
433+
; CHECK-NEXT: vmv.s.x v9, a0
434+
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
435+
; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t
436+
; CHECK-NEXT: vmv.x.s a0, v9
437+
; CHECK-NEXT: ret
495438
%r = call i16 @llvm.vp.reduce.umin.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
496439
ret i16 %r
497440
}

0 commit comments

Comments
 (0)