Skip to content

Commit 50f9b34

Browse files
authored
[RISCV] Prefer vmv.s.x for build_vector a, undef, ..., undef (llvm#136164)
If we have a build vector which could be either a splat or a scalar insert, prefer the scalar insert. At high LMUL, this reduces vector register pressure (locally, the use will likely still be aligned), and the amount of work performed for the splat.
1 parent b6dff56 commit 50f9b34

File tree

7 files changed

+46
-32
lines changed

7 files changed

+46
-32
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4208,8 +4208,22 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
42084208
if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
42094209
if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
42104210
return Gather;
4211-
unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4212-
: RISCVISD::VMV_V_X_VL;
4211+
4212+
// Prefer vmv.s.x/vfmv.s.f if legal to reduce work and register
4213+
// pressure at high LMUL.
4214+
if (all_of(Op->ops().drop_front(),
4215+
[](const SDUse &U) { return U.get().isUndef(); })) {
4216+
unsigned Opc =
4217+
VT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
4218+
if (!VT.isFloatingPoint())
4219+
Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4220+
Splat = DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4221+
Splat, VL);
4222+
return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4223+
}
4224+
4225+
unsigned Opc =
4226+
VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
42134227
if (!VT.isFloatingPoint())
42144228
Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
42154229
Splat =

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ define <2 x i8> @v2i8(i8 %x, i8 %y) {
187187
; CHECK-LABEL: v2i8:
188188
; CHECK: # %bb.0:
189189
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
190-
; CHECK-NEXT: vmv.v.x v8, a0
190+
; CHECK-NEXT: vmv.s.x v8, a0
191191
; CHECK-NEXT: vadd.vx v9, v8, a1
192192
; CHECK-NEXT: vrgather.vi v8, v9, 0
193193
; CHECK-NEXT: ret
@@ -203,7 +203,7 @@ define <4 x i8> @v4i8(i8 %x, i8 %y) {
203203
; CHECK-LABEL: v4i8:
204204
; CHECK: # %bb.0:
205205
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
206-
; CHECK-NEXT: vmv.v.x v8, a0
206+
; CHECK-NEXT: vmv.s.x v8, a0
207207
; CHECK-NEXT: vadd.vx v9, v8, a1
208208
; CHECK-NEXT: vrgather.vi v8, v9, 0
209209
; CHECK-NEXT: ret
@@ -219,7 +219,7 @@ define <8 x i8> @v8i8(i8 %x, i8 %y) {
219219
; CHECK-LABEL: v8i8:
220220
; CHECK: # %bb.0:
221221
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
222-
; CHECK-NEXT: vmv.v.x v8, a0
222+
; CHECK-NEXT: vmv.s.x v8, a0
223223
; CHECK-NEXT: vadd.vx v9, v8, a1
224224
; CHECK-NEXT: vrgather.vi v8, v9, 0
225225
; CHECK-NEXT: ret
@@ -235,7 +235,7 @@ define <16 x i8> @v16i8(i8 %x, i8 %y) {
235235
; CHECK-LABEL: v16i8:
236236
; CHECK: # %bb.0:
237237
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
238-
; CHECK-NEXT: vmv.v.x v8, a0
238+
; CHECK-NEXT: vmv.s.x v8, a0
239239
; CHECK-NEXT: vadd.vx v9, v8, a1
240240
; CHECK-NEXT: vrgather.vi v8, v9, 0
241241
; CHECK-NEXT: ret
@@ -252,7 +252,7 @@ define <32 x i8> @v32i8(i8 %x, i8 %y) {
252252
; CHECK: # %bb.0:
253253
; CHECK-NEXT: li a2, 32
254254
; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
255-
; CHECK-NEXT: vmv.v.x v8, a0
255+
; CHECK-NEXT: vmv.s.x v8, a0
256256
; CHECK-NEXT: vadd.vx v10, v8, a1
257257
; CHECK-NEXT: vrgather.vi v8, v10, 0
258258
; CHECK-NEXT: ret
@@ -269,7 +269,7 @@ define <64 x i8> @v64i8(i8 %x, i8 %y) {
269269
; CHECK: # %bb.0:
270270
; CHECK-NEXT: li a2, 64
271271
; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
272-
; CHECK-NEXT: vmv.v.x v8, a0
272+
; CHECK-NEXT: vmv.s.x v8, a0
273273
; CHECK-NEXT: vadd.vx v12, v8, a1
274274
; CHECK-NEXT: vrgather.vi v8, v12, 0
275275
; CHECK-NEXT: ret
@@ -300,7 +300,7 @@ define <2 x i16> @v2i16(i16 %x, i16 %y) {
300300
; CHECK-LABEL: v2i16:
301301
; CHECK: # %bb.0:
302302
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
303-
; CHECK-NEXT: vmv.v.x v8, a0
303+
; CHECK-NEXT: vmv.s.x v8, a0
304304
; CHECK-NEXT: vadd.vx v9, v8, a1
305305
; CHECK-NEXT: vrgather.vi v8, v9, 0
306306
; CHECK-NEXT: ret
@@ -316,7 +316,7 @@ define <4 x i16> @v4i16(i16 %x, i16 %y) {
316316
; CHECK-LABEL: v4i16:
317317
; CHECK: # %bb.0:
318318
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
319-
; CHECK-NEXT: vmv.v.x v8, a0
319+
; CHECK-NEXT: vmv.s.x v8, a0
320320
; CHECK-NEXT: vadd.vx v9, v8, a1
321321
; CHECK-NEXT: vrgather.vi v8, v9, 0
322322
; CHECK-NEXT: ret
@@ -332,7 +332,7 @@ define <8 x i16> @v8i16(i16 %x, i16 %y) {
332332
; CHECK-LABEL: v8i16:
333333
; CHECK: # %bb.0:
334334
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
335-
; CHECK-NEXT: vmv.v.x v8, a0
335+
; CHECK-NEXT: vmv.s.x v8, a0
336336
; CHECK-NEXT: vadd.vx v9, v8, a1
337337
; CHECK-NEXT: vrgather.vi v8, v9, 0
338338
; CHECK-NEXT: ret
@@ -348,7 +348,7 @@ define <16 x i16> @v16i16(i16 %x, i16 %y) {
348348
; CHECK-LABEL: v16i16:
349349
; CHECK: # %bb.0:
350350
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
351-
; CHECK-NEXT: vmv.v.x v8, a0
351+
; CHECK-NEXT: vmv.s.x v8, a0
352352
; CHECK-NEXT: vadd.vx v10, v8, a1
353353
; CHECK-NEXT: vrgather.vi v8, v10, 0
354354
; CHECK-NEXT: ret
@@ -365,7 +365,7 @@ define <32 x i16> @v32i16(i16 %x, i16 %y) {
365365
; CHECK: # %bb.0:
366366
; CHECK-NEXT: li a2, 32
367367
; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
368-
; CHECK-NEXT: vmv.v.x v8, a0
368+
; CHECK-NEXT: vmv.s.x v8, a0
369369
; CHECK-NEXT: vadd.vx v12, v8, a1
370370
; CHECK-NEXT: vrgather.vi v8, v12, 0
371371
; CHECK-NEXT: ret
@@ -396,7 +396,7 @@ define <2 x i32> @v2i32(i32 %x, i32 %y) {
396396
; CHECK-LABEL: v2i32:
397397
; CHECK: # %bb.0:
398398
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
399-
; CHECK-NEXT: vmv.v.x v8, a0
399+
; CHECK-NEXT: vmv.s.x v8, a0
400400
; CHECK-NEXT: vadd.vx v9, v8, a1
401401
; CHECK-NEXT: vrgather.vi v8, v9, 0
402402
; CHECK-NEXT: ret
@@ -412,7 +412,7 @@ define <4 x i32> @v4i32(i32 %x, i32 %y) {
412412
; CHECK-LABEL: v4i32:
413413
; CHECK: # %bb.0:
414414
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
415-
; CHECK-NEXT: vmv.v.x v8, a0
415+
; CHECK-NEXT: vmv.s.x v8, a0
416416
; CHECK-NEXT: vadd.vx v9, v8, a1
417417
; CHECK-NEXT: vrgather.vi v8, v9, 0
418418
; CHECK-NEXT: ret
@@ -428,7 +428,7 @@ define <8 x i32> @v8i32(i32 %x, i32 %y) {
428428
; CHECK-LABEL: v8i32:
429429
; CHECK: # %bb.0:
430430
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
431-
; CHECK-NEXT: vmv.v.x v8, a0
431+
; CHECK-NEXT: vmv.s.x v8, a0
432432
; CHECK-NEXT: vadd.vx v10, v8, a1
433433
; CHECK-NEXT: vrgather.vi v8, v10, 0
434434
; CHECK-NEXT: ret
@@ -444,7 +444,7 @@ define <16 x i32> @v16i32(i32 %x, i32 %y) {
444444
; CHECK-LABEL: v16i32:
445445
; CHECK: # %bb.0:
446446
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
447-
; CHECK-NEXT: vmv.v.x v8, a0
447+
; CHECK-NEXT: vmv.s.x v8, a0
448448
; CHECK-NEXT: vadd.vx v12, v8, a1
449449
; CHECK-NEXT: vrgather.vi v8, v12, 0
450450
; CHECK-NEXT: ret
@@ -509,7 +509,7 @@ define <2 x i64> @v2i64(i64 %x, i64 %y) {
509509
; RV64-LABEL: v2i64:
510510
; RV64: # %bb.0:
511511
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
512-
; RV64-NEXT: vmv.v.x v8, a0
512+
; RV64-NEXT: vmv.s.x v8, a0
513513
; RV64-NEXT: vadd.vx v9, v8, a1
514514
; RV64-NEXT: vrgather.vi v8, v9, 0
515515
; RV64-NEXT: ret
@@ -542,7 +542,7 @@ define <4 x i64> @v4i64(i64 %x, i64 %y) {
542542
; RV64-LABEL: v4i64:
543543
; RV64: # %bb.0:
544544
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
545-
; RV64-NEXT: vmv.v.x v8, a0
545+
; RV64-NEXT: vmv.s.x v8, a0
546546
; RV64-NEXT: vadd.vx v10, v8, a1
547547
; RV64-NEXT: vrgather.vi v8, v10, 0
548548
; RV64-NEXT: ret
@@ -575,7 +575,7 @@ define <8 x i64> @v8i64(i64 %x, i64 %y) {
575575
; RV64-LABEL: v8i64:
576576
; RV64: # %bb.0:
577577
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
578-
; RV64-NEXT: vmv.v.x v8, a0
578+
; RV64-NEXT: vmv.s.x v8, a0
579579
; RV64-NEXT: vadd.vx v12, v8, a1
580580
; RV64-NEXT: vrgather.vi v8, v12, 0
581581
; RV64-NEXT: ret
@@ -591,7 +591,7 @@ define <4 x half> @v4f16(half %x, half %y) {
591591
; CHECK-LABEL: v4f16:
592592
; CHECK: # %bb.0:
593593
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
594-
; CHECK-NEXT: vfmv.v.f v8, fa0
594+
; CHECK-NEXT: vfmv.s.f v8, fa0
595595
; CHECK-NEXT: vfadd.vf v9, v8, fa1
596596
; CHECK-NEXT: vrgather.vi v8, v9, 0
597597
; CHECK-NEXT: ret
@@ -607,7 +607,7 @@ define <2 x float> @v2f32(float %x, float %y) {
607607
; CHECK-LABEL: v2f32:
608608
; CHECK: # %bb.0:
609609
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
610-
; CHECK-NEXT: vfmv.v.f v8, fa0
610+
; CHECK-NEXT: vfmv.s.f v8, fa0
611611
; CHECK-NEXT: vfadd.vf v9, v8, fa1
612612
; CHECK-NEXT: vrgather.vi v8, v9, 0
613613
; CHECK-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ define void @buggy(i32 %0) #0 {
439439
; RV32-LABEL: buggy:
440440
; RV32: # %bb.0: # %entry
441441
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
442-
; RV32-NEXT: vmv.v.x v8, a0
442+
; RV32-NEXT: vmv.s.x v8, a0
443443
; RV32-NEXT: vadd.vv v8, v8, v8
444444
; RV32-NEXT: vor.vi v8, v8, 1
445445
; RV32-NEXT: vrgather.vi v9, v8, 0
@@ -450,7 +450,7 @@ define void @buggy(i32 %0) #0 {
450450
; RV64: # %bb.0: # %entry
451451
; RV64-NEXT: slli a0, a0, 1
452452
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
453-
; RV64-NEXT: vmv.v.x v8, a0
453+
; RV64-NEXT: vmv.s.x v8, a0
454454
; RV64-NEXT: vor.vi v8, v8, 1
455455
; RV64-NEXT: vrgather.vi v9, v8, 0
456456
; RV64-NEXT: vse32.v v9, (zero)

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ define <4 x double> @vslide1up_4xf64(<4 x double> %v, double %b) {
250250
; CHECK-LABEL: vslide1up_4xf64:
251251
; CHECK: # %bb.0:
252252
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
253-
; CHECK-NEXT: vfmv.v.f v10, fa0
253+
; CHECK-NEXT: vfmv.s.f v10, fa0
254254
; CHECK-NEXT: vslideup.vi v10, v8, 3
255255
; CHECK-NEXT: vmv.v.v v8, v10
256256
; CHECK-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -500,7 +500,7 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi
500500
; RV32-SLOW-NEXT: or a4, a6, a5
501501
; RV32-SLOW-NEXT: or a3, a4, a3
502502
; RV32-SLOW-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
503-
; RV32-SLOW-NEXT: vmv.v.x v8, a3
503+
; RV32-SLOW-NEXT: vmv.s.x v8, a3
504504
; RV32-SLOW-NEXT: .LBB8_2: # %else
505505
; RV32-SLOW-NEXT: andi a2, a2, 2
506506
; RV32-SLOW-NEXT: beqz a2, .LBB8_4
@@ -544,7 +544,7 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi
544544
; RV64-SLOW-NEXT: or a4, a6, a5
545545
; RV64-SLOW-NEXT: or a3, a4, a3
546546
; RV64-SLOW-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
547-
; RV64-SLOW-NEXT: vmv.v.x v8, a3
547+
; RV64-SLOW-NEXT: vmv.s.x v8, a3
548548
; RV64-SLOW-NEXT: .LBB8_2: # %else
549549
; RV64-SLOW-NEXT: andi a2, a2, 2
550550
; RV64-SLOW-NEXT: beqz a2, .LBB8_4

llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ define i32 @test(i32 %size, ptr %add.ptr, i64 %const) {
99
; RV32: # %bb.0: # %entry
1010
; RV32-NEXT: addi a3, a2, 1
1111
; RV32-NEXT: th.lbib a4, (a1), -1, 0
12-
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
13-
; RV32-NEXT: vmv.v.x v8, a4
12+
; RV32-NEXT: vsetivli zero, 8, e8, m1, ta, ma
13+
; RV32-NEXT: vmv.s.x v8, a4
1414
; RV32-NEXT: vmv.s.x v9, zero
1515
; RV32-NEXT: vsetvli zero, a3, e8, mf2, tu, ma
1616
; RV32-NEXT: vslideup.vx v8, v9, a2
@@ -35,8 +35,8 @@ define i32 @test(i32 %size, ptr %add.ptr, i64 %const) {
3535
; RV64: # %bb.0: # %entry
3636
; RV64-NEXT: addi a3, a2, 1
3737
; RV64-NEXT: th.lbib a4, (a1), -1, 0
38-
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
39-
; RV64-NEXT: vmv.v.x v8, a4
38+
; RV64-NEXT: vsetivli zero, 8, e8, m1, ta, ma
39+
; RV64-NEXT: vmv.s.x v8, a4
4040
; RV64-NEXT: vmv.s.x v9, zero
4141
; RV64-NEXT: vsetvli zero, a3, e8, mf2, tu, ma
4242
; RV64-NEXT: vslideup.vx v8, v9, a2

llvm/test/CodeGen/RISCV/rvv/pr125306.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ define <2 x i32> @main(ptr %0) {
6060
; CHECK-NEXT: vslide1down.vx v8, v8, zero
6161
; CHECK-NEXT: vslide1down.vx v10, v10, zero
6262
; CHECK-NEXT: vmin.vv v8, v10, v8
63-
; CHECK-NEXT: vmv.v.x v10, a0
63+
; CHECK-NEXT: vmv.s.x v10, a0
6464
; CHECK-NEXT: vslide1down.vx v11, v11, zero
6565
; CHECK-NEXT: vmin.vx v10, v10, a2
6666
; CHECK-NEXT: vmin.vx v10, v10, a1

0 commit comments

Comments
 (0)