Skip to content

Commit 7150b2c

Browse files
authored
[RISCV] Optimize vp.splice with 0 offset. (#145533)
We can skip the slidedown if the offset is 0.
1 parent 0e9e1ce commit 7150b2c

File tree

3 files changed

+40
-18
lines changed

3 files changed

+40
-18
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13297,10 +13297,11 @@ RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
1329713297
DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
1329813298
}
1329913299

13300-
SDValue SlideDown =
13301-
getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
13302-
Op1, DownOffset, Mask, UpOffset);
13303-
SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
13300+
if (ImmValue != 0)
13301+
Op1 = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
13302+
DAG.getUNDEF(ContainerVT), Op1, DownOffset, Mask,
13303+
UpOffset);
13304+
SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, Op1, Op2,
1330413305
UpOffset, Mask, EVL2, RISCVVType::TAIL_AGNOSTIC);
1330513306

1330613307
if (IsMaskVector) {

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,17 @@ define <2 x i64> @test_vp_splice_v2i64_negative_offset(<2 x i64> %va, <2 x i64>
3030
ret <2 x i64> %v
3131
}
3232

33+
define <2 x i64> @test_vp_splice_v2i64_zero_offset(<2 x i64> %va, <2 x i64> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
34+
; CHECK-LABEL: test_vp_splice_v2i64_zero_offset:
35+
; CHECK: # %bb.0:
36+
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
37+
; CHECK-NEXT: vslideup.vx v8, v9, a0
38+
; CHECK-NEXT: ret
39+
40+
%v = call <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64> %va, <2 x i64> %vb, i32 0, <2 x i1> splat (i1 1), i32 %evla, i32 %evlb)
41+
ret <2 x i64> %v
42+
}
43+
3344
define <2 x i64> @test_vp_splice_v2i64_masked(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
3445
; CHECK-LABEL: test_vp_splice_v2i64_masked:
3546
; CHECK: # %bb.0:

llvm/test/CodeGen/RISCV/rvv/vp-splice.ll

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,16 @@ define <vscale x 2 x i64> @test_vp_splice_nxv2i64_negative_offset(<vscale x 2 x
4040
ret <vscale x 2 x i64> %v
4141
}
4242

43+
define <vscale x 2 x i64> @test_vp_splice_nxv2i64_zero_offset(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
44+
; CHECK-LABEL: test_vp_splice_nxv2i64_zero_offset:
45+
; CHECK: # %bb.0:
46+
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
47+
; CHECK-NEXT: vslideup.vx v8, v10, a0
48+
; CHECK-NEXT: ret
49+
%v = call <vscale x 2 x i64> @llvm.experimental.vp.splice.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, i32 0, <vscale x 2 x i1> splat (i1 1), i32 %evla, i32 %evlb)
50+
ret <vscale x 2 x i64> %v
51+
}
52+
4353
define <vscale x 2 x i64> @test_vp_splice_nxv2i64_masked(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
4454
; CHECK-LABEL: test_vp_splice_nxv2i64_masked:
4555
; CHECK: # %bb.0:
@@ -295,10 +305,10 @@ define <vscale x 16 x i64> @test_vp_splice_nxv16i64(<vscale x 16 x i64> %va, <vs
295305
; CHECK-NEXT: addi a5, a5, -1
296306
; CHECK-NEXT: slli a1, a4, 3
297307
; CHECK-NEXT: mv a7, a2
298-
; CHECK-NEXT: bltu a2, a5, .LBB21_2
308+
; CHECK-NEXT: bltu a2, a5, .LBB22_2
299309
; CHECK-NEXT: # %bb.1:
300310
; CHECK-NEXT: mv a7, a5
301-
; CHECK-NEXT: .LBB21_2:
311+
; CHECK-NEXT: .LBB22_2:
302312
; CHECK-NEXT: addi sp, sp, -80
303313
; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
304314
; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
@@ -311,10 +321,10 @@ define <vscale x 16 x i64> @test_vp_splice_nxv16i64(<vscale x 16 x i64> %va, <vs
311321
; CHECK-NEXT: slli a7, a7, 3
312322
; CHECK-NEXT: addi a6, sp, 64
313323
; CHECK-NEXT: mv t0, a2
314-
; CHECK-NEXT: bltu a2, a4, .LBB21_4
324+
; CHECK-NEXT: bltu a2, a4, .LBB22_4
315325
; CHECK-NEXT: # %bb.3:
316326
; CHECK-NEXT: mv t0, a4
317-
; CHECK-NEXT: .LBB21_4:
327+
; CHECK-NEXT: .LBB22_4:
318328
; CHECK-NEXT: vl8re64.v v24, (a5)
319329
; CHECK-NEXT: add a5, a6, a7
320330
; CHECK-NEXT: vl8re64.v v0, (a0)
@@ -328,10 +338,10 @@ define <vscale x 16 x i64> @test_vp_splice_nxv16i64(<vscale x 16 x i64> %va, <vs
328338
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
329339
; CHECK-NEXT: vse64.v v16, (a6)
330340
; CHECK-NEXT: mv a0, a3
331-
; CHECK-NEXT: bltu a3, a4, .LBB21_6
341+
; CHECK-NEXT: bltu a3, a4, .LBB22_6
332342
; CHECK-NEXT: # %bb.5:
333343
; CHECK-NEXT: mv a0, a4
334-
; CHECK-NEXT: .LBB21_6:
344+
; CHECK-NEXT: .LBB22_6:
335345
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
336346
; CHECK-NEXT: vse64.v v0, (a5)
337347
; CHECK-NEXT: sub a2, a3, a4
@@ -363,10 +373,10 @@ define <vscale x 16 x i64> @test_vp_splice_nxv16i64_negative_offset(<vscale x 16
363373
; CHECK-NEXT: addi a6, a6, -1
364374
; CHECK-NEXT: slli a1, a5, 3
365375
; CHECK-NEXT: mv a4, a2
366-
; CHECK-NEXT: bltu a2, a6, .LBB22_2
376+
; CHECK-NEXT: bltu a2, a6, .LBB23_2
367377
; CHECK-NEXT: # %bb.1:
368378
; CHECK-NEXT: mv a4, a6
369-
; CHECK-NEXT: .LBB22_2:
379+
; CHECK-NEXT: .LBB23_2:
370380
; CHECK-NEXT: addi sp, sp, -80
371381
; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
372382
; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
@@ -379,10 +389,10 @@ define <vscale x 16 x i64> @test_vp_splice_nxv16i64_negative_offset(<vscale x 16
379389
; CHECK-NEXT: slli a4, a4, 3
380390
; CHECK-NEXT: addi a7, sp, 64
381391
; CHECK-NEXT: mv t0, a2
382-
; CHECK-NEXT: bltu a2, a5, .LBB22_4
392+
; CHECK-NEXT: bltu a2, a5, .LBB23_4
383393
; CHECK-NEXT: # %bb.3:
384394
; CHECK-NEXT: mv t0, a5
385-
; CHECK-NEXT: .LBB22_4:
395+
; CHECK-NEXT: .LBB23_4:
386396
; CHECK-NEXT: vl8re64.v v24, (a6)
387397
; CHECK-NEXT: add a6, a7, a4
388398
; CHECK-NEXT: vl8re64.v v0, (a0)
@@ -396,10 +406,10 @@ define <vscale x 16 x i64> @test_vp_splice_nxv16i64_negative_offset(<vscale x 16
396406
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
397407
; CHECK-NEXT: vse64.v v16, (a7)
398408
; CHECK-NEXT: mv a0, a3
399-
; CHECK-NEXT: bltu a3, a5, .LBB22_6
409+
; CHECK-NEXT: bltu a3, a5, .LBB23_6
400410
; CHECK-NEXT: # %bb.5:
401411
; CHECK-NEXT: mv a0, a5
402-
; CHECK-NEXT: .LBB22_6:
412+
; CHECK-NEXT: .LBB23_6:
403413
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
404414
; CHECK-NEXT: vse64.v v0, (a6)
405415
; CHECK-NEXT: sub a2, a3, a5
@@ -410,10 +420,10 @@ define <vscale x 16 x i64> @test_vp_splice_nxv16i64_negative_offset(<vscale x 16
410420
; CHECK-NEXT: li a3, 8
411421
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
412422
; CHECK-NEXT: vse64.v v24, (a5)
413-
; CHECK-NEXT: bltu a4, a3, .LBB22_8
423+
; CHECK-NEXT: bltu a4, a3, .LBB23_8
414424
; CHECK-NEXT: # %bb.7:
415425
; CHECK-NEXT: li a4, 8
416-
; CHECK-NEXT: .LBB22_8:
426+
; CHECK-NEXT: .LBB23_8:
417427
; CHECK-NEXT: sub a2, a6, a4
418428
; CHECK-NEXT: add a1, a2, a1
419429
; CHECK-NEXT: vle64.v v16, (a1)

0 commit comments

Comments
 (0)