Skip to content

Commit 04ab6c8

Browse files
committed
[RISCV] Teach RISCVTargetLowering::shouldSinkOperands to sink splats for FAdd/FSub/FMul/FDiv.
1 parent 890027b commit 04ab6c8

File tree

2 files changed

+70
-78
lines changed

2 files changed

+70
-78
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,6 +1065,10 @@ bool RISCVTargetLowering::shouldSinkOperands(
10651065
case Instruction::Add:
10661066
case Instruction::Sub:
10671067
case Instruction::Mul:
1068+
case Instruction::FAdd:
1069+
case Instruction::FSub:
1070+
case Instruction::FMul:
1071+
case Instruction::FDiv:
10681072
return true;
10691073
case Instruction::Shl:
10701074
case Instruction::LShr:

llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll

Lines changed: 66 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -909,14 +909,13 @@ define void @sink_splat_fmul(float* nocapture %a, float %x) {
909909
; CHECK-LABEL: sink_splat_fmul:
910910
; CHECK: # %bb.0: # %entry
911911
; CHECK-NEXT: fmv.w.x ft0, a1
912-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
913-
; CHECK-NEXT: vfmv.v.f v25, ft0
914912
; CHECK-NEXT: addi a1, zero, 1024
915913
; CHECK-NEXT: .LBB14_1: # %vector.body
916914
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
917-
; CHECK-NEXT: vle32.v v26, (a0)
918-
; CHECK-NEXT: vfmul.vv v26, v26, v25
919-
; CHECK-NEXT: vse32.v v26, (a0)
915+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
916+
; CHECK-NEXT: vle32.v v25, (a0)
917+
; CHECK-NEXT: vfmul.vf v25, v25, ft0
918+
; CHECK-NEXT: vse32.v v25, (a0)
920919
; CHECK-NEXT: addi a1, a1, -4
921920
; CHECK-NEXT: addi a0, a0, 16
922921
; CHECK-NEXT: bnez a1, .LBB14_1
@@ -947,14 +946,13 @@ define void @sink_splat_fdiv(float* nocapture %a, float %x) {
947946
; CHECK-LABEL: sink_splat_fdiv:
948947
; CHECK: # %bb.0: # %entry
949948
; CHECK-NEXT: fmv.w.x ft0, a1
950-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
951-
; CHECK-NEXT: vfmv.v.f v25, ft0
952949
; CHECK-NEXT: addi a1, zero, 1024
953950
; CHECK-NEXT: .LBB15_1: # %vector.body
954951
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
955-
; CHECK-NEXT: vle32.v v26, (a0)
956-
; CHECK-NEXT: vfdiv.vv v26, v26, v25
957-
; CHECK-NEXT: vse32.v v26, (a0)
952+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
953+
; CHECK-NEXT: vle32.v v25, (a0)
954+
; CHECK-NEXT: vfdiv.vf v25, v25, ft0
955+
; CHECK-NEXT: vse32.v v25, (a0)
958956
; CHECK-NEXT: addi a1, a1, -4
959957
; CHECK-NEXT: addi a0, a0, 16
960958
; CHECK-NEXT: bnez a1, .LBB15_1
@@ -985,14 +983,13 @@ define void @sink_splat_frdiv(float* nocapture %a, float %x) {
985983
; CHECK-LABEL: sink_splat_frdiv:
986984
; CHECK: # %bb.0: # %entry
987985
; CHECK-NEXT: fmv.w.x ft0, a1
988-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
989-
; CHECK-NEXT: vfmv.v.f v25, ft0
990986
; CHECK-NEXT: addi a1, zero, 1024
991987
; CHECK-NEXT: .LBB16_1: # %vector.body
992988
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
993-
; CHECK-NEXT: vle32.v v26, (a0)
994-
; CHECK-NEXT: vfdiv.vv v26, v25, v26
995-
; CHECK-NEXT: vse32.v v26, (a0)
989+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
990+
; CHECK-NEXT: vle32.v v25, (a0)
991+
; CHECK-NEXT: vfrdiv.vf v25, v25, ft0
992+
; CHECK-NEXT: vse32.v v25, (a0)
996993
; CHECK-NEXT: addi a1, a1, -4
997994
; CHECK-NEXT: addi a0, a0, 16
998995
; CHECK-NEXT: bnez a1, .LBB16_1
@@ -1023,14 +1020,13 @@ define void @sink_splat_fadd(float* nocapture %a, float %x) {
10231020
; CHECK-LABEL: sink_splat_fadd:
10241021
; CHECK: # %bb.0: # %entry
10251022
; CHECK-NEXT: fmv.w.x ft0, a1
1026-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
1027-
; CHECK-NEXT: vfmv.v.f v25, ft0
10281023
; CHECK-NEXT: addi a1, zero, 1024
10291024
; CHECK-NEXT: .LBB17_1: # %vector.body
10301025
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1031-
; CHECK-NEXT: vle32.v v26, (a0)
1032-
; CHECK-NEXT: vfadd.vv v26, v26, v25
1033-
; CHECK-NEXT: vse32.v v26, (a0)
1026+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
1027+
; CHECK-NEXT: vle32.v v25, (a0)
1028+
; CHECK-NEXT: vfadd.vf v25, v25, ft0
1029+
; CHECK-NEXT: vse32.v v25, (a0)
10341030
; CHECK-NEXT: addi a1, a1, -4
10351031
; CHECK-NEXT: addi a0, a0, 16
10361032
; CHECK-NEXT: bnez a1, .LBB17_1
@@ -1061,14 +1057,13 @@ define void @sink_splat_fsub(float* nocapture %a, float %x) {
10611057
; CHECK-LABEL: sink_splat_fsub:
10621058
; CHECK: # %bb.0: # %entry
10631059
; CHECK-NEXT: fmv.w.x ft0, a1
1064-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
1065-
; CHECK-NEXT: vfmv.v.f v25, ft0
10661060
; CHECK-NEXT: addi a1, zero, 1024
10671061
; CHECK-NEXT: .LBB18_1: # %vector.body
10681062
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1069-
; CHECK-NEXT: vle32.v v26, (a0)
1070-
; CHECK-NEXT: vfsub.vv v26, v26, v25
1071-
; CHECK-NEXT: vse32.v v26, (a0)
1063+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
1064+
; CHECK-NEXT: vle32.v v25, (a0)
1065+
; CHECK-NEXT: vfsub.vf v25, v25, ft0
1066+
; CHECK-NEXT: vse32.v v25, (a0)
10721067
; CHECK-NEXT: addi a1, a1, -4
10731068
; CHECK-NEXT: addi a0, a0, 16
10741069
; CHECK-NEXT: bnez a1, .LBB18_1
@@ -1099,14 +1094,13 @@ define void @sink_splat_frsub(float* nocapture %a, float %x) {
10991094
; CHECK-LABEL: sink_splat_frsub:
11001095
; CHECK: # %bb.0: # %entry
11011096
; CHECK-NEXT: fmv.w.x ft0, a1
1102-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
1103-
; CHECK-NEXT: vfmv.v.f v25, ft0
11041097
; CHECK-NEXT: addi a1, zero, 1024
11051098
; CHECK-NEXT: .LBB19_1: # %vector.body
11061099
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1107-
; CHECK-NEXT: vle32.v v26, (a0)
1108-
; CHECK-NEXT: vfsub.vv v26, v25, v26
1109-
; CHECK-NEXT: vse32.v v26, (a0)
1100+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
1101+
; CHECK-NEXT: vle32.v v25, (a0)
1102+
; CHECK-NEXT: vfrsub.vf v25, v25, ft0
1103+
; CHECK-NEXT: vse32.v v25, (a0)
11101104
; CHECK-NEXT: addi a1, a1, -4
11111105
; CHECK-NEXT: addi a0, a0, 16
11121106
; CHECK-NEXT: bnez a1, .LBB19_1
@@ -1136,8 +1130,8 @@ for.cond.cleanup: ; preds = %vector.body
11361130
define void @sink_splat_fmul_scalable(float* nocapture %a, float %x) {
11371131
; CHECK-LABEL: sink_splat_fmul_scalable:
11381132
; CHECK: # %bb.0: # %entry
1139-
; CHECK-NEXT: csrr a2, vlenb
1140-
; CHECK-NEXT: srli a3, a2, 2
1133+
; CHECK-NEXT: csrr a7, vlenb
1134+
; CHECK-NEXT: srli a3, a7, 2
11411135
; CHECK-NEXT: addi a4, zero, 1024
11421136
; CHECK-NEXT: fmv.w.x ft0, a1
11431137
; CHECK-NEXT: bgeu a4, a3, .LBB20_2
@@ -1148,16 +1142,15 @@ define void @sink_splat_fmul_scalable(float* nocapture %a, float %x) {
11481142
; CHECK-NEXT: mv a5, zero
11491143
; CHECK-NEXT: remu a6, a4, a3
11501144
; CHECK-NEXT: sub a1, a4, a6
1151-
; CHECK-NEXT: vsetvli a4, zero, e32, m1, ta, mu
1152-
; CHECK-NEXT: vfmv.v.f v25, ft0
11531145
; CHECK-NEXT: mv a4, a0
11541146
; CHECK-NEXT: .LBB20_3: # %vector.body
11551147
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1156-
; CHECK-NEXT: vl1re32.v v26, (a4)
1157-
; CHECK-NEXT: vfmul.vv v26, v26, v25
1158-
; CHECK-NEXT: vs1r.v v26, (a4)
1148+
; CHECK-NEXT: vl1re32.v v25, (a4)
1149+
; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu
1150+
; CHECK-NEXT: vfmul.vf v25, v25, ft0
1151+
; CHECK-NEXT: vs1r.v v25, (a4)
11591152
; CHECK-NEXT: add a5, a5, a3
1160-
; CHECK-NEXT: add a4, a4, a2
1153+
; CHECK-NEXT: add a4, a4, a7
11611154
; CHECK-NEXT: bne a5, a1, .LBB20_3
11621155
; CHECK-NEXT: # %bb.4: # %middle.block
11631156
; CHECK-NEXT: beqz a6, .LBB20_7
@@ -1230,8 +1223,8 @@ for.body: ; preds = %for.body.preheader,
12301223
define void @sink_splat_fdiv_scalable(float* nocapture %a, float %x) {
12311224
; CHECK-LABEL: sink_splat_fdiv_scalable:
12321225
; CHECK: # %bb.0: # %entry
1233-
; CHECK-NEXT: csrr a2, vlenb
1234-
; CHECK-NEXT: srli a3, a2, 2
1226+
; CHECK-NEXT: csrr a7, vlenb
1227+
; CHECK-NEXT: srli a3, a7, 2
12351228
; CHECK-NEXT: addi a4, zero, 1024
12361229
; CHECK-NEXT: fmv.w.x ft0, a1
12371230
; CHECK-NEXT: bgeu a4, a3, .LBB21_2
@@ -1242,16 +1235,15 @@ define void @sink_splat_fdiv_scalable(float* nocapture %a, float %x) {
12421235
; CHECK-NEXT: mv a5, zero
12431236
; CHECK-NEXT: remu a6, a4, a3
12441237
; CHECK-NEXT: sub a1, a4, a6
1245-
; CHECK-NEXT: vsetvli a4, zero, e32, m1, ta, mu
1246-
; CHECK-NEXT: vfmv.v.f v25, ft0
12471238
; CHECK-NEXT: mv a4, a0
12481239
; CHECK-NEXT: .LBB21_3: # %vector.body
12491240
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1250-
; CHECK-NEXT: vl1re32.v v26, (a4)
1251-
; CHECK-NEXT: vfdiv.vv v26, v26, v25
1252-
; CHECK-NEXT: vs1r.v v26, (a4)
1241+
; CHECK-NEXT: vl1re32.v v25, (a4)
1242+
; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu
1243+
; CHECK-NEXT: vfdiv.vf v25, v25, ft0
1244+
; CHECK-NEXT: vs1r.v v25, (a4)
12531245
; CHECK-NEXT: add a5, a5, a3
1254-
; CHECK-NEXT: add a4, a4, a2
1246+
; CHECK-NEXT: add a4, a4, a7
12551247
; CHECK-NEXT: bne a5, a1, .LBB21_3
12561248
; CHECK-NEXT: # %bb.4: # %middle.block
12571249
; CHECK-NEXT: beqz a6, .LBB21_7
@@ -1324,8 +1316,8 @@ for.body: ; preds = %for.body.preheader,
13241316
define void @sink_splat_frdiv_scalable(float* nocapture %a, float %x) {
13251317
; CHECK-LABEL: sink_splat_frdiv_scalable:
13261318
; CHECK: # %bb.0: # %entry
1327-
; CHECK-NEXT: csrr a2, vlenb
1328-
; CHECK-NEXT: srli a3, a2, 2
1319+
; CHECK-NEXT: csrr a7, vlenb
1320+
; CHECK-NEXT: srli a3, a7, 2
13291321
; CHECK-NEXT: addi a4, zero, 1024
13301322
; CHECK-NEXT: fmv.w.x ft0, a1
13311323
; CHECK-NEXT: bgeu a4, a3, .LBB22_2
@@ -1336,16 +1328,15 @@ define void @sink_splat_frdiv_scalable(float* nocapture %a, float %x) {
13361328
; CHECK-NEXT: mv a5, zero
13371329
; CHECK-NEXT: remu a6, a4, a3
13381330
; CHECK-NEXT: sub a1, a4, a6
1339-
; CHECK-NEXT: vsetvli a4, zero, e32, m1, ta, mu
1340-
; CHECK-NEXT: vfmv.v.f v25, ft0
13411331
; CHECK-NEXT: mv a4, a0
13421332
; CHECK-NEXT: .LBB22_3: # %vector.body
13431333
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1344-
; CHECK-NEXT: vl1re32.v v26, (a4)
1345-
; CHECK-NEXT: vfdiv.vv v26, v25, v26
1346-
; CHECK-NEXT: vs1r.v v26, (a4)
1334+
; CHECK-NEXT: vl1re32.v v25, (a4)
1335+
; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu
1336+
; CHECK-NEXT: vfrdiv.vf v25, v25, ft0
1337+
; CHECK-NEXT: vs1r.v v25, (a4)
13471338
; CHECK-NEXT: add a5, a5, a3
1348-
; CHECK-NEXT: add a4, a4, a2
1339+
; CHECK-NEXT: add a4, a4, a7
13491340
; CHECK-NEXT: bne a5, a1, .LBB22_3
13501341
; CHECK-NEXT: # %bb.4: # %middle.block
13511342
; CHECK-NEXT: beqz a6, .LBB22_7
@@ -1418,8 +1409,8 @@ for.body: ; preds = %for.body.preheader,
14181409
define void @sink_splat_fadd_scalable(float* nocapture %a, float %x) {
14191410
; CHECK-LABEL: sink_splat_fadd_scalable:
14201411
; CHECK: # %bb.0: # %entry
1421-
; CHECK-NEXT: csrr a2, vlenb
1422-
; CHECK-NEXT: srli a3, a2, 2
1412+
; CHECK-NEXT: csrr a7, vlenb
1413+
; CHECK-NEXT: srli a3, a7, 2
14231414
; CHECK-NEXT: addi a4, zero, 1024
14241415
; CHECK-NEXT: fmv.w.x ft0, a1
14251416
; CHECK-NEXT: bgeu a4, a3, .LBB23_2
@@ -1430,16 +1421,15 @@ define void @sink_splat_fadd_scalable(float* nocapture %a, float %x) {
14301421
; CHECK-NEXT: mv a5, zero
14311422
; CHECK-NEXT: remu a6, a4, a3
14321423
; CHECK-NEXT: sub a1, a4, a6
1433-
; CHECK-NEXT: vsetvli a4, zero, e32, m1, ta, mu
1434-
; CHECK-NEXT: vfmv.v.f v25, ft0
14351424
; CHECK-NEXT: mv a4, a0
14361425
; CHECK-NEXT: .LBB23_3: # %vector.body
14371426
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1438-
; CHECK-NEXT: vl1re32.v v26, (a4)
1439-
; CHECK-NEXT: vfadd.vv v26, v26, v25
1440-
; CHECK-NEXT: vs1r.v v26, (a4)
1427+
; CHECK-NEXT: vl1re32.v v25, (a4)
1428+
; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu
1429+
; CHECK-NEXT: vfadd.vf v25, v25, ft0
1430+
; CHECK-NEXT: vs1r.v v25, (a4)
14411431
; CHECK-NEXT: add a5, a5, a3
1442-
; CHECK-NEXT: add a4, a4, a2
1432+
; CHECK-NEXT: add a4, a4, a7
14431433
; CHECK-NEXT: bne a5, a1, .LBB23_3
14441434
; CHECK-NEXT: # %bb.4: # %middle.block
14451435
; CHECK-NEXT: beqz a6, .LBB23_7
@@ -1512,8 +1502,8 @@ for.body: ; preds = %for.body.preheader,
15121502
define void @sink_splat_fsub_scalable(float* nocapture %a, float %x) {
15131503
; CHECK-LABEL: sink_splat_fsub_scalable:
15141504
; CHECK: # %bb.0: # %entry
1515-
; CHECK-NEXT: csrr a2, vlenb
1516-
; CHECK-NEXT: srli a3, a2, 2
1505+
; CHECK-NEXT: csrr a7, vlenb
1506+
; CHECK-NEXT: srli a3, a7, 2
15171507
; CHECK-NEXT: addi a4, zero, 1024
15181508
; CHECK-NEXT: fmv.w.x ft0, a1
15191509
; CHECK-NEXT: bgeu a4, a3, .LBB24_2
@@ -1524,16 +1514,15 @@ define void @sink_splat_fsub_scalable(float* nocapture %a, float %x) {
15241514
; CHECK-NEXT: mv a5, zero
15251515
; CHECK-NEXT: remu a6, a4, a3
15261516
; CHECK-NEXT: sub a1, a4, a6
1527-
; CHECK-NEXT: vsetvli a4, zero, e32, m1, ta, mu
1528-
; CHECK-NEXT: vfmv.v.f v25, ft0
15291517
; CHECK-NEXT: mv a4, a0
15301518
; CHECK-NEXT: .LBB24_3: # %vector.body
15311519
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1532-
; CHECK-NEXT: vl1re32.v v26, (a4)
1533-
; CHECK-NEXT: vfsub.vv v26, v26, v25
1534-
; CHECK-NEXT: vs1r.v v26, (a4)
1520+
; CHECK-NEXT: vl1re32.v v25, (a4)
1521+
; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu
1522+
; CHECK-NEXT: vfsub.vf v25, v25, ft0
1523+
; CHECK-NEXT: vs1r.v v25, (a4)
15351524
; CHECK-NEXT: add a5, a5, a3
1536-
; CHECK-NEXT: add a4, a4, a2
1525+
; CHECK-NEXT: add a4, a4, a7
15371526
; CHECK-NEXT: bne a5, a1, .LBB24_3
15381527
; CHECK-NEXT: # %bb.4: # %middle.block
15391528
; CHECK-NEXT: beqz a6, .LBB24_7
@@ -1606,8 +1595,8 @@ for.body: ; preds = %for.body.preheader,
16061595
define void @sink_splat_frsub_scalable(float* nocapture %a, float %x) {
16071596
; CHECK-LABEL: sink_splat_frsub_scalable:
16081597
; CHECK: # %bb.0: # %entry
1609-
; CHECK-NEXT: csrr a2, vlenb
1610-
; CHECK-NEXT: srli a3, a2, 2
1598+
; CHECK-NEXT: csrr a7, vlenb
1599+
; CHECK-NEXT: srli a3, a7, 2
16111600
; CHECK-NEXT: addi a4, zero, 1024
16121601
; CHECK-NEXT: fmv.w.x ft0, a1
16131602
; CHECK-NEXT: bgeu a4, a3, .LBB25_2
@@ -1618,16 +1607,15 @@ define void @sink_splat_frsub_scalable(float* nocapture %a, float %x) {
16181607
; CHECK-NEXT: mv a5, zero
16191608
; CHECK-NEXT: remu a6, a4, a3
16201609
; CHECK-NEXT: sub a1, a4, a6
1621-
; CHECK-NEXT: vsetvli a4, zero, e32, m1, ta, mu
1622-
; CHECK-NEXT: vfmv.v.f v25, ft0
16231610
; CHECK-NEXT: mv a4, a0
16241611
; CHECK-NEXT: .LBB25_3: # %vector.body
16251612
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1626-
; CHECK-NEXT: vl1re32.v v26, (a4)
1627-
; CHECK-NEXT: vfsub.vv v26, v25, v26
1628-
; CHECK-NEXT: vs1r.v v26, (a4)
1613+
; CHECK-NEXT: vl1re32.v v25, (a4)
1614+
; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu
1615+
; CHECK-NEXT: vfrsub.vf v25, v25, ft0
1616+
; CHECK-NEXT: vs1r.v v25, (a4)
16291617
; CHECK-NEXT: add a5, a5, a3
1630-
; CHECK-NEXT: add a4, a4, a2
1618+
; CHECK-NEXT: add a4, a4, a7
16311619
; CHECK-NEXT: bne a5, a1, .LBB25_3
16321620
; CHECK-NEXT: # %bb.4: # %middle.block
16331621
; CHECK-NEXT: beqz a6, .LBB25_7

0 commit comments

Comments
 (0)