Skip to content

Commit 1253001

Browse files
authored
[RISCV] Add reductions to list of roots in tryToReduceVL (llvm#107595)
This allows us to reduce VLs feeding reduction instructions. In particular, this means that <3 x Ty> reduce(load) like sequences no longer require a VL toggle. This was waiting on 3d72957; now that the latent correctness issue is fixed, we can expand this transform.
1 parent 901006f commit 1253001

File tree

4 files changed

+64
-61
lines changed

4 files changed

+64
-61
lines changed

llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,24 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const {
145145
case RISCV::VMERGE_VVM:
146146
SrcIdx = 3; // TODO: We can also handle the false operand.
147147
break;
148+
case RISCV::VREDSUM_VS:
149+
case RISCV::VREDMAXU_VS:
150+
case RISCV::VREDMAX_VS:
151+
case RISCV::VREDMINU_VS:
152+
case RISCV::VREDMIN_VS:
153+
case RISCV::VREDAND_VS:
154+
case RISCV::VREDOR_VS:
155+
case RISCV::VREDXOR_VS:
156+
case RISCV::VWREDSUM_VS:
157+
case RISCV::VWREDSUMU_VS:
158+
case RISCV::VFREDUSUM_VS:
159+
case RISCV::VFREDOSUM_VS:
160+
case RISCV::VFREDMAX_VS:
161+
case RISCV::VFREDMIN_VS:
162+
case RISCV::VFWREDUSUM_VS:
163+
case RISCV::VFWREDOSUM_VS:
164+
SrcIdx = 2;
165+
break;
148166
}
149167

150168
MachineOperand &VL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc()));

llvm/test/CodeGen/RISCV/redundant-copy-from-tail-duplicate.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ define signext i32 @sum(ptr %a, i32 signext %n, i1 %prof.min.iters.check, <vscal
1919
; CHECK-NEXT: mv a0, a2
2020
; CHECK-NEXT: ret
2121
; CHECK-NEXT: .LBB0_4: # %vector.ph
22-
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
22+
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2323
; CHECK-NEXT: vmv.s.x v8, zero
2424
; CHECK-NEXT: vmv.v.i v12, 0
2525
; CHECK-NEXT: vsetivli zero, 1, e32, m4, ta, ma

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll

Lines changed: 17 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -121,10 +121,9 @@ define i32 @reduce_sum_16xi32_prefix2(ptr %p) {
121121
define i32 @reduce_sum_16xi32_prefix3(ptr %p) {
122122
; CHECK-LABEL: reduce_sum_16xi32_prefix3:
123123
; CHECK: # %bb.0:
124-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
124+
; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
125125
; CHECK-NEXT: vle32.v v8, (a0)
126126
; CHECK-NEXT: vmv.s.x v9, zero
127-
; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
128127
; CHECK-NEXT: vredsum.vs v8, v8, v9
129128
; CHECK-NEXT: vmv.x.s a0, v8
130129
; CHECK-NEXT: ret
@@ -160,10 +159,9 @@ define i32 @reduce_sum_16xi32_prefix4(ptr %p) {
160159
define i32 @reduce_sum_16xi32_prefix5(ptr %p) {
161160
; CHECK-LABEL: reduce_sum_16xi32_prefix5:
162161
; CHECK: # %bb.0:
163-
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
162+
; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
164163
; CHECK-NEXT: vle32.v v8, (a0)
165164
; CHECK-NEXT: vmv.s.x v10, zero
166-
; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
167165
; CHECK-NEXT: vredsum.vs v8, v8, v10
168166
; CHECK-NEXT: vmv.x.s a0, v8
169167
; CHECK-NEXT: ret
@@ -183,10 +181,9 @@ define i32 @reduce_sum_16xi32_prefix5(ptr %p) {
183181
define i32 @reduce_sum_16xi32_prefix6(ptr %p) {
184182
; CHECK-LABEL: reduce_sum_16xi32_prefix6:
185183
; CHECK: # %bb.0:
186-
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
184+
; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
187185
; CHECK-NEXT: vle32.v v8, (a0)
188186
; CHECK-NEXT: vmv.s.x v10, zero
189-
; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
190187
; CHECK-NEXT: vredsum.vs v8, v8, v10
191188
; CHECK-NEXT: vmv.x.s a0, v8
192189
; CHECK-NEXT: ret
@@ -208,10 +205,9 @@ define i32 @reduce_sum_16xi32_prefix6(ptr %p) {
208205
define i32 @reduce_sum_16xi32_prefix7(ptr %p) {
209206
; CHECK-LABEL: reduce_sum_16xi32_prefix7:
210207
; CHECK: # %bb.0:
211-
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
208+
; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma
212209
; CHECK-NEXT: vle32.v v8, (a0)
213210
; CHECK-NEXT: vmv.s.x v10, zero
214-
; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma
215211
; CHECK-NEXT: vredsum.vs v8, v8, v10
216212
; CHECK-NEXT: vmv.x.s a0, v8
217213
; CHECK-NEXT: ret
@@ -263,10 +259,9 @@ define i32 @reduce_sum_16xi32_prefix8(ptr %p) {
263259
define i32 @reduce_sum_16xi32_prefix9(ptr %p) {
264260
; CHECK-LABEL: reduce_sum_16xi32_prefix9:
265261
; CHECK: # %bb.0:
266-
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
262+
; CHECK-NEXT: vsetivli zero, 9, e32, m4, ta, ma
267263
; CHECK-NEXT: vle32.v v8, (a0)
268264
; CHECK-NEXT: vmv.s.x v12, zero
269-
; CHECK-NEXT: vsetivli zero, 9, e32, m4, ta, ma
270265
; CHECK-NEXT: vredsum.vs v8, v8, v12
271266
; CHECK-NEXT: vmv.x.s a0, v8
272267
; CHECK-NEXT: ret
@@ -294,10 +289,9 @@ define i32 @reduce_sum_16xi32_prefix9(ptr %p) {
294289
define i32 @reduce_sum_16xi32_prefix13(ptr %p) {
295290
; CHECK-LABEL: reduce_sum_16xi32_prefix13:
296291
; CHECK: # %bb.0:
297-
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
292+
; CHECK-NEXT: vsetivli zero, 13, e32, m4, ta, ma
298293
; CHECK-NEXT: vle32.v v8, (a0)
299294
; CHECK-NEXT: vmv.s.x v12, zero
300-
; CHECK-NEXT: vsetivli zero, 13, e32, m4, ta, ma
301295
; CHECK-NEXT: vredsum.vs v8, v8, v12
302296
; CHECK-NEXT: vmv.x.s a0, v8
303297
; CHECK-NEXT: ret
@@ -334,10 +328,9 @@ define i32 @reduce_sum_16xi32_prefix13(ptr %p) {
334328
define i32 @reduce_sum_16xi32_prefix14(ptr %p) {
335329
; CHECK-LABEL: reduce_sum_16xi32_prefix14:
336330
; CHECK: # %bb.0:
337-
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
331+
; CHECK-NEXT: vsetivli zero, 14, e32, m4, ta, ma
338332
; CHECK-NEXT: vle32.v v8, (a0)
339333
; CHECK-NEXT: vmv.s.x v12, zero
340-
; CHECK-NEXT: vsetivli zero, 14, e32, m4, ta, ma
341334
; CHECK-NEXT: vredsum.vs v8, v8, v12
342335
; CHECK-NEXT: vmv.x.s a0, v8
343336
; CHECK-NEXT: ret
@@ -375,10 +368,9 @@ define i32 @reduce_sum_16xi32_prefix14(ptr %p) {
375368
define i32 @reduce_sum_16xi32_prefix15(ptr %p) {
376369
; CHECK-LABEL: reduce_sum_16xi32_prefix15:
377370
; CHECK: # %bb.0:
378-
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
371+
; CHECK-NEXT: vsetivli zero, 15, e32, m4, ta, ma
379372
; CHECK-NEXT: vle32.v v8, (a0)
380373
; CHECK-NEXT: vmv.s.x v12, zero
381-
; CHECK-NEXT: vsetivli zero, 15, e32, m4, ta, ma
382374
; CHECK-NEXT: vredsum.vs v8, v8, v12
383375
; CHECK-NEXT: vmv.x.s a0, v8
384376
; CHECK-NEXT: ret
@@ -499,10 +491,9 @@ define i32 @reduce_xor_16xi32_prefix2(ptr %p) {
499491
define i32 @reduce_xor_16xi32_prefix5(ptr %p) {
500492
; CHECK-LABEL: reduce_xor_16xi32_prefix5:
501493
; CHECK: # %bb.0:
502-
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
494+
; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
503495
; CHECK-NEXT: vle32.v v8, (a0)
504496
; CHECK-NEXT: vmv.s.x v10, zero
505-
; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
506497
; CHECK-NEXT: vredxor.vs v8, v8, v10
507498
; CHECK-NEXT: vmv.x.s a0, v8
508499
; CHECK-NEXT: ret
@@ -537,7 +528,7 @@ define i32 @reduce_and_16xi32_prefix2(ptr %p) {
537528
define i32 @reduce_and_16xi32_prefix5(ptr %p) {
538529
; CHECK-LABEL: reduce_and_16xi32_prefix5:
539530
; CHECK: # %bb.0:
540-
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
531+
; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
541532
; CHECK-NEXT: vle32.v v8, (a0)
542533
; CHECK-NEXT: vsetivli zero, 5, e32, m1, ta, ma
543534
; CHECK-NEXT: vmv.v.i v10, -1
@@ -576,10 +567,9 @@ define i32 @reduce_or_16xi32_prefix2(ptr %p) {
576567
define i32 @reduce_or_16xi32_prefix5(ptr %p) {
577568
; CHECK-LABEL: reduce_or_16xi32_prefix5:
578569
; CHECK: # %bb.0:
579-
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
570+
; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
580571
; CHECK-NEXT: vle32.v v8, (a0)
581572
; CHECK-NEXT: vmv.s.x v10, zero
582-
; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
583573
; CHECK-NEXT: vredor.vs v8, v8, v10
584574
; CHECK-NEXT: vmv.x.s a0, v8
585575
; CHECK-NEXT: ret
@@ -619,11 +609,10 @@ define i32 @reduce_smax_16xi32_prefix2(ptr %p) {
619609
define i32 @reduce_smax_16xi32_prefix5(ptr %p) {
620610
; CHECK-LABEL: reduce_smax_16xi32_prefix5:
621611
; CHECK: # %bb.0:
622-
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
612+
; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
623613
; CHECK-NEXT: vle32.v v8, (a0)
624614
; CHECK-NEXT: lui a0, 524288
625615
; CHECK-NEXT: vmv.s.x v10, a0
626-
; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
627616
; CHECK-NEXT: vredmax.vs v8, v8, v10
628617
; CHECK-NEXT: vmv.x.s a0, v8
629618
; CHECK-NEXT: ret
@@ -658,12 +647,11 @@ define i32 @reduce_smin_16xi32_prefix2(ptr %p) {
658647
define i32 @reduce_smin_16xi32_prefix5(ptr %p) {
659648
; CHECK-LABEL: reduce_smin_16xi32_prefix5:
660649
; CHECK: # %bb.0:
661-
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
650+
; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
662651
; CHECK-NEXT: vle32.v v8, (a0)
663652
; CHECK-NEXT: lui a0, 524288
664653
; CHECK-NEXT: addi a0, a0, -1
665654
; CHECK-NEXT: vmv.s.x v10, a0
666-
; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
667655
; CHECK-NEXT: vredmin.vs v8, v8, v10
668656
; CHECK-NEXT: vmv.x.s a0, v8
669657
; CHECK-NEXT: ret
@@ -698,10 +686,9 @@ define i32 @reduce_umax_16xi32_prefix2(ptr %p) {
698686
define i32 @reduce_umax_16xi32_prefix5(ptr %p) {
699687
; CHECK-LABEL: reduce_umax_16xi32_prefix5:
700688
; CHECK: # %bb.0:
701-
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
689+
; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
702690
; CHECK-NEXT: vle32.v v8, (a0)
703691
; CHECK-NEXT: vmv.s.x v10, zero
704-
; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
705692
; CHECK-NEXT: vredmaxu.vs v8, v8, v10
706693
; CHECK-NEXT: vmv.x.s a0, v8
707694
; CHECK-NEXT: ret
@@ -736,7 +723,7 @@ define i32 @reduce_umin_16xi32_prefix2(ptr %p) {
736723
define i32 @reduce_umin_16xi32_prefix5(ptr %p) {
737724
; RV32-LABEL: reduce_umin_16xi32_prefix5:
738725
; RV32: # %bb.0:
739-
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
726+
; RV32-NEXT: vsetivli zero, 5, e32, m2, ta, ma
740727
; RV32-NEXT: vle32.v v8, (a0)
741728
; RV32-NEXT: vsetivli zero, 5, e32, m1, ta, ma
742729
; RV32-NEXT: vmv.v.i v10, -1
@@ -747,11 +734,10 @@ define i32 @reduce_umin_16xi32_prefix5(ptr %p) {
747734
;
748735
; RV64-LABEL: reduce_umin_16xi32_prefix5:
749736
; RV64: # %bb.0:
750-
; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
737+
; RV64-NEXT: vsetivli zero, 5, e32, m2, ta, ma
751738
; RV64-NEXT: vle32.v v8, (a0)
752739
; RV64-NEXT: li a0, -1
753740
; RV64-NEXT: vmv.s.x v10, a0
754-
; RV64-NEXT: vsetivli zero, 5, e32, m2, ta, ma
755741
; RV64-NEXT: vredminu.vs v8, v8, v10
756742
; RV64-NEXT: vmv.x.s a0, v8
757743
; RV64-NEXT: ret
@@ -787,11 +773,10 @@ define float @reduce_fadd_16xf32_prefix2(ptr %p) {
787773
define float @reduce_fadd_16xi32_prefix5(ptr %p) {
788774
; CHECK-LABEL: reduce_fadd_16xi32_prefix5:
789775
; CHECK: # %bb.0:
790-
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
776+
; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
791777
; CHECK-NEXT: vle32.v v8, (a0)
792778
; CHECK-NEXT: lui a0, 524288
793779
; CHECK-NEXT: vmv.s.x v10, a0
794-
; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
795780
; CHECK-NEXT: vfredusum.vs v8, v8, v10
796781
; CHECK-NEXT: vfmv.f.s fa0, v8
797782
; CHECK-NEXT: ret

0 commit comments

Comments
 (0)