Skip to content

Commit b7166e2

Browse files
committed
[RISCV] Combine extract_vector_elt followed by VFMV_S_F_VL.
If we're extracting an element and inserting into a undef vector with the same number of elements, we can use the original vector. This pattern occurs around reductions that have been cascaded together. This can be generalized to wider/narrow vectors by using insert_subvector/extract_subvector, but we don't have lit tests for that case currently. We can also support non-undef before by using a slide or vmv.v.v Reviewed By: reames Differential Revision: https://reviews.llvm.org/D142264
1 parent 002b190 commit b7166e2

File tree

4 files changed

+71
-77
lines changed

4 files changed

+71
-77
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10562,6 +10562,25 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
1056210562

1056310563
break;
1056410564
}
10565+
case RISCVISD::VFMV_S_F_VL: {
10566+
SDValue Src = N->getOperand(1);
10567+
// Try to remove vector->scalar->vector if the scalar->vector is inserting
10568+
// into an undef vector.
10569+
// TODO: Could use a vslide or vmv.v.v for non-undef.
10570+
if (N->getOperand(0).isUndef() &&
10571+
Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
10572+
isNullConstant(Src.getOperand(1)) &&
10573+
Src.getOperand(0).getValueType().isScalableVector()) {
10574+
EVT VT = N->getValueType(0);
10575+
EVT SrcVT = Src.getOperand(0).getValueType();
10576+
assert(SrcVT.getVectorElementType() == VT.getVectorElementType());
10577+
// Widths match, just return the original vector.
10578+
if (SrcVT == VT)
10579+
return Src.getOperand(0);
10580+
// TODO: Use insert_subvector/extract_subvector to change widen/narrow?
10581+
}
10582+
break;
10583+
}
1056510584
case ISD::INTRINSIC_WO_CHAIN: {
1056610585
unsigned IntNo = N->getConstantOperandVal(0);
1056710586
switch (IntNo) {

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -133,17 +133,14 @@ define float @vpreduce_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32
133133
; CHECK-NEXT: vfmv.s.f v25, fa0
134134
; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma
135135
; CHECK-NEXT: vfredusum.vs v25, v8, v25, v0.t
136-
; CHECK-NEXT: vfmv.f.s ft0, v25
137-
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
138-
; CHECK-NEXT: vfmv.s.f v8, ft0
139136
; CHECK-NEXT: addi a1, a0, -32
140137
; CHECK-NEXT: sltu a0, a0, a1
141138
; CHECK-NEXT: addi a0, a0, -1
142139
; CHECK-NEXT: and a0, a0, a1
143140
; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma
144141
; CHECK-NEXT: vmv1r.v v0, v24
145-
; CHECK-NEXT: vfredusum.vs v8, v16, v8, v0.t
146-
; CHECK-NEXT: vfmv.f.s fa0, v8
142+
; CHECK-NEXT: vfredusum.vs v25, v16, v25, v0.t
143+
; CHECK-NEXT: vfmv.f.s fa0, v25
147144
; CHECK-NEXT: ret
148145
%r = call reassoc float @llvm.vp.reduce.fadd.v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 %evl)
149146
ret float %r
@@ -164,17 +161,14 @@ define float @vpreduce_ord_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m,
164161
; CHECK-NEXT: vfmv.s.f v25, fa0
165162
; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma
166163
; CHECK-NEXT: vfredosum.vs v25, v8, v25, v0.t
167-
; CHECK-NEXT: vfmv.f.s ft0, v25
168-
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
169-
; CHECK-NEXT: vfmv.s.f v8, ft0
170164
; CHECK-NEXT: addi a1, a0, -32
171165
; CHECK-NEXT: sltu a0, a0, a1
172166
; CHECK-NEXT: addi a0, a0, -1
173167
; CHECK-NEXT: and a0, a0, a1
174168
; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma
175169
; CHECK-NEXT: vmv1r.v v0, v24
176-
; CHECK-NEXT: vfredosum.vs v8, v16, v8, v0.t
177-
; CHECK-NEXT: vfmv.f.s fa0, v8
170+
; CHECK-NEXT: vfredosum.vs v25, v16, v25, v0.t
171+
; CHECK-NEXT: vfmv.f.s fa0, v25
178172
; CHECK-NEXT: ret
179173
%r = call float @llvm.vp.reduce.fadd.v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 %evl)
180174
ret float %r

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll

Lines changed: 18 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -238,15 +238,13 @@ define half @vreduce_fadd_v128f16(ptr %x, half %s) {
238238
define half @vreduce_ord_fadd_v128f16(ptr %x, half %s) {
239239
; CHECK-LABEL: vreduce_ord_fadd_v128f16:
240240
; CHECK: # %bb.0:
241-
; CHECK-NEXT: li a1, 64
242-
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
241+
; CHECK-NEXT: addi a1, a0, 128
242+
; CHECK-NEXT: li a2, 64
243+
; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
243244
; CHECK-NEXT: vle16.v v8, (a0)
244-
; CHECK-NEXT: addi a0, a0, 128
245-
; CHECK-NEXT: vle16.v v16, (a0)
245+
; CHECK-NEXT: vle16.v v16, (a1)
246246
; CHECK-NEXT: vfmv.s.f v24, fa0
247247
; CHECK-NEXT: vfredosum.vs v8, v8, v24
248-
; CHECK-NEXT: vfmv.f.s ft0, v8
249-
; CHECK-NEXT: vfmv.s.f v8, ft0
250248
; CHECK-NEXT: vfredosum.vs v8, v16, v8
251249
; CHECK-NEXT: vfmv.f.s fa0, v8
252250
; CHECK-NEXT: ret
@@ -670,15 +668,13 @@ define float @vreduce_fadd_v64f32(ptr %x, float %s) {
670668
define float @vreduce_ord_fadd_v64f32(ptr %x, float %s) {
671669
; CHECK-LABEL: vreduce_ord_fadd_v64f32:
672670
; CHECK: # %bb.0:
673-
; CHECK-NEXT: li a1, 32
674-
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
671+
; CHECK-NEXT: addi a1, a0, 128
672+
; CHECK-NEXT: li a2, 32
673+
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
675674
; CHECK-NEXT: vle32.v v8, (a0)
676-
; CHECK-NEXT: addi a0, a0, 128
677-
; CHECK-NEXT: vle32.v v16, (a0)
675+
; CHECK-NEXT: vle32.v v16, (a1)
678676
; CHECK-NEXT: vfmv.s.f v24, fa0
679677
; CHECK-NEXT: vfredosum.vs v8, v8, v24
680-
; CHECK-NEXT: vfmv.f.s ft0, v8
681-
; CHECK-NEXT: vfmv.s.f v8, ft0
682678
; CHECK-NEXT: vfredosum.vs v8, v16, v8
683679
; CHECK-NEXT: vfmv.f.s fa0, v8
684680
; CHECK-NEXT: ret
@@ -715,20 +711,15 @@ define float @vreduce_ord_fwadd_v64f32(ptr %x, float %s) {
715711
; CHECK: # %bb.0:
716712
; CHECK-NEXT: li a1, 64
717713
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
718-
; CHECK-NEXT: vle16.v v16, (a0)
714+
; CHECK-NEXT: vle16.v v8, (a0)
719715
; CHECK-NEXT: li a0, 32
720716
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
721-
; CHECK-NEXT: vslidedown.vx v8, v16, a0
717+
; CHECK-NEXT: vslidedown.vx v16, v8, a0
722718
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
723719
; CHECK-NEXT: vfmv.s.f v24, fa0
724720
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
725-
; CHECK-NEXT: vfwredosum.vs v16, v16, v24
726-
; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma
727-
; CHECK-NEXT: vfmv.f.s ft0, v16
728-
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
729-
; CHECK-NEXT: vfmv.s.f v16, ft0
730-
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
731-
; CHECK-NEXT: vfwredosum.vs v8, v8, v16
721+
; CHECK-NEXT: vfwredosum.vs v8, v8, v24
722+
; CHECK-NEXT: vfwredosum.vs v8, v16, v8
732723
; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma
733724
; CHECK-NEXT: vfmv.f.s fa0, v8
734725
; CHECK-NEXT: ret
@@ -1084,14 +1075,12 @@ define double @vreduce_fadd_v32f64(ptr %x, double %s) {
10841075
define double @vreduce_ord_fadd_v32f64(ptr %x, double %s) {
10851076
; CHECK-LABEL: vreduce_ord_fadd_v32f64:
10861077
; CHECK: # %bb.0:
1078+
; CHECK-NEXT: addi a1, a0, 128
10871079
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
10881080
; CHECK-NEXT: vle64.v v8, (a0)
1089-
; CHECK-NEXT: addi a0, a0, 128
1090-
; CHECK-NEXT: vle64.v v16, (a0)
1081+
; CHECK-NEXT: vle64.v v16, (a1)
10911082
; CHECK-NEXT: vfmv.s.f v24, fa0
10921083
; CHECK-NEXT: vfredosum.vs v8, v8, v24
1093-
; CHECK-NEXT: vfmv.f.s ft0, v8
1094-
; CHECK-NEXT: vfmv.s.f v8, ft0
10951084
; CHECK-NEXT: vfredosum.vs v8, v16, v8
10961085
; CHECK-NEXT: vfmv.f.s fa0, v8
10971086
; CHECK-NEXT: ret
@@ -1126,18 +1115,14 @@ define double @vreduce_ord_fwadd_v32f64(ptr %x, double %s) {
11261115
; CHECK: # %bb.0:
11271116
; CHECK-NEXT: li a1, 32
11281117
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
1129-
; CHECK-NEXT: vle32.v v16, (a0)
1118+
; CHECK-NEXT: vle32.v v8, (a0)
11301119
; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
1131-
; CHECK-NEXT: vslidedown.vi v8, v16, 16
1120+
; CHECK-NEXT: vslidedown.vi v16, v8, 16
11321121
; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma
11331122
; CHECK-NEXT: vfmv.s.f v24, fa0
11341123
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
1135-
; CHECK-NEXT: vfwredosum.vs v16, v16, v24
1136-
; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma
1137-
; CHECK-NEXT: vfmv.f.s ft0, v16
1138-
; CHECK-NEXT: vfmv.s.f v16, ft0
1139-
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
1140-
; CHECK-NEXT: vfwredosum.vs v8, v8, v16
1124+
; CHECK-NEXT: vfwredosum.vs v8, v8, v24
1125+
; CHECK-NEXT: vfwredosum.vs v8, v16, v8
11411126
; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma
11421127
; CHECK-NEXT: vfmv.f.s fa0, v8
11431128
; CHECK-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll

Lines changed: 30 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -93,30 +93,28 @@ declare half @llvm.vp.reduce.fadd.nxv64f16(half, <vscale x 64 x half>, <vscale x
9393
define half @vpreduce_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x 64 x i1> %m, i32 zeroext %evl) {
9494
; CHECK-LABEL: vpreduce_fadd_nxv64f16:
9595
; CHECK: # %bb.0:
96-
; CHECK-NEXT: csrr a2, vlenb
97-
; CHECK-NEXT: srli a1, a2, 1
96+
; CHECK-NEXT: csrr a1, vlenb
97+
; CHECK-NEXT: srli a2, a1, 1
9898
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
99-
; CHECK-NEXT: vslidedown.vx v24, v0, a1
100-
; CHECK-NEXT: slli a2, a2, 2
101-
; CHECK-NEXT: sub a1, a0, a2
102-
; CHECK-NEXT: sltu a3, a0, a1
103-
; CHECK-NEXT: addi a3, a3, -1
104-
; CHECK-NEXT: and a1, a3, a1
99+
; CHECK-NEXT: vslidedown.vx v24, v0, a2
105100
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
101+
; CHECK-NEXT: slli a1, a1, 2
106102
; CHECK-NEXT: vfmv.s.f v25, fa0
107-
; CHECK-NEXT: bltu a0, a2, .LBB6_2
103+
; CHECK-NEXT: mv a2, a0
104+
; CHECK-NEXT: bltu a0, a1, .LBB6_2
108105
; CHECK-NEXT: # %bb.1:
109-
; CHECK-NEXT: mv a0, a2
106+
; CHECK-NEXT: mv a2, a1
110107
; CHECK-NEXT: .LBB6_2:
111-
; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma
108+
; CHECK-NEXT: vsetvli zero, a2, e16, m8, tu, ma
112109
; CHECK-NEXT: vfredusum.vs v25, v8, v25, v0.t
113-
; CHECK-NEXT: vfmv.f.s ft0, v25
114-
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
115-
; CHECK-NEXT: vfmv.s.f v8, ft0
116-
; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma
110+
; CHECK-NEXT: sub a1, a0, a1
111+
; CHECK-NEXT: sltu a0, a0, a1
112+
; CHECK-NEXT: addi a0, a0, -1
113+
; CHECK-NEXT: and a0, a0, a1
114+
; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma
117115
; CHECK-NEXT: vmv1r.v v0, v24
118-
; CHECK-NEXT: vfredusum.vs v8, v16, v8, v0.t
119-
; CHECK-NEXT: vfmv.f.s fa0, v8
116+
; CHECK-NEXT: vfredusum.vs v25, v16, v25, v0.t
117+
; CHECK-NEXT: vfmv.f.s fa0, v25
120118
; CHECK-NEXT: ret
121119
%r = call reassoc half @llvm.vp.reduce.fadd.nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x 64 x i1> %m, i32 %evl)
122120
ret half %r
@@ -125,30 +123,28 @@ define half @vpreduce_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x
125123
define half @vpreduce_ord_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x 64 x i1> %m, i32 zeroext %evl) {
126124
; CHECK-LABEL: vpreduce_ord_fadd_nxv64f16:
127125
; CHECK: # %bb.0:
128-
; CHECK-NEXT: csrr a2, vlenb
129-
; CHECK-NEXT: srli a1, a2, 1
126+
; CHECK-NEXT: csrr a1, vlenb
127+
; CHECK-NEXT: srli a2, a1, 1
130128
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
131-
; CHECK-NEXT: vslidedown.vx v24, v0, a1
132-
; CHECK-NEXT: slli a2, a2, 2
133-
; CHECK-NEXT: sub a1, a0, a2
134-
; CHECK-NEXT: sltu a3, a0, a1
135-
; CHECK-NEXT: addi a3, a3, -1
136-
; CHECK-NEXT: and a1, a3, a1
129+
; CHECK-NEXT: vslidedown.vx v24, v0, a2
137130
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
131+
; CHECK-NEXT: slli a1, a1, 2
138132
; CHECK-NEXT: vfmv.s.f v25, fa0
139-
; CHECK-NEXT: bltu a0, a2, .LBB7_2
133+
; CHECK-NEXT: mv a2, a0
134+
; CHECK-NEXT: bltu a0, a1, .LBB7_2
140135
; CHECK-NEXT: # %bb.1:
141-
; CHECK-NEXT: mv a0, a2
136+
; CHECK-NEXT: mv a2, a1
142137
; CHECK-NEXT: .LBB7_2:
143-
; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma
138+
; CHECK-NEXT: vsetvli zero, a2, e16, m8, tu, ma
144139
; CHECK-NEXT: vfredosum.vs v25, v8, v25, v0.t
145-
; CHECK-NEXT: vfmv.f.s ft0, v25
146-
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
147-
; CHECK-NEXT: vfmv.s.f v8, ft0
148-
; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma
140+
; CHECK-NEXT: sub a1, a0, a1
141+
; CHECK-NEXT: sltu a0, a0, a1
142+
; CHECK-NEXT: addi a0, a0, -1
143+
; CHECK-NEXT: and a0, a0, a1
144+
; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma
149145
; CHECK-NEXT: vmv1r.v v0, v24
150-
; CHECK-NEXT: vfredosum.vs v8, v16, v8, v0.t
151-
; CHECK-NEXT: vfmv.f.s fa0, v8
146+
; CHECK-NEXT: vfredosum.vs v25, v16, v25, v0.t
147+
; CHECK-NEXT: vfmv.f.s fa0, v25
152148
; CHECK-NEXT: ret
153149
%r = call half @llvm.vp.reduce.fadd.nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x 64 x i1> %m, i32 %evl)
154150
ret half %r

0 commit comments

Comments
 (0)