Skip to content

Commit a6b870d

Browse files
authored
[RISCV] Enable sub(max, min) lowering for ABDS and ABDU (#86592)
We have the ISD nodes for representing signed and unsigned absolute difference. For RISCV, we have vector min/max in the base vector extension, so we can expand to the sub(max,min) lowering. We could almost use the default expansion, but since fixed length min/max are custom (not legal), the default expansion doesn't cover the fixed vector cases. The expansion here is just a copy of the generic code specialized to allow the custom min/max nodes to be created so they can in turn be legalized to the _vl variants. Existing DAG combines handle the recognition of absolute difference idioms and conversion into the respective ISD::ABDS and ISD::ABDU nodes. This change does have the net effect of potentially pushing a free floating zero/sign extend after the expansion, and we don't do a great job of folding that into later expressions. However, since in general narrowing can reduce required work (by reducing LMUL) this seems like the right general tradeoff.
1 parent cc3b6f9 commit a6b870d

File tree

4 files changed

+257
-474
lines changed

4 files changed

+257
-474
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -819,6 +819,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
819819
setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT,
820820
Legal);
821821

822+
setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Custom);
823+
822824
// Custom-lower extensions and truncations from/to mask types.
823825
setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND},
824826
VT, Custom);
@@ -1203,6 +1205,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
12031205
setOperationAction(
12041206
{ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom);
12051207

1208+
setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Custom);
1209+
12061210
// vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
12071211
if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
12081212
setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom);
@@ -6785,6 +6789,22 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
67856789
if (!Op.getValueType().isVector())
67866790
return lowerSADDSAT_SSUBSAT(Op, DAG);
67876791
return lowerToScalableOp(Op, DAG);
6792+
case ISD::ABDS:
6793+
case ISD::ABDU: {
6794+
SDLoc dl(Op);
6795+
EVT VT = Op->getValueType(0);
6796+
SDValue LHS = DAG.getFreeze(Op->getOperand(0));
6797+
SDValue RHS = DAG.getFreeze(Op->getOperand(1));
6798+
bool IsSigned = Op->getOpcode() == ISD::ABDS;
6799+
6800+
// abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
6801+
// abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
6802+
unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
6803+
unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
6804+
SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
6805+
SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
6806+
return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
6807+
}
67886808
case ISD::ABS:
67896809
case ISD::VP_ABS:
67906810
return lowerABS(Op, DAG);

llvm/test/CodeGen/RISCV/rvv/abd.ll

Lines changed: 55 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,9 @@ define <vscale x 16 x i8> @sabd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
1010
; CHECK-LABEL: sabd_b:
1111
; CHECK: # %bb.0:
1212
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
13-
; CHECK-NEXT: vwsub.vv v12, v8, v10
14-
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
15-
; CHECK-NEXT: vrsub.vi v8, v12, 0
16-
; CHECK-NEXT: vmax.vv v12, v12, v8
17-
; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
18-
; CHECK-NEXT: vnsrl.wi v8, v12, 0
13+
; CHECK-NEXT: vmin.vv v12, v8, v10
14+
; CHECK-NEXT: vmax.vv v8, v8, v10
15+
; CHECK-NEXT: vsub.vv v8, v8, v12
1916
; CHECK-NEXT: ret
2017
%a.sext = sext <vscale x 16 x i8> %a to <vscale x 16 x i16>
2118
%b.sext = sext <vscale x 16 x i8> %b to <vscale x 16 x i16>
@@ -33,9 +30,9 @@ define <vscale x 16 x i8> @sabd_b_promoted_ops(<vscale x 16 x i1> %a, <vscale x
3330
; CHECK-NEXT: vmerge.vim v12, v10, -1, v0
3431
; CHECK-NEXT: vmv1r.v v0, v8
3532
; CHECK-NEXT: vmerge.vim v8, v10, -1, v0
36-
; CHECK-NEXT: vsub.vv v8, v12, v8
37-
; CHECK-NEXT: vrsub.vi v10, v8, 0
38-
; CHECK-NEXT: vmax.vv v8, v8, v10
33+
; CHECK-NEXT: vmin.vv v10, v12, v8
34+
; CHECK-NEXT: vmax.vv v8, v12, v8
35+
; CHECK-NEXT: vsub.vv v8, v8, v10
3936
; CHECK-NEXT: ret
4037
%a.sext = sext <vscale x 16 x i1> %a to <vscale x 16 x i8>
4138
%b.sext = sext <vscale x 16 x i1> %b to <vscale x 16 x i8>
@@ -48,12 +45,9 @@ define <vscale x 8 x i16> @sabd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
4845
; CHECK-LABEL: sabd_h:
4946
; CHECK: # %bb.0:
5047
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
51-
; CHECK-NEXT: vwsub.vv v12, v8, v10
52-
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
53-
; CHECK-NEXT: vrsub.vi v8, v12, 0
54-
; CHECK-NEXT: vmax.vv v12, v12, v8
55-
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
56-
; CHECK-NEXT: vnsrl.wi v8, v12, 0
48+
; CHECK-NEXT: vmin.vv v12, v8, v10
49+
; CHECK-NEXT: vmax.vv v8, v8, v10
50+
; CHECK-NEXT: vsub.vv v8, v8, v12
5751
; CHECK-NEXT: ret
5852
%a.sext = sext <vscale x 8 x i16> %a to <vscale x 8 x i32>
5953
%b.sext = sext <vscale x 8 x i16> %b to <vscale x 8 x i32>
@@ -67,10 +61,11 @@ define <vscale x 8 x i16> @sabd_h_promoted_ops(<vscale x 8 x i8> %a, <vscale x 8
6761
; CHECK-LABEL: sabd_h_promoted_ops:
6862
; CHECK: # %bb.0:
6963
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
70-
; CHECK-NEXT: vwsub.vv v10, v8, v9
64+
; CHECK-NEXT: vmin.vv v10, v8, v9
65+
; CHECK-NEXT: vmax.vv v8, v8, v9
66+
; CHECK-NEXT: vsub.vv v10, v8, v10
7167
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
72-
; CHECK-NEXT: vrsub.vi v8, v10, 0
73-
; CHECK-NEXT: vmax.vv v8, v10, v8
68+
; CHECK-NEXT: vzext.vf2 v8, v10
7469
; CHECK-NEXT: ret
7570
%a.sext = sext <vscale x 8 x i8> %a to <vscale x 8 x i16>
7671
%b.sext = sext <vscale x 8 x i8> %b to <vscale x 8 x i16>
@@ -83,12 +78,9 @@ define <vscale x 4 x i32> @sabd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
8378
; CHECK-LABEL: sabd_s:
8479
; CHECK: # %bb.0:
8580
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
86-
; CHECK-NEXT: vwsub.vv v12, v8, v10
87-
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
88-
; CHECK-NEXT: vrsub.vi v8, v12, 0
89-
; CHECK-NEXT: vmax.vv v12, v12, v8
90-
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
91-
; CHECK-NEXT: vnsrl.wi v8, v12, 0
81+
; CHECK-NEXT: vmin.vv v12, v8, v10
82+
; CHECK-NEXT: vmax.vv v8, v8, v10
83+
; CHECK-NEXT: vsub.vv v8, v8, v12
9284
; CHECK-NEXT: ret
9385
%a.sext = sext <vscale x 4 x i32> %a to <vscale x 4 x i64>
9486
%b.sext = sext <vscale x 4 x i32> %b to <vscale x 4 x i64>
@@ -102,10 +94,11 @@ define <vscale x 4 x i32> @sabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x
10294
; CHECK-LABEL: sabd_s_promoted_ops:
10395
; CHECK: # %bb.0:
10496
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
105-
; CHECK-NEXT: vwsub.vv v10, v8, v9
97+
; CHECK-NEXT: vmin.vv v10, v8, v9
98+
; CHECK-NEXT: vmax.vv v8, v8, v9
99+
; CHECK-NEXT: vsub.vv v10, v8, v10
106100
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
107-
; CHECK-NEXT: vrsub.vi v8, v10, 0
108-
; CHECK-NEXT: vmax.vv v8, v10, v8
101+
; CHECK-NEXT: vzext.vf2 v8, v10
109102
; CHECK-NEXT: ret
110103
%a.sext = sext <vscale x 4 x i16> %a to <vscale x 4 x i32>
111104
%b.sext = sext <vscale x 4 x i16> %b to <vscale x 4 x i32>
@@ -128,10 +121,11 @@ define <vscale x 2 x i64> @sabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x
128121
; CHECK-LABEL: sabd_d_promoted_ops:
129122
; CHECK: # %bb.0:
130123
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
131-
; CHECK-NEXT: vwsub.vv v10, v8, v9
124+
; CHECK-NEXT: vmin.vv v10, v8, v9
125+
; CHECK-NEXT: vmax.vv v8, v8, v9
126+
; CHECK-NEXT: vsub.vv v10, v8, v10
132127
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
133-
; CHECK-NEXT: vrsub.vi v8, v10, 0
134-
; CHECK-NEXT: vmax.vv v8, v10, v8
128+
; CHECK-NEXT: vzext.vf2 v8, v10
135129
; CHECK-NEXT: ret
136130
%a.sext = sext <vscale x 2 x i32> %a to <vscale x 2 x i64>
137131
%b.sext = sext <vscale x 2 x i32> %b to <vscale x 2 x i64>
@@ -148,12 +142,9 @@ define <vscale x 16 x i8> @uabd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
148142
; CHECK-LABEL: uabd_b:
149143
; CHECK: # %bb.0:
150144
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
151-
; CHECK-NEXT: vwsubu.vv v12, v8, v10
152-
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
153-
; CHECK-NEXT: vrsub.vi v8, v12, 0
154-
; CHECK-NEXT: vmax.vv v12, v12, v8
155-
; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
156-
; CHECK-NEXT: vnsrl.wi v8, v12, 0
145+
; CHECK-NEXT: vminu.vv v12, v8, v10
146+
; CHECK-NEXT: vmaxu.vv v8, v8, v10
147+
; CHECK-NEXT: vsub.vv v8, v8, v12
157148
; CHECK-NEXT: ret
158149
%a.zext = zext <vscale x 16 x i8> %a to <vscale x 16 x i16>
159150
%b.zext = zext <vscale x 16 x i8> %b to <vscale x 16 x i16>
@@ -171,9 +162,9 @@ define <vscale x 16 x i8> @uabd_b_promoted_ops(<vscale x 16 x i1> %a, <vscale x
171162
; CHECK-NEXT: vmerge.vim v12, v10, 1, v0
172163
; CHECK-NEXT: vmv1r.v v0, v8
173164
; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
174-
; CHECK-NEXT: vsub.vv v8, v12, v8
175-
; CHECK-NEXT: vrsub.vi v10, v8, 0
176-
; CHECK-NEXT: vmax.vv v8, v8, v10
165+
; CHECK-NEXT: vminu.vv v10, v12, v8
166+
; CHECK-NEXT: vmaxu.vv v8, v12, v8
167+
; CHECK-NEXT: vsub.vv v8, v8, v10
177168
; CHECK-NEXT: ret
178169
%a.zext = zext <vscale x 16 x i1> %a to <vscale x 16 x i8>
179170
%b.zext = zext <vscale x 16 x i1> %b to <vscale x 16 x i8>
@@ -186,12 +177,9 @@ define <vscale x 8 x i16> @uabd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
186177
; CHECK-LABEL: uabd_h:
187178
; CHECK: # %bb.0:
188179
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
189-
; CHECK-NEXT: vwsubu.vv v12, v8, v10
190-
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
191-
; CHECK-NEXT: vrsub.vi v8, v12, 0
192-
; CHECK-NEXT: vmax.vv v12, v12, v8
193-
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
194-
; CHECK-NEXT: vnsrl.wi v8, v12, 0
180+
; CHECK-NEXT: vminu.vv v12, v8, v10
181+
; CHECK-NEXT: vmaxu.vv v8, v8, v10
182+
; CHECK-NEXT: vsub.vv v8, v8, v12
195183
; CHECK-NEXT: ret
196184
%a.zext = zext <vscale x 8 x i16> %a to <vscale x 8 x i32>
197185
%b.zext = zext <vscale x 8 x i16> %b to <vscale x 8 x i32>
@@ -205,10 +193,11 @@ define <vscale x 8 x i16> @uabd_h_promoted_ops(<vscale x 8 x i8> %a, <vscale x 8
205193
; CHECK-LABEL: uabd_h_promoted_ops:
206194
; CHECK: # %bb.0:
207195
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
208-
; CHECK-NEXT: vwsubu.vv v10, v8, v9
196+
; CHECK-NEXT: vminu.vv v10, v8, v9
197+
; CHECK-NEXT: vmaxu.vv v8, v8, v9
198+
; CHECK-NEXT: vsub.vv v10, v8, v10
209199
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
210-
; CHECK-NEXT: vrsub.vi v8, v10, 0
211-
; CHECK-NEXT: vmax.vv v8, v10, v8
200+
; CHECK-NEXT: vzext.vf2 v8, v10
212201
; CHECK-NEXT: ret
213202
%a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i16>
214203
%b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i16>
@@ -221,12 +210,9 @@ define <vscale x 4 x i32> @uabd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
221210
; CHECK-LABEL: uabd_s:
222211
; CHECK: # %bb.0:
223212
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
224-
; CHECK-NEXT: vwsubu.vv v12, v8, v10
225-
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
226-
; CHECK-NEXT: vrsub.vi v8, v12, 0
227-
; CHECK-NEXT: vmax.vv v12, v12, v8
228-
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
229-
; CHECK-NEXT: vnsrl.wi v8, v12, 0
213+
; CHECK-NEXT: vminu.vv v12, v8, v10
214+
; CHECK-NEXT: vmaxu.vv v8, v8, v10
215+
; CHECK-NEXT: vsub.vv v8, v8, v12
230216
; CHECK-NEXT: ret
231217
%a.zext = zext <vscale x 4 x i32> %a to <vscale x 4 x i64>
232218
%b.zext = zext <vscale x 4 x i32> %b to <vscale x 4 x i64>
@@ -240,10 +226,11 @@ define <vscale x 4 x i32> @uabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x
240226
; CHECK-LABEL: uabd_s_promoted_ops:
241227
; CHECK: # %bb.0:
242228
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
243-
; CHECK-NEXT: vwsubu.vv v10, v8, v9
229+
; CHECK-NEXT: vminu.vv v10, v8, v9
230+
; CHECK-NEXT: vmaxu.vv v8, v8, v9
231+
; CHECK-NEXT: vsub.vv v10, v8, v10
244232
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
245-
; CHECK-NEXT: vrsub.vi v8, v10, 0
246-
; CHECK-NEXT: vmax.vv v8, v10, v8
233+
; CHECK-NEXT: vzext.vf2 v8, v10
247234
; CHECK-NEXT: ret
248235
%a.zext = zext <vscale x 4 x i16> %a to <vscale x 4 x i32>
249236
%b.zext = zext <vscale x 4 x i16> %b to <vscale x 4 x i32>
@@ -266,10 +253,11 @@ define <vscale x 2 x i64> @uabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x
266253
; CHECK-LABEL: uabd_d_promoted_ops:
267254
; CHECK: # %bb.0:
268255
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
269-
; CHECK-NEXT: vwsubu.vv v10, v8, v9
256+
; CHECK-NEXT: vminu.vv v10, v8, v9
257+
; CHECK-NEXT: vmaxu.vv v8, v8, v9
258+
; CHECK-NEXT: vsub.vv v10, v8, v10
270259
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
271-
; CHECK-NEXT: vrsub.vi v8, v10, 0
272-
; CHECK-NEXT: vmax.vv v8, v10, v8
260+
; CHECK-NEXT: vzext.vf2 v8, v10
273261
; CHECK-NEXT: ret
274262
%a.zext = zext <vscale x 2 x i32> %a to <vscale x 2 x i64>
275263
%b.zext = zext <vscale x 2 x i32> %b to <vscale x 2 x i64>
@@ -285,12 +273,9 @@ define <vscale x 4 x i32> @uabd_non_matching_extension(<vscale x 4 x i32> %a, <v
285273
; CHECK: # %bb.0:
286274
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
287275
; CHECK-NEXT: vzext.vf4 v12, v10
288-
; CHECK-NEXT: vwsubu.vv v16, v8, v12
289-
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
290-
; CHECK-NEXT: vrsub.vi v8, v16, 0
291-
; CHECK-NEXT: vmax.vv v12, v16, v8
292-
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
293-
; CHECK-NEXT: vnsrl.wi v8, v12, 0
276+
; CHECK-NEXT: vminu.vv v10, v8, v12
277+
; CHECK-NEXT: vmaxu.vv v8, v8, v12
278+
; CHECK-NEXT: vsub.vv v8, v8, v10
294279
; CHECK-NEXT: ret
295280
%a.zext = zext <vscale x 4 x i32> %a to <vscale x 4 x i64>
296281
%b.zext = zext <vscale x 4 x i8> %b to <vscale x 4 x i64>
@@ -307,10 +292,11 @@ define <vscale x 4 x i32> @uabd_non_matching_promoted_ops(<vscale x 4 x i8> %a,
307292
; CHECK: # %bb.0:
308293
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
309294
; CHECK-NEXT: vzext.vf2 v10, v8
310-
; CHECK-NEXT: vwsubu.vv v12, v10, v9
295+
; CHECK-NEXT: vminu.vv v8, v10, v9
296+
; CHECK-NEXT: vmaxu.vv v9, v10, v9
297+
; CHECK-NEXT: vsub.vv v10, v9, v8
311298
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
312-
; CHECK-NEXT: vrsub.vi v8, v12, 0
313-
; CHECK-NEXT: vmax.vv v8, v12, v8
299+
; CHECK-NEXT: vzext.vf2 v8, v10
314300
; CHECK-NEXT: ret
315301
%a.zext = zext <vscale x 4 x i8> %a to <vscale x 4 x i32>
316302
%b.zext = zext <vscale x 4 x i16> %b to <vscale x 4 x i32>

0 commit comments

Comments
 (0)