Skip to content

Commit 951974e

Browse files
committed
[RISCV] Support (truncate (smin (smax X, C1), C2)) for vnclipu in combineTruncToVnclip.
If the smax removed all negative numbers, then we can treat the smin like a umin. If the smin and smax are in the other order we can swap them and use a vnclipu as long as the smax constant is smaller than the smin constant. This is based on similar code from X86's detectUSatPattern.
1 parent 246234a commit 951974e

File tree

3 files changed

+48
-63
lines changed

3 files changed

+48
-63
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16230,17 +16230,35 @@ static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG,
1623016230
return SDValue();
1623116231
};
1623216232

16233+
SDLoc DL(N);
16234+
1623316235
auto DetectUSatPattern = [&](SDValue V) {
16234-
// Src must be a UMIN or UMIN_VL.
16235-
APInt C;
16236-
SDValue UMin = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, C);
16237-
if (!UMin)
16238-
return SDValue();
16236+
APInt LoC, HiC;
16237+
16238+
// Simple case, V is a UMIN.
16239+
if (SDValue UMin = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
16240+
if (HiC.isMask(VT.getScalarSizeInBits()))
16241+
return UMin;
16242+
16243+
// If we have an SMAX that removes negative numbers first, then we can match
16244+
// SMIN instead of UMIN.
16245+
if (SDValue SMin = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16246+
if (SDValue SMax = MatchMinMax(SMin, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16247+
if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
16248+
return SMin;
1623916249

16240-
if (!C.isMask(VT.getScalarSizeInBits()))
16241-
return SDValue();
16250+
// If we have an SMIN before an SMAX and the SMAX constant is less than or
16251+
// equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
16252+
// first.
16253+
if (SDValue SMax = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16254+
if (SDValue SMin = MatchMinMax(SMax, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16255+
if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
16256+
HiC.uge(LoC))
16257+
return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMin,
16258+
V.getOperand(1), DAG.getUNDEF(V.getValueType()),
16259+
Mask, VL);
1624216260

16243-
return UMin;
16261+
return SDValue();
1624416262
};
1624516263

1624616264
auto DetectSSatPattern = [&](SDValue V) {
@@ -16249,15 +16267,15 @@ static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG,
1624916267
APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
1625016268
APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
1625116269

16252-
APInt CMin, CMax;
16253-
if (SDValue SMin = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, CMin))
16254-
if (SDValue SMax = MatchMinMax(SMin, ISD::SMAX, RISCVISD::SMAX_VL, CMax))
16255-
if (CMin == SignedMax && CMax == SignedMin)
16270+
APInt HiC, LoC;
16271+
if (SDValue SMin = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16272+
if (SDValue SMax = MatchMinMax(SMin, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16273+
if (HiC == SignedMax && LoC == SignedMin)
1625616274
return SMax;
1625716275

16258-
if (SDValue SMax = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, CMax))
16259-
if (SDValue SMin = MatchMinMax(SMax, ISD::SMIN, RISCVISD::SMIN_VL, CMin))
16260-
if (CMin == SignedMax && CMax == SignedMin)
16276+
if (SDValue SMax = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16277+
if (SDValue SMin = MatchMinMax(SMax, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16278+
if (HiC == SignedMax && LoC == SignedMin)
1626116279
return SMin;
1626216280

1626316281
return SDValue();
@@ -16272,7 +16290,6 @@ static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG,
1627216290
else
1627316291
return SDValue();
1627416292

16275-
SDLoc DL(N);
1627616293
// Rounding mode here is arbitrary since we aren't shifting out any bits.
1627716294
return DAG.getNode(
1627816295
ClipOpc, DL, VT,

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll

Lines changed: 6 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,8 @@ define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
105105
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
106106
; CHECK-NEXT: vle16.v v8, (a0)
107107
; CHECK-NEXT: vmax.vx v8, v8, zero
108-
; CHECK-NEXT: li a0, 255
109-
; CHECK-NEXT: vmin.vx v8, v8, a0
110108
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
111-
; CHECK-NEXT: vnsrl.wi v8, v8, 0
109+
; CHECK-NEXT: vnclipu.wi v8, v8, 0
112110
; CHECK-NEXT: vse8.v v8, (a1)
113111
; CHECK-NEXT: ret
114112
%1 = load <4 x i16>, ptr %x, align 16
@@ -125,11 +123,9 @@ define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) {
125123
; CHECK: # %bb.0:
126124
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
127125
; CHECK-NEXT: vle16.v v8, (a0)
128-
; CHECK-NEXT: li a0, 255
129-
; CHECK-NEXT: vmin.vx v8, v8, a0
130126
; CHECK-NEXT: vmax.vx v8, v8, zero
131127
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
132-
; CHECK-NEXT: vnsrl.wi v8, v8, 0
128+
; CHECK-NEXT: vnclipu.wi v8, v8, 0
133129
; CHECK-NEXT: vse8.v v8, (a1)
134130
; CHECK-NEXT: ret
135131
%1 = load <4 x i16>, ptr %x, align 16
@@ -237,11 +233,8 @@ define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
237233
; CHECK-NEXT: vle32.v v8, (a0)
238234
; CHECK-NEXT: li a0, 1
239235
; CHECK-NEXT: vmax.vx v8, v8, a0
240-
; CHECK-NEXT: lui a0, 16
241-
; CHECK-NEXT: addi a0, a0, -1
242-
; CHECK-NEXT: vmin.vx v8, v8, a0
243236
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
244-
; CHECK-NEXT: vnsrl.wi v8, v8, 0
237+
; CHECK-NEXT: vnclipu.wi v8, v8, 0
245238
; CHECK-NEXT: vse16.v v8, (a1)
246239
; CHECK-NEXT: ret
247240
%1 = load <4 x i32>, ptr %x, align 16
@@ -258,13 +251,10 @@ define void @trunc_sat_u16u32_minmax(ptr %x, ptr %y) {
258251
; CHECK: # %bb.0:
259252
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
260253
; CHECK-NEXT: vle32.v v8, (a0)
261-
; CHECK-NEXT: lui a0, 16
262-
; CHECK-NEXT: addi a0, a0, -1
263-
; CHECK-NEXT: vmin.vx v8, v8, a0
264254
; CHECK-NEXT: li a0, 50
265255
; CHECK-NEXT: vmax.vx v8, v8, a0
266256
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
267-
; CHECK-NEXT: vnsrl.wi v8, v8, 0
257+
; CHECK-NEXT: vnclipu.wi v8, v8, 0
268258
; CHECK-NEXT: vse16.v v8, (a1)
269259
; CHECK-NEXT: ret
270260
%1 = load <4 x i32>, ptr %x, align 16
@@ -374,11 +364,8 @@ define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
374364
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
375365
; CHECK-NEXT: vle64.v v8, (a0)
376366
; CHECK-NEXT: vmax.vx v8, v8, zero
377-
; CHECK-NEXT: li a0, -1
378-
; CHECK-NEXT: srli a0, a0, 32
379-
; CHECK-NEXT: vmin.vx v8, v8, a0
380367
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
381-
; CHECK-NEXT: vnsrl.wi v10, v8, 0
368+
; CHECK-NEXT: vnclipu.wi v10, v8, 0
382369
; CHECK-NEXT: vse32.v v10, (a1)
383370
; CHECK-NEXT: ret
384371
%1 = load <4 x i64>, ptr %x, align 16
@@ -395,12 +382,9 @@ define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
395382
; CHECK: # %bb.0:
396383
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
397384
; CHECK-NEXT: vle64.v v8, (a0)
398-
; CHECK-NEXT: li a0, -1
399-
; CHECK-NEXT: srli a0, a0, 32
400-
; CHECK-NEXT: vmin.vx v8, v8, a0
401385
; CHECK-NEXT: vmax.vx v8, v8, zero
402386
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
403-
; CHECK-NEXT: vnsrl.wi v10, v8, 0
387+
; CHECK-NEXT: vnclipu.wi v10, v8, 0
404388
; CHECK-NEXT: vse32.v v10, (a1)
405389
; CHECK-NEXT: ret
406390
%1 = load <4 x i64>, ptr %x, align 16

llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll

Lines changed: 9 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,8 @@ define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
105105
; CHECK-NEXT: vl1re16.v v8, (a0)
106106
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
107107
; CHECK-NEXT: vmax.vx v8, v8, zero
108-
; CHECK-NEXT: li a0, 255
109-
; CHECK-NEXT: vmin.vx v8, v8, a0
110108
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
111-
; CHECK-NEXT: vnsrl.wi v8, v8, 0
109+
; CHECK-NEXT: vnclipu.wi v8, v8, 0
112110
; CHECK-NEXT: vse8.v v8, (a1)
113111
; CHECK-NEXT: ret
114112
%1 = load <vscale x 4 x i16>, ptr %x, align 16
@@ -124,12 +122,10 @@ define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) {
124122
; CHECK-LABEL: trunc_sat_u8u16_minmax:
125123
; CHECK: # %bb.0:
126124
; CHECK-NEXT: vl1re16.v v8, (a0)
127-
; CHECK-NEXT: li a0, 255
128-
; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
129-
; CHECK-NEXT: vmin.vx v8, v8, a0
125+
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
130126
; CHECK-NEXT: vmax.vx v8, v8, zero
131127
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
132-
; CHECK-NEXT: vnsrl.wi v8, v8, 0
128+
; CHECK-NEXT: vnclipu.wi v8, v8, 0
133129
; CHECK-NEXT: vse8.v v8, (a1)
134130
; CHECK-NEXT: ret
135131
%1 = load <vscale x 4 x i16>, ptr %x, align 16
@@ -237,11 +233,8 @@ define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
237233
; CHECK-NEXT: li a0, 1
238234
; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
239235
; CHECK-NEXT: vmax.vx v8, v8, a0
240-
; CHECK-NEXT: lui a0, 16
241-
; CHECK-NEXT: addi a0, a0, -1
242-
; CHECK-NEXT: vmin.vx v8, v8, a0
243236
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
244-
; CHECK-NEXT: vnsrl.wi v10, v8, 0
237+
; CHECK-NEXT: vnclipu.wi v10, v8, 0
245238
; CHECK-NEXT: vs1r.v v10, (a1)
246239
; CHECK-NEXT: ret
247240
%1 = load <vscale x 4 x i32>, ptr %x, align 16
@@ -257,14 +250,11 @@ define void @trunc_sat_u16u32_minmax(ptr %x, ptr %y) {
257250
; CHECK-LABEL: trunc_sat_u16u32_minmax:
258251
; CHECK: # %bb.0:
259252
; CHECK-NEXT: vl2re32.v v8, (a0)
260-
; CHECK-NEXT: lui a0, 16
261-
; CHECK-NEXT: addi a0, a0, -1
262-
; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
263-
; CHECK-NEXT: vmin.vx v8, v8, a0
264253
; CHECK-NEXT: li a0, 50
254+
; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
265255
; CHECK-NEXT: vmax.vx v8, v8, a0
266256
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
267-
; CHECK-NEXT: vnsrl.wi v10, v8, 0
257+
; CHECK-NEXT: vnclipu.wi v10, v8, 0
268258
; CHECK-NEXT: vs1r.v v10, (a1)
269259
; CHECK-NEXT: ret
270260
%1 = load <vscale x 4 x i32>, ptr %x, align 16
@@ -374,11 +364,8 @@ define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
374364
; CHECK-NEXT: vl4re64.v v8, (a0)
375365
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
376366
; CHECK-NEXT: vmax.vx v8, v8, zero
377-
; CHECK-NEXT: li a0, -1
378-
; CHECK-NEXT: srli a0, a0, 32
379-
; CHECK-NEXT: vmin.vx v8, v8, a0
380367
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
381-
; CHECK-NEXT: vnsrl.wi v12, v8, 0
368+
; CHECK-NEXT: vnclipu.wi v12, v8, 0
382369
; CHECK-NEXT: vs2r.v v12, (a1)
383370
; CHECK-NEXT: ret
384371
%1 = load <vscale x 4 x i64>, ptr %x, align 16
@@ -394,13 +381,10 @@ define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
394381
; CHECK-LABEL: trunc_sat_u32u64_minmax:
395382
; CHECK: # %bb.0:
396383
; CHECK-NEXT: vl4re64.v v8, (a0)
397-
; CHECK-NEXT: li a0, -1
398-
; CHECK-NEXT: srli a0, a0, 32
399-
; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
400-
; CHECK-NEXT: vmin.vx v8, v8, a0
384+
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
401385
; CHECK-NEXT: vmax.vx v8, v8, zero
402386
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
403-
; CHECK-NEXT: vnsrl.wi v12, v8, 0
387+
; CHECK-NEXT: vnclipu.wi v12, v8, 0
404388
; CHECK-NEXT: vs2r.v v12, (a1)
405389
; CHECK-NEXT: ret
406390
%1 = load <vscale x 4 x i64>, ptr %x, align 16

0 commit comments

Comments
 (0)