Skip to content

Commit 8247068

Browse files
authored
[RISCV] Support (truncate (smin (smax X, C1), C2)) for vnclipu in combineTruncToVnclip. (#93756)
If the smax removed all negative numbers, then we can treat the smin like a umin. If the smin and smax are in the other order we can swap them and use a vnclipu as long as the smax constant is smaller than the smin constant. This is based on similar code from X86's detectUSatPattern.
1 parent 6e975ec commit 8247068

File tree

4 files changed

+71
-142
lines changed

4 files changed

+71
-142
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 40 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -16230,17 +16230,37 @@ static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG,
1623016230
return SDValue();
1623116231
};
1623216232

16233-
auto DetectUSatPattern = [&](SDValue V) {
16234-
// Src must be a UMIN or UMIN_VL.
16235-
APInt C;
16236-
SDValue UMin = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, C);
16237-
if (!UMin)
16238-
return SDValue();
16233+
SDLoc DL(N);
1623916234

16240-
if (!C.isMask(VT.getScalarSizeInBits()))
16241-
return SDValue();
16235+
auto DetectUSatPattern = [&](SDValue V) {
16236+
APInt LoC, HiC;
16237+
16238+
// Simple case, V is a UMIN.
16239+
if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
16240+
if (HiC.isMask(VT.getScalarSizeInBits()))
16241+
return UMinOp;
16242+
16243+
// If we have an SMAX that removes negative numbers first, then we can match
16244+
// SMIN instead of UMIN.
16245+
if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16246+
if (SDValue SMaxOp =
16247+
MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16248+
if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
16249+
return SMinOp;
16250+
16251+
// If we have an SMIN before an SMAX and the SMAX constant is less than or
16252+
// equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
16253+
// first.
16254+
if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16255+
if (SDValue SMinOp =
16256+
MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16257+
if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
16258+
HiC.uge(LoC))
16259+
return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
16260+
V.getOperand(1), DAG.getUNDEF(V.getValueType()),
16261+
Mask, VL);
1624216262

16243-
return UMin;
16263+
return SDValue();
1624416264
};
1624516265

1624616266
auto DetectSSatPattern = [&](SDValue V) {
@@ -16249,16 +16269,18 @@ static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG,
1624916269
APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
1625016270
APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
1625116271

16252-
APInt CMin, CMax;
16253-
if (SDValue SMin = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, CMin))
16254-
if (SDValue SMax = MatchMinMax(SMin, ISD::SMAX, RISCVISD::SMAX_VL, CMax))
16255-
if (CMin == SignedMax && CMax == SignedMin)
16256-
return SMax;
16272+
APInt HiC, LoC;
16273+
if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16274+
if (SDValue SMaxOp =
16275+
MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16276+
if (HiC == SignedMax && LoC == SignedMin)
16277+
return SMaxOp;
1625716278

16258-
if (SDValue SMax = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, CMax))
16259-
if (SDValue SMin = MatchMinMax(SMax, ISD::SMIN, RISCVISD::SMIN_VL, CMin))
16260-
if (CMin == SignedMax && CMax == SignedMin)
16261-
return SMin;
16279+
if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16280+
if (SDValue SMinOp =
16281+
MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16282+
if (HiC == SignedMax && LoC == SignedMin)
16283+
return SMinOp;
1626216284

1626316285
return SDValue();
1626416286
};
@@ -16272,7 +16294,6 @@ static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG,
1627216294
else
1627316295
return SDValue();
1627416296

16275-
SDLoc DL(N);
1627616297
// Rounding mode here is arbitrary since we aren't shifting out any bits.
1627716298
return DAG.getNode(
1627816299
ClipOpc, DL, VT,

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll

Lines changed: 6 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -98,17 +98,14 @@ define void @trunc_sat_u8u16_notopt(ptr %x, ptr %y) {
9898
ret void
9999
}
100100

101-
; FIXME: This can be a signed vmax followed by vnclipu.
102101
define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
103102
; CHECK-LABEL: trunc_sat_u8u16_maxmin:
104103
; CHECK: # %bb.0:
105104
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
106105
; CHECK-NEXT: vle16.v v8, (a0)
107106
; CHECK-NEXT: vmax.vx v8, v8, zero
108-
; CHECK-NEXT: li a0, 255
109-
; CHECK-NEXT: vmin.vx v8, v8, a0
110107
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
111-
; CHECK-NEXT: vnsrl.wi v8, v8, 0
108+
; CHECK-NEXT: vnclipu.wi v8, v8, 0
112109
; CHECK-NEXT: vse8.v v8, (a1)
113110
; CHECK-NEXT: ret
114111
%1 = load <4 x i16>, ptr %x, align 16
@@ -119,17 +116,14 @@ define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
119116
ret void
120117
}
121118

122-
; FIXME: This can be a signed vmax followed by vnclipu.
123119
define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) {
124120
; CHECK-LABEL: trunc_sat_u8u16_minmax:
125121
; CHECK: # %bb.0:
126122
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
127123
; CHECK-NEXT: vle16.v v8, (a0)
128-
; CHECK-NEXT: li a0, 255
129-
; CHECK-NEXT: vmin.vx v8, v8, a0
130124
; CHECK-NEXT: vmax.vx v8, v8, zero
131125
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
132-
; CHECK-NEXT: vnsrl.wi v8, v8, 0
126+
; CHECK-NEXT: vnclipu.wi v8, v8, 0
133127
; CHECK-NEXT: vse8.v v8, (a1)
134128
; CHECK-NEXT: ret
135129
%1 = load <4 x i16>, ptr %x, align 16
@@ -229,19 +223,15 @@ define void @trunc_sat_u16u32_min(ptr %x, ptr %y) {
229223
ret void
230224
}
231225

232-
; FIXME: This can be a signed vmax followed by vnclipu.
233226
define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
234227
; CHECK-LABEL: trunc_sat_u16u32_maxmin:
235228
; CHECK: # %bb.0:
236229
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
237230
; CHECK-NEXT: vle32.v v8, (a0)
238231
; CHECK-NEXT: li a0, 1
239232
; CHECK-NEXT: vmax.vx v8, v8, a0
240-
; CHECK-NEXT: lui a0, 16
241-
; CHECK-NEXT: addi a0, a0, -1
242-
; CHECK-NEXT: vmin.vx v8, v8, a0
243233
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
244-
; CHECK-NEXT: vnsrl.wi v8, v8, 0
234+
; CHECK-NEXT: vnclipu.wi v8, v8, 0
245235
; CHECK-NEXT: vse16.v v8, (a1)
246236
; CHECK-NEXT: ret
247237
%1 = load <4 x i32>, ptr %x, align 16
@@ -252,19 +242,15 @@ define void @trunc_sat_u16u32_maxmin(ptr %x, ptr %y) {
252242
ret void
253243
}
254244

255-
; FIXME: This can be a signed vmax followed by vnclipu.
256245
define void @trunc_sat_u16u32_minmax(ptr %x, ptr %y) {
257246
; CHECK-LABEL: trunc_sat_u16u32_minmax:
258247
; CHECK: # %bb.0:
259248
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
260249
; CHECK-NEXT: vle32.v v8, (a0)
261-
; CHECK-NEXT: lui a0, 16
262-
; CHECK-NEXT: addi a0, a0, -1
263-
; CHECK-NEXT: vmin.vx v8, v8, a0
264250
; CHECK-NEXT: li a0, 50
265251
; CHECK-NEXT: vmax.vx v8, v8, a0
266252
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
267-
; CHECK-NEXT: vnsrl.wi v8, v8, 0
253+
; CHECK-NEXT: vnclipu.wi v8, v8, 0
268254
; CHECK-NEXT: vse16.v v8, (a1)
269255
; CHECK-NEXT: ret
270256
%1 = load <4 x i32>, ptr %x, align 16
@@ -367,18 +353,14 @@ define void @trunc_sat_u32u64_min(ptr %x, ptr %y) {
367353
}
368354

369355

370-
; FIXME: This can be a signed vmax followed by vnclipu.
371356
define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
372357
; CHECK-LABEL: trunc_sat_u32u64_maxmin:
373358
; CHECK: # %bb.0:
374359
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
375360
; CHECK-NEXT: vle64.v v8, (a0)
376361
; CHECK-NEXT: vmax.vx v8, v8, zero
377-
; CHECK-NEXT: li a0, -1
378-
; CHECK-NEXT: srli a0, a0, 32
379-
; CHECK-NEXT: vmin.vx v8, v8, a0
380362
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
381-
; CHECK-NEXT: vnsrl.wi v10, v8, 0
363+
; CHECK-NEXT: vnclipu.wi v10, v8, 0
382364
; CHECK-NEXT: vse32.v v10, (a1)
383365
; CHECK-NEXT: ret
384366
%1 = load <4 x i64>, ptr %x, align 16
@@ -389,18 +371,14 @@ define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
389371
ret void
390372
}
391373

392-
; FIXME: This can be a signed vmax followed by vnclipu.
393374
define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
394375
; CHECK-LABEL: trunc_sat_u32u64_minmax:
395376
; CHECK: # %bb.0:
396377
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
397378
; CHECK-NEXT: vle64.v v8, (a0)
398-
; CHECK-NEXT: li a0, -1
399-
; CHECK-NEXT: srli a0, a0, 32
400-
; CHECK-NEXT: vmin.vx v8, v8, a0
401379
; CHECK-NEXT: vmax.vx v8, v8, zero
402380
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
403-
; CHECK-NEXT: vnsrl.wi v10, v8, 0
381+
; CHECK-NEXT: vnclipu.wi v10, v8, 0
404382
; CHECK-NEXT: vse32.v v10, (a1)
405383
; CHECK-NEXT: ret
406384
%1 = load <4 x i64>, ptr %x, align 16
@@ -477,7 +455,6 @@ define void @trunc_sat_u8u32_min(ptr %x, ptr %y) {
477455
ret void
478456
}
479457

480-
; FIXME: This can be a signed vmax followed by vnclipu.
481458
define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) {
482459
; CHECK-LABEL: trunc_sat_u8u32_maxmin:
483460
; CHECK: # %bb.0:
@@ -500,7 +477,6 @@ define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) {
500477
ret void
501478
}
502479

503-
; FIXME: This can be a signed vmax followed by vnclipu.
504480
define void @trunc_sat_u8u32_minmax(ptr %x, ptr %y) {
505481
; CHECK-LABEL: trunc_sat_u8u32_minmax:
506482
; CHECK: # %bb.0:
@@ -595,7 +571,6 @@ define void @trunc_sat_u8u64_min(ptr %x, ptr %y) {
595571
ret void
596572
}
597573

598-
; FIXME: This can be a signed vmax followed by vnclipu.
599574
define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) {
600575
; CHECK-LABEL: trunc_sat_u8u64_maxmin:
601576
; CHECK: # %bb.0:
@@ -620,7 +595,6 @@ define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) {
620595
ret void
621596
}
622597

623-
; FIXME: This can be a signed vmax followed by vnclipu.
624598
define void @trunc_sat_u8u64_minmax(ptr %x, ptr %y) {
625599
; CHECK-LABEL: trunc_sat_u8u64_minmax:
626600
; CHECK: # %bb.0:
@@ -735,7 +709,6 @@ define void @trunc_sat_u16u64_min(ptr %x, ptr %y) {
735709
ret void
736710
}
737711

738-
; FIXME: This can be a signed vmax followed by vnclipu.
739712
define void @trunc_sat_u16u64_maxmin(ptr %x, ptr %y) {
740713
; CHECK-LABEL: trunc_sat_u16u64_maxmin:
741714
; CHECK: # %bb.0:
@@ -760,7 +733,6 @@ define void @trunc_sat_u16u64_maxmin(ptr %x, ptr %y) {
760733
ret void
761734
}
762735

763-
; FIXME: This can be a signed vmax followed by vnclipu.
764736
define void @trunc_sat_u16u64_minmax(ptr %x, ptr %y) {
765737
; CHECK-LABEL: trunc_sat_u16u64_minmax:
766738
; CHECK: # %bb.0:

0 commit comments

Comments
 (0)