Skip to content

Commit 49c4481

Browse files
committed
[AArch64] Support saturated truncate
Add support for saturated truncate with the following changes: - Add action to Legal for types v8i16, v4i32, and v2i64 - Implement `isTypeDesirableForOp` to check for truncate conversions - Add patterns for saturated truncate of supported types
1 parent 7c1f22e commit 49c4481

File tree

5 files changed

+108
-185
lines changed

5 files changed

+108
-185
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1410,6 +1410,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
14101410
}
14111411
}
14121412

1413+
for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
1414+
setOperationAction(ISD::TRUNCATE_SSAT_S, VT, Legal);
1415+
setOperationAction(ISD::TRUNCATE_SSAT_U, VT, Legal);
1416+
setOperationAction(ISD::TRUNCATE_USAT_U, VT, Legal);
1417+
}
1418+
14131419
if (Subtarget->hasSME()) {
14141420
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
14151421
}
@@ -28730,6 +28736,18 @@ bool AArch64TargetLowering::hasInlineStackProbe(
2873028736
MF.getInfo<AArch64FunctionInfo>()->hasStackProbing();
2873128737
}
2873228738

28739+
bool AArch64TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
28740+
switch (Opc) {
28741+
case ISD::TRUNCATE_SSAT_S:
28742+
case ISD::TRUNCATE_SSAT_U:
28743+
case ISD::TRUNCATE_USAT_U:
28744+
if (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32)
28745+
return true;
28746+
}
28747+
28748+
return TargetLowering::isTypeDesirableForOp(Opc, VT);
28749+
}
28750+
2873328751
#ifndef NDEBUG
2873428752
void AArch64TargetLowering::verifyTargetSDNode(const SDNode *N) const {
2873528753
switch (N->getOpcode()) {

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -743,6 +743,11 @@ class AArch64TargetLowering : public TargetLowering {
743743
bool generateFMAsInMachineCombiner(EVT VT,
744744
CodeGenOptLevel OptLevel) const override;
745745

746+
/// Return true if the target has native support for
747+
/// the specified value type and it is 'desirable' to use the type for the
748+
/// given node type.
749+
bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
750+
746751
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
747752
ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
748753

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 47 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -5418,64 +5418,75 @@ def VImm7FFF: PatLeaf<(AArch64movi_msl (i32 127), (i32 264))>;
54185418
def VImm8000: PatLeaf<(AArch64mvni_msl (i32 127), (i32 264))>;
54195419

54205420
// trunc(umin(X, 255)) -> UQXTRN v8i8
5421-
def : Pat<(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))),
5421+
def : Pat<(v8i8 (truncusat_u (v8i16 V128:$Vn))),
54225422
(UQXTNv8i8 V128:$Vn)>;
54235423
// trunc(umin(X, 65535)) -> UQXTRN v4i16
5424-
def : Pat<(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))),
5424+
def : Pat<(v4i16 (truncusat_u (v4i32 V128:$Vn))),
54255425
(UQXTNv4i16 V128:$Vn)>;
5426+
// trunc(umin(X, 4294967295)) -> UQXTRN v2i32
5427+
def : Pat<(v2i32 (truncusat_u (v2i64 V128:$Vn))),
5428+
(UQXTNv2i32 V128:$Vn)>;
54265429
// trunc(smin(smax(X, -128), 128)) -> SQXTRN
5427-
// with reversed min/max
5428-
def : Pat<(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
5429-
(v8i16 VImm7F)))),
5430-
(SQXTNv8i8 V128:$Vn)>;
5431-
def : Pat<(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
5432-
(v8i16 VImm80)))),
5430+
def : Pat<(v8i8 (truncssat_s (v8i16 V128:$Vn))),
54335431
(SQXTNv8i8 V128:$Vn)>;
54345432
// trunc(smin(smax(X, -32768), 32767)) -> SQXTRN
5435-
// with reversed min/max
5436-
def : Pat<(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
5437-
(v4i32 VImm7FFF)))),
5438-
(SQXTNv4i16 V128:$Vn)>;
5439-
def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
5440-
(v4i32 VImm8000)))),
5433+
def : Pat<(v4i16 (truncssat_s (v4i32 V128:$Vn))),
54415434
(SQXTNv4i16 V128:$Vn)>;
5442-
5443-
// concat_vectors(Vd, trunc(umin(X, 255))) -> UQXTRN(Vd, Vn)
5435+
// trunc(smin(smax(X, -2147483648), 2147483647)) -> SQXTRN
5436+
def : Pat<(v2i32 (truncssat_s (v2i64 V128:$Vn))),
5437+
(SQXTNv2i32 V128:$Vn)>;
5438+
// trunc(umin(smax(X, 0), 255)) -> SQXTUN
5439+
def : Pat<(v8i8 (truncssat_u (v8i16 V128:$Vn))),
5440+
(SQXTUNv8i8 V128:$Vn)>;
5441+
// trunc(umin(smax(X, 0), 65535)) -> SQXTUN
5442+
def : Pat<(v4i16 (truncssat_u (v4i32 V128:$Vn))),
5443+
(SQXTUNv4i16 V128:$Vn)>;
5444+
// trunc(umin(smax(X, 0), 4294967295)) -> SQXTUN
5445+
def : Pat<(v2i32 (truncssat_u (v2i64 V128:$Vn))),
5446+
(SQXTUNv2i32 V128:$Vn)>;
5447+
5448+
// truncusat_u
5449+
// concat_vectors(Vd, truncusat_u(Vn)) ~> UQXTRN(Vd, Vn)
54445450
def : Pat<(v16i8 (concat_vectors
54455451
(v8i8 V64:$Vd),
5446-
(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))))),
5452+
(v8i8 (truncusat_u (v8i16 V128:$Vn))))),
54475453
(UQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5448-
// concat_vectors(Vd, trunc(umin(X, 65535))) -> UQXTRN(Vd, Vn)
54495454
def : Pat<(v8i16 (concat_vectors
54505455
(v4i16 V64:$Vd),
5451-
(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))))),
5456+
(v4i16 (truncusat_u (v4i32 V128:$Vn))))),
54525457
(UQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5458+
def : Pat<(v4i32 (concat_vectors
5459+
(v2i32 V64:$Vd),
5460+
(v2i32 (truncusat_u (v2i64 V128:$Vn))))),
5461+
(UQXTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
54535462

5454-
// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn)
5455-
// with reversed min/max
5463+
// concat_vectors(Vd, truncssat_s(Vn)) ~> SQXTN2(Vd, Vn)
54565464
def : Pat<(v16i8 (concat_vectors
54575465
(v8i8 V64:$Vd),
5458-
(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
5459-
(v8i16 VImm7F)))))),
5466+
(v8i8 (truncssat_s (v8i16 V128:$Vn))))),
54605467
(SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5461-
def : Pat<(v16i8 (concat_vectors
5462-
(v8i8 V64:$Vd),
5463-
(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
5464-
(v8i16 VImm80)))))),
5465-
(SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5466-
5467-
// concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn)
5468-
// with reversed min/max
54695468
def : Pat<(v8i16 (concat_vectors
54705469
(v4i16 V64:$Vd),
5471-
(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
5472-
(v4i32 VImm7FFF)))))),
5470+
(v4i16 (truncssat_s (v4i32 V128:$Vn))))),
54735471
(SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5472+
def : Pat<(v4i32 (concat_vectors
5473+
(v2i32 V64:$Vd),
5474+
(v2i32 (truncssat_s (v2i64 V128:$Vn))))),
5475+
(SQXTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5476+
5477+
// concat_vectors(Vd, truncssat_u(Vn)) ~> SQXTUN2(Vd, Vn)
5478+
def : Pat<(v16i8 (concat_vectors
5479+
(v8i8 V64:$Vd),
5480+
(v8i8 (truncssat_u (v8i16 V128:$Vn))))),
5481+
(SQXTUNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
54745482
def : Pat<(v8i16 (concat_vectors
54755483
(v4i16 V64:$Vd),
5476-
(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
5477-
(v4i32 VImm8000)))))),
5478-
(SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5484+
(v4i16 (truncssat_u (v4i32 V128:$Vn))))),
5485+
(SQXTUNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5486+
def : Pat<(v4i32 (concat_vectors
5487+
(v2i32 V64:$Vd),
5488+
(v2i32 (truncssat_u (v2i64 V128:$Vn))))),
5489+
(SQXTUNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
54795490

54805491
// Select BSWAP vector instructions into REV instructions
54815492
def : Pat<(v4i16 (bswap (v4i16 V64:$Rn))),

llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll

Lines changed: 20 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,8 @@
77
define <2 x i32> @stest_f64i32(<2 x double> %x) {
88
; CHECK-LABEL: stest_f64i32:
99
; CHECK: // %bb.0: // %entry
10-
; CHECK-NEXT: mov d1, v0.d[1]
11-
; CHECK-NEXT: fcvtzs w8, d0
12-
; CHECK-NEXT: fcvtzs w9, d1
13-
; CHECK-NEXT: fmov s0, w8
14-
; CHECK-NEXT: mov v0.s[1], w9
15-
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
10+
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
11+
; CHECK-NEXT: sqxtn v0.2s, v0.2d
1612
; CHECK-NEXT: ret
1713
entry:
1814
%conv = fptosi <2 x double> %x to <2 x i64>
@@ -45,12 +41,8 @@ entry:
4541
define <2 x i32> @ustest_f64i32(<2 x double> %x) {
4642
; CHECK-LABEL: ustest_f64i32:
4743
; CHECK: // %bb.0: // %entry
48-
; CHECK-NEXT: mov d1, v0.d[1]
49-
; CHECK-NEXT: fcvtzu w8, d0
50-
; CHECK-NEXT: fcvtzu w9, d1
51-
; CHECK-NEXT: fmov s0, w8
52-
; CHECK-NEXT: mov v0.s[1], w9
53-
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
44+
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
45+
; CHECK-NEXT: sqxtun v0.2s, v0.2d
5446
; CHECK-NEXT: ret
5547
entry:
5648
%conv = fptosi <2 x double> %x to <2 x i64>
@@ -242,8 +234,8 @@ entry:
242234
define <4 x i16> @ustest_f32i16(<4 x float> %x) {
243235
; CHECK-LABEL: ustest_f32i16:
244236
; CHECK: // %bb.0: // %entry
245-
; CHECK-NEXT: fcvtzu v0.4s, v0.4s
246-
; CHECK-NEXT: uqxtn v0.4h, v0.4s
237+
; CHECK-NEXT: fcvtzs v0.4s, v0.4s
238+
; CHECK-NEXT: sqxtun v0.4h, v0.4s
247239
; CHECK-NEXT: ret
248240
entry:
249241
%conv = fptosi <4 x float> %x to <4 x i32>
@@ -308,10 +300,10 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
308300
; CHECK-CVT: // %bb.0: // %entry
309301
; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
310302
; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
311-
; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s
312-
; CHECK-CVT-NEXT: fcvtzu v2.4s, v0.4s
313-
; CHECK-CVT-NEXT: uqxtn v0.4h, v1.4s
314-
; CHECK-CVT-NEXT: uqxtn2 v0.8h, v2.4s
303+
; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s
304+
; CHECK-CVT-NEXT: fcvtzs v2.4s, v0.4s
305+
; CHECK-CVT-NEXT: sqxtun v0.4h, v1.4s
306+
; CHECK-CVT-NEXT: sqxtun2 v0.8h, v2.4s
315307
; CHECK-CVT-NEXT: ret
316308
;
317309
; CHECK-FP16-LABEL: ustest_f16i16:
@@ -656,12 +648,8 @@ entry:
656648
define <2 x i32> @stest_f64i32_mm(<2 x double> %x) {
657649
; CHECK-LABEL: stest_f64i32_mm:
658650
; CHECK: // %bb.0: // %entry
659-
; CHECK-NEXT: mov d1, v0.d[1]
660-
; CHECK-NEXT: fcvtzs w8, d0
661-
; CHECK-NEXT: fcvtzs w9, d1
662-
; CHECK-NEXT: fmov s0, w8
663-
; CHECK-NEXT: mov v0.s[1], w9
664-
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
651+
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
652+
; CHECK-NEXT: sqxtn v0.2s, v0.2d
665653
; CHECK-NEXT: ret
666654
entry:
667655
%conv = fptosi <2 x double> %x to <2 x i64>
@@ -691,12 +679,8 @@ entry:
691679
define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
692680
; CHECK-LABEL: ustest_f64i32_mm:
693681
; CHECK: // %bb.0: // %entry
694-
; CHECK-NEXT: mov d1, v0.d[1]
695-
; CHECK-NEXT: fcvtzu w8, d0
696-
; CHECK-NEXT: fcvtzu w9, d1
697-
; CHECK-NEXT: fmov s0, w8
698-
; CHECK-NEXT: mov v0.s[1], w9
699-
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
682+
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
683+
; CHECK-NEXT: sqxtun v0.2s, v0.2d
700684
; CHECK-NEXT: ret
701685
entry:
702686
%conv = fptosi <2 x double> %x to <2 x i64>
@@ -868,8 +852,8 @@ entry:
868852
define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) {
869853
; CHECK-LABEL: ustest_f32i16_mm:
870854
; CHECK: // %bb.0: // %entry
871-
; CHECK-NEXT: fcvtzu v0.4s, v0.4s
872-
; CHECK-NEXT: uqxtn v0.4h, v0.4s
855+
; CHECK-NEXT: fcvtzs v0.4s, v0.4s
856+
; CHECK-NEXT: sqxtun v0.4h, v0.4s
873857
; CHECK-NEXT: ret
874858
entry:
875859
%conv = fptosi <4 x float> %x to <4 x i32>
@@ -929,10 +913,10 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
929913
; CHECK-CVT: // %bb.0: // %entry
930914
; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
931915
; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
932-
; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s
933-
; CHECK-CVT-NEXT: fcvtzu v2.4s, v0.4s
934-
; CHECK-CVT-NEXT: uqxtn v0.4h, v1.4s
935-
; CHECK-CVT-NEXT: uqxtn2 v0.8h, v2.4s
916+
; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s
917+
; CHECK-CVT-NEXT: fcvtzs v2.4s, v0.4s
918+
; CHECK-CVT-NEXT: sqxtun v0.4h, v1.4s
919+
; CHECK-CVT-NEXT: sqxtun2 v0.8h, v2.4s
936920
; CHECK-CVT-NEXT: ret
937921
;
938922
; CHECK-FP16-LABEL: ustest_f16i16_mm:

0 commit comments

Comments
 (0)