Skip to content

Commit e53f9bd

Browse files
committed
[AArch64] Support saturated truncate
Add support for `ISD::TRUNCATE_[US]SAT`.
1 parent e3baff2 commit e53f9bd

File tree

4 files changed

+49
-30
lines changed

4 files changed

+49
-30
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1410,6 +1410,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
14101410
}
14111411
}
14121412

1413+
for (MVT VT : {MVT::v8i16, MVT::v4i32}) {
1414+
setOperationAction(ISD::TRUNCATE_SSAT_S, VT, Custom);
1415+
setOperationAction(ISD::TRUNCATE_SSAT_U, VT, Custom);
1416+
setOperationAction(ISD::TRUNCATE_USAT_U, VT, Custom);
1417+
}
1418+
14131419
if (Subtarget->hasSME()) {
14141420
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
14151421
}
@@ -28730,6 +28736,18 @@ bool AArch64TargetLowering::hasInlineStackProbe(
2873028736
MF.getInfo<AArch64FunctionInfo>()->hasStackProbing();
2873128737
}
2873228738

28739+
bool AArch64TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
28740+
switch (Opc) {
28741+
case ISD::TRUNCATE_SSAT_S:
28742+
case ISD::TRUNCATE_SSAT_U:
28743+
case ISD::TRUNCATE_USAT_U:
28744+
if (VT == MVT::v8i8 || VT == MVT::v4i16)
28745+
return true;
28746+
}
28747+
28748+
return TargetLowering::isTypeDesirableForOp(Opc, VT);
28749+
}
28750+
2873328751
#ifndef NDEBUG
2873428752
void AArch64TargetLowering::verifyTargetSDNode(const SDNode *N) const {
2873528753
switch (N->getOpcode()) {

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -743,6 +743,11 @@ class AArch64TargetLowering : public TargetLowering {
743743
bool generateFMAsInMachineCombiner(EVT VT,
744744
CodeGenOptLevel OptLevel) const override;
745745

746+
/// Return true if the target has native support for
747+
/// the specified value type and it is 'desirable' to use the type for the
748+
/// given node type.
749+
bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
750+
746751
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
747752
ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
748753

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 20 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5418,64 +5418,60 @@ def VImm7FFF: PatLeaf<(AArch64movi_msl (i32 127), (i32 264))>;
54185418
def VImm8000: PatLeaf<(AArch64mvni_msl (i32 127), (i32 264))>;
54195419

54205420
// trunc(umin(X, 255)) -> UQXTRN v8i8
5421-
def : Pat<(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))),
5421+
def : Pat<(v8i8 (truncusat_u (v8i16 V128:$Vn))),
54225422
(UQXTNv8i8 V128:$Vn)>;
54235423
// trunc(umin(X, 65535)) -> UQXTRN v4i16
5424-
def : Pat<(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))),
5424+
def : Pat<(v4i16 (truncusat_u (v4i32 V128:$Vn))),
54255425
(UQXTNv4i16 V128:$Vn)>;
54265426
// trunc(smin(smax(X, -128), 128)) -> SQXTRN
54275427
// with reversed min/max
5428-
def : Pat<(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
5429-
(v8i16 VImm7F)))),
5430-
(SQXTNv8i8 V128:$Vn)>;
5431-
def : Pat<(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
5432-
(v8i16 VImm80)))),
5428+
def : Pat<(v8i8 (truncssat_s (v8i16 V128:$Vn))),
54335429
(SQXTNv8i8 V128:$Vn)>;
5430+
// trunc(umin(smax(X, 0), 255)) -> SQXTUN
5431+
def : Pat<(v8i8 (truncssat_u (v8i16 V128:$Vn))),
5432+
(SQXTUNv8i8 V128:$Vn)>;
54345433
// trunc(smin(smax(X, -32768), 32767)) -> SQXTRN
54355434
// with reversed min/max
5436-
def : Pat<(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
5437-
(v4i32 VImm7FFF)))),
5438-
(SQXTNv4i16 V128:$Vn)>;
5439-
def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
5440-
(v4i32 VImm8000)))),
5435+
def : Pat<(v4i16 (truncssat_s (v4i32 V128:$Vn))),
54415436
(SQXTNv4i16 V128:$Vn)>;
5437+
// trunc(umin(smax(X, 0), 65535)) -> SQXTUN
5438+
def : Pat<(v4i16 (truncssat_u (v4i32 V128:$Vn))),
5439+
(SQXTUNv4i16 V128:$Vn)>;
54425440

54435441
// concat_vectors(Vd, trunc(umin(X, 255))) -> UQXTRN(Vd, Vn)
54445442
def : Pat<(v16i8 (concat_vectors
54455443
(v8i8 V64:$Vd),
5446-
(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))))),
5444+
(v8i8 (truncusat_u (v8i16 V128:$Vn))))),
54475445
(UQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
54485446
// concat_vectors(Vd, trunc(umin(X, 65535))) -> UQXTRN(Vd, Vn)
54495447
def : Pat<(v8i16 (concat_vectors
54505448
(v4i16 V64:$Vd),
5451-
(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))))),
5449+
(v4i16 (truncusat_u (v4i32 V128:$Vn))))),
54525450
(UQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
54535451

54545452
// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn)
54555453
// with reversed min/max
54565454
def : Pat<(v16i8 (concat_vectors
54575455
(v8i8 V64:$Vd),
5458-
(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
5459-
(v8i16 VImm7F)))))),
5456+
(v8i8 (truncssat_s (v8i16 V128:$Vn))))),
54605457
(SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5458+
// concat_vectors(Vd, trunc(smin(smax Vm, 0), 127) ~> SQXTUN2(Vd, Vn)
54615459
def : Pat<(v16i8 (concat_vectors
54625460
(v8i8 V64:$Vd),
5463-
(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
5464-
(v8i16 VImm80)))))),
5465-
(SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5461+
(v8i8 (truncssat_u (v8i16 V128:$Vn))))),
5462+
(SQXTUNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
54665463

54675464
// concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn)
54685465
// with reversed min/max
54695466
def : Pat<(v8i16 (concat_vectors
54705467
(v4i16 V64:$Vd),
5471-
(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
5472-
(v4i32 VImm7FFF)))))),
5468+
(v4i16 (truncssat_s (v4i32 V128:$Vn))))),
54735469
(SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5470+
// concat_vectors(Vd, trunc(smin(smax Vm, 0), 32767) ~> SQXTUN2(Vd, Vn)
54745471
def : Pat<(v8i16 (concat_vectors
54755472
(v4i16 V64:$Vd),
5476-
(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
5477-
(v4i32 VImm8000)))))),
5478-
(SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5473+
(v4i16 (truncssat_u (v4i32 V128:$Vn))))),
5474+
(SQXTUNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
54795475

54805476
// Select BSWAP vector instructions into REV instructions
54815477
def : Pat<(v4i16 (bswap (v4i16 V64:$Rn))),

llvm/test/CodeGen/AArch64/qmovn.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -292,15 +292,15 @@ entry:
292292

293293
; Test the (concat_vectors (X), (trunc(umin(smax(Y, 0), 2^n))))) pattern.
294294

295+
; TODO: %min is a value between 0 and 255 and is within the unsigned range of i8.
296+
; So it is saturated truncate. we have an optimization opportunity.
295297
define <16 x i8> @us_maxmin_v8i16_to_v16i8(<8 x i8> %x, <8 x i16> %y) {
296298
; CHECK-LABEL: us_maxmin_v8i16_to_v16i8:
297299
; CHECK: // %bb.0: // %entry
298300
; CHECK-NEXT: movi v2.2d, #0000000000000000
299-
; CHECK-NEXT: movi v3.2d, #0xff00ff00ff00ff
300301
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
301302
; CHECK-NEXT: smax v1.8h, v1.8h, v2.8h
302-
; CHECK-NEXT: smin v1.8h, v1.8h, v3.8h
303-
; CHECK-NEXT: xtn2 v0.16b, v1.8h
303+
; CHECK-NEXT: uqxtn2 v0.16b, v1.8h
304304
; CHECK-NEXT: ret
305305
entry:
306306
%max = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %y, <8 x i16> zeroinitializer)
@@ -310,15 +310,15 @@ entry:
310310
ret <16 x i8> %shuffle
311311
}
312312

313+
; TODO: %min is a value between 0 and 65535 and is within the unsigned range of i16.
314+
; So it is saturated. we have an optimization opportunity.
313315
define <8 x i16> @us_maxmin_v4i32_to_v8i16(<4 x i16> %x, <4 x i32> %y) {
314316
; CHECK-LABEL: us_maxmin_v4i32_to_v8i16:
315317
; CHECK: // %bb.0: // %entry
316318
; CHECK-NEXT: movi v2.2d, #0000000000000000
317319
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
318320
; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s
319-
; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff
320-
; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s
321-
; CHECK-NEXT: xtn2 v0.8h, v1.4s
321+
; CHECK-NEXT: uqxtn2 v0.8h, v1.4s
322322
; CHECK-NEXT: ret
323323
entry:
324324
%max = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %y, <4 x i32> zeroinitializer)

0 commit comments

Comments
 (0)