Skip to content

Commit e09d40d

Browse files
committed
Move to DAGCombine + fixups
1 parent c87775c commit e09d40d

File tree

3 files changed

+111
-64
lines changed

3 files changed

+111
-64
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 60 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -4550,10 +4550,9 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
45504550
EVT VT = Op.getValueType();
45514551

45524552
if (VT.isScalableVector()) {
4553-
unsigned Opc = Op.getOpcode();
4554-
bool IsSigned = Opc == ISD::FP_TO_SINT || Opc == ISD::STRICT_FP_TO_SINT;
4555-
unsigned Opcode = IsSigned ? AArch64ISD::FCVTZS_MERGE_PASSTHRU
4556-
: AArch64ISD::FCVTZU_MERGE_PASSTHRU;
4553+
unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
4554+
? AArch64ISD::FCVTZU_MERGE_PASSTHRU
4555+
: AArch64ISD::FCVTZS_MERGE_PASSTHRU;
45574556
return LowerToPredicatedOp(Op, DAG, Opcode);
45584557
}
45594558

@@ -4629,46 +4628,6 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
46294628
return Op;
46304629
}
46314630

4632-
static bool CanLowerToScalarSVEFPIntConversion(EVT VT) {
4633-
if (!VT.isSimple())
4634-
return false;
4635-
// There are SVE instructions that can convert to/from all pairs of these int
4636-
// and float types. Note: We don't bother with i8 or i16 as those are illegal
4637-
// types for scalars.
4638-
return is_contained({MVT::i32, MVT::i64, MVT::f16, MVT::f32, MVT::f64},
4639-
VT.getSimpleVT().SimpleTy);
4640-
}
4641-
4642-
/// Lowers a scalar FP conversion (to/from) int to SVE.
4643-
static SDValue LowerScalarFPConversionToSVE(SDValue Op, SelectionDAG &DAG) {
4644-
assert(!Op->isStrictFPOpcode() && "strict fp ops not supported");
4645-
SDValue SrcVal = Op.getOperand(0);
4646-
EVT SrcTy = SrcVal.getValueType();
4647-
EVT DestTy = Op.getValueType();
4648-
EVT SrcVecTy;
4649-
EVT DestVecTy;
4650-
// Use a packed vector for the larger type.
4651-
// Note: For conversions such as FCVTZS_ZPmZ_DtoS, and UCVTF_ZPmZ_StoD that
4652-
// notionally take or return a nxv2i32 type we must instead use a nxv4i32, as
4653-
// (unlike floats) nxv2i32 is an illegal unpacked type.
4654-
if (DestTy.bitsGT(SrcTy)) {
4655-
DestVecTy = getPackedSVEVectorVT(DestTy);
4656-
SrcVecTy = SrcTy == MVT::i32 ? getPackedSVEVectorVT(SrcTy)
4657-
: DestVecTy.changeVectorElementType(SrcTy);
4658-
} else {
4659-
SrcVecTy = getPackedSVEVectorVT(SrcTy);
4660-
DestVecTy = DestTy == MVT::i32 ? getPackedSVEVectorVT(DestTy)
4661-
: SrcVecTy.changeVectorElementType(DestTy);
4662-
}
4663-
SDLoc dl(Op);
4664-
SDValue ZeroIdx = DAG.getVectorIdxConstant(0, dl);
4665-
SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SrcVecTy,
4666-
DAG.getUNDEF(SrcVecTy), SrcVal, ZeroIdx);
4667-
Vec = DAG.getNode(Op.getOpcode(), dl, DestVecTy, Vec);
4668-
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
4669-
ZeroIdx);
4670-
}
4671-
46724631
SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
46734632
SelectionDAG &DAG) const {
46744633
bool IsStrict = Op->isStrictFPOpcode();
@@ -4677,12 +4636,6 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
46774636
if (SrcVal.getValueType().isVector())
46784637
return LowerVectorFP_TO_INT(Op, DAG);
46794638

4680-
if (!IsStrict && !Subtarget->isNeonAvailable() &&
4681-
Subtarget->isSVEorStreamingSVEAvailable() &&
4682-
CanLowerToScalarSVEFPIntConversion(SrcVal.getValueType()) &&
4683-
CanLowerToScalarSVEFPIntConversion(Op.getValueType()))
4684-
return LowerScalarFPConversionToSVE(Op, DAG);
4685-
46864639
// f16 conversions are promoted to f32 when full fp16 is not supported.
46874640
if ((SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
46884641
SrcVal.getValueType() == MVT::bf16) {
@@ -4986,12 +4939,6 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
49864939
bool IsStrict = Op->isStrictFPOpcode();
49874940
SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
49884941

4989-
if (!IsStrict && !Subtarget->isNeonAvailable() &&
4990-
Subtarget->isSVEorStreamingSVEAvailable() &&
4991-
CanLowerToScalarSVEFPIntConversion(SrcVal.getValueType()) &&
4992-
CanLowerToScalarSVEFPIntConversion(Op.getValueType()))
4993-
return LowerScalarFPConversionToSVE(Op, DAG);
4994-
49954942
bool IsSigned = Op->getOpcode() == ISD::STRICT_SINT_TO_FP ||
49964943
Op->getOpcode() == ISD::SINT_TO_FP;
49974944

@@ -18982,13 +18929,67 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
1898218929
return SDValue();
1898318930
}
1898418931

18932+
static bool
18933+
shouldUseSVEForScalarFPConversion(SDNode *N,
18934+
const AArch64Subtarget *Subtarget) {
18935+
auto isSupportedType = [](EVT VT) {
18936+
if (!VT.isSimple())
18937+
return false;
18938+
// There are SVE instructions that can convert to/from all pairs of these
18939+
// int and float types. Note: We don't bother with i8 or i16 as those are
18940+
// illegal types for scalars.
18941+
return is_contained({MVT::i32, MVT::i64, MVT::f16, MVT::f32, MVT::f64},
18942+
VT.getSimpleVT().SimpleTy);
18943+
};
18944+
// If we are in a streaming[-compatible] function, use SVE for scalar FP <->
18945+
// INT conversions as this can help avoid movs between GPRs and FPRs, which
18946+
// could be quite expensive.
18947+
return !N->isStrictFPOpcode() && Subtarget->isSVEorStreamingSVEAvailable() &&
18948+
(Subtarget->isStreaming() || Subtarget->isStreamingCompatible()) &&
18949+
isSupportedType(N->getValueType(0)) &&
18950+
isSupportedType(N->getOperand(0).getValueType());
18951+
}
18952+
18953+
/// Replaces a scalar FP <-> INT conversion with an SVE (scalable) one, wrapped
18954+
/// with an insert and extract.
18955+
static SDValue replaceScalarFPConversionWithSVE(SDNode *N, SelectionDAG &DAG) {
18956+
assert(!N->isStrictFPOpcode() && "strict fp ops not supported");
18957+
SDValue SrcVal = N->getOperand(0);
18958+
EVT SrcTy = SrcVal.getValueType();
18959+
EVT DestTy = N->getValueType(0);
18960+
EVT SrcVecTy;
18961+
EVT DestVecTy;
18962+
// Use a packed vector for the larger type.
18963+
// Note: For conversions such as FCVTZS_ZPmZ_DtoS, and UCVTF_ZPmZ_StoD that
18964+
// notionally take or return a nxv2i32 type we must instead use a nxv4i32, as
18965+
// (unlike floats) nxv2i32 is an illegal unpacked type.
18966+
if (DestTy.bitsGT(SrcTy)) {
18967+
DestVecTy = getPackedSVEVectorVT(DestTy);
18968+
SrcVecTy = SrcTy == MVT::i32 ? getPackedSVEVectorVT(SrcTy)
18969+
: DestVecTy.changeVectorElementType(SrcTy);
18970+
} else {
18971+
SrcVecTy = getPackedSVEVectorVT(SrcTy);
18972+
DestVecTy = DestTy == MVT::i32 ? getPackedSVEVectorVT(DestTy)
18973+
: SrcVecTy.changeVectorElementType(DestTy);
18974+
}
18975+
SDLoc dl(N);
18976+
SDValue ZeroIdx = DAG.getVectorIdxConstant(0, dl);
18977+
SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SrcVecTy,
18978+
DAG.getUNDEF(SrcVecTy), SrcVal, ZeroIdx);
18979+
Vec = DAG.getNode(N->getOpcode(), dl, DestVecTy, Vec);
18980+
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, DestTy, Vec, ZeroIdx);
18981+
}
18982+
1898518983
static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
1898618984
const AArch64Subtarget *Subtarget) {
1898718985
// First try to optimize away the conversion when it's conditionally from
1898818986
// a constant. Vectors only.
1898918987
if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
1899018988
return Res;
1899118989

18990+
if (shouldUseSVEForScalarFPConversion(N, Subtarget))
18991+
return replaceScalarFPConversionWithSVE(N, DAG);
18992+
1899218993
EVT VT = N->getValueType(0);
1899318994
if (VT != MVT::f32 && VT != MVT::f64)
1899418995
return SDValue();
@@ -19027,6 +19028,9 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
1902719028
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
1902819029
TargetLowering::DAGCombinerInfo &DCI,
1902919030
const AArch64Subtarget *Subtarget) {
19031+
if (shouldUseSVEForScalarFPConversion(N, Subtarget))
19032+
return replaceScalarFPConversionWithSVE(N, DAG);
19033+
1903019034
if (!Subtarget->isNeonAvailable())
1903119035
return SDValue();
1903219036

llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -force-streaming-compatible < %s | FileCheck %s
3-
; RUN: llc -force-streaming-compatible -mattr=+sme2p2 < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
4-
; RUN: llc < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
2+
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
3+
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
4+
; RUN: llc -mattr=+sme2p2 -force-streaming-compatible < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
5+
; RUN: llc -mattr=+neon < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
6+
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
57

68
target triple = "aarch64-unknown-linux-gnu"
79

@@ -20,6 +22,12 @@ define double @t1(double %x) {
2022
; USE-NEON-NO-GPRS-NEXT: fcvtzs d0, d0
2123
; USE-NEON-NO-GPRS-NEXT: scvtf d0, d0
2224
; USE-NEON-NO-GPRS-NEXT: ret
25+
;
26+
; NONEON-NOSVE-LABEL: t1:
27+
; NONEON-NOSVE: // %bb.0: // %entry
28+
; NONEON-NOSVE-NEXT: fcvtzs x8, d0
29+
; NONEON-NOSVE-NEXT: scvtf d0, x8
30+
; NONEON-NOSVE-NEXT: ret
2331
entry:
2432
%conv = fptosi double %x to i64
2533
%conv1 = sitofp i64 %conv to double
@@ -41,6 +49,12 @@ define float @t2(float %x) {
4149
; USE-NEON-NO-GPRS-NEXT: fcvtzs s0, s0
4250
; USE-NEON-NO-GPRS-NEXT: scvtf s0, s0
4351
; USE-NEON-NO-GPRS-NEXT: ret
52+
;
53+
; NONEON-NOSVE-LABEL: t2:
54+
; NONEON-NOSVE: // %bb.0: // %entry
55+
; NONEON-NOSVE-NEXT: fcvtzs w8, s0
56+
; NONEON-NOSVE-NEXT: scvtf s0, w8
57+
; NONEON-NOSVE-NEXT: ret
4458
entry:
4559
%conv = fptosi float %x to i32
4660
%conv1 = sitofp i32 %conv to float
@@ -64,6 +78,14 @@ define half @t3(half %x) {
6478
; USE-NEON-NO-GPRS-NEXT: scvtf s0, s0
6579
; USE-NEON-NO-GPRS-NEXT: fcvt h0, s0
6680
; USE-NEON-NO-GPRS-NEXT: ret
81+
;
82+
; NONEON-NOSVE-LABEL: t3:
83+
; NONEON-NOSVE: // %bb.0: // %entry
84+
; NONEON-NOSVE-NEXT: fcvt s0, h0
85+
; NONEON-NOSVE-NEXT: fcvtzs w8, s0
86+
; NONEON-NOSVE-NEXT: scvtf s0, w8
87+
; NONEON-NOSVE-NEXT: fcvt h0, s0
88+
; NONEON-NOSVE-NEXT: ret
6789
entry:
6890
%conv = fptosi half %x to i32
6991
%conv1 = sitofp i32 %conv to half
@@ -85,6 +107,12 @@ define double @t4(double %x) {
85107
; USE-NEON-NO-GPRS-NEXT: fcvtzu d0, d0
86108
; USE-NEON-NO-GPRS-NEXT: ucvtf d0, d0
87109
; USE-NEON-NO-GPRS-NEXT: ret
110+
;
111+
; NONEON-NOSVE-LABEL: t4:
112+
; NONEON-NOSVE: // %bb.0: // %entry
113+
; NONEON-NOSVE-NEXT: fcvtzu x8, d0
114+
; NONEON-NOSVE-NEXT: ucvtf d0, x8
115+
; NONEON-NOSVE-NEXT: ret
88116
entry:
89117
%conv = fptoui double %x to i64
90118
%conv1 = uitofp i64 %conv to double
@@ -106,6 +134,12 @@ define float @t5(float %x) {
106134
; USE-NEON-NO-GPRS-NEXT: fcvtzu s0, s0
107135
; USE-NEON-NO-GPRS-NEXT: ucvtf s0, s0
108136
; USE-NEON-NO-GPRS-NEXT: ret
137+
;
138+
; NONEON-NOSVE-LABEL: t5:
139+
; NONEON-NOSVE: // %bb.0: // %entry
140+
; NONEON-NOSVE-NEXT: fcvtzu w8, s0
141+
; NONEON-NOSVE-NEXT: ucvtf s0, w8
142+
; NONEON-NOSVE-NEXT: ret
109143
entry:
110144
%conv = fptoui float %x to i32
111145
%conv1 = uitofp i32 %conv to float
@@ -129,6 +163,14 @@ define half @t6(half %x) {
129163
; USE-NEON-NO-GPRS-NEXT: ucvtf s0, s0
130164
; USE-NEON-NO-GPRS-NEXT: fcvt h0, s0
131165
; USE-NEON-NO-GPRS-NEXT: ret
166+
;
167+
; NONEON-NOSVE-LABEL: t6:
168+
; NONEON-NOSVE: // %bb.0: // %entry
169+
; NONEON-NOSVE-NEXT: fcvt s0, h0
170+
; NONEON-NOSVE-NEXT: fcvtzu w8, s0
171+
; NONEON-NOSVE-NEXT: ucvtf s0, w8
172+
; NONEON-NOSVE-NEXT: fcvt h0, s0
173+
; NONEON-NOSVE-NEXT: ret
132174
entry:
133175
%conv = fptoui half %x to i32
134176
%conv1 = uitofp i32 %conv to half

llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
3-
; RUN: llc -mattr=+sve,+sme -force-streaming < %s | FileCheck %s
3+
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,FORCE-STREAMING
44
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
55

66
target triple = "aarch64-unknown-linux-gnu"
@@ -226,10 +226,11 @@ entry:
226226
}
227227

228228
define half @strict_convert_signed(i32 %x) {
229-
; CHECK-LABEL: strict_convert_signed:
230-
; CHECK: // %bb.0: // %entry
231-
; CHECK-NEXT: scvtf h0, w0
232-
; CHECK-NEXT: ret
229+
; FORCE-STREAMING-LABEL: strict_convert_signed:
230+
; FORCE-STREAMING: // %bb.0: // %entry
231+
; FORCE-STREAMING-NEXT: scvtf s0, w0
232+
; FORCE-STREAMING-NEXT: fcvt h0, s0
233+
; FORCE-STREAMING-NEXT: ret
233234
;
234235
; NONEON-NOSVE-LABEL: strict_convert_signed:
235236
; NONEON-NOSVE: // %bb.0: // %entry

0 commit comments

Comments
 (0)