Skip to content

Commit d04e63c

Browse files
committed
Fixups
1 parent 9441ab2 commit d04e63c

File tree

2 files changed

+37
-24
lines changed

2 files changed

+37
-24
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -19087,69 +19087,82 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
1908719087
/// functions, this can help to reduce the number of fmovs to/from GPRs.
1908819088
static SDValue
1908919089
tryToReplaceScalarFPConversionWithSVE(SDNode *N, SelectionDAG &DAG,
19090+
TargetLowering::DAGCombinerInfo &DCI,
1909019091
const AArch64Subtarget *Subtarget) {
1909119092
if (N->isStrictFPOpcode())
1909219093
return SDValue();
1909319094

19095+
if (DCI.isBeforeLegalizeOps())
19096+
return SDValue();
19097+
1909419098
if (!Subtarget->isSVEorStreamingSVEAvailable() ||
1909519099
(!Subtarget->isStreaming() && !Subtarget->isStreamingCompatible()))
1909619100
return SDValue();
1909719101

1909819102
auto isSupportedType = [](EVT VT) {
19099-
if (!VT.isSimple())
19100-
return false;
19101-
// There are SVE instructions that can convert to/from all pairs of these
19102-
// int and float types. Note: We don't bother with i8 or i16 as those are
19103-
// illegal types for scalars.
19104-
return is_contained({MVT::i32, MVT::i64, MVT::f16, MVT::f32, MVT::f64},
19105-
VT.getSimpleVT().SimpleTy);
19103+
return VT != MVT::bf16 && VT != MVT::f128;
1910619104
};
1910719105

1910819106
if (!isSupportedType(N->getValueType(0)) ||
1910919107
!isSupportedType(N->getOperand(0).getValueType()))
1911019108
return SDValue();
1911119109

19110+
// Look through fp_extends to avoid extra fcvts.
1911219111
SDValue SrcVal = N->getOperand(0);
19112+
if (SrcVal->getOpcode() == ISD::FP_EXTEND &&
19113+
isSupportedType(SrcVal->getOperand(0).getValueType()))
19114+
SrcVal = SrcVal->getOperand(0);
19115+
1911319116
EVT SrcTy = SrcVal.getValueType();
1911419117
EVT DestTy = N->getValueType(0);
1911519118

19116-
bool IsI32ToF64 = SrcTy == MVT::i32 && DestTy == MVT::f64;
19117-
bool isF64ToI32 = SrcTy == MVT::f64 && DestTy == MVT::i32;
19118-
19119-
// Conversions between f64 and i32 are a special case as nxv2i32 is an illegal
19120-
// type (unlike the equivalent nxv2f32 for floating-point types).
19121-
// TODO: Support these conversations.
19122-
if (IsI32ToF64 || isF64ToI32)
19123-
return SDValue();
19119+
// Merge in any subsequent fp_round to avoid extra fcvts.
19120+
SDNode *FPRoundNode = nullptr;
19121+
if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::FP_ROUND &&
19122+
isSupportedType(N->use_begin()->getValueType(0))) {
19123+
FPRoundNode = *N->use_begin();
19124+
DestTy = FPRoundNode->getValueType(0);
19125+
}
1912419126

1912519127
EVT SrcVecTy;
1912619128
EVT DestVecTy;
1912719129
if (DestTy.bitsGT(SrcTy)) {
1912819130
DestVecTy = getPackedSVEVectorVT(DestTy);
19129-
SrcVecTy = SrcTy == MVT::i32 ? getPackedSVEVectorVT(SrcTy)
19130-
: DestVecTy.changeVectorElementType(SrcTy);
19131+
SrcVecTy = DestVecTy.changeVectorElementType(SrcTy);
1913119132
} else {
1913219133
SrcVecTy = getPackedSVEVectorVT(SrcTy);
19133-
DestVecTy = DestTy == MVT::i32 ? getPackedSVEVectorVT(DestTy)
19134-
: SrcVecTy.changeVectorElementType(DestTy);
19134+
DestVecTy = SrcVecTy.changeVectorElementType(DestTy);
1913519135
}
1913619136

19137+
// Ensure the resulting src/dest vector type is legal.
19138+
if (SrcVecTy == MVT::nxv2i32 || DestVecTy == MVT::nxv2i32)
19139+
return SDValue();
19140+
1913719141
SDLoc DL(N);
1913819142
SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
1913919143
SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, SrcVecTy,
1914019144
DAG.getUNDEF(SrcVecTy), SrcVal, ZeroIdx);
1914119145
SDValue Convert = DAG.getNode(N->getOpcode(), DL, DestVecTy, Vec);
19142-
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, DestTy, Convert, ZeroIdx);
19146+
SDValue Scalar =
19147+
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, DestTy, Convert, ZeroIdx);
19148+
19149+
if (FPRoundNode) {
19150+
DAG.ReplaceAllUsesWith(SDValue(FPRoundNode, 0), Scalar);
19151+
return SDValue();
19152+
}
19153+
return Scalar;
1914319154
}
1914419155

1914519156
static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
19157+
TargetLowering::DAGCombinerInfo &DCI,
1914619158
const AArch64Subtarget *Subtarget) {
1914719159
// First try to optimize away the conversion when it's conditionally from
1914819160
// a constant. Vectors only.
1914919161
if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
1915019162
return Res;
1915119163

19152-
if (SDValue Res = tryToReplaceScalarFPConversionWithSVE(N, DAG, Subtarget))
19164+
if (SDValue Res =
19165+
tryToReplaceScalarFPConversionWithSVE(N, DAG, DCI, Subtarget))
1915319166
return Res;
1915419167

1915519168
EVT VT = N->getValueType(0);
@@ -19190,7 +19203,8 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
1919019203
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
1919119204
TargetLowering::DAGCombinerInfo &DCI,
1919219205
const AArch64Subtarget *Subtarget) {
19193-
if (SDValue Res = tryToReplaceScalarFPConversionWithSVE(N, DAG, Subtarget))
19206+
if (SDValue Res =
19207+
tryToReplaceScalarFPConversionWithSVE(N, DAG, DCI, Subtarget))
1919419208
return Res;
1919519209

1919619210
if (!Subtarget->isNeonAvailable())
@@ -26273,7 +26287,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
2627326287
return performMulCombine(N, DAG, DCI, Subtarget);
2627426288
case ISD::SINT_TO_FP:
2627526289
case ISD::UINT_TO_FP:
26276-
return performIntToFpCombine(N, DAG, Subtarget);
26290+
return performIntToFpCombine(N, DAG, DCI, Subtarget);
2627726291
case ISD::FP_TO_SINT:
2627826292
case ISD::FP_TO_UINT:
2627926293
case ISD::FP_TO_SINT_SAT:

llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
33
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
44
; RUN: llc -mattr=+sme2p2 -force-streaming-compatible < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
5-
; RUN: llc -mattr=+neon < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
65
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
76

87
target triple = "aarch64-unknown-linux-gnu"

0 commit comments

Comments
 (0)