Skip to content

Commit b81e8db

Browse files
committed
Use intrinsics
1 parent 64335db commit b81e8db

File tree

5 files changed

+65
-73
lines changed

5 files changed

+65
-73
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 44 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -18961,36 +18961,10 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
1896118961
return SDValue();
1896218962
}
1896318963

18964-
/// Creates a scalar FP <-> INT conversion with a scalable one, wrapped
18965-
/// with an insert and extract.
18966-
static SDValue createScalarSVEFPConversion(SelectionDAG &DAG, unsigned Opc,
18967-
SDLoc DL, SDValue SrcVal, EVT SrcTy,
18968-
EVT DestTy) {
18969-
EVT SrcVecTy;
18970-
EVT DestVecTy;
18971-
if (DestTy.bitsGT(SrcTy)) {
18972-
DestVecTy = getPackedSVEVectorVT(DestTy);
18973-
SrcVecTy = DestVecTy.changeVectorElementType(SrcTy);
18974-
} else {
18975-
SrcVecTy = getPackedSVEVectorVT(SrcTy);
18976-
DestVecTy = SrcVecTy.changeVectorElementType(DestTy);
18977-
}
18978-
SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
18979-
SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, SrcVecTy,
18980-
DAG.getUNDEF(SrcVecTy), SrcVal, ZeroIdx);
18981-
Vec = DAG.getNode(Opc, DL, DestVecTy, Vec);
18982-
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, DestTy, Vec, ZeroIdx);
18983-
}
18984-
1898518964
/// Tries to replace scalar FP <-> conversions with SVE in streaming functions.
1898618965
static SDValue
1898718966
tryReplaceScalarFPConversionWithSVE(SDNode *N, SelectionDAG &DAG,
18988-
TargetLowering::DAGCombinerInfo &DCI,
1898918967
const AArch64Subtarget *Subtarget) {
18990-
// Uncomment to introduce extra fcvts.
18991-
// if (DCI.isBeforeLegalizeOps())
18992-
// return SDValue();
18993-
1899418968
if (N->isStrictFPOpcode())
1899518969
return SDValue();
1899618970

@@ -19015,39 +18989,64 @@ tryReplaceScalarFPConversionWithSVE(SDNode *N, SelectionDAG &DAG,
1901518989
(!Subtarget->isStreaming() && !Subtarget->isStreamingCompatible()))
1901618990
return SDValue();
1901718991

19018-
SDLoc DL(N);
1901918992
unsigned Opc = N->getOpcode();
18993+
bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::FP_TO_SINT;
18994+
1902018995
SDValue SrcVal = N->getOperand(0);
1902118996
EVT SrcTy = SrcVal.getValueType();
1902218997
EVT DestTy = N->getValueType(0);
1902318998

19024-
// Conversions between f64 and i32 are a special case as nxv2i32 is an illegal
19025-
// type (unlike the equivalent nxv2f32 for floating-point types).
19026-
// May materialize extra instructions :(
19027-
if (SrcTy == MVT::i32 && DestTy == MVT::f64) {
19028-
SDValue ExtSrc = DAG.getNode(Opc == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND
19029-
: ISD::ZERO_EXTEND,
19030-
DL, MVT::i64, SrcVal);
19031-
return createScalarSVEFPConversion(DAG, Opc, DL, ExtSrc, MVT::i64,
19032-
MVT::f64);
18999+
EVT SrcVecTy;
19000+
EVT DestVecTy;
19001+
if (DestTy.bitsGT(SrcTy)) {
19002+
DestVecTy = getPackedSVEVectorVT(DestTy);
19003+
SrcVecTy = SrcTy == MVT::i32 ? getPackedSVEVectorVT(SrcTy)
19004+
: DestVecTy.changeVectorElementType(SrcTy);
19005+
} else {
19006+
SrcVecTy = getPackedSVEVectorVT(SrcTy);
19007+
DestVecTy = DestTy == MVT::i32 ? getPackedSVEVectorVT(DestTy)
19008+
: SrcVecTy.changeVectorElementType(DestTy);
1903319009
}
19034-
if (SrcTy == MVT::f64 && DestTy == MVT::i32) {
19035-
SDValue ExtDest =
19036-
createScalarSVEFPConversion(DAG, Opc, DL, SrcVal, MVT::f64, MVT::i64);
19037-
return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ExtDest);
19010+
19011+
SDLoc DL(N);
19012+
SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
19013+
SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, SrcVecTy,
19014+
DAG.getUNDEF(SrcVecTy), SrcVal, ZeroIdx);
19015+
19016+
// Conversions between f64 and i32 are a special case as nxv2i32 is an illegal
19017+
// type (unlike the equivalent nxv2f32 for floating-point types). So,
19018+
// unfortunately, the only way to lower to these variants is via the
19019+
// intrinsics. Note: We could sign/zero extend to the i64 variant, but that
19020+
// may result in extra extends or fmovs in the final assembly.
19021+
bool IsI32ToF64 = SrcTy == MVT::i32 && DestTy == MVT::f64;
19022+
bool isF64ToI32 = SrcTy == MVT::f64 && DestTy == MVT::i32;
19023+
if (IsI32ToF64 || isF64ToI32) {
19024+
unsigned IntrinsicOpc;
19025+
if (IsI32ToF64)
19026+
IntrinsicOpc = IsSigned ? Intrinsic::aarch64_sve_scvtf_f64i32
19027+
: Intrinsic::aarch64_sve_ucvtf_f64i32;
19028+
else
19029+
IntrinsicOpc = IsSigned ? Intrinsic::aarch64_sve_fcvtzs_i32f64
19030+
: Intrinsic::aarch64_sve_fcvtzu_i32f64;
19031+
SDValue PTrue = getPredicateForVector(DAG, DL, MVT::nxv2f64);
19032+
Vec = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, DestVecTy,
19033+
{DAG.getConstant(IntrinsicOpc, DL, MVT::i32),
19034+
DAG.getUNDEF(DestTy), PTrue, Vec});
19035+
} else {
19036+
Vec = DAG.getNode(Opc, DL, DestVecTy, Vec);
1903819037
}
19039-
return createScalarSVEFPConversion(DAG, Opc, DL, SrcVal, SrcTy, DestTy);
19038+
19039+
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, DestTy, Vec, ZeroIdx);
1904019040
}
1904119041

1904219042
static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
19043-
TargetLowering::DAGCombinerInfo &DCI,
1904419043
const AArch64Subtarget *Subtarget) {
1904519044
// First try to optimize away the conversion when it's conditionally from
1904619045
// a constant. Vectors only.
1904719046
if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
1904819047
return Res;
1904919048

19050-
if (SDValue Res = tryReplaceScalarFPConversionWithSVE(N, DAG, DCI, Subtarget))
19049+
if (SDValue Res = tryReplaceScalarFPConversionWithSVE(N, DAG, Subtarget))
1905119050
return Res;
1905219051

1905319052
EVT VT = N->getValueType(0);
@@ -19088,7 +19087,7 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
1908819087
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
1908919088
TargetLowering::DAGCombinerInfo &DCI,
1909019089
const AArch64Subtarget *Subtarget) {
19091-
if (SDValue Res = tryReplaceScalarFPConversionWithSVE(N, DAG, DCI, Subtarget))
19090+
if (SDValue Res = tryReplaceScalarFPConversionWithSVE(N, DAG, Subtarget))
1909219091
return Res;
1909319092

1909419093
if (!Subtarget->isNeonAvailable())
@@ -26110,7 +26109,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
2611026109
return performMulCombine(N, DAG, DCI, Subtarget);
2611126110
case ISD::SINT_TO_FP:
2611226111
case ISD::UINT_TO_FP:
26113-
return performIntToFpCombine(N, DAG, DCI, Subtarget);
26112+
return performIntToFpCombine(N, DAG, Subtarget);
2611426113
case ISD::FP_TO_SINT:
2611526114
case ISD::FP_TO_UINT:
2611626115
case ISD::FP_TO_SINT_SAT:

llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,8 @@ define i32 @f64_to_s32(double %x) {
8484
; CHECK: // %bb.0: // %entry
8585
; CHECK-NEXT: ptrue p0.d
8686
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
87-
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
88-
; CHECK-NEXT: fmov x0, d0
89-
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
87+
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.d
88+
; CHECK-NEXT: fmov w0, s0
9089
; CHECK-NEXT: ret
9190
;
9291
; NONEON-NOSVE-LABEL: f64_to_s32:
@@ -195,9 +194,8 @@ define i32 @f64_to_u32(double %x) {
195194
; CHECK: // %bb.0: // %entry
196195
; CHECK-NEXT: ptrue p0.d
197196
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
198-
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
199-
; CHECK-NEXT: fmov x0, d0
200-
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
197+
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.d
198+
; CHECK-NEXT: fmov w0, s0
201199
; CHECK-NEXT: ret
202200
;
203201
; NONEON-NOSVE-LABEL: f64_to_u32:

llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,9 @@ entry:
4545
define double @s32_to_f64(i32 %x) {
4646
; CHECK-LABEL: s32_to_f64:
4747
; CHECK: // %bb.0: // %entry
48-
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
49-
; CHECK-NEXT: sxtw x8, w0
48+
; CHECK-NEXT: fmov s0, w0
5049
; CHECK-NEXT: ptrue p0.d
51-
; CHECK-NEXT: fmov d0, x8
52-
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
50+
; CHECK-NEXT: scvtf z0.d, p0/m, z0.s
5351
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
5452
; CHECK-NEXT: ret
5553
;
@@ -102,10 +100,9 @@ entry:
102100
define double @u32_to_f64(i32 %x) {
103101
; CHECK-LABEL: u32_to_f64:
104102
; CHECK: // %bb.0: // %entry
105-
; CHECK-NEXT: mov w8, w0
103+
; CHECK-NEXT: fmov s0, w0
106104
; CHECK-NEXT: ptrue p0.d
107-
; CHECK-NEXT: fmov d0, x8
108-
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
105+
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.s
109106
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
110107
; CHECK-NEXT: ret
111108
;

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1166,7 +1166,7 @@ define <1 x i16> @fcvtzu_v1f64_v1i16(<1 x double> %op1) {
11661166
; CHECK: // %bb.0:
11671167
; CHECK-NEXT: ptrue p0.d
11681168
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
1169-
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
1169+
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.d
11701170
; CHECK-NEXT: fmov w8, s0
11711171
; CHECK-NEXT: mov z0.h, w8
11721172
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -2867,7 +2867,7 @@ define <1 x i16> @fcvtzs_v1f64_v1i16(<1 x double> %op1) {
28672867
; CHECK: // %bb.0:
28682868
; CHECK-NEXT: ptrue p0.d
28692869
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
2870-
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
2870+
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.d
28712871
; CHECK-NEXT: fmov w8, s0
28722872
; CHECK-NEXT: mov z0.h, w8
28732873
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -392,8 +392,8 @@ define <1 x double> @ucvtf_v1i16_v1f64(<1 x i16> %op1) {
392392
; CHECK-NEXT: fmov w8, s0
393393
; CHECK-NEXT: ptrue p0.d
394394
; CHECK-NEXT: and w8, w8, #0xffff
395-
; CHECK-NEXT: fmov d0, x8
396-
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
395+
; CHECK-NEXT: fmov s0, w8
396+
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.s
397397
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
398398
; CHECK-NEXT: ret
399399
;
@@ -2836,10 +2836,10 @@ define float @scvtf_i16_f32(ptr %0) {
28362836
define double @scvtf_i16_f64(ptr %0) {
28372837
; CHECK-LABEL: scvtf_i16_f64:
28382838
; CHECK: // %bb.0:
2839-
; CHECK-NEXT: ldrsh x8, [x0]
2839+
; CHECK-NEXT: ldrsh w8, [x0]
28402840
; CHECK-NEXT: ptrue p0.d
2841-
; CHECK-NEXT: fmov d0, x8
2842-
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
2841+
; CHECK-NEXT: fmov s0, w8
2842+
; CHECK-NEXT: scvtf z0.d, p0/m, z0.s
28432843
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
28442844
; CHECK-NEXT: ret
28452845
;
@@ -2895,10 +2895,9 @@ define float @scvtf_i32_f32(ptr %0) {
28952895
define double @scvtf_i32_f64(ptr %0) {
28962896
; CHECK-LABEL: scvtf_i32_f64:
28972897
; CHECK: // %bb.0:
2898-
; CHECK-NEXT: ldrsw x8, [x0]
28992898
; CHECK-NEXT: ptrue p0.d
2900-
; CHECK-NEXT: fmov d0, x8
2901-
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
2899+
; CHECK-NEXT: ldr s0, [x0]
2900+
; CHECK-NEXT: scvtf z0.d, p0/m, z0.s
29022901
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
29032902
; CHECK-NEXT: ret
29042903
;
@@ -3016,8 +3015,8 @@ define double @ucvtf_i16_f64(ptr %0) {
30163015
; CHECK: // %bb.0:
30173016
; CHECK-NEXT: ldrh w8, [x0]
30183017
; CHECK-NEXT: ptrue p0.d
3019-
; CHECK-NEXT: fmov d0, x8
3020-
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
3018+
; CHECK-NEXT: fmov s0, w8
3019+
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.s
30213020
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
30223021
; CHECK-NEXT: ret
30233022
;
@@ -3073,10 +3072,9 @@ define float @ucvtf_i32_f32(ptr %0) {
30733072
define double @ucvtf_i32_f64(ptr %0) {
30743073
; CHECK-LABEL: ucvtf_i32_f64:
30753074
; CHECK: // %bb.0:
3076-
; CHECK-NEXT: ldr w8, [x0]
30773075
; CHECK-NEXT: ptrue p0.d
3078-
; CHECK-NEXT: fmov d0, x8
3079-
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
3076+
; CHECK-NEXT: ldr s0, [x0]
3077+
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.s
30803078
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
30813079
; CHECK-NEXT: ret
30823080
;

0 commit comments

Comments
 (0)