53
53
#include "llvm/CodeGen/SelectionDAGNodes.h"
54
54
#include "llvm/CodeGen/TargetCallingConv.h"
55
55
#include "llvm/CodeGen/TargetInstrInfo.h"
56
+ #include "llvm/CodeGen/TargetLowering.h"
56
57
#include "llvm/CodeGen/TargetOpcodes.h"
57
58
#include "llvm/CodeGen/ValueTypes.h"
58
59
#include "llvm/CodeGenTypes/MachineValueType.h"
@@ -1268,6 +1269,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
1268
1269
for (auto Op : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP,
1269
1270
ISD::STRICT_UINT_TO_FP})
1270
1271
for (auto VT : {MVT::v2i32, MVT::v2i64, MVT::v4i32})
1272
+ // and this is where v2i64 is marked custom
1271
1273
setOperationAction(Op, VT, Custom);
1272
1274
1273
1275
if (Subtarget->hasFullFP16()) {
@@ -1285,6 +1287,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
1285
1287
} else {
1286
1288
// when AArch64 doesn't have fullfp16 support, promote the input
1287
1289
// to i32 first.
1290
+ // this is where the promotion from v8i8 to v8i32 is dictated.
1288
1291
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
1289
1292
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
1290
1293
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32);
@@ -5095,6 +5098,31 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
5095
5098
uint64_t VTSize = VT.getFixedSizeInBits();
5096
5099
uint64_t InVTSize = InVT.getFixedSizeInBits();
5097
5100
if (VTSize < InVTSize) {
5101
+ bool IsTargetf32Orf64 = VT.getVectorElementType() == MVT::f32 ||
5102
+ VT.getVectorElementType() == MVT::f64;
5103
+ bool IsTargetf16 = false;
5104
+ // check if Op has any uses that only boils down to f16.
5105
+ if (Op.hasOneUse() &&
5106
+ Op->user_begin()->getOpcode() == ISD::CONCAT_VECTORS &&
5107
+ Op->user_begin()->hasOneUse() &&
5108
+ Op->user_begin()->user_begin()->getOpcode() == ISD::FP_ROUND &&
5109
+ Op->user_begin()->user_begin()->getValueType(0).isVector() &&
5110
+ Op->user_begin()
5111
+ ->user_begin()
5112
+ ->getValueType(0)
5113
+ .getVectorElementType() == MVT::f16) {
5114
+ IsTargetf16 = true;
5115
+ }
5116
+
5117
+ if (IsTargetf32Orf64 && !IsTargetf16) {
5118
+ // AArch64 doesn't have a direct vector instruction to convert
5119
+ // fixed point to floating point AND narrow it at the same time.
5120
+ // Additional rounding when the target is f32/f64 causes subtle
5121
+ // differences across different platforms (that do have such
5122
+ // instructions). Conversion to f16 however is fine.
5123
+ return SDValue();
5124
+ }
5125
+
5098
5126
MVT CastVT =
5099
5127
MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
5100
5128
InVT.getVectorNumElements());
0 commit comments