Skip to content

Commit 839f1e4

Browse files
authored
[X86][SDAG] Improve the lowering of s|uitofp i8|i16 to half (#70834)
Prior to this patch, vector `s|uitofp` from narrow types (`<= i16`) were scalarized when the hardware doesn't support fp16 conversions natively. This patch fixes that by avoiding using `i16` as an intermediate type when there is no hardware support conversion from this type to half. In other words, when the target doesn't support `avx512fp16`, we avoid using intermediate `i16` vectors for `s|uitofp` conversions. Instead we extend the narrow type to `i32`, which will be converted to `float` and downcasted to `half`. Put differently, we go from: ``` s|uitofp iNarrow %src to half ``` To ``` %tmp = s|zext iNarrow %src to i32 %tmpfp = s|uitofp i32 %tmp to float fptrunc float %tmpfp to half ``` Note that this patch: - Doesn't change the actual lowering of i32 to half. I.e., the `float` intermediate step and the final downcasting are what existed for this input type to half. - Changes only the intermediate type for the lowering of `s|uitofp`. I.e., the first `s|zext` from i16 to i32. Remark: The vector and scalar lowering of `s|uitofp` don't use the same code path. Not super happy about that, but I'm not planning to fix that, at least in this PR. This fixes #67080
1 parent 6cc1c2c commit 839f1e4

File tree

3 files changed

+442
-270
lines changed

3 files changed

+442
-270
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -53388,19 +53388,26 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
5338853388
EVT VT = N->getValueType(0);
5338953389
EVT InVT = Op0.getValueType();
5339053390

53391-
// UINT_TO_FP(vXi1~15) -> UINT_TO_FP(ZEXT(vXi1~15 to vXi16))
53392-
// UINT_TO_FP(vXi17~31) -> UINT_TO_FP(ZEXT(vXi17~31 to vXi32))
53391+
// Using i16 as an intermediate type is a bad idea, unless we have HW support
53392+
// for it. Therefore for type sizes equal or smaller than 32 just go with i32.
53393+
// if hasFP16 support:
53394+
// UINT_TO_FP(vXi1~15) -> UINT_TO_FP(ZEXT(vXi1~15 to vXi16))
53395+
// UINT_TO_FP(vXi17~31) -> UINT_TO_FP(ZEXT(vXi17~31 to vXi32))
53396+
// else
53397+
// UINT_TO_FP(vXi1~31) -> UINT_TO_FP(ZEXT(vXi1~31 to vXi32))
5339353398
// UINT_TO_FP(vXi33~63) -> UINT_TO_FP(ZEXT(vXi33~63 to vXi64))
5339453399
if (InVT.isVector() && VT.getVectorElementType() == MVT::f16) {
5339553400
unsigned ScalarSize = InVT.getScalarSizeInBits();
53396-
if (ScalarSize == 16 || ScalarSize == 32 || ScalarSize >= 64)
53401+
if ((ScalarSize == 16 && Subtarget.hasFP16()) || ScalarSize == 32 ||
53402+
ScalarSize >= 64)
5339753403
return SDValue();
5339853404
SDLoc dl(N);
53399-
EVT DstVT = EVT::getVectorVT(*DAG.getContext(),
53400-
ScalarSize < 16 ? MVT::i16
53401-
: ScalarSize < 32 ? MVT::i32
53402-
: MVT::i64,
53403-
InVT.getVectorNumElements());
53405+
EVT DstVT =
53406+
EVT::getVectorVT(*DAG.getContext(),
53407+
(Subtarget.hasFP16() && ScalarSize < 16) ? MVT::i16
53408+
: ScalarSize < 32 ? MVT::i32
53409+
: MVT::i64,
53410+
InVT.getVectorNumElements());
5340453411
SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);
5340553412
if (IsStrict)
5340653413
return DAG.getNode(ISD::STRICT_UINT_TO_FP, dl, {VT, MVT::Other},
@@ -53451,19 +53458,26 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
5345153458
EVT VT = N->getValueType(0);
5345253459
EVT InVT = Op0.getValueType();
5345353460

53454-
// SINT_TO_FP(vXi1~15) -> SINT_TO_FP(SEXT(vXi1~15 to vXi16))
53455-
// SINT_TO_FP(vXi17~31) -> SINT_TO_FP(SEXT(vXi17~31 to vXi32))
53461+
// Using i16 as an intermediate type is a bad idea, unless we have HW support
53462+
// for it. Therefore for type sizes equal or smaller than 32 just go with i32.
53463+
// if hasFP16 support:
53464+
// SINT_TO_FP(vXi1~15) -> SINT_TO_FP(SEXT(vXi1~15 to vXi16))
53465+
// SINT_TO_FP(vXi17~31) -> SINT_TO_FP(SEXT(vXi17~31 to vXi32))
53466+
// else
53467+
// SINT_TO_FP(vXi1~31) -> SINT_TO_FP(ZEXT(vXi1~31 to vXi32))
5345653468
// SINT_TO_FP(vXi33~63) -> SINT_TO_FP(SEXT(vXi33~63 to vXi64))
5345753469
if (InVT.isVector() && VT.getVectorElementType() == MVT::f16) {
5345853470
unsigned ScalarSize = InVT.getScalarSizeInBits();
53459-
if (ScalarSize == 16 || ScalarSize == 32 || ScalarSize >= 64)
53471+
if ((ScalarSize == 16 && Subtarget.hasFP16()) || ScalarSize == 32 ||
53472+
ScalarSize >= 64)
5346053473
return SDValue();
5346153474
SDLoc dl(N);
53462-
EVT DstVT = EVT::getVectorVT(*DAG.getContext(),
53463-
ScalarSize < 16 ? MVT::i16
53464-
: ScalarSize < 32 ? MVT::i32
53465-
: MVT::i64,
53466-
InVT.getVectorNumElements());
53475+
EVT DstVT =
53476+
EVT::getVectorVT(*DAG.getContext(),
53477+
(Subtarget.hasFP16() && ScalarSize < 16) ? MVT::i16
53478+
: ScalarSize < 32 ? MVT::i32
53479+
: MVT::i64,
53480+
InVT.getVectorNumElements());
5346753481
SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
5346853482
if (IsStrict)
5346953483
return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},

0 commit comments

Comments
 (0)