Skip to content

Commit 0e96551

Browse files
committed
Review updates.
1 parent 8f3d0ae commit 0e96551

File tree

9 files changed

+180
-152
lines changed

9 files changed

+180
-152
lines changed

llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

Lines changed: 70 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -232,9 +232,14 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
232232
for (auto Op : {ISD::FP_TO_SINT, ISD::STRICT_FP_TO_SINT,
233233
ISD::SINT_TO_FP, ISD::STRICT_SINT_TO_FP})
234234
setOperationAction(Op, VT, Custom);
235-
for (auto Op : {ISD::FP_TO_UINT, ISD::STRICT_FP_TO_UINT,
236-
ISD::UINT_TO_FP, ISD::STRICT_UINT_TO_FP})
235+
for (auto Op : {ISD::FP_TO_UINT, ISD::STRICT_FP_TO_UINT})
237236
setOperationAction(Op, VT, Custom);
237+
for (auto Op : {ISD::UINT_TO_FP, ISD::STRICT_UINT_TO_FP}) {
238+
// Handle unsigned 32-bit input types as signed 64-bit types on z10.
239+
auto OpAction =
240+
(!Subtarget.hasFPExtension() && VT == MVT::i32) ? Promote : Custom;
241+
setOperationAction(Op, VT, OpAction);
242+
}
238243
}
239244
}
240245

@@ -578,7 +583,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
578583

579584
// Special treatment.
580585
setOperationAction(ISD::IS_FPCLASS, VT, Custom);
581-
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
582586

583587
// Handle constrained floating-point operations.
584588
setOperationAction(ISD::STRICT_FADD, VT, Legal);
@@ -6825,22 +6829,18 @@ SDValue SystemZTargetLowering::lower_FP_TO_INT(SDValue Op,
68256829
EVT InVT = InOp.getValueType();
68266830

68276831
// FP to unsigned is not directly supported on z10. Promoting an i32
6828-
// result to i64 doesn't generate an inexact condition for values that are
6829-
// outside the i32 range but in the i64 range, so use the default
6830-
// expansion.
6832+
// result to (signed) i64 doesn't generate an inexact condition (fp
6833+
// exception) for values that are outside the i32 range but in the i64
6834+
// range, so use the default expansion.
68316835
if (!Subtarget.hasFPExtension() && !IsSigned)
6832-
return SDValue(); // Expand (i32 / i64).
6836+
// Expand i32/i64. F16 values will be recognized to fit and extended.
6837+
return SDValue();
68336838

6839+
// Conversion from f16 is done via f32.
68346840
if (InOp.getSimpleValueType() == MVT::f16) {
6835-
// f16: Extend to f32 before the conversion.
6836-
if (!IsStrict) {
6837-
SDValue InF32 = DAG.getFPExtendOrRound(InOp, SDLoc(InOp), MVT::f32);
6838-
return DAG.getNode(Op->getOpcode(), DL, Op.getSimpleValueType(), InF32);
6839-
}
6840-
SDValue InF32;
6841-
std::tie(InF32, Chain) =
6842-
DAG.getStrictFPExtendOrRound(InOp, Chain, DL, MVT::f32);
6843-
return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), {Chain, InF32});
6841+
SmallVector<SDValue, 2> Results;
6842+
LowerOperationWrapper(Op.getNode(), Results, DAG);
6843+
return DAG.getMergeValues(Results, DL);
68446844
}
68456845

68466846
if (VT == MVT::i128) {
@@ -6863,45 +6863,17 @@ SDValue SystemZTargetLowering::lower_INT_TO_FP(SDValue Op,
68636863
SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
68646864
EVT InVT = InOp.getValueType();
68656865

6866-
auto roundToF16 = [&DAG, &IsStrict, &DL, &Chain](SDValue V) -> SDValue {
6867-
if (!IsStrict)
6868-
return DAG.getFPExtendOrRound(V, DL, MVT::f16);
6869-
SDValue F16Res;
6870-
std::tie(F16Res, Chain) =
6871-
DAG.getStrictFPExtendOrRound(V, V.getValue(1), DL, MVT::f16);
6872-
return DAG.getMergeValues({F16Res, Chain}, DL);
6873-
};
6874-
6875-
// Unsigned to fp is not directly supported on z10.
6876-
if (!Subtarget.hasFPExtension() && !IsSigned) {
6877-
if (InVT == MVT::i32) { // Conversion from i32 is promoted to i64 (signed).
6878-
SDValue I64In = DAG.getZExtOrTrunc(InOp, DL, MVT::i64);
6879-
SDValue FPRes;
6880-
MVT ResVT = VT == MVT::f16 ? MVT::f32 : VT;
6881-
if (!IsStrict)
6882-
FPRes = DAG.getNode(ISD::SINT_TO_FP, DL, ResVT, I64In);
6883-
else
6884-
FPRes = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL,
6885-
DAG.getVTList(ResVT, MVT::Other), {Chain, I64In});
6886-
return VT == MVT::f16 ? roundToF16(FPRes) : FPRes;
6887-
}
6888-
assert(InVT == MVT::i64 && "i32 and i64 are the only legal int types.");
6889-
if (VT != MVT::f16)
6890-
return SDValue(); // Expand
6891-
}
6892-
68936866
// Conversion to f16 is done via f32.
68946867
if (VT == MVT::f16) {
6895-
SDValue PromotedOp;
6896-
if (!IsStrict)
6897-
PromotedOp = DAG.getNode(Op->getOpcode(), DL, MVT::f32, InOp);
6898-
else
6899-
PromotedOp =
6900-
DAG.getNode(Op->getOpcode(), DL, DAG.getVTList(MVT::f32, MVT::Other),
6901-
{Chain, InOp});
6902-
return roundToF16(PromotedOp);
6868+
SmallVector<SDValue, 2> Results;
6869+
LowerOperationWrapper(Op.getNode(), Results, DAG);
6870+
return DAG.getMergeValues(Results, DL);
69036871
}
69046872

6873+
// Unsigned to fp is not directly supported on z10.
6874+
if (!Subtarget.hasFPExtension() && !IsSigned)
6875+
return SDValue(); // Expand i64.
6876+
69056877
if (InVT == MVT::i128) {
69066878
RTLIB::Libcall LC =
69076879
IsSigned ? RTLIB::getSINTTOFP(InVT, VT) : RTLIB::getUINTTOFP(InVT, VT);
@@ -7019,23 +6991,17 @@ SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
70196991

70206992
SDValue SystemZTargetLowering::lowerFCOPYSIGN(SDValue Op,
70216993
SelectionDAG &DAG) const {
7022-
SDValue Op0 = Op.getOperand(0);
7023-
SDValue Op1 = Op.getOperand(1);
7024-
MVT Op0VT = Op0.getSimpleValueType();
7025-
MVT Op1VT = Op1.getSimpleValueType();
7026-
if (Op0VT != MVT::f16 && Op1VT != MVT::f16)
7027-
return Op; // Legal
6994+
MVT VT = Op.getSimpleValueType();
6995+
SDValue ValOp = Op.getOperand(0);
6996+
SDValue SignOp = Op.getOperand(1);
70286997

7029-
// Perform the copy on to the largest type present, or f32 if it was f16.
7030-
MVT VT = (Op0VT.getSizeInBits() > Op1VT.getSizeInBits()) ? Op0VT : Op1VT;
7031-
if (VT == MVT::f16)
7032-
VT = MVT::f32;
6998+
// Remove the rounding which would result in a libcall for half.
6999+
if (VT == MVT::f16 && SignOp.getOpcode() == ISD::FP_ROUND) {
7000+
SDValue WideOp = SignOp.getOperand(0);
7001+
return DAG.getNode(ISD::FCOPYSIGN, SDLoc(Op), VT, ValOp, WideOp);
7002+
}
70337003

7034-
SDLoc DL(Op);
7035-
SDValue Op0Conv = DAG.getFPExtendOrRound(Op0, DL, VT);
7036-
SDValue Op1Conv = DAG.getFPExtendOrRound(Op1, DL, VT);
7037-
SDValue ResConv = DAG.getNode(ISD::FCOPYSIGN, DL, VT, {Op0Conv, Op1Conv});
7038-
return DAG.getFPExtendOrRound(ResConv, DL, Op0VT);
7004+
return Op; // Legal
70397005
}
70407006

70417007
SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op,
@@ -7359,71 +7325,60 @@ SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
73597325
}
73607326
break;
73617327
}
7328+
case ISD::UINT_TO_FP:
73627329
case ISD::SINT_TO_FP:
7363-
case ISD::UINT_TO_FP: {
7364-
if (useSoftFloat())
7365-
return;
7366-
SDLoc DL(N);
7367-
SDValue Src = N->getOperand(0);
7368-
EVT ResVT = N->getValueType(0);
7369-
if (ResVT == MVT::f16) {
7370-
SDValue F32Res = DAG.getNode(N->getOpcode(), DL, MVT::f32, Src);
7371-
Results.push_back(DAG.getFPExtendOrRound(F32Res, DL, MVT::f16));
7372-
}
7373-
break;
7374-
}
7375-
case ISD::STRICT_SINT_TO_FP:
7376-
case ISD::STRICT_UINT_TO_FP: {
7330+
case ISD::STRICT_UINT_TO_FP:
7331+
case ISD::STRICT_SINT_TO_FP: {
73777332
if (useSoftFloat())
73787333
return;
7334+
bool IsStrict = N->isStrictFPOpcode();
73797335
SDLoc DL(N);
7380-
SDValue Chain = N->getOperand(0);
7381-
SDValue Src = N->getOperand(1);
7336+
SDValue InOp = N->getOperand(IsStrict ? 1 : 0);
73827337
EVT ResVT = N->getValueType(0);
7338+
SDValue Chain = IsStrict ? N->getOperand(0) : DAG.getEntryNode();
73837339
if (ResVT == MVT::f16) {
7384-
SDValue F32Res =
7385-
DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::f32, MVT::Other),
7386-
{Chain, Src});
7387-
SDValue F16Res;
7388-
std::tie(F16Res, Chain) = DAG.getStrictFPExtendOrRound(
7389-
F32Res, F32Res.getValue(1), DL, MVT::f16);
7390-
Results.push_back(F16Res);
7391-
Results.push_back(Chain);
7340+
if (!IsStrict) {
7341+
SDValue OpF32 = DAG.getNode(N->getOpcode(), DL, MVT::f32, InOp);
7342+
Results.push_back(DAG.getFPExtendOrRound(OpF32, DL, MVT::f16));
7343+
} else {
7344+
SDValue OpF32 =
7345+
DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::f32, MVT::Other),
7346+
{Chain, InOp});
7347+
SDValue F16Res;
7348+
std::tie(F16Res, Chain) = DAG.getStrictFPExtendOrRound(
7349+
OpF32, OpF32.getValue(1), DL, MVT::f16);
7350+
Results.push_back(F16Res);
7351+
Results.push_back(Chain);
7352+
}
73927353
}
73937354
break;
73947355
}
73957356
case ISD::FP_TO_UINT:
7396-
case ISD::FP_TO_SINT: {
7397-
if (useSoftFloat())
7398-
return;
7399-
SDLoc DL(N);
7400-
SDValue Src = N->getOperand(0);
7401-
EVT SrcVT = Src->getValueType(0);
7402-
if (SrcVT == MVT::f16) {
7403-
SDValue SrcF32 = DAG.getFPExtendOrRound(Src, DL, MVT::f32);
7404-
SDValue OpF32 =
7405-
DAG.getNode(N->getOpcode(), DL, N->getValueType(0), SrcF32);
7406-
Results.push_back(OpF32);
7407-
}
7408-
break;
7409-
}
7357+
case ISD::FP_TO_SINT:
74107358
case ISD::STRICT_FP_TO_UINT:
74117359
case ISD::STRICT_FP_TO_SINT: {
74127360
if (useSoftFloat())
74137361
return;
7362+
bool IsStrict = N->isStrictFPOpcode();
74147363
SDLoc DL(N);
74157364
EVT ResVT = N->getValueType(0);
7416-
SDValue Chain = N->getOperand(0);
7417-
SDValue Src = N->getOperand(1);
7418-
EVT SrcVT = Src->getValueType(0);
7419-
if (SrcVT == MVT::f16) {
7420-
SDValue InF32;
7421-
std::tie(InF32, Chain) =
7422-
DAG.getStrictFPExtendOrRound(Src, Chain, DL, MVT::f32);
7423-
SDValue OpF32 = DAG.getNode(
7424-
N->getOpcode(), DL, DAG.getVTList(ResVT, MVT::Other), {Chain, InF32});
7425-
Results.push_back(OpF32);
7426-
Results.push_back(OpF32.getValue(1));
7365+
SDValue InOp = N->getOperand(IsStrict ? 1 : 0);
7366+
EVT InVT = InOp->getValueType(0);
7367+
SDValue Chain = IsStrict ? N->getOperand(0) : DAG.getEntryNode();
7368+
if (InVT == MVT::f16) {
7369+
if (!IsStrict) {
7370+
SDValue InF32 = DAG.getFPExtendOrRound(InOp, DL, MVT::f32);
7371+
Results.push_back(DAG.getNode(N->getOpcode(), DL, ResVT, InF32));
7372+
} else {
7373+
SDValue InF32;
7374+
std::tie(InF32, Chain) =
7375+
DAG.getStrictFPExtendOrRound(InOp, Chain, DL, MVT::f32);
7376+
SDValue OpF32 =
7377+
DAG.getNode(N->getOpcode(), DL, DAG.getVTList(ResVT, MVT::Other),
7378+
{Chain, InF32});
7379+
Results.push_back(OpF32);
7380+
Results.push_back(OpF32.getValue(1));
7381+
}
74277382
}
74287383
break;
74297384
}

llvm/lib/Target/SystemZ/SystemZInstrFP.td

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,25 @@ let Predicates = [FeatureNoVectorEnhancements1] in
8484
def LGDR : UnaryRRE<"lgdr", 0xB3CD, bitconvert, GR64, FP64>;
8585
def LDGR : UnaryRRE<"ldgr", 0xB3C1, bitconvert, FP64, GR64>;
8686

87+
// fcopysign with an FP16 result.
88+
let isCodeGenOnly = 1 in {
89+
def CPSDRhh : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP16, FP16, FP16>;
90+
def CPSDRhs : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP16, FP16, FP32>;
91+
def CPSDRhd : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP16, FP16, FP64>;
92+
}
93+
94+
// The sign of an FP128 is in the high register.
95+
let Predicates = [FeatureNoVectorEnhancements1] in
96+
def : Pat<(fcopysign FP16:$src1, (f128 FP128:$src2)),
97+
(CPSDRhd FP16:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
98+
let Predicates = [FeatureVectorEnhancements1] in
99+
def : Pat<(fcopysign FP16:$src1, (f128 VR128:$src2)),
100+
(CPSDRhd FP16:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_h64))>;
101+
102+
87103
// fcopysign with an FP32 result.
88104
let isCodeGenOnly = 1 in {
105+
def CPSDRsh : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP32, FP32, FP16>;
89106
def CPSDRss : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP32, FP32, FP32>;
90107
def CPSDRsd : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP32, FP32, FP64>;
91108
}
@@ -99,8 +116,10 @@ let Predicates = [FeatureVectorEnhancements1] in
99116
(CPSDRsd FP32:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_h64))>;
100117

101118
// fcopysign with an FP64 result.
102-
let isCodeGenOnly = 1 in
119+
let isCodeGenOnly = 1 in {
120+
def CPSDRdh : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP64, FP64, FP16>;
103121
def CPSDRds : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP64, FP64, FP32>;
122+
}
104123
def CPSDRdd : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP64, FP64, FP64>;
105124

106125
// The sign of an FP128 is in the high register.
@@ -118,6 +137,8 @@ class CopySign128<RegisterOperand cls, dag upper>
118137
(INSERT_SUBREG FP128:$src1, upper, subreg_h64)>;
119138

120139
let Predicates = [FeatureNoVectorEnhancements1] in {
140+
def : CopySign128<FP16, (CPSDRdh (EXTRACT_SUBREG FP128:$src1, subreg_h64),
141+
FP16:$src2)>;
121142
def : CopySign128<FP32, (CPSDRds (EXTRACT_SUBREG FP128:$src1, subreg_h64),
122143
FP32:$src2)>;
123144
def : CopySign128<FP64, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64),

llvm/lib/Target/SystemZ/SystemZScheduleZ13.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -787,7 +787,7 @@ def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>;
787787
def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXBR$")>;
788788

789789
// Copy sign
790-
def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>;
790+
def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s|h)(d|s|h)$")>;
791791

792792
//===----------------------------------------------------------------------===//
793793
// FP: Load instructions

llvm/lib/Target/SystemZ/SystemZScheduleZ14.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -807,7 +807,7 @@ def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>;
807807
def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXBR$")>;
808808

809809
// Copy sign
810-
def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>;
810+
def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s|h)(d|s|h)$")>;
811811

812812
//===----------------------------------------------------------------------===//
813813
// FP: Load instructions

llvm/lib/Target/SystemZ/SystemZScheduleZ15.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -825,7 +825,7 @@ def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>;
825825
def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXBR$")>;
826826

827827
// Copy sign
828-
def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>;
828+
def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s|h)(d|s|h)$")>;
829829

830830
//===----------------------------------------------------------------------===//
831831
// FP: Load instructions

llvm/lib/Target/SystemZ/SystemZScheduleZ16.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -826,7 +826,7 @@ def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>;
826826
def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXBR$")>;
827827

828828
// Copy sign
829-
def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>;
829+
def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s|h)(d|s|h)$")>;
830830

831831
//===----------------------------------------------------------------------===//
832832
// FP: Load instructions

llvm/lib/Target/SystemZ/SystemZScheduleZ196.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -719,7 +719,7 @@ def : InstRW<[WLat9, WLat9, FPU, NormalGr], (instregex "LT(E|D)BR$")>;
719719
def : InstRW<[WLat10, WLat10, FPU4, GroupAlone], (instregex "LTXBR$")>;
720720

721721
// Copy sign
722-
def : InstRW<[WLat5, FXU2, GroupAlone], (instregex "CPSDR(d|s)(d|s)$")>;
722+
def : InstRW<[WLat5, FXU2, GroupAlone], (instregex "CPSDR(d|s|h)(d|s|h)$")>;
723723

724724
//===----------------------------------------------------------------------===//
725725
// FP: Load instructions

llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -757,7 +757,7 @@ def : InstRW<[WLat9, WLat9, FPU, NormalGr], (instregex "LT(E|D)BR$")>;
757757
def : InstRW<[WLat10, WLat10, FPU4, GroupAlone], (instregex "LTXBR$")>;
758758

759759
// Copy sign
760-
def : InstRW<[WLat5, FXU2, GroupAlone], (instregex "CPSDR(d|s)(d|s)$")>;
760+
def : InstRW<[WLat5, FXU2, GroupAlone], (instregex "CPSDR(d|s|h)(d|s|h)$")>;
761761

762762
//===----------------------------------------------------------------------===//
763763
// FP: Load instructions

0 commit comments

Comments
 (0)