@@ -12221,67 +12221,56 @@ static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
12221
12221
In, DAG.getUNDEF(SVT)));
12222
12222
}
12223
12223
12224
- static SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) {
12225
- LLVMContext *Context = DAG.getContext();
12226
- SDLoc dl(Op);
12227
- MVT VT = Op.getSimpleValueType();
12228
- MVT EltVT = VT;
12229
- unsigned NumElts = VT == MVT::f64 ? 2 : 4;
12230
- if (VT.isVector()) {
12231
- EltVT = VT.getVectorElementType();
12232
- NumElts = VT.getVectorNumElements();
12233
- }
12234
-
12235
- unsigned EltBits = EltVT.getSizeInBits();
12236
- Constant *C = ConstantInt::get(*Context, APInt::getSignedMaxValue(EltBits));
12237
- C = ConstantVector::getSplat(NumElts, C);
12238
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12239
- SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy());
12240
- unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
12241
- SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
12242
- MachinePointerInfo::getConstantPool(),
12243
- false, false, false, Alignment);
12244
- if (VT.isVector()) {
12245
- MVT ANDVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
12246
- return DAG.getNode(ISD::BITCAST, dl, VT,
12247
- DAG.getNode(ISD::AND, dl, ANDVT,
12248
- DAG.getNode(ISD::BITCAST, dl, ANDVT,
12249
- Op.getOperand(0)),
12250
- DAG.getNode(ISD::BITCAST, dl, ANDVT, Mask)));
12251
- }
12252
- return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask);
12253
- }
12224
+ // The only differences between FABS and FNEG are the mask and the logic op.
12225
+ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
12226
+ assert((Op.getOpcode() == ISD::FABS || Op.getOpcode() == ISD::FNEG) &&
12227
+ "Wrong opcode for lowering FABS or FNEG.");
12254
12228
12255
- static SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) {
12256
- LLVMContext *Context = DAG.getContext();
12229
+ bool IsFABS = (Op.getOpcode() == ISD::FABS);
12257
12230
SDLoc dl(Op);
12258
12231
MVT VT = Op.getSimpleValueType();
12232
+ // Assume scalar op for initialization; update for vector if needed.
12233
+ // Note that there are no scalar bitwise logical SSE/AVX instructions, so we
12234
+ // generate a 16-byte vector constant and logic op even for the scalar case.
12235
+ // Using a 16-byte mask allows folding the load of the mask with
12236
+ // the logic op, so it can save (~4 bytes) on code size.
12259
12237
MVT EltVT = VT;
12260
12238
unsigned NumElts = VT == MVT::f64 ? 2 : 4;
12239
+ // FIXME: Use function attribute "OptimizeForSize" and/or CodeGenOpt::Level to
12240
+ // decide if we should generate a 16-byte constant mask when we only need 4 or
12241
+ // 8 bytes for the scalar case.
12261
12242
if (VT.isVector()) {
12262
12243
EltVT = VT.getVectorElementType();
12263
12244
NumElts = VT.getVectorNumElements();
12264
12245
}
12265
12246
12266
12247
unsigned EltBits = EltVT.getSizeInBits();
12267
- Constant *C = ConstantInt::get(*Context, APInt::getSignBit(EltBits));
12248
+ LLVMContext *Context = DAG.getContext();
12249
+ // For FABS, mask is 0x7f...; for FNEG, mask is 0x80...
12250
+ APInt MaskElt =
12251
+ IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignBit(EltBits);
12252
+ Constant *C = ConstantInt::get(*Context, MaskElt);
12268
12253
C = ConstantVector::getSplat(NumElts, C);
12269
12254
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12270
12255
SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy());
12271
12256
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
12272
12257
SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
12273
12258
MachinePointerInfo::getConstantPool(),
12274
12259
false, false, false, Alignment);
12260
+
12275
12261
if (VT.isVector()) {
12276
- MVT XORVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits()/64);
12262
+ // For a vector, cast operands to a vector type, perform the logic op,
12263
+ // and cast the result back to the original value type.
12264
+ MVT VecVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
12265
+ SDValue Op0Casted = DAG.getNode(ISD::BITCAST, dl, VecVT, Op.getOperand(0));
12266
+ SDValue MaskCasted = DAG.getNode(ISD::BITCAST, dl, VecVT, Mask);
12267
+ unsigned LogicOp = IsFABS ? ISD::AND : ISD::XOR;
12277
12268
return DAG.getNode(ISD::BITCAST, dl, VT,
12278
- DAG.getNode(ISD::XOR, dl, XORVT,
12279
- DAG.getNode(ISD::BITCAST, dl, XORVT,
12280
- Op.getOperand(0)),
12281
- DAG.getNode(ISD::BITCAST, dl, XORVT, Mask)));
12269
+ DAG.getNode(LogicOp, dl, VecVT, Op0Casted, MaskCasted));
12282
12270
}
12283
-
12284
- return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask);
12271
+ // If not vector, then scalar.
12272
+ unsigned LogicOp = IsFABS ? X86ISD::FAND : X86ISD::FXOR;
12273
+ return DAG.getNode(LogicOp, dl, VT, Op.getOperand(0), Mask);
12285
12274
}
12286
12275
12287
12276
static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
@@ -16908,8 +16897,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
16908
16897
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
16909
16898
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
16910
16899
case ISD::LOAD: return LowerExtendedLoad(Op, Subtarget, DAG);
16911
- case ISD::FABS: return LowerFABS(Op, DAG);
16912
- case ISD::FNEG: return LowerFNEG (Op, DAG);
16900
+ case ISD::FABS:
16901
+ case ISD::FNEG: return LowerFABSorFNEG (Op, DAG);
16913
16902
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
16914
16903
case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
16915
16904
case ISD::SETCC: return LowerSETCC(Op, DAG);
0 commit comments