-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[PowerPC] Fix use of FPSCR builtins in smmintrin.h #67299
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
2d62858
2c7688c
f80fbd7
c50491e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -646,8 +646,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, | |
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); | ||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom); | ||
|
||
// To handle counter-based loop conditions. | ||
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); | ||
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); | ||
|
||
setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom); | ||
setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom); | ||
|
@@ -11595,6 +11595,50 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { | |
llvm_unreachable("ERROR:Should return for all cases within swtich."); | ||
} | ||
|
||
// Lower mffsl intrinsic with mffs in targets without ISA 3.0 | ||
static SDValue lowerMFFSL(SDValue Op, SelectionDAG &DAG, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if we actually need this. The reason There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Thanks, according to the 'lightweight' meaning, this sounds reasonable. I don't have strong preference to align with GCC behavior. We have builtins only for P9 which can't be or haven't been emulated. |
||
const PPCSubtarget &Subtarget) { | ||
assert(cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue() == | ||
Intrinsic::ppc_mffsl && | ||
"Should only be called on int_ppc_mffsl"); | ||
if (Subtarget.isISA3_0()) | ||
return Op; | ||
|
||
SDLoc dl(Op); | ||
SDValue Chain = Op.getOperand(0); | ||
SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain); | ||
Chain = MFFS.getValue(1); | ||
|
||
if (Subtarget.isPPC64()) { | ||
SDValue Int = DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS); | ||
// Mask 29-31, 45-51 and 56-63 bits | ||
SDValue Masked = DAG.getNode(ISD::AND, dl, MVT::i64, Int, | ||
DAG.getConstant(0x70007f0ffULL, dl, MVT::i64)); | ||
SDValue Cast = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Masked); | ||
return DAG.getMergeValues({Cast, Chain}, dl); | ||
} | ||
|
||
MachineFunction &MF = DAG.getMachineFunction(); | ||
MachinePointerInfo PtrInfo; | ||
int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false); | ||
SDValue Base = DAG.getFrameIndex(SSFI, MVT::i32); | ||
Chain = DAG.getStore(Chain, dl, MFFS, Base, PtrInfo); | ||
|
||
assert(!Subtarget.isLittleEndian() && "32-bit little endian is unsupported!"); | ||
SDValue Offset4 = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, | ||
DAG.getConstant(4, dl, MVT::i32)); | ||
SDValue Hi = DAG.getLoad(MVT::i32, dl, Chain, Base, PtrInfo); | ||
SDValue Lo = DAG.getLoad(MVT::i32, dl, Hi.getValue(1), Offset4, PtrInfo); | ||
Chain = Lo.getValue(1); | ||
Hi = | ||
DAG.getNode(ISD::AND, dl, MVT::i32, Hi, DAG.getConstant(7, dl, MVT::i32)); | ||
Lo = DAG.getNode(ISD::AND, dl, MVT::i32, Lo, | ||
DAG.getConstant(0x7f0ffULL, dl, MVT::i32)); | ||
Chain = DAG.getStore(Chain, dl, Hi, Base, PtrInfo); | ||
Chain = DAG.getStore(Chain, dl, Lo, Offset4, PtrInfo); | ||
return DAG.getLoad(MVT::f64, dl, Chain, Base, PtrInfo); | ||
} | ||
|
||
/// LowerOperation - Provide custom lowering hooks for some operations. | ||
/// | ||
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { | ||
|
@@ -11669,8 +11713,12 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { | |
return LowerFP_ROUND(Op, DAG); | ||
case ISD::ROTL: return LowerROTL(Op, DAG); | ||
|
||
// For counter-based loop handling. | ||
case ISD::INTRINSIC_W_CHAIN: return SDValue(); | ||
case ISD::INTRINSIC_W_CHAIN: { | ||
if (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue() == | ||
Intrinsic::ppc_mffsl) | ||
return lowerMFFSL(Op, DAG, Subtarget); | ||
return SDValue(); | ||
} | ||
|
||
case ISD::BITCAST: return LowerBITCAST(Op, DAG); | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't believe that this builtin needs to be renamed. On clang both
__builtin_mffs
and__builtin_ppc_mffs
work.Also, this is the same for the other 3 builtins.
When you update these names you will probably also have to update the
ppc-smmintrin.c
test as well.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
__builtin_mffs
aliases to__builtin_ppc_mffs
through macro. But the compat macros do not always work. In the test cases using-ffreestanding
or targeting non-AIX non-Linux OSes, the macros will not be defined.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay, that's fair.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we remove
__fpscr_save.__fpscr &= 0x70007f0ffL;
? I suspect it may break some assumption of following codeThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure that there are any assumptions broken here. For example,
__builtin_ppc_set_fpscr_rn
only uses the last two bits for the rounding control and masks off the rest anyway. Also,__builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
is using the mask0b00000011
so it only uses the last 8 bits.