Skip to content

Commit 1c2dfc3

Browse files
committed
[X86] Use FILD/FIST to implement i64 atomic load on 32-bit targets with X87, but no SSE2
If we have X87, but not SSE2 we can atomicaly load an i64 value into the significand of an 80-bit extended precision x87 register using fild. We can then use a fist instruction to convert it back to an i64 integer and store it to a stack temporary. From there we can do two 32-bit loads to get the value into integer registers without worrying about atomicness. This matches what gcc and icc do for this case and removes an existing FIXME. Differential Revision: https://reviews.llvm.org/D60156 llvm-svn: 358211
1 parent 1fe5a99 commit 1c2dfc3

File tree

8 files changed

+382
-323
lines changed

8 files changed

+382
-323
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 53 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -25584,17 +25584,18 @@ bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
2558425584

2558525585
// Note: this turns large loads into lock cmpxchg8b/16b.
2558625586
// TODO: In 32-bit mode, use MOVLPS when SSE1 is available?
25587-
// TODO: In 32-bit mode, use FILD/FISTP when X87 is available?
2558825587
TargetLowering::AtomicExpansionKind
2558925588
X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
2559025589
Type *MemType = LI->getType();
2559125590

2559225591
// If this a 64 bit atomic load on a 32-bit target and SSE2 is enabled, we
25593-
// can use movq to do the load.
25592+
// can use movq to do the load. If we have X87 we can load into an 80-bit
25593+
// X87 register and store it to a stack temporary.
2559425594
bool NoImplicitFloatOps =
2559525595
LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
2559625596
if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
25597-
!Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2())
25597+
!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
25598+
(Subtarget.hasSSE2() || Subtarget.hasX87()))
2559825599
return AtomicExpansionKind::None;
2559925600

2560025601
return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg
@@ -27440,23 +27441,57 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
2744027441
bool NoImplicitFloatOps =
2744127442
DAG.getMachineFunction().getFunction().hasFnAttribute(
2744227443
Attribute::NoImplicitFloat);
27443-
if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
27444-
Subtarget.hasSSE2()) {
27444+
if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) {
2744527445
auto *Node = cast<AtomicSDNode>(N);
27446-
// Use a VZEXT_LOAD which will be selected as MOVQ. Then extract the lower
27447-
// 64-bits.
27448-
SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
27449-
SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
27450-
SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
27451-
MVT::i64, Node->getMemOperand());
27452-
SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld,
27453-
DAG.getIntPtrConstant(0, dl));
27454-
Results.push_back(Res);
27455-
Results.push_back(Ld.getValue(1));
27456-
return;
27446+
if (Subtarget.hasSSE2()) {
27447+
// Use a VZEXT_LOAD which will be selected as MOVQ. Then extract the
27448+
// lower 64-bits.
27449+
SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
27450+
SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
27451+
SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
27452+
MVT::i64, Node->getMemOperand());
27453+
SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld,
27454+
DAG.getIntPtrConstant(0, dl));
27455+
Results.push_back(Res);
27456+
Results.push_back(Ld.getValue(1));
27457+
return;
27458+
}
27459+
if (Subtarget.hasX87()) {
27460+
// First load this into an 80-bit X87 register. This will put the whole
27461+
// integer into the significand.
27462+
// FIXME: Do we need to glue? See FIXME comment in BuildFILD.
27463+
SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other, MVT::Glue);
27464+
SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
27465+
SDValue Result = DAG.getMemIntrinsicNode(X86ISD::FILD_FLAG,
27466+
dl, Tys, Ops, MVT::i64,
27467+
Node->getMemOperand());
27468+
SDValue Chain = Result.getValue(1);
27469+
SDValue InFlag = Result.getValue(2);
27470+
27471+
// Now store the X87 register to a stack temporary and convert to i64.
27472+
// This store is not atomic and doesn't need to be.
27473+
// FIXME: We don't need a stack temporary if the result of the load
27474+
// is already being stored. We could just directly store there.
27475+
SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
27476+
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
27477+
MachinePointerInfo MPI =
27478+
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
27479+
SDValue StoreOps[] = { Chain, Result, StackPtr, InFlag };
27480+
Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, dl,
27481+
DAG.getVTList(MVT::Other), StoreOps,
27482+
MVT::i64, MPI, 0 /*Align*/,
27483+
MachineMemOperand::MOStore);
27484+
27485+
// Finally load the value back from the stack temporary and return it.
27486+
// This load is not atomic and doesn't need to be.
27487+
// This load will be further type legalized.
27488+
Result = DAG.getLoad(MVT::i64, dl, Chain, StackPtr, MPI);
27489+
Results.push_back(Result);
27490+
Results.push_back(Result.getValue(1));
27491+
return;
27492+
}
2745727493
}
2745827494
// TODO: Use MOVLPS when SSE1 is available?
27459-
// TODO: Use FILD/FISTP when X87 is available?
2746027495
// Delegate to generic TypeLegalization. Situations we can really handle
2746127496
// should have already been dealt with by AtomicExpandPass.cpp.
2746227497
break;
@@ -27649,6 +27684,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
2764927684
case X86ISD::FXOR: return "X86ISD::FXOR";
2765027685
case X86ISD::FILD: return "X86ISD::FILD";
2765127686
case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG";
27687+
case X86ISD::FIST: return "X86ISD::FIST";
2765227688
case X86ISD::FP_TO_INT_IN_MEM: return "X86ISD::FP_TO_INT_IN_MEM";
2765327689
case X86ISD::FLD: return "X86ISD::FLD";
2765427690
case X86ISD::FST: return "X86ISD::FST";

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -608,16 +608,22 @@ namespace llvm {
608608
FILD,
609609
FILD_FLAG,
610610

611+
/// This instruction implements a fp->int store from FP stack
612+
/// slots. This corresponds to the fist instruction. It takes a
613+
/// chain operand, value to store, address, and glue. The memory VT
614+
/// specifies the type to store as.
615+
FIST,
616+
611617
/// This instruction implements an extending load to FP stack slots.
612618
/// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
613619
/// operand, and ptr to load from. The memory VT specifies the type to
614620
/// load from.
615621
FLD,
616622

617-
/// This instruction implements a truncating store to FP stack
623+
/// This instruction implements a truncating store from FP stack
618624
/// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
619-
/// chain operand, value to store, and address. The memory VT specifies
620-
/// the type to store as.
625+
/// chain operand, value to store, address, and glue. The memory VT
626+
/// specifies the type to store as.
621627
FST,
622628

623629
/// This instruction grabs the address of the next argument

llvm/lib/Target/X86/X86InstrFPStack.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ def SDTX86Fld : SDTypeProfile<1, 1, [SDTCisFP<0>,
2121
def SDTX86Fst : SDTypeProfile<0, 2, [SDTCisFP<0>,
2222
SDTCisPtrTy<1>]>;
2323
def SDTX86Fild : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
24+
def SDTX86Fist : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
2425
def SDTX86Fnstsw : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
2526

2627
def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
@@ -35,6 +36,9 @@ def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild,
3536
def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
3637
[SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
3738
SDNPMemOperand]>;
39+
def X86fist : SDNode<"X86ISD::FIST", SDTX86Fist,
40+
[SDNPHasChain, SDNPInGlue, SDNPMayStore,
41+
SDNPMemOperand]>;
3842
def X86fp_stsw : SDNode<"X86ISD::FNSTSW16r", SDTX86Fnstsw>;
3943
def X86fp_to_mem : SDNode<"X86ISD::FP_TO_INT_IN_MEM", SDTX86Fst,
4044
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
@@ -79,6 +83,11 @@ def X86fildflag64 : PatFrag<(ops node:$ptr), (X86fildflag node:$ptr), [{
7983
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
8084
}]>;
8185

86+
def X86fist64 : PatFrag<(ops node:$val, node:$ptr),
87+
(X86fist node:$val, node:$ptr), [{
88+
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
89+
}]>;
90+
8291
def X86fp_to_i16mem : PatFrag<(ops node:$val, node:$ptr),
8392
(X86fp_to_mem node:$val, node:$ptr), [{
8493
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
@@ -760,6 +769,10 @@ def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>;
760769
// Used to conv. i64 to f64 since there isn't a SSE version.
761770
def : Pat<(X86fildflag64 addr:$src), (ILD_Fp64m64 addr:$src)>;
762771

772+
// Used to conv. between f80 and i64 for i64 atomic loads.
773+
def : Pat<(X86fildflag64 addr:$src), (ILD_Fp64m80 addr:$src)>;
774+
def : Pat<(X86fist64 RFP80:$src, addr:$op), (IST_Fp64m80 addr:$op, RFP80:$src)>;
775+
763776
// FP extensions map onto simple pseudo-value conversions if they are to/from
764777
// the FP stack.
765778
def : Pat<(f64 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>,

llvm/test/CodeGen/X86/atomic-fp.ll

Lines changed: 30 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -77,14 +77,13 @@ define void @fadd_64r(double* %loc, double %val) nounwind {
7777
; X86-NOSSE-NEXT: pushl %ebx
7878
; X86-NOSSE-NEXT: pushl %esi
7979
; X86-NOSSE-NEXT: andl $-8, %esp
80-
; X86-NOSSE-NEXT: subl $16, %esp
80+
; X86-NOSSE-NEXT: subl $24, %esp
8181
; X86-NOSSE-NEXT: movl 8(%ebp), %esi
82-
; X86-NOSSE-NEXT: xorl %eax, %eax
83-
; X86-NOSSE-NEXT: xorl %edx, %edx
84-
; X86-NOSSE-NEXT: xorl %ecx, %ecx
85-
; X86-NOSSE-NEXT: xorl %ebx, %ebx
86-
; X86-NOSSE-NEXT: lock cmpxchg8b (%esi)
87-
; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
82+
; X86-NOSSE-NEXT: fildll (%esi)
83+
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
84+
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
85+
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
86+
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
8887
; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
8988
; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
9089
; X86-NOSSE-NEXT: faddl 12(%ebp)
@@ -283,13 +282,12 @@ define void @fadd_64g() nounwind {
283282
; X86-NOSSE-NEXT: movl %esp, %ebp
284283
; X86-NOSSE-NEXT: pushl %ebx
285284
; X86-NOSSE-NEXT: andl $-8, %esp
286-
; X86-NOSSE-NEXT: subl $24, %esp
287-
; X86-NOSSE-NEXT: xorl %eax, %eax
288-
; X86-NOSSE-NEXT: xorl %edx, %edx
289-
; X86-NOSSE-NEXT: xorl %ecx, %ecx
290-
; X86-NOSSE-NEXT: xorl %ebx, %ebx
291-
; X86-NOSSE-NEXT: lock cmpxchg8b glob64
292-
; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
285+
; X86-NOSSE-NEXT: subl $32, %esp
286+
; X86-NOSSE-NEXT: fildll glob64
287+
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
288+
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
289+
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
290+
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
293291
; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
294292
; X86-NOSSE-NEXT: fld1
295293
; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp)
@@ -484,13 +482,12 @@ define void @fadd_64imm() nounwind {
484482
; X86-NOSSE-NEXT: movl %esp, %ebp
485483
; X86-NOSSE-NEXT: pushl %ebx
486484
; X86-NOSSE-NEXT: andl $-8, %esp
487-
; X86-NOSSE-NEXT: subl $24, %esp
488-
; X86-NOSSE-NEXT: xorl %eax, %eax
489-
; X86-NOSSE-NEXT: xorl %edx, %edx
490-
; X86-NOSSE-NEXT: xorl %ecx, %ecx
491-
; X86-NOSSE-NEXT: xorl %ebx, %ebx
492-
; X86-NOSSE-NEXT: lock cmpxchg8b -559038737
493-
; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
485+
; X86-NOSSE-NEXT: subl $32, %esp
486+
; X86-NOSSE-NEXT: fildll -559038737
487+
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
488+
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
489+
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
490+
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
494491
; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
495492
; X86-NOSSE-NEXT: fld1
496493
; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp)
@@ -691,13 +688,12 @@ define void @fadd_64stack() nounwind {
691688
; X86-NOSSE-NEXT: movl %esp, %ebp
692689
; X86-NOSSE-NEXT: pushl %ebx
693690
; X86-NOSSE-NEXT: andl $-8, %esp
694-
; X86-NOSSE-NEXT: subl $32, %esp
695-
; X86-NOSSE-NEXT: xorl %eax, %eax
696-
; X86-NOSSE-NEXT: xorl %edx, %edx
697-
; X86-NOSSE-NEXT: xorl %ecx, %ecx
698-
; X86-NOSSE-NEXT: xorl %ebx, %ebx
699-
; X86-NOSSE-NEXT: lock cmpxchg8b (%esp)
700-
; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
691+
; X86-NOSSE-NEXT: subl $40, %esp
692+
; X86-NOSSE-NEXT: fildll (%esp)
693+
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
694+
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
695+
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
696+
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
701697
; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
702698
; X86-NOSSE-NEXT: fld1
703699
; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp)
@@ -831,15 +827,14 @@ define void @fadd_array(i64* %arg, double %arg1, i64 %arg2) nounwind {
831827
; X86-NOSSE-NEXT: pushl %edi
832828
; X86-NOSSE-NEXT: pushl %esi
833829
; X86-NOSSE-NEXT: andl $-8, %esp
834-
; X86-NOSSE-NEXT: subl $24, %esp
830+
; X86-NOSSE-NEXT: subl $32, %esp
835831
; X86-NOSSE-NEXT: movl 20(%ebp), %esi
836832
; X86-NOSSE-NEXT: movl 8(%ebp), %edi
837-
; X86-NOSSE-NEXT: xorl %eax, %eax
838-
; X86-NOSSE-NEXT: xorl %edx, %edx
839-
; X86-NOSSE-NEXT: xorl %ecx, %ecx
840-
; X86-NOSSE-NEXT: xorl %ebx, %ebx
841-
; X86-NOSSE-NEXT: lock cmpxchg8b (%edi,%esi,8)
842-
; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
833+
; X86-NOSSE-NEXT: fildll (%edi,%esi,8)
834+
; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp)
835+
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
836+
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
837+
; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
843838
; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
844839
; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
845840
; X86-NOSSE-NEXT: faddl 12(%ebp)

llvm/test/CodeGen/X86/atomic-load-store-wide.ll

Lines changed: 28 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -45,22 +45,21 @@ define i64 @test2(i64* %ptr) {
4545
;
4646
; NOSSE-LABEL: test2:
4747
; NOSSE: # %bb.0:
48-
; NOSSE-NEXT: pushl %ebx
48+
; NOSSE-NEXT: pushl %ebp
4949
; NOSSE-NEXT: .cfi_def_cfa_offset 8
50-
; NOSSE-NEXT: pushl %esi
51-
; NOSSE-NEXT: .cfi_def_cfa_offset 12
52-
; NOSSE-NEXT: .cfi_offset %esi, -12
53-
; NOSSE-NEXT: .cfi_offset %ebx, -8
54-
; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
55-
; NOSSE-NEXT: xorl %eax, %eax
56-
; NOSSE-NEXT: xorl %edx, %edx
57-
; NOSSE-NEXT: xorl %ecx, %ecx
58-
; NOSSE-NEXT: xorl %ebx, %ebx
59-
; NOSSE-NEXT: lock cmpxchg8b (%esi)
60-
; NOSSE-NEXT: popl %esi
61-
; NOSSE-NEXT: .cfi_def_cfa_offset 8
62-
; NOSSE-NEXT: popl %ebx
63-
; NOSSE-NEXT: .cfi_def_cfa_offset 4
50+
; NOSSE-NEXT: .cfi_offset %ebp, -8
51+
; NOSSE-NEXT: movl %esp, %ebp
52+
; NOSSE-NEXT: .cfi_def_cfa_register %ebp
53+
; NOSSE-NEXT: andl $-8, %esp
54+
; NOSSE-NEXT: subl $8, %esp
55+
; NOSSE-NEXT: movl 8(%ebp), %eax
56+
; NOSSE-NEXT: fildll (%eax)
57+
; NOSSE-NEXT: fistpll (%esp)
58+
; NOSSE-NEXT: movl (%esp), %eax
59+
; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
60+
; NOSSE-NEXT: movl %ebp, %esp
61+
; NOSSE-NEXT: popl %ebp
62+
; NOSSE-NEXT: .cfi_def_cfa %esp, 4
6463
; NOSSE-NEXT: retl
6564
%val = load atomic i64, i64* %ptr seq_cst, align 8
6665
ret i64 %val
@@ -102,22 +101,21 @@ define i64 @test4(i64* %ptr) {
102101
;
103102
; NOSSE-LABEL: test4:
104103
; NOSSE: # %bb.0:
105-
; NOSSE-NEXT: pushl %ebx
106-
; NOSSE-NEXT: .cfi_def_cfa_offset 8
107-
; NOSSE-NEXT: pushl %esi
108-
; NOSSE-NEXT: .cfi_def_cfa_offset 12
109-
; NOSSE-NEXT: .cfi_offset %esi, -12
110-
; NOSSE-NEXT: .cfi_offset %ebx, -8
111-
; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
112-
; NOSSE-NEXT: xorl %eax, %eax
113-
; NOSSE-NEXT: xorl %edx, %edx
114-
; NOSSE-NEXT: xorl %ecx, %ecx
115-
; NOSSE-NEXT: xorl %ebx, %ebx
116-
; NOSSE-NEXT: lock cmpxchg8b (%esi)
117-
; NOSSE-NEXT: popl %esi
104+
; NOSSE-NEXT: pushl %ebp
118105
; NOSSE-NEXT: .cfi_def_cfa_offset 8
119-
; NOSSE-NEXT: popl %ebx
120-
; NOSSE-NEXT: .cfi_def_cfa_offset 4
106+
; NOSSE-NEXT: .cfi_offset %ebp, -8
107+
; NOSSE-NEXT: movl %esp, %ebp
108+
; NOSSE-NEXT: .cfi_def_cfa_register %ebp
109+
; NOSSE-NEXT: andl $-8, %esp
110+
; NOSSE-NEXT: subl $8, %esp
111+
; NOSSE-NEXT: movl 8(%ebp), %eax
112+
; NOSSE-NEXT: fildll (%eax)
113+
; NOSSE-NEXT: fistpll (%esp)
114+
; NOSSE-NEXT: movl (%esp), %eax
115+
; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
116+
; NOSSE-NEXT: movl %ebp, %esp
117+
; NOSSE-NEXT: popl %ebp
118+
; NOSSE-NEXT: .cfi_def_cfa %esp, 4
121119
; NOSSE-NEXT: retl
122120
%val = load atomic volatile i64, i64* %ptr seq_cst, align 8
123121
ret i64 %val

0 commit comments

Comments
 (0)