@@ -544,6 +544,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
544
544
if (!Subtarget.is64Bit())
545
545
setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
546
546
547
+ if (Subtarget.is64Bit() && Subtarget.hasAVX()) {
548
+ // All CPUs supporting AVX will atomically load/store aligned 128-bit
549
+ // values, so we can emit [V]MOVAPS/[V]MOVDQA.
550
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
551
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
552
+ }
553
+
547
554
if (Subtarget.canUseCMPXCHG16B())
548
555
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
549
556
@@ -30415,32 +30422,40 @@ TargetLoweringBase::AtomicExpansionKind
30415
30422
X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
30416
30423
Type *MemType = SI->getValueOperand()->getType();
30417
30424
30418
- bool NoImplicitFloatOps =
30419
- SI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
30420
- if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
30421
- !Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
30422
- (Subtarget.hasSSE1() || Subtarget.hasX87()))
30423
- return AtomicExpansionKind::None;
30425
+ if (!SI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat) &&
30426
+ !Subtarget.useSoftFloat()) {
30427
+ if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
30428
+ (Subtarget.hasSSE1() || Subtarget.hasX87()))
30429
+ return AtomicExpansionKind::None;
30430
+
30431
+ if (MemType->getPrimitiveSizeInBits() == 128 && Subtarget.is64Bit() &&
30432
+ Subtarget.hasAVX())
30433
+ return AtomicExpansionKind::None;
30434
+ }
30424
30435
30425
30436
return needsCmpXchgNb(MemType) ? AtomicExpansionKind::Expand
30426
30437
: AtomicExpansionKind::None;
30427
30438
}
30428
30439
30429
30440
// Note: this turns large loads into lock cmpxchg8b/16b.
30430
- // TODO: In 32-bit mode, use MOVLPS when SSE1 is available?
30431
30441
TargetLowering::AtomicExpansionKind
30432
30442
X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
30433
30443
Type *MemType = LI->getType();
30434
30444
30435
- // If this a 64 bit atomic load on a 32-bit target and SSE2 is enabled, we
30436
- // can use movq to do the load. If we have X87 we can load into an 80-bit
30437
- // X87 register and store it to a stack temporary.
30438
- bool NoImplicitFloatOps =
30439
- LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
30440
- if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
30441
- !Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
30442
- (Subtarget.hasSSE1() || Subtarget.hasX87()))
30443
- return AtomicExpansionKind::None;
30445
+ if (!LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat) &&
30446
+ !Subtarget.useSoftFloat()) {
30447
+ // If this a 64 bit atomic load on a 32-bit target and SSE2 is enabled, we
30448
+ // can use movq to do the load. If we have X87 we can load into an 80-bit
30449
+ // X87 register and store it to a stack temporary.
30450
+ if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
30451
+ (Subtarget.hasSSE1() || Subtarget.hasX87()))
30452
+ return AtomicExpansionKind::None;
30453
+
30454
+ // If this is a 128-bit load with AVX, 128-bit SSE loads/stores are atomic.
30455
+ if (MemType->getPrimitiveSizeInBits() == 128 && Subtarget.is64Bit() &&
30456
+ Subtarget.hasAVX())
30457
+ return AtomicExpansionKind::None;
30458
+ }
30444
30459
30445
30460
return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg
30446
30461
: AtomicExpansionKind::None;
@@ -31683,14 +31698,21 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
31683
31698
if (!IsSeqCst && IsTypeLegal)
31684
31699
return Op;
31685
31700
31686
- if (VT == MVT::i64 && !IsTypeLegal) {
31701
+ if (!IsTypeLegal && !Subtarget.useSoftFloat() &&
31702
+ !DAG.getMachineFunction().getFunction().hasFnAttribute(
31703
+ Attribute::NoImplicitFloat)) {
31704
+ SDValue Chain;
31705
+ // For illegal i128 atomic_store, when AVX is enabled, we can simply emit a
31706
+ // vector store.
31707
+ if (VT == MVT::i128 && Subtarget.is64Bit() && Subtarget.hasAVX()) {
31708
+ SDValue VecVal = DAG.getBitcast(MVT::v2i64, Node->getVal());
31709
+ Chain = DAG.getStore(Node->getChain(), dl, VecVal, Node->getBasePtr(),
31710
+ Node->getMemOperand());
31711
+ }
31712
+
31687
31713
// For illegal i64 atomic_stores, we can try to use MOVQ or MOVLPS if SSE
31688
31714
// is enabled.
31689
- bool NoImplicitFloatOps =
31690
- DAG.getMachineFunction().getFunction().hasFnAttribute(
31691
- Attribute::NoImplicitFloat);
31692
- if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) {
31693
- SDValue Chain;
31715
+ if (VT == MVT::i64) {
31694
31716
if (Subtarget.hasSSE1()) {
31695
31717
SDValue SclToVec =
31696
31718
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Node->getVal());
@@ -31722,15 +31744,15 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
31722
31744
DAG.getMemIntrinsicNode(X86ISD::FIST, dl, DAG.getVTList(MVT::Other),
31723
31745
StoreOps, MVT::i64, Node->getMemOperand());
31724
31746
}
31747
+ }
31725
31748
31726
- if (Chain) {
31727
- // If this is a sequentially consistent store, also emit an appropriate
31728
- // barrier.
31729
- if (IsSeqCst)
31730
- Chain = emitLockedStackOp(DAG, Subtarget, Chain, dl);
31749
+ if (Chain) {
31750
+ // If this is a sequentially consistent store, also emit an appropriate
31751
+ // barrier.
31752
+ if (IsSeqCst)
31753
+ Chain = emitLockedStackOp(DAG, Subtarget, Chain, dl);
31731
31754
31732
- return Chain;
31733
- }
31755
+ return Chain;
31734
31756
}
31735
31757
}
31736
31758
@@ -33303,12 +33325,30 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
33303
33325
return;
33304
33326
}
33305
33327
case ISD::ATOMIC_LOAD: {
33306
- assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");
33328
+ assert(
33329
+ (N->getValueType(0) == MVT::i64 || N->getValueType(0) == MVT::i128) &&
33330
+ "Unexpected VT!");
33307
33331
bool NoImplicitFloatOps =
33308
33332
DAG.getMachineFunction().getFunction().hasFnAttribute(
33309
33333
Attribute::NoImplicitFloat);
33310
33334
if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) {
33311
33335
auto *Node = cast<AtomicSDNode>(N);
33336
+
33337
+ if (N->getValueType(0) == MVT::i128) {
33338
+ if (Subtarget.is64Bit() && Subtarget.hasAVX()) {
33339
+ SDValue Ld = DAG.getLoad(MVT::v2i64, dl, Node->getChain(),
33340
+ Node->getBasePtr(), Node->getMemOperand());
33341
+ SDValue ResL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld,
33342
+ DAG.getIntPtrConstant(0, dl));
33343
+ SDValue ResH = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld,
33344
+ DAG.getIntPtrConstant(1, dl));
33345
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, N->getValueType(0),
33346
+ {ResL, ResH}));
33347
+ Results.push_back(Ld.getValue(1));
33348
+ return;
33349
+ }
33350
+ break;
33351
+ }
33312
33352
if (Subtarget.hasSSE1()) {
33313
33353
// Use a VZEXT_LOAD which will be selected as MOVQ or XORPS+MOVLPS.
33314
33354
// Then extract the lower 64-bits.
0 commit comments