Skip to content

Commit 85a9f2e

Browse files
diggerlinarsenm
andauthored
[PowerPC] enable AtomicExpandImpl::expandAtomicCmpXchg for powerpc (#142395)
In PowerPC, the AtomicCmpXchgInst is lowered to ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS. However, this node does not handle the weak attribute of AtomicCmpXchgInst. As a result, when compiling C++ atomic_compare_exchange_weak_explicit, the generated assembly includes a "reservation lost" loop — i.e., it branches back and retries if the stwcx. (store-conditional) fails. This differs from GCC’s codegen, which does not include that loop for weak compare-exchange. Since PowerPC uses LL/SC-style atomic instructions, the patch enables AtomicExpandImpl::expandAtomicCmpXchg for PowerPC. With this, the weak attribute is properly respected, and the "reservation lost" loop is removed for weak operations. --------- Co-authored-by: Matt Arsenault <[email protected]>
1 parent a59e4ac commit 85a9f2e

17 files changed

+3133
-2111
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -254,20 +254,20 @@ class LLVM_ABI TargetLoweringBase {
254254
/// support for these atomic instructions, and also have different options
255255
/// w.r.t. what they should expand to.
256256
enum class AtomicExpansionKind {
257-
None, // Don't expand the instruction.
258-
CastToInteger, // Cast the atomic instruction to another type, e.g. from
259-
// floating-point to integer type.
257+
None, // Don't expand the instruction.
258+
CastToInteger, // Cast the atomic instruction to another type, e.g. from
259+
// floating-point to integer type.
260260
LLSC, // Expand the instruction into loadlinked/storeconditional; used
261-
// by ARM/AArch64.
261+
// by ARM/AArch64/PowerPC.
262262
LLOnly, // Expand the (load) instruction into just a load-linked, which has
263263
// greater atomic guarantees than a normal load.
264264
CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
265-
MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.
266-
BitTestIntrinsic, // Use a target-specific intrinsic for special bit
267-
// operations; used by X86.
268-
CmpArithIntrinsic,// Use a target-specific intrinsic for special compare
269-
// operations; used by X86.
270-
Expand, // Generic expansion in terms of other atomic operations.
265+
MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.
266+
BitTestIntrinsic, // Use a target-specific intrinsic for special bit
267+
// operations; used by X86.
268+
CmpArithIntrinsic, // Use a target-specific intrinsic for special compare
269+
// operations; used by X86.
270+
Expand, // Generic expansion in terms of other atomic operations.
271271

272272
// Rewrite to a non-atomic form for use in a known non-preemptible
273273
// environment.

llvm/include/llvm/IR/IntrinsicsPowerPC.td

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1835,6 +1835,19 @@ let TargetPrefix = "ppc" in {
18351835
Intrinsic<[],[],[]>;
18361836
def int_ppc_iospace_eieio : ClangBuiltin<"__builtin_ppc_iospace_eieio">,
18371837
Intrinsic<[],[],[]>;
1838+
def int_ppc_lbarx :
1839+
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty],
1840+
[IntrReadMem, IntrArgMemOnly]>;
1841+
def int_ppc_lharx :
1842+
Intrinsic<[llvm_i32_ty],[llvm_ptr_ty],
1843+
[IntrReadMem, IntrArgMemOnly]>;
1844+
def int_ppc_lwarx :
1845+
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty],
1846+
[IntrReadMem, IntrArgMemOnly]>;
1847+
def int_ppc_ldarx :
1848+
Intrinsic<[llvm_i64_ty],[llvm_ptr_ty],
1849+
[IntrReadMem, IntrArgMemOnly]>;
1850+
18381851
def int_ppc_stdcx :
18391852
ClangBuiltin<"__builtin_ppc_stdcx">,
18401853
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i64_ty],
@@ -1844,7 +1857,7 @@ let TargetPrefix = "ppc" in {
18441857
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
18451858
[IntrWriteMem, IntrArgMemOnly]>;
18461859
def int_ppc_sthcx :
1847-
Intrinsic<[llvm_i32_ty], [ llvm_ptr_ty, llvm_i32_ty ],
1860+
Intrinsic<[llvm_i32_ty], [ llvm_ptr_ty, llvm_i32_ty],
18481861
[IntrWriteMem, IntrArgMemOnly, IntrNoDuplicate]>;
18491862
def int_ppc_stbcx :
18501863
ClangBuiltin<"__builtin_ppc_stbcx">,

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1442,6 +1442,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
14421442
setMinimumJumpTableEntries(PPCMinimumJumpTableEntries);
14431443

14441444
setMinFunctionAlignment(Align(4));
1445+
setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32);
14451446

14461447
auto CPUDirective = Subtarget.getCPUDirective();
14471448
switch (CPUDirective) {
@@ -12690,6 +12691,76 @@ static Instruction *callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id) {
1269012691
return Builder.CreateIntrinsic(Id, {});
1269112692
}
1269212693

12694+
Value *PPCTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy,
12695+
Value *Addr,
12696+
AtomicOrdering Ord) const {
12697+
unsigned SZ = ValueTy->getPrimitiveSizeInBits();
12698+
12699+
assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12700+
"Only 8/16/32/64-bit atomic loads supported");
12701+
Intrinsic::ID IntID;
12702+
switch (SZ) {
12703+
default:
12704+
llvm_unreachable("Unexpected PrimitiveSize");
12705+
case 8:
12706+
IntID = Intrinsic::ppc_lbarx;
12707+
assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12708+
break;
12709+
case 16:
12710+
IntID = Intrinsic::ppc_lharx;
12711+
assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12712+
break;
12713+
case 32:
12714+
IntID = Intrinsic::ppc_lwarx;
12715+
break;
12716+
case 64:
12717+
IntID = Intrinsic::ppc_ldarx;
12718+
break;
12719+
}
12720+
Value *Call =
12721+
Builder.CreateIntrinsic(IntID, Addr, /*FMFSource=*/nullptr, "larx");
12722+
12723+
return Builder.CreateTruncOrBitCast(Call, ValueTy);
12724+
}
12725+
12726+
// Perform a store-conditional operation to Addr. Return the status of the
12727+
// store. This should be 0 if the store succeeded, non-zero otherwise.
12728+
Value *PPCTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
12729+
Value *Val, Value *Addr,
12730+
AtomicOrdering Ord) const {
12731+
Type *Ty = Val->getType();
12732+
unsigned SZ = Ty->getPrimitiveSizeInBits();
12733+
12734+
assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12735+
"Only 8/16/32/64-bit atomic loads supported");
12736+
Intrinsic::ID IntID;
12737+
switch (SZ) {
12738+
default:
12739+
llvm_unreachable("Unexpected PrimitiveSize");
12740+
case 8:
12741+
IntID = Intrinsic::ppc_stbcx;
12742+
assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12743+
break;
12744+
case 16:
12745+
IntID = Intrinsic::ppc_sthcx;
12746+
assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12747+
break;
12748+
case 32:
12749+
IntID = Intrinsic::ppc_stwcx;
12750+
break;
12751+
case 64:
12752+
IntID = Intrinsic::ppc_stdcx;
12753+
break;
12754+
}
12755+
12756+
if (SZ == 8 || SZ == 16)
12757+
Val = Builder.CreateZExt(Val, Builder.getInt32Ty());
12758+
12759+
Value *Call = Builder.CreateIntrinsic(IntID, {Addr, Val},
12760+
/*FMFSource=*/nullptr, "stcx");
12761+
return Builder.CreateXor(Call, Builder.getInt32(1));
12762+
}
12763+
1269312764
// The mappings for emitLeading/TrailingFence is taken from
1269412765
// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
1269512766
Instruction *PPCTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
@@ -19651,7 +19722,7 @@ PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
1965119722
unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
1965219723
if (shouldInlineQuadwordAtomics() && Size == 128)
1965319724
return AtomicExpansionKind::MaskedIntrinsic;
19654-
return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI);
19725+
return AtomicExpansionKind::LLSC;
1965519726
}
1965619727

1965719728
static Intrinsic::ID

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -927,6 +927,12 @@ namespace llvm {
927927
return true;
928928
}
929929

930+
Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
931+
AtomicOrdering Ord) const override;
932+
933+
Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
934+
AtomicOrdering Ord) const override;
935+
930936
Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst,
931937
AtomicOrdering Ord) const override;
932938
Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,

llvm/lib/Target/PowerPC/PPCInstr64Bit.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2023,6 +2023,8 @@ def SLBSYNC : XForm_0<31, 338, (outs), (ins), "slbsync", IIC_SprSLBSYNC, []>;
20232023

20242024
} // IsISA3_0
20252025

2026+
def : Pat<(int_ppc_ldarx ForceXForm:$ptr),
2027+
(LDARX ForceXForm:$ptr)>;
20262028
def : Pat<(int_ppc_stdcx ForceXForm:$dst, g8rc:$A),
20272029
(RLWINM (STDCX g8rc:$A, ForceXForm:$dst), 31, 31, 31)>;
20282030
def : Pat<(PPCStoreCond ForceXForm:$dst, g8rc:$A, 8),

llvm/lib/Target/PowerPC/PPCInstrInfo.td

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5143,7 +5143,6 @@ def : Pat<(int_ppc_store2r gprc:$a, ForceXForm:$ptr),
51435143
def : Pat<(int_ppc_store4r gprc:$a, ForceXForm:$ptr),
51445144
(STWBRX gprc:$a, ForceXForm:$ptr)>;
51455145

5146-
51475146
// Fast 32-bit reverse bits algorithm:
51485147
// Step 1: 1-bit swap (swap odd 1-bit and even 1-bit):
51495148
// n = ((n >> 1) & 0x55555555) | ((n << 1) & 0xAAAAAAAA);
@@ -5324,10 +5323,14 @@ def CFENCE : PPCPostRAExpPseudo<(outs), (ins gprc:$cr), "#CFENCE", []>;
53245323
def : Pat<(i64 (bitreverse i64:$A)),
53255324
(OR8 (RLDICR DWBytes7654.DWord, 32, 31), DWBytes3210.DWord)>;
53265325

5326+
def : Pat<(int_ppc_lwarx ForceXForm:$ptr),
5327+
(LWARX ForceXForm:$ptr)>;
53275328
def : Pat<(int_ppc_stwcx ForceXForm:$dst, gprc:$A),
53285329
(RLWINM (STWCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
53295330
def : Pat<(PPCStoreCond ForceXForm:$dst, gprc:$A, 4),
53305331
(RLWINM (STWCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
5332+
def : Pat<(int_ppc_lbarx ForceXForm:$ptr),
5333+
(LBARX ForceXForm:$ptr)>;
53315334
def : Pat<(int_ppc_stbcx ForceXForm:$dst, gprc:$A),
53325335
(RLWINM (STBCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
53335336
def : Pat<(PPCStoreCond ForceXForm:$dst, gprc:$A, 1),
@@ -5360,6 +5363,9 @@ def : Pat<(int_ppc_mtmsr gprc:$RS),
53605363
(MTMSR $RS, 0)>;
53615364

53625365
let Predicates = [IsISA2_07] in {
5366+
def : Pat<(int_ppc_lharx ForceXForm:$ptr),
5367+
(LHARX ForceXForm:$ptr)>;
5368+
53635369
def : Pat<(int_ppc_sthcx ForceXForm:$dst, gprc:$A),
53645370
(RLWINM (STHCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
53655371
def : Pat<(PPCStoreCond ForceXForm:$dst, gprc:$A, 2),

0 commit comments

Comments
 (0)