Skip to content

[PowerPC] enable AtomicExpandImpl::expandAtomicCmpXchg for powerpc #142395

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jun 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,20 +253,20 @@ class TargetLoweringBase {
/// support for these atomic instructions, and also have different options
/// w.r.t. what they should expand to.
enum class AtomicExpansionKind {
None, // Don't expand the instruction.
CastToInteger, // Cast the atomic instruction to another type, e.g. from
// floating-point to integer type.
None, // Don't expand the instruction.
CastToInteger, // Cast the atomic instruction to another type, e.g. from
// floating-point to integer type.
LLSC, // Expand the instruction into loadlinked/storeconditional; used
// by ARM/AArch64.
// by ARM/AArch64/PowerPC.
LLOnly, // Expand the (load) instruction into just a load-linked, which has
// greater atomic guarantees than a normal load.
CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.
BitTestIntrinsic, // Use a target-specific intrinsic for special bit
// operations; used by X86.
CmpArithIntrinsic,// Use a target-specific intrinsic for special compare
// operations; used by X86.
Expand, // Generic expansion in terms of other atomic operations.
MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.
BitTestIntrinsic, // Use a target-specific intrinsic for special bit
// operations; used by X86.
CmpArithIntrinsic, // Use a target-specific intrinsic for special compare
// operations; used by X86.
Expand, // Generic expansion in terms of other atomic operations.

// Rewrite to a non-atomic form for use in a known non-preemptible
// environment.
Expand Down
15 changes: 14 additions & 1 deletion llvm/include/llvm/IR/IntrinsicsPowerPC.td
Original file line number Diff line number Diff line change
Expand Up @@ -1835,6 +1835,19 @@ let TargetPrefix = "ppc" in {
Intrinsic<[],[],[]>;
def int_ppc_iospace_eieio : ClangBuiltin<"__builtin_ppc_iospace_eieio">,
Intrinsic<[],[],[]>;
def int_ppc_lbarx :
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty],
[IntrReadMem, IntrArgMemOnly]>;
def int_ppc_lharx :
Intrinsic<[llvm_i32_ty],[llvm_ptr_ty],
[IntrReadMem, IntrArgMemOnly]>;
def int_ppc_lwarx :
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty],
[IntrReadMem, IntrArgMemOnly]>;
def int_ppc_ldarx :
Intrinsic<[llvm_i64_ty],[llvm_ptr_ty],
[IntrReadMem, IntrArgMemOnly]>;

def int_ppc_stdcx :
ClangBuiltin<"__builtin_ppc_stdcx">,
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i64_ty],
Expand All @@ -1844,7 +1857,7 @@ let TargetPrefix = "ppc" in {
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
[IntrWriteMem, IntrArgMemOnly]>;
def int_ppc_sthcx :
Intrinsic<[llvm_i32_ty], [ llvm_ptr_ty, llvm_i32_ty ],
Intrinsic<[llvm_i32_ty], [ llvm_ptr_ty, llvm_i32_ty],
[IntrWriteMem, IntrArgMemOnly, IntrNoDuplicate]>;
def int_ppc_stbcx :
ClangBuiltin<"__builtin_ppc_stbcx">,
Expand Down
73 changes: 72 additions & 1 deletion llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1475,6 +1475,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setMinimumJumpTableEntries(PPCMinimumJumpTableEntries);

setMinFunctionAlignment(Align(4));
setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32);

switch (Subtarget.getCPUDirective()) {
default: break;
Expand Down Expand Up @@ -12672,6 +12673,76 @@ static Instruction *callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id) {
return Builder.CreateIntrinsic(Id, {});
}

Value *PPCTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy,
Value *Addr,
AtomicOrdering Ord) const {
unsigned SZ = ValueTy->getPrimitiveSizeInBits();

assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
"Only 8/16/32/64-bit atomic loads supported");
Intrinsic::ID IntID;
switch (SZ) {
default:
llvm_unreachable("Unexpected PrimitiveSize");
case 8:
IntID = Intrinsic::ppc_lbarx;
assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
break;
case 16:
IntID = Intrinsic::ppc_lharx;
assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
break;
case 32:
IntID = Intrinsic::ppc_lwarx;
break;
case 64:
IntID = Intrinsic::ppc_ldarx;
break;
}
Value *Call =
Builder.CreateIntrinsic(IntID, Addr, /*FMFSource=*/nullptr, "larx");

return Builder.CreateTruncOrBitCast(Call, ValueTy);
}

// Perform a store-conditional operation to Addr. Return the status of the
// store. This should be 0 if the store succeeded, non-zero otherwise.
Value *PPCTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
Value *Val, Value *Addr,
AtomicOrdering Ord) const {
Type *Ty = Val->getType();
unsigned SZ = Ty->getPrimitiveSizeInBits();

assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
"Only 8/16/32/64-bit atomic loads supported");
Intrinsic::ID IntID;
switch (SZ) {
default:
llvm_unreachable("Unexpected PrimitiveSize");
case 8:
IntID = Intrinsic::ppc_stbcx;
assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
break;
case 16:
IntID = Intrinsic::ppc_sthcx;
assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
break;
case 32:
IntID = Intrinsic::ppc_stwcx;
break;
case 64:
IntID = Intrinsic::ppc_stdcx;
break;
}

if (SZ == 8 || SZ == 16)
Val = Builder.CreateZExt(Val, Builder.getInt32Ty());

Value *Call = Builder.CreateIntrinsic(IntID, {Addr, Val},
/*FMFSource=*/nullptr, "stcx");
return Builder.CreateXor(Call, Builder.getInt32(1));
}

// The mappings for emitLeading/TrailingFence is taken from
// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
Instruction *PPCTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
Expand Down Expand Up @@ -19633,7 +19704,7 @@ PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
if (shouldInlineQuadwordAtomics() && Size == 128)
return AtomicExpansionKind::MaskedIntrinsic;
return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI);
return AtomicExpansionKind::LLSC;
}

static Intrinsic::ID
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -927,6 +927,12 @@ namespace llvm {
return true;
}

Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
AtomicOrdering Ord) const override;

Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
AtomicOrdering Ord) const override;

Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst,
AtomicOrdering Ord) const override;
Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstr64Bit.td
Original file line number Diff line number Diff line change
Expand Up @@ -2023,6 +2023,8 @@ def SLBSYNC : XForm_0<31, 338, (outs), (ins), "slbsync", IIC_SprSLBSYNC, []>;

} // IsISA3_0

def : Pat<(int_ppc_ldarx ForceXForm:$ptr),
(LDARX ForceXForm:$ptr)>;
def : Pat<(int_ppc_stdcx ForceXForm:$dst, g8rc:$A),
(RLWINM (STDCX g8rc:$A, ForceXForm:$dst), 31, 31, 31)>;
def : Pat<(PPCStoreCond ForceXForm:$dst, g8rc:$A, 8),
Expand Down
8 changes: 7 additions & 1 deletion llvm/lib/Target/PowerPC/PPCInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -5143,7 +5143,6 @@ def : Pat<(int_ppc_store2r gprc:$a, ForceXForm:$ptr),
def : Pat<(int_ppc_store4r gprc:$a, ForceXForm:$ptr),
(STWBRX gprc:$a, ForceXForm:$ptr)>;


// Fast 32-bit reverse bits algorithm:
// Step 1: 1-bit swap (swap odd 1-bit and even 1-bit):
// n = ((n >> 1) & 0x55555555) | ((n << 1) & 0xAAAAAAAA);
Expand Down Expand Up @@ -5324,10 +5323,14 @@ def CFENCE : PPCPostRAExpPseudo<(outs), (ins gprc:$cr), "#CFENCE", []>;
def : Pat<(i64 (bitreverse i64:$A)),
(OR8 (RLDICR DWBytes7654.DWord, 32, 31), DWBytes3210.DWord)>;

def : Pat<(int_ppc_lwarx ForceXForm:$ptr),
(LWARX ForceXForm:$ptr)>;
def : Pat<(int_ppc_stwcx ForceXForm:$dst, gprc:$A),
(RLWINM (STWCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
def : Pat<(PPCStoreCond ForceXForm:$dst, gprc:$A, 4),
(RLWINM (STWCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
def : Pat<(int_ppc_lbarx ForceXForm:$ptr),
(LBARX ForceXForm:$ptr)>;
def : Pat<(int_ppc_stbcx ForceXForm:$dst, gprc:$A),
(RLWINM (STBCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
def : Pat<(PPCStoreCond ForceXForm:$dst, gprc:$A, 1),
Expand Down Expand Up @@ -5360,6 +5363,9 @@ def : Pat<(int_ppc_mtmsr gprc:$RS),
(MTMSR $RS, 0)>;

let Predicates = [IsISA2_07] in {
def : Pat<(int_ppc_lharx ForceXForm:$ptr),
(LHARX ForceXForm:$ptr)>;

def : Pat<(int_ppc_sthcx ForceXForm:$dst, gprc:$A),
(RLWINM (STHCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
def : Pat<(PPCStoreCond ForceXForm:$dst, gprc:$A, 2),
Expand Down
Loading