Skip to content

[PowerPC] Use 'sync; ld; cmp; bc; isync' for atomic load seq-cst on 32-bit platform #75905

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10984,17 +10984,18 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
case Intrinsic::ppc_cfence: {
assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
SDValue Val = Op.getOperand(ArgStart + 1);
EVT Ty = Val.getValueType();
if (Ty == MVT::i128) {
// FIXME: Testing one of two paired registers is sufficient to guarantee
// ordering?
Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
}
unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
EVT FTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
return SDValue(
DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val),
DAG.getMachineNode(Opcode, DL, MVT::Other,
DAG.getNode(ISD::ANY_EXTEND, DL, FTy, Val),
Op.getOperand(0)),
0);
}
Expand Down Expand Up @@ -11825,7 +11826,7 @@ Instruction *PPCTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
// See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
// http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
// and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
if (isa<LoadInst>(Inst))
return Builder.CreateCall(
Intrinsic::getDeclaration(
Builder.GetInsertBlock()->getParent()->getParent(),
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3177,9 +3177,11 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
}

// FIXME: Maybe we can expand it in 'PowerPC Expand Atomic' pass.
case PPC::CFENCE:
case PPC::CFENCE8: {
auto Val = MI.getOperand(0).getReg();
BuildMI(MBB, MI, DL, get(PPC::CMPD), PPC::CR7).addReg(Val).addReg(Val);
unsigned CmpOp = Subtarget.isPPC64() ? PPC::CMPD : PPC::CMPW;
BuildMI(MBB, MI, DL, get(CmpOp), PPC::CR7).addReg(Val).addReg(Val);
BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP))
.addImm(PPC::PRED_NE_MINUS)
.addReg(PPC::CR7)
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -5260,6 +5260,9 @@ def HASHCHKP : XForm_XD6_RA5_RB5<31, 690, (outs),
"hashchkp $RB, $addr", IIC_IntGeneral, []>;
}

let Defs = [CR7], Itinerary = IIC_LdStSync in
def CFENCE : PPCPostRAExpPseudo<(outs), (ins gprc:$cr), "#CFENCE", []>;

// Now both high word and low word are reversed, next
// swap the high word and low word.
def : Pat<(i64 (bitreverse i64:$A)),
Expand Down
8 changes: 6 additions & 2 deletions llvm/test/CodeGen/PowerPC/atomics-indexed.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ define i8 @load_x_i8_seq_cst(ptr %mem) {
; PPC32-NEXT: sync
; PPC32-NEXT: ori r4, r4, 24464
; PPC32-NEXT: lbzx r3, r3, r4
; PPC32-NEXT: lwsync
; PPC32-NEXT: cmpw cr7, r3, r3
; PPC32-NEXT: bne- cr7, .+4
; PPC32-NEXT: isync
; PPC32-NEXT: blr
;
; PPC64-LABEL: load_x_i8_seq_cst:
Expand All @@ -38,7 +40,9 @@ define i16 @load_x_i16_acquire(ptr %mem) {
; PPC32-NEXT: lis r4, 2
; PPC32-NEXT: ori r4, r4, 48928
; PPC32-NEXT: lhzx r3, r3, r4
; PPC32-NEXT: lwsync
; PPC32-NEXT: cmpw cr7, r3, r3
; PPC32-NEXT: bne- cr7, .+4
; PPC32-NEXT: isync
; PPC32-NEXT: blr
;
; PPC64-LABEL: load_x_i16_acquire:
Expand Down
4 changes: 3 additions & 1 deletion llvm/test/CodeGen/PowerPC/atomics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ define i32 @load_i32_acquire(ptr %mem) {
; PPC32-LABEL: load_i32_acquire:
; PPC32: # %bb.0:
; PPC32-NEXT: lwz r3, 0(r3)
; PPC32-NEXT: lwsync
; PPC32-NEXT: cmpw cr7, r3, r3
; PPC32-NEXT: bne- cr7, .+4
; PPC32-NEXT: isync
; PPC32-NEXT: blr
;
; PPC64-LABEL: load_i32_acquire:
Expand Down