Skip to content

Commit 5641422

Browse files
author
Kai Luo
authored
[PowerPC] Use 'sync; ld; cmp; bc; isync' for atomic load seq-cst on 32-bit platform (#75905)
`cmp; bc; isync` is more performant than `lwsync` theoretically. 64-bit platform already features it, now implement it for 32-bit platform.
1 parent 227bfa1 commit 5641422

File tree

5 files changed

+20
-8
lines changed

5 files changed

+20
-8
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10984,17 +10984,18 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
1098410984
switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
1098510985
case Intrinsic::ppc_cfence: {
1098610986
assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
10987-
assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
1098810987
SDValue Val = Op.getOperand(ArgStart + 1);
1098910988
EVT Ty = Val.getValueType();
1099010989
if (Ty == MVT::i128) {
1099110990
// FIXME: Testing one of two paired registers is sufficient to guarantee
1099210991
// ordering?
1099310992
Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
1099410993
}
10994+
unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
10995+
EVT FTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
1099510996
return SDValue(
10996-
DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
10997-
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val),
10997+
DAG.getMachineNode(Opcode, DL, MVT::Other,
10998+
DAG.getNode(ISD::ANY_EXTEND, DL, FTy, Val),
1099810999
Op.getOperand(0)),
1099911000
0);
1100011001
}
@@ -11825,7 +11826,7 @@ Instruction *PPCTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
1182511826
// See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
1182611827
// http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
1182711828
// and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
11828-
if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
11829+
if (isa<LoadInst>(Inst))
1182911830
return Builder.CreateCall(
1183011831
Intrinsic::getDeclaration(
1183111832
Builder.GetInsertBlock()->getParent()->getParent(),

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3177,9 +3177,11 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
31773177
}
31783178

31793179
// FIXME: Maybe we can expand it in 'PowerPC Expand Atomic' pass.
3180+
case PPC::CFENCE:
31803181
case PPC::CFENCE8: {
31813182
auto Val = MI.getOperand(0).getReg();
3182-
BuildMI(MBB, MI, DL, get(PPC::CMPD), PPC::CR7).addReg(Val).addReg(Val);
3183+
unsigned CmpOp = Subtarget.isPPC64() ? PPC::CMPD : PPC::CMPW;
3184+
BuildMI(MBB, MI, DL, get(CmpOp), PPC::CR7).addReg(Val).addReg(Val);
31833185
BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP))
31843186
.addImm(PPC::PRED_NE_MINUS)
31853187
.addReg(PPC::CR7)

llvm/lib/Target/PowerPC/PPCInstrInfo.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5260,6 +5260,9 @@ def HASHCHKP : XForm_XD6_RA5_RB5<31, 690, (outs),
52605260
"hashchkp $RB, $addr", IIC_IntGeneral, []>;
52615261
}
52625262

5263+
let Defs = [CR7], Itinerary = IIC_LdStSync in
5264+
def CFENCE : PPCPostRAExpPseudo<(outs), (ins gprc:$cr), "#CFENCE", []>;
5265+
52635266
// Now both high word and low word are reversed, next
52645267
// swap the high word and low word.
52655268
def : Pat<(i64 (bitreverse i64:$A)),

llvm/test/CodeGen/PowerPC/atomics-indexed.ll

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@ define i8 @load_x_i8_seq_cst(ptr %mem) {
1515
; PPC32-NEXT: sync
1616
; PPC32-NEXT: ori r4, r4, 24464
1717
; PPC32-NEXT: lbzx r3, r3, r4
18-
; PPC32-NEXT: lwsync
18+
; PPC32-NEXT: cmpw cr7, r3, r3
19+
; PPC32-NEXT: bne- cr7, .+4
20+
; PPC32-NEXT: isync
1921
; PPC32-NEXT: blr
2022
;
2123
; PPC64-LABEL: load_x_i8_seq_cst:
@@ -38,7 +40,9 @@ define i16 @load_x_i16_acquire(ptr %mem) {
3840
; PPC32-NEXT: lis r4, 2
3941
; PPC32-NEXT: ori r4, r4, 48928
4042
; PPC32-NEXT: lhzx r3, r3, r4
41-
; PPC32-NEXT: lwsync
43+
; PPC32-NEXT: cmpw cr7, r3, r3
44+
; PPC32-NEXT: bne- cr7, .+4
45+
; PPC32-NEXT: isync
4246
; PPC32-NEXT: blr
4347
;
4448
; PPC64-LABEL: load_x_i16_acquire:

llvm/test/CodeGen/PowerPC/atomics.ll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,9 @@ define i32 @load_i32_acquire(ptr %mem) {
2929
; PPC32-LABEL: load_i32_acquire:
3030
; PPC32: # %bb.0:
3131
; PPC32-NEXT: lwz r3, 0(r3)
32-
; PPC32-NEXT: lwsync
32+
; PPC32-NEXT: cmpw cr7, r3, r3
33+
; PPC32-NEXT: bne- cr7, .+4
34+
; PPC32-NEXT: isync
3335
; PPC32-NEXT: blr
3436
;
3537
; PPC64-LABEL: load_i32_acquire:

0 commit comments

Comments
 (0)