Skip to content

Commit 0604fa0

Browse files
SixWeiningleecheechen
authored andcommitted
[LoongArch] Improve codegen for atomic cmpxchg ops (llvm#69339)
PR llvm#67391 improved atomic codegen by handling memory ordering specified by the `cmpxchg` instruction. An acquire barrier needs to be generated when memory ordering includes an acquire operation. This PR improves the codegen further by only handling the failure ordering. (cherry picked from commit 78abc45) Change-Id: I00391ad1aaf5c64ae95cc0f4f84a0b480a2fb5b3
1 parent 6722b1c commit 0604fa0

File tree

4 files changed

+56
-18
lines changed

4 files changed

+56
-18
lines changed

llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -571,11 +571,11 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg(
571571
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB);
572572
}
573573

574-
AtomicOrdering Ordering =
574+
AtomicOrdering FailureOrdering =
575575
static_cast<AtomicOrdering>(MI.getOperand(IsMasked ? 6 : 5).getImm());
576576
int hint;
577577

578-
switch (Ordering) {
578+
switch (FailureOrdering) {
579579
case AtomicOrdering::Acquire:
580580
case AtomicOrdering::AcquireRelease:
581581
case AtomicOrdering::SequentiallyConsistent:

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4492,8 +4492,9 @@ LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
44924492
Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
44934493
IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
44944494
Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
4495-
Value *Ordering =
4496-
Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(Ord));
4495+
AtomicOrdering FailOrd = CI->getFailureOrdering();
4496+
Value *FailureOrdering =
4497+
Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
44974498

44984499
// TODO: Support cmpxchg on LA32.
44994500
Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
@@ -4504,7 +4505,7 @@ Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
45044505
Function *MaskedCmpXchg =
45054506
Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
45064507
Value *Result = Builder.CreateCall(
4507-
MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
4508+
MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
45084509
Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
45094510
return Result;
45104511
}

llvm/lib/Target/LoongArch/LoongArchInstrInfo.td

Lines changed: 46 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1753,7 +1753,7 @@ def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMMinMax;
17531753

17541754
class PseudoCmpXchg
17551755
: Pseudo<(outs GPR:$res, GPR:$scratch),
1756-
(ins GPR:$addr, GPR:$cmpval, GPR:$newval, grlenimm:$ordering)> {
1756+
(ins GPR:$addr, GPR:$cmpval, GPR:$newval, grlenimm:$fail_order)> {
17571757
let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
17581758
let mayLoad = 1;
17591759
let mayStore = 1;
@@ -1767,7 +1767,7 @@ def PseudoCmpXchg64 : PseudoCmpXchg;
17671767
def PseudoMaskedCmpXchg32
17681768
: Pseudo<(outs GPR:$res, GPR:$scratch),
17691769
(ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask,
1770-
grlenimm:$ordering)> {
1770+
grlenimm:$fail_order)> {
17711771
let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
17721772
let mayLoad = 1;
17731773
let mayStore = 1;
@@ -1785,6 +1785,43 @@ class AtomicPat<Intrinsic intrin, Pseudo AMInst>
17851785
: Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering),
17861786
(AMInst GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering)>;
17871787

1788+
// These atomic cmpxchg PatFrags only care about the failure ordering.
1789+
// The PatFrags defined by multiclass `ternary_atomic_op_ord` in
1790+
// TargetSelectionDAG.td care about the merged memory ordering that is the
1791+
// stronger one between success and failure. But for LoongArch LL-SC we only
1792+
// need to care about the failure ordering as explained in PR #67391. So we
1793+
// define these PatFrags that will be used to define cmpxchg pats below.
1794+
multiclass ternary_atomic_op_failure_ord {
1795+
def NAME#_failure_monotonic : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
1796+
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val), [{
1797+
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getFailureOrdering();
1798+
return Ordering == AtomicOrdering::Monotonic;
1799+
}]>;
1800+
def NAME#_failure_acquire : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
1801+
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val), [{
1802+
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getFailureOrdering();
1803+
return Ordering == AtomicOrdering::Acquire;
1804+
}]>;
1805+
def NAME#_failure_release : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
1806+
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val), [{
1807+
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getFailureOrdering();
1808+
return Ordering == AtomicOrdering::Release;
1809+
}]>;
1810+
def NAME#_failure_acq_rel : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
1811+
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val), [{
1812+
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getFailureOrdering();
1813+
return Ordering == AtomicOrdering::AcquireRelease;
1814+
}]>;
1815+
def NAME#_failure_seq_cst : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
1816+
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val), [{
1817+
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getFailureOrdering();
1818+
return Ordering == AtomicOrdering::SequentiallyConsistent;
1819+
}]>;
1820+
}
1821+
1822+
defm atomic_cmp_swap_32 : ternary_atomic_op_failure_ord;
1823+
defm atomic_cmp_swap_64 : ternary_atomic_op_failure_ord;
1824+
17881825
let Predicates = [IsLA64] in {
17891826
def : AtomicPat<int_loongarch_masked_atomicrmw_xchg_i64,
17901827
PseudoMaskedAtomicSwap32>;
@@ -1847,24 +1884,24 @@ def : AtomicPat<int_loongarch_masked_atomicrmw_umin_i64,
18471884
// AtomicOrdering.h.
18481885
multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst,
18491886
ValueType vt = GRLenVT> {
1850-
def : Pat<(vt (!cast<PatFrag>(Op#"_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)),
1887+
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)),
18511888
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>;
1852-
def : Pat<(vt (!cast<PatFrag>(Op#"_acquire") GPR:$addr, GPR:$cmp, GPR:$new)),
1889+
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acquire") GPR:$addr, GPR:$cmp, GPR:$new)),
18531890
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>;
1854-
def : Pat<(vt (!cast<PatFrag>(Op#"_release") GPR:$addr, GPR:$cmp, GPR:$new)),
1891+
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_release") GPR:$addr, GPR:$cmp, GPR:$new)),
18551892
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>;
1856-
def : Pat<(vt (!cast<PatFrag>(Op#"_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)),
1893+
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)),
18571894
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>;
1858-
def : Pat<(vt (!cast<PatFrag>(Op#"_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)),
1895+
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)),
18591896
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
18601897
}
18611898

18621899
defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>;
18631900
defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>;
18641901
def : Pat<(int_loongarch_masked_cmpxchg_i64
1865-
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering),
1902+
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order),
18661903
(PseudoMaskedCmpXchg32
1867-
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>;
1904+
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order)>;
18681905

18691906
def : PseudoMaskedAMMinMaxPat<int_loongarch_masked_atomicrmw_max_i64,
18701907
PseudoMaskedAtomicLoadMax32>;

llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
132132
; LA64-NEXT: beqz $a5, .LBB4_1
133133
; LA64-NEXT: b .LBB4_4
134134
; LA64-NEXT: .LBB4_3:
135-
; LA64-NEXT: dbar 20
135+
; LA64-NEXT: dbar 1792
136136
; LA64-NEXT: .LBB4_4:
137137
; LA64-NEXT: ret
138138
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire monotonic
@@ -166,7 +166,7 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
166166
; LA64-NEXT: beqz $a5, .LBB5_1
167167
; LA64-NEXT: b .LBB5_4
168168
; LA64-NEXT: .LBB5_3:
169-
; LA64-NEXT: dbar 20
169+
; LA64-NEXT: dbar 1792
170170
; LA64-NEXT: .LBB5_4:
171171
; LA64-NEXT: ret
172172
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire monotonic
@@ -185,7 +185,7 @@ define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
185185
; LA64-NEXT: beqz $a4, .LBB6_1
186186
; LA64-NEXT: b .LBB6_4
187187
; LA64-NEXT: .LBB6_3:
188-
; LA64-NEXT: dbar 20
188+
; LA64-NEXT: dbar 1792
189189
; LA64-NEXT: .LBB6_4:
190190
; LA64-NEXT: ret
191191
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire monotonic
@@ -204,7 +204,7 @@ define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
204204
; LA64-NEXT: beqz $a4, .LBB7_1
205205
; LA64-NEXT: b .LBB7_4
206206
; LA64-NEXT: .LBB7_3:
207-
; LA64-NEXT: dbar 20
207+
; LA64-NEXT: dbar 1792
208208
; LA64-NEXT: .LBB7_4:
209209
; LA64-NEXT: ret
210210
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire monotonic

0 commit comments

Comments
 (0)