Skip to content

Commit 78abc45

Browse files
authored
[LoongArch] Improve codegen for atomic cmpxchg ops (#69339)
PR #67391 improved atomic codegen by handling memory ordering specified by the `cmpxchg` instruction. An acquire barrier needs to be generated when memory ordering includes an acquire operation. This PR improves the codegen further by only handling the failure ordering.
1 parent 271087e commit 78abc45

File tree

4 files changed

+56
-18
lines changed

4 files changed

+56
-18
lines changed

llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -571,11 +571,11 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg(
571571
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB);
572572
}
573573

574-
AtomicOrdering Ordering =
574+
AtomicOrdering FailureOrdering =
575575
static_cast<AtomicOrdering>(MI.getOperand(IsMasked ? 6 : 5).getImm());
576576
int hint;
577577

578-
switch (Ordering) {
578+
switch (FailureOrdering) {
579579
case AtomicOrdering::Acquire:
580580
case AtomicOrdering::AcquireRelease:
581581
case AtomicOrdering::SequentiallyConsistent:

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4184,8 +4184,9 @@ LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
41844184
Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
41854185
IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
41864186
Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
4187-
Value *Ordering =
4188-
Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(Ord));
4187+
AtomicOrdering FailOrd = CI->getFailureOrdering();
4188+
Value *FailureOrdering =
4189+
Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
41894190

41904191
// TODO: Support cmpxchg on LA32.
41914192
Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
@@ -4196,7 +4197,7 @@ Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
41964197
Function *MaskedCmpXchg =
41974198
Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
41984199
Value *Result = Builder.CreateCall(
4199-
MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
4200+
MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
42004201
Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
42014202
return Result;
42024203
}

llvm/lib/Target/LoongArch/LoongArchInstrInfo.td

Lines changed: 46 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1814,7 +1814,7 @@ def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMMinMax;
18141814

18151815
class PseudoCmpXchg
18161816
: Pseudo<(outs GPR:$res, GPR:$scratch),
1817-
(ins GPR:$addr, GPR:$cmpval, GPR:$newval, grlenimm:$ordering)> {
1817+
(ins GPR:$addr, GPR:$cmpval, GPR:$newval, grlenimm:$fail_order)> {
18181818
let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
18191819
let mayLoad = 1;
18201820
let mayStore = 1;
@@ -1828,7 +1828,7 @@ def PseudoCmpXchg64 : PseudoCmpXchg;
18281828
def PseudoMaskedCmpXchg32
18291829
: Pseudo<(outs GPR:$res, GPR:$scratch),
18301830
(ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask,
1831-
grlenimm:$ordering)> {
1831+
grlenimm:$fail_order)> {
18321832
let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
18331833
let mayLoad = 1;
18341834
let mayStore = 1;
@@ -1846,6 +1846,43 @@ class AtomicPat<Intrinsic intrin, Pseudo AMInst>
18461846
: Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering),
18471847
(AMInst GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering)>;
18481848

1849+
// These atomic cmpxchg PatFrags only care about the failure ordering.
1850+
// The PatFrags defined by multiclass `ternary_atomic_op_ord` in
1851+
// TargetSelectionDAG.td care about the merged memory ordering that is the
1852+
// stronger one between success and failure. But for LoongArch LL-SC we only
1853+
// need to care about the failure ordering as explained in PR #67391. So we
1854+
// define these PatFrags that will be used to define cmpxchg pats below.
1855+
multiclass ternary_atomic_op_failure_ord {
1856+
def NAME#_failure_monotonic : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
1857+
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val), [{
1858+
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getFailureOrdering();
1859+
return Ordering == AtomicOrdering::Monotonic;
1860+
}]>;
1861+
def NAME#_failure_acquire : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
1862+
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val), [{
1863+
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getFailureOrdering();
1864+
return Ordering == AtomicOrdering::Acquire;
1865+
}]>;
1866+
def NAME#_failure_release : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
1867+
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val), [{
1868+
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getFailureOrdering();
1869+
return Ordering == AtomicOrdering::Release;
1870+
}]>;
1871+
def NAME#_failure_acq_rel : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
1872+
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val), [{
1873+
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getFailureOrdering();
1874+
return Ordering == AtomicOrdering::AcquireRelease;
1875+
}]>;
1876+
def NAME#_failure_seq_cst : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
1877+
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val), [{
1878+
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getFailureOrdering();
1879+
return Ordering == AtomicOrdering::SequentiallyConsistent;
1880+
}]>;
1881+
}
1882+
1883+
defm atomic_cmp_swap_32 : ternary_atomic_op_failure_ord;
1884+
defm atomic_cmp_swap_64 : ternary_atomic_op_failure_ord;
1885+
18491886
let Predicates = [IsLA64] in {
18501887
def : AtomicPat<int_loongarch_masked_atomicrmw_xchg_i64,
18511888
PseudoMaskedAtomicSwap32>;
@@ -1908,24 +1945,24 @@ def : AtomicPat<int_loongarch_masked_atomicrmw_umin_i64,
19081945
// AtomicOrdering.h.
19091946
multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst,
19101947
ValueType vt = GRLenVT> {
1911-
def : Pat<(vt (!cast<PatFrag>(Op#"_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)),
1948+
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)),
19121949
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>;
1913-
def : Pat<(vt (!cast<PatFrag>(Op#"_acquire") GPR:$addr, GPR:$cmp, GPR:$new)),
1950+
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acquire") GPR:$addr, GPR:$cmp, GPR:$new)),
19141951
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>;
1915-
def : Pat<(vt (!cast<PatFrag>(Op#"_release") GPR:$addr, GPR:$cmp, GPR:$new)),
1952+
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_release") GPR:$addr, GPR:$cmp, GPR:$new)),
19161953
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>;
1917-
def : Pat<(vt (!cast<PatFrag>(Op#"_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)),
1954+
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)),
19181955
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>;
1919-
def : Pat<(vt (!cast<PatFrag>(Op#"_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)),
1956+
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)),
19201957
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
19211958
}
19221959

19231960
defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>;
19241961
defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>;
19251962
def : Pat<(int_loongarch_masked_cmpxchg_i64
1926-
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering),
1963+
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order),
19271964
(PseudoMaskedCmpXchg32
1928-
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>;
1965+
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order)>;
19291966

19301967
def : PseudoMaskedAMMinMaxPat<int_loongarch_masked_atomicrmw_max_i64,
19311968
PseudoMaskedAtomicLoadMax32>;

llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ define void @cmpxchg_i8_acquire_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind {
129129
; LA64-NEXT: beqz $a5, .LBB4_1
130130
; LA64-NEXT: b .LBB4_4
131131
; LA64-NEXT: .LBB4_3:
132-
; LA64-NEXT: dbar 20
132+
; LA64-NEXT: dbar 1792
133133
; LA64-NEXT: .LBB4_4:
134134
; LA64-NEXT: ret
135135
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire monotonic
@@ -162,7 +162,7 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin
162162
; LA64-NEXT: beqz $a5, .LBB5_1
163163
; LA64-NEXT: b .LBB5_4
164164
; LA64-NEXT: .LBB5_3:
165-
; LA64-NEXT: dbar 20
165+
; LA64-NEXT: dbar 1792
166166
; LA64-NEXT: .LBB5_4:
167167
; LA64-NEXT: ret
168168
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire monotonic
@@ -181,7 +181,7 @@ define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwin
181181
; LA64-NEXT: beqz $a4, .LBB6_1
182182
; LA64-NEXT: b .LBB6_4
183183
; LA64-NEXT: .LBB6_3:
184-
; LA64-NEXT: dbar 20
184+
; LA64-NEXT: dbar 1792
185185
; LA64-NEXT: .LBB6_4:
186186
; LA64-NEXT: ret
187187
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire monotonic
@@ -200,7 +200,7 @@ define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
200200
; LA64-NEXT: beqz $a4, .LBB7_1
201201
; LA64-NEXT: b .LBB7_4
202202
; LA64-NEXT: .LBB7_3:
203-
; LA64-NEXT: dbar 20
203+
; LA64-NEXT: dbar 1792
204204
; LA64-NEXT: .LBB7_4:
205205
; LA64-NEXT: ret
206206
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire monotonic

0 commit comments

Comments
 (0)