Skip to content

[LoongArch] Improve codegen for atomic ops #67391

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 19 additions & 31 deletions llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,18 +153,12 @@ static void doAtomicBinOpExpansion(const LoongArchInstrInfo *TII,
Register ScratchReg = MI.getOperand(1).getReg();
Register AddrReg = MI.getOperand(2).getReg();
Register IncrReg = MI.getOperand(3).getReg();
AtomicOrdering Ordering =
static_cast<AtomicOrdering>(MI.getOperand(4).getImm());

// .loop:
// if(Ordering != AtomicOrdering::Monotonic)
// dbar 0
// ll.[w|d] dest, (addr)
// binop scratch, dest, val
// sc.[w|d] scratch, scratch, (addr)
// beqz scratch, loop
if (Ordering != AtomicOrdering::Monotonic)
BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
BuildMI(LoopMBB, DL,
TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg)
.addReg(AddrReg)
Expand Down Expand Up @@ -251,21 +245,15 @@ static void doMaskedAtomicBinOpExpansion(
Register AddrReg = MI.getOperand(2).getReg();
Register IncrReg = MI.getOperand(3).getReg();
Register MaskReg = MI.getOperand(4).getReg();
AtomicOrdering Ordering =
static_cast<AtomicOrdering>(MI.getOperand(5).getImm());

// .loop:
// if(Ordering != AtomicOrdering::Monotonic)
// dbar 0
// ll.w destreg, (alignedaddr)
// binop scratch, destreg, incr
// xor scratch, destreg, scratch
// and scratch, scratch, masktargetdata
// xor scratch, destreg, scratch
// sc.w scratch, scratch, (alignedaddr)
// beqz scratch, loop
if (Ordering != AtomicOrdering::Monotonic)
BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
BuildMI(LoopMBB, DL, TII->get(LoongArch::LL_W), DestReg)
.addReg(AddrReg)
.addImm(0);
Expand Down Expand Up @@ -372,23 +360,20 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
auto TailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());

// Insert new MBBs.
MF->insert(++MBB.getIterator(), LoopHeadMBB);
MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
MF->insert(++LoopTailMBB->getIterator(), TailMBB);
MF->insert(++TailMBB->getIterator(), DoneMBB);
MF->insert(++LoopTailMBB->getIterator(), DoneMBB);

// Set up successors and transfer remaining instructions to DoneMBB.
LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
LoopHeadMBB->addSuccessor(LoopTailMBB);
LoopIfBodyMBB->addSuccessor(LoopTailMBB);
LoopTailMBB->addSuccessor(LoopHeadMBB);
LoopTailMBB->addSuccessor(TailMBB);
TailMBB->addSuccessor(DoneMBB);
LoopTailMBB->addSuccessor(DoneMBB);
DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
DoneMBB->transferSuccessors(&MBB);
MBB.addSuccessor(LoopHeadMBB);
Expand All @@ -402,11 +387,9 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(

//
// .loophead:
// dbar 0
// ll.w destreg, (alignedaddr)
// and scratch2, destreg, mask
// move scratch1, destreg
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LL_W), DestReg)
.addReg(AddrReg)
.addImm(0);
Expand Down Expand Up @@ -463,7 +446,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
// .looptail:
// sc.w scratch1, scratch1, (addr)
// beqz scratch1, loop
// dbar 0x700
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), Scratch1Reg)
.addReg(Scratch1Reg)
.addReg(AddrReg)
Expand All @@ -472,18 +454,13 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
.addReg(Scratch1Reg)
.addMBB(LoopHeadMBB);

// .tail:
// dbar 0x700
BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700);

NextMBBI = MBB.end();
MI.eraseFromParent();

LivePhysRegs LiveRegs;
computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB);
computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
computeAndAddLiveIns(LiveRegs, *TailMBB);
computeAndAddLiveIns(LiveRegs, *DoneMBB);

return true;
Expand Down Expand Up @@ -535,12 +512,10 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg(
.addReg(CmpValReg)
.addMBB(TailMBB);
// .looptail:
// dbar 0
// move scratch, newval
// sc.[w|d] scratch, scratch, (addr)
// beqz scratch, loophead
// b done
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::OR), ScratchReg)
.addReg(NewValReg)
.addReg(LoongArch::R0);
Expand Down Expand Up @@ -573,13 +548,11 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg(
.addMBB(TailMBB);

// .looptail:
// dbar 0
// andn scratch, dest, mask
// or scratch, scratch, newval
// sc.[w|d] scratch, scratch, (addr)
// beqz scratch, loophead
// b done
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::ANDN), ScratchReg)
.addReg(DestReg)
.addReg(MaskReg);
Expand All @@ -598,9 +571,24 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg(
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB);
}

AtomicOrdering Ordering =
static_cast<AtomicOrdering>(MI.getOperand(IsMasked ? 6 : 5).getImm());
int hint;

switch (Ordering) {
case AtomicOrdering::Acquire:
case AtomicOrdering::AcquireRelease:
case AtomicOrdering::SequentiallyConsistent:
// TODO: acquire
hint = 0;
break;
default:
hint = 0x700;
}

// .tail:
// dbar 0x700
BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700);
// dbar 0x700 | acquire
BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(hint);

NextMBBI = MBB.end();
MI.eraseFromParent();
Expand Down
24 changes: 19 additions & 5 deletions llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1792,7 +1792,7 @@ def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMMinMax;

class PseudoCmpXchg
: Pseudo<(outs GPR:$res, GPR:$scratch),
(ins GPR:$addr, GPR:$cmpval, GPR:$newval)> {
(ins GPR:$addr, GPR:$cmpval, GPR:$newval, grlenimm:$ordering)> {
let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
let mayLoad = 1;
let mayStore = 1;
Expand Down Expand Up @@ -1882,14 +1882,28 @@ def : AtomicPat<int_loongarch_masked_atomicrmw_umax_i64,
def : AtomicPat<int_loongarch_masked_atomicrmw_umin_i64,
PseudoMaskedAtomicLoadUMin32>;

def : Pat<(atomic_cmp_swap_64 GPR:$addr, GPR:$cmp, GPR:$new),
(PseudoCmpXchg64 GPR:$addr, GPR:$cmp, GPR:$new)>;
// Ordering constants must be kept in sync with the AtomicOrdering enum in
// AtomicOrdering.h.
multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst,
ValueType vt = GRLenVT> {
def : Pat<(vt (!cast<PatFrag>(Op#"_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)),
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>;
def : Pat<(vt (!cast<PatFrag>(Op#"_acquire") GPR:$addr, GPR:$cmp, GPR:$new)),
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>;
def : Pat<(vt (!cast<PatFrag>(Op#"_release") GPR:$addr, GPR:$cmp, GPR:$new)),
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>;
def : Pat<(vt (!cast<PatFrag>(Op#"_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)),
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>;
def : Pat<(vt (!cast<PatFrag>(Op#"_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)),
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
}

defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>;
defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>;
def : Pat<(int_loongarch_masked_cmpxchg_i64
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering),
(PseudoMaskedCmpXchg32
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>;
def : Pat<(atomic_cmp_swap_32 GPR:$addr, GPR:$cmp, GPR:$new),
(PseudoCmpXchg32 GPR:$addr, GPR:$cmp, GPR:$new)>;

def : PseudoMaskedAMMinMaxPat<int_loongarch_masked_atomicrmw_max_i64,
PseudoMaskedAtomicLoadMax32>;
Expand Down
24 changes: 8 additions & 16 deletions llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,13 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; LA64-NEXT: bne $a5, $a2, .LBB0_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB0_3 Depth=2
; LA64-NEXT: dbar 0
; LA64-NEXT: move $a7, $a6
; LA64-NEXT: sc.w $a7, $a0, 0
; LA64-NEXT: beqz $a7, .LBB0_3
; LA64-NEXT: b .LBB0_6
; LA64-NEXT: .LBB0_5: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1
; LA64-NEXT: dbar 1792
; LA64-NEXT: dbar 0
; LA64-NEXT: .LBB0_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1
; LA64-NEXT: addi.w $a6, $a2, 0
Expand Down Expand Up @@ -86,14 +85,13 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
; LA64-NEXT: bne $a5, $a2, .LBB1_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB1_3 Depth=2
; LA64-NEXT: dbar 0
; LA64-NEXT: move $a7, $a6
; LA64-NEXT: sc.w $a7, $a0, 0
; LA64-NEXT: beqz $a7, .LBB1_3
; LA64-NEXT: b .LBB1_6
; LA64-NEXT: .LBB1_5: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1
; LA64-NEXT: dbar 1792
; LA64-NEXT: dbar 0
; LA64-NEXT: .LBB1_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1
; LA64-NEXT: addi.w $a6, $a2, 0
Expand Down Expand Up @@ -127,14 +125,13 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; LA64-NEXT: bne $a1, $a3, .LBB2_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB2_3 Depth=2
; LA64-NEXT: dbar 0
; LA64-NEXT: move $a6, $a5
; LA64-NEXT: sc.w $a6, $a0, 0
; LA64-NEXT: beqz $a6, .LBB2_3
; LA64-NEXT: b .LBB2_6
; LA64-NEXT: .LBB2_5: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1
; LA64-NEXT: dbar 1792
; LA64-NEXT: dbar 0
; LA64-NEXT: .LBB2_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1
; LA64-NEXT: move $a3, $a1
Expand Down Expand Up @@ -166,14 +163,13 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; LA64-NEXT: bne $a2, $a3, .LBB3_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB3_3 Depth=2
; LA64-NEXT: dbar 0
; LA64-NEXT: move $a5, $a4
; LA64-NEXT: sc.d $a5, $a0, 0
; LA64-NEXT: beqz $a5, .LBB3_3
; LA64-NEXT: b .LBB3_6
; LA64-NEXT: .LBB3_5: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1
; LA64-NEXT: dbar 1792
; LA64-NEXT: dbar 0
; LA64-NEXT: .LBB3_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1
; LA64-NEXT: bne $a2, $a3, .LBB3_1
Expand Down Expand Up @@ -221,14 +217,13 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; LA64-NEXT: bne $a6, $a2, .LBB4_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB4_3 Depth=2
; LA64-NEXT: dbar 0
; LA64-NEXT: move $t0, $a7
; LA64-NEXT: sc.w $t0, $a0, 0
; LA64-NEXT: beqz $t0, .LBB4_3
; LA64-NEXT: b .LBB4_6
; LA64-NEXT: .LBB4_5: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1
; LA64-NEXT: dbar 1792
; LA64-NEXT: dbar 0
; LA64-NEXT: .LBB4_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1
; LA64-NEXT: addi.w $a7, $a2, 0
Expand Down Expand Up @@ -279,14 +274,13 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; LA64-NEXT: bne $a6, $a2, .LBB5_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB5_3 Depth=2
; LA64-NEXT: dbar 0
; LA64-NEXT: move $t0, $a7
; LA64-NEXT: sc.w $t0, $a0, 0
; LA64-NEXT: beqz $t0, .LBB5_3
; LA64-NEXT: b .LBB5_6
; LA64-NEXT: .LBB5_5: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1
; LA64-NEXT: dbar 1792
; LA64-NEXT: dbar 0
; LA64-NEXT: .LBB5_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1
; LA64-NEXT: addi.w $a7, $a2, 0
Expand Down Expand Up @@ -325,14 +319,13 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
; LA64-NEXT: bne $a2, $a4, .LBB6_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB6_3 Depth=2
; LA64-NEXT: dbar 0
; LA64-NEXT: move $a7, $a6
; LA64-NEXT: sc.w $a7, $a0, 0
; LA64-NEXT: beqz $a7, .LBB6_3
; LA64-NEXT: b .LBB6_6
; LA64-NEXT: .LBB6_5: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1
; LA64-NEXT: dbar 1792
; LA64-NEXT: dbar 0
; LA64-NEXT: .LBB6_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1
; LA64-NEXT: move $a4, $a2
Expand Down Expand Up @@ -369,14 +362,13 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; LA64-NEXT: bne $a2, $a3, .LBB7_5
; LA64-NEXT: # %bb.4: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB7_3 Depth=2
; LA64-NEXT: dbar 0
; LA64-NEXT: move $a5, $a4
; LA64-NEXT: sc.d $a5, $a0, 0
; LA64-NEXT: beqz $a5, .LBB7_3
; LA64-NEXT: b .LBB7_6
; LA64-NEXT: .LBB7_5: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1
; LA64-NEXT: dbar 1792
; LA64-NEXT: dbar 0
; LA64-NEXT: .LBB7_6: # %atomicrmw.start
; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1
; LA64-NEXT: bne $a2, $a3, .LBB7_1
Expand Down
Loading