Skip to content

Commit 17743f2

Browse files
committed
[LoongArch] Improve codegen for atomic ops
1 parent 25cd4c0 commit 17743f2

File tree

8 files changed

+67
-164
lines changed

8 files changed

+67
-164
lines changed

llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp

Lines changed: 19 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -153,18 +153,12 @@ static void doAtomicBinOpExpansion(const LoongArchInstrInfo *TII,
153153
Register ScratchReg = MI.getOperand(1).getReg();
154154
Register AddrReg = MI.getOperand(2).getReg();
155155
Register IncrReg = MI.getOperand(3).getReg();
156-
AtomicOrdering Ordering =
157-
static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
158156

159157
// .loop:
160-
// if(Ordering != AtomicOrdering::Monotonic)
161-
// dbar 0
162158
// ll.[w|d] dest, (addr)
163159
// binop scratch, dest, val
164160
// sc.[w|d] scratch, scratch, (addr)
165161
// beqz scratch, loop
166-
if (Ordering != AtomicOrdering::Monotonic)
167-
BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
168162
BuildMI(LoopMBB, DL,
169163
TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg)
170164
.addReg(AddrReg)
@@ -251,21 +245,15 @@ static void doMaskedAtomicBinOpExpansion(
251245
Register AddrReg = MI.getOperand(2).getReg();
252246
Register IncrReg = MI.getOperand(3).getReg();
253247
Register MaskReg = MI.getOperand(4).getReg();
254-
AtomicOrdering Ordering =
255-
static_cast<AtomicOrdering>(MI.getOperand(5).getImm());
256248

257249
// .loop:
258-
// if(Ordering != AtomicOrdering::Monotonic)
259-
// dbar 0
260250
// ll.w destreg, (alignedaddr)
261251
// binop scratch, destreg, incr
262252
// xor scratch, destreg, scratch
263253
// and scratch, scratch, masktargetdata
264254
// xor scratch, destreg, scratch
265255
// sc.w scratch, scratch, (alignedaddr)
266256
// beqz scratch, loop
267-
if (Ordering != AtomicOrdering::Monotonic)
268-
BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
269257
BuildMI(LoopMBB, DL, TII->get(LoongArch::LL_W), DestReg)
270258
.addReg(AddrReg)
271259
.addImm(0);
@@ -372,23 +360,20 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
372360
auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
373361
auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
374362
auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
375-
auto TailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
376363
auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
377364

378365
// Insert new MBBs.
379366
MF->insert(++MBB.getIterator(), LoopHeadMBB);
380367
MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
381368
MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
382-
MF->insert(++LoopTailMBB->getIterator(), TailMBB);
383-
MF->insert(++TailMBB->getIterator(), DoneMBB);
369+
MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
384370

385371
// Set up successors and transfer remaining instructions to DoneMBB.
386372
LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
387373
LoopHeadMBB->addSuccessor(LoopTailMBB);
388374
LoopIfBodyMBB->addSuccessor(LoopTailMBB);
389375
LoopTailMBB->addSuccessor(LoopHeadMBB);
390-
LoopTailMBB->addSuccessor(TailMBB);
391-
TailMBB->addSuccessor(DoneMBB);
376+
LoopTailMBB->addSuccessor(DoneMBB);
392377
DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
393378
DoneMBB->transferSuccessors(&MBB);
394379
MBB.addSuccessor(LoopHeadMBB);
@@ -402,11 +387,9 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
402387

403388
//
404389
// .loophead:
405-
// dbar 0
406390
// ll.w destreg, (alignedaddr)
407391
// and scratch2, destreg, mask
408392
// move scratch1, destreg
409-
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
410393
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LL_W), DestReg)
411394
.addReg(AddrReg)
412395
.addImm(0);
@@ -463,7 +446,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
463446
// .looptail:
464447
// sc.w scratch1, scratch1, (addr)
465448
// beqz scratch1, loop
466-
// dbar 0x700
467449
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), Scratch1Reg)
468450
.addReg(Scratch1Reg)
469451
.addReg(AddrReg)
@@ -472,18 +454,13 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
472454
.addReg(Scratch1Reg)
473455
.addMBB(LoopHeadMBB);
474456

475-
// .tail:
476-
// dbar 0x700
477-
BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700);
478-
479457
NextMBBI = MBB.end();
480458
MI.eraseFromParent();
481459

482460
LivePhysRegs LiveRegs;
483461
computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
484462
computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB);
485463
computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
486-
computeAndAddLiveIns(LiveRegs, *TailMBB);
487464
computeAndAddLiveIns(LiveRegs, *DoneMBB);
488465

489466
return true;
@@ -535,12 +512,10 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg(
535512
.addReg(CmpValReg)
536513
.addMBB(TailMBB);
537514
// .looptail:
538-
// dbar 0
539515
// move scratch, newval
540516
// sc.[w|d] scratch, scratch, (addr)
541517
// beqz scratch, loophead
542518
// b done
543-
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
544519
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::OR), ScratchReg)
545520
.addReg(NewValReg)
546521
.addReg(LoongArch::R0);
@@ -573,13 +548,11 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg(
573548
.addMBB(TailMBB);
574549

575550
// .looptail:
576-
// dbar 0
577551
// andn scratch, dest, mask
578552
// or scratch, scratch, newval
579553
// sc.[w|d] scratch, scratch, (addr)
580554
// beqz scratch, loophead
581555
// b done
582-
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
583556
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::ANDN), ScratchReg)
584557
.addReg(DestReg)
585558
.addReg(MaskReg);
@@ -598,9 +571,24 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg(
598571
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB);
599572
}
600573

574+
AtomicOrdering Ordering =
575+
static_cast<AtomicOrdering>(MI.getOperand(IsMasked ? 6 : 5).getImm());
576+
int hint;
577+
578+
switch (Ordering) {
579+
case AtomicOrdering::Acquire:
580+
case AtomicOrdering::AcquireRelease:
581+
case AtomicOrdering::SequentiallyConsistent:
582+
// TODO: acquire
583+
hint = 0;
584+
break;
585+
default:
586+
hint = 0x700;
587+
}
588+
601589
// .tail:
602-
// dbar 0x700
603-
BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700);
590+
// dbar 0x700 | acquire
591+
BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(hint);
604592

605593
NextMBBI = MBB.end();
606594
MI.eraseFromParent();

llvm/lib/Target/LoongArch/LoongArchInstrInfo.td

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1792,7 +1792,7 @@ def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMMinMax;
17921792

17931793
class PseudoCmpXchg
17941794
: Pseudo<(outs GPR:$res, GPR:$scratch),
1795-
(ins GPR:$addr, GPR:$cmpval, GPR:$newval)> {
1795+
(ins GPR:$addr, GPR:$cmpval, GPR:$newval, grlenimm:$ordering)> {
17961796
let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
17971797
let mayLoad = 1;
17981798
let mayStore = 1;
@@ -1882,14 +1882,28 @@ def : AtomicPat<int_loongarch_masked_atomicrmw_umax_i64,
18821882
def : AtomicPat<int_loongarch_masked_atomicrmw_umin_i64,
18831883
PseudoMaskedAtomicLoadUMin32>;
18841884

1885-
def : Pat<(atomic_cmp_swap_64 GPR:$addr, GPR:$cmp, GPR:$new),
1886-
(PseudoCmpXchg64 GPR:$addr, GPR:$cmp, GPR:$new)>;
1885+
// Ordering constants must be kept in sync with the AtomicOrdering enum in
1886+
// AtomicOrdering.h.
1887+
multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst,
1888+
ValueType vt = GRLenVT> {
1889+
def : Pat<(vt (!cast<PatFrag>(Op#"_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)),
1890+
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>;
1891+
def : Pat<(vt (!cast<PatFrag>(Op#"_acquire") GPR:$addr, GPR:$cmp, GPR:$new)),
1892+
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>;
1893+
def : Pat<(vt (!cast<PatFrag>(Op#"_release") GPR:$addr, GPR:$cmp, GPR:$new)),
1894+
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>;
1895+
def : Pat<(vt (!cast<PatFrag>(Op#"_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)),
1896+
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>;
1897+
def : Pat<(vt (!cast<PatFrag>(Op#"_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)),
1898+
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
1899+
}
1900+
1901+
defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>;
1902+
defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>;
18871903
def : Pat<(int_loongarch_masked_cmpxchg_i64
18881904
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering),
18891905
(PseudoMaskedCmpXchg32
18901906
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>;
1891-
def : Pat<(atomic_cmp_swap_32 GPR:$addr, GPR:$cmp, GPR:$new),
1892-
(PseudoCmpXchg32 GPR:$addr, GPR:$cmp, GPR:$new)>;
18931907

18941908
def : PseudoMaskedAMMinMaxPat<int_loongarch_masked_atomicrmw_max_i64,
18951909
PseudoMaskedAtomicLoadMax32>;

llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,13 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
3333
; LA64-NEXT: bne $a5, $a2, .LBB0_5
3434
; LA64-NEXT: # %bb.4: # %atomicrmw.start
3535
; LA64-NEXT: # in Loop: Header=BB0_3 Depth=2
36-
; LA64-NEXT: dbar 0
3736
; LA64-NEXT: move $a7, $a6
3837
; LA64-NEXT: sc.w $a7, $a0, 0
3938
; LA64-NEXT: beqz $a7, .LBB0_3
4039
; LA64-NEXT: b .LBB0_6
4140
; LA64-NEXT: .LBB0_5: # %atomicrmw.start
4241
; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1
43-
; LA64-NEXT: dbar 1792
42+
; LA64-NEXT: dbar 0
4443
; LA64-NEXT: .LBB0_6: # %atomicrmw.start
4544
; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1
4645
; LA64-NEXT: addi.w $a6, $a2, 0
@@ -86,14 +85,13 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
8685
; LA64-NEXT: bne $a5, $a2, .LBB1_5
8786
; LA64-NEXT: # %bb.4: # %atomicrmw.start
8887
; LA64-NEXT: # in Loop: Header=BB1_3 Depth=2
89-
; LA64-NEXT: dbar 0
9088
; LA64-NEXT: move $a7, $a6
9189
; LA64-NEXT: sc.w $a7, $a0, 0
9290
; LA64-NEXT: beqz $a7, .LBB1_3
9391
; LA64-NEXT: b .LBB1_6
9492
; LA64-NEXT: .LBB1_5: # %atomicrmw.start
9593
; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1
96-
; LA64-NEXT: dbar 1792
94+
; LA64-NEXT: dbar 0
9795
; LA64-NEXT: .LBB1_6: # %atomicrmw.start
9896
; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1
9997
; LA64-NEXT: addi.w $a6, $a2, 0
@@ -127,14 +125,13 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
127125
; LA64-NEXT: bne $a1, $a3, .LBB2_5
128126
; LA64-NEXT: # %bb.4: # %atomicrmw.start
129127
; LA64-NEXT: # in Loop: Header=BB2_3 Depth=2
130-
; LA64-NEXT: dbar 0
131128
; LA64-NEXT: move $a6, $a5
132129
; LA64-NEXT: sc.w $a6, $a0, 0
133130
; LA64-NEXT: beqz $a6, .LBB2_3
134131
; LA64-NEXT: b .LBB2_6
135132
; LA64-NEXT: .LBB2_5: # %atomicrmw.start
136133
; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1
137-
; LA64-NEXT: dbar 1792
134+
; LA64-NEXT: dbar 0
138135
; LA64-NEXT: .LBB2_6: # %atomicrmw.start
139136
; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1
140137
; LA64-NEXT: move $a3, $a1
@@ -166,14 +163,13 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
166163
; LA64-NEXT: bne $a2, $a3, .LBB3_5
167164
; LA64-NEXT: # %bb.4: # %atomicrmw.start
168165
; LA64-NEXT: # in Loop: Header=BB3_3 Depth=2
169-
; LA64-NEXT: dbar 0
170166
; LA64-NEXT: move $a5, $a4
171167
; LA64-NEXT: sc.d $a5, $a0, 0
172168
; LA64-NEXT: beqz $a5, .LBB3_3
173169
; LA64-NEXT: b .LBB3_6
174170
; LA64-NEXT: .LBB3_5: # %atomicrmw.start
175171
; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1
176-
; LA64-NEXT: dbar 1792
172+
; LA64-NEXT: dbar 0
177173
; LA64-NEXT: .LBB3_6: # %atomicrmw.start
178174
; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1
179175
; LA64-NEXT: bne $a2, $a3, .LBB3_1
@@ -221,14 +217,13 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
221217
; LA64-NEXT: bne $a6, $a2, .LBB4_5
222218
; LA64-NEXT: # %bb.4: # %atomicrmw.start
223219
; LA64-NEXT: # in Loop: Header=BB4_3 Depth=2
224-
; LA64-NEXT: dbar 0
225220
; LA64-NEXT: move $t0, $a7
226221
; LA64-NEXT: sc.w $t0, $a0, 0
227222
; LA64-NEXT: beqz $t0, .LBB4_3
228223
; LA64-NEXT: b .LBB4_6
229224
; LA64-NEXT: .LBB4_5: # %atomicrmw.start
230225
; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1
231-
; LA64-NEXT: dbar 1792
226+
; LA64-NEXT: dbar 0
232227
; LA64-NEXT: .LBB4_6: # %atomicrmw.start
233228
; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1
234229
; LA64-NEXT: addi.w $a7, $a2, 0
@@ -279,14 +274,13 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
279274
; LA64-NEXT: bne $a6, $a2, .LBB5_5
280275
; LA64-NEXT: # %bb.4: # %atomicrmw.start
281276
; LA64-NEXT: # in Loop: Header=BB5_3 Depth=2
282-
; LA64-NEXT: dbar 0
283277
; LA64-NEXT: move $t0, $a7
284278
; LA64-NEXT: sc.w $t0, $a0, 0
285279
; LA64-NEXT: beqz $t0, .LBB5_3
286280
; LA64-NEXT: b .LBB5_6
287281
; LA64-NEXT: .LBB5_5: # %atomicrmw.start
288282
; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1
289-
; LA64-NEXT: dbar 1792
283+
; LA64-NEXT: dbar 0
290284
; LA64-NEXT: .LBB5_6: # %atomicrmw.start
291285
; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1
292286
; LA64-NEXT: addi.w $a7, $a2, 0
@@ -325,14 +319,13 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
325319
; LA64-NEXT: bne $a2, $a4, .LBB6_5
326320
; LA64-NEXT: # %bb.4: # %atomicrmw.start
327321
; LA64-NEXT: # in Loop: Header=BB6_3 Depth=2
328-
; LA64-NEXT: dbar 0
329322
; LA64-NEXT: move $a7, $a6
330323
; LA64-NEXT: sc.w $a7, $a0, 0
331324
; LA64-NEXT: beqz $a7, .LBB6_3
332325
; LA64-NEXT: b .LBB6_6
333326
; LA64-NEXT: .LBB6_5: # %atomicrmw.start
334327
; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1
335-
; LA64-NEXT: dbar 1792
328+
; LA64-NEXT: dbar 0
336329
; LA64-NEXT: .LBB6_6: # %atomicrmw.start
337330
; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1
338331
; LA64-NEXT: move $a4, $a2
@@ -369,14 +362,13 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
369362
; LA64-NEXT: bne $a2, $a3, .LBB7_5
370363
; LA64-NEXT: # %bb.4: # %atomicrmw.start
371364
; LA64-NEXT: # in Loop: Header=BB7_3 Depth=2
372-
; LA64-NEXT: dbar 0
373365
; LA64-NEXT: move $a5, $a4
374366
; LA64-NEXT: sc.d $a5, $a0, 0
375367
; LA64-NEXT: beqz $a5, .LBB7_3
376368
; LA64-NEXT: b .LBB7_6
377369
; LA64-NEXT: .LBB7_5: # %atomicrmw.start
378370
; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1
379-
; LA64-NEXT: dbar 1792
371+
; LA64-NEXT: dbar 0
380372
; LA64-NEXT: .LBB7_6: # %atomicrmw.start
381373
; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1
382374
; LA64-NEXT: bne $a2, $a3, .LBB7_1

0 commit comments

Comments
 (0)