Skip to content

Commit 3104681

Browse files
author
Kai Luo
authored
[PowerPC][Atomics] Remove redundant block to clear reservation (#68430)
This PR is following what https://reviews.llvm.org/D134783 does for quardword CAS.
1 parent bf90ffb commit 3104681

File tree

2 files changed

+132
-58
lines changed

2 files changed

+132
-58
lines changed

llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -239,23 +239,18 @@ bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128(
239239
// loop:
240240
// old = lqarx ptr
241241
// <compare old, cmp>
242-
// bne 0, fail
242+
// bne 0, exit
243243
// succ:
244244
// stqcx new ptr
245245
// bne 0, loop
246-
// b exit
247-
// fail:
248-
// stqcx old ptr
249246
// exit:
250247
// ....
251248
MachineFunction::iterator MFI = ++MBB.getIterator();
252249
MachineBasicBlock *LoopCmpMBB = MF->CreateMachineBasicBlock(BB);
253250
MachineBasicBlock *CmpSuccMBB = MF->CreateMachineBasicBlock(BB);
254-
MachineBasicBlock *CmpFailMBB = MF->CreateMachineBasicBlock(BB);
255251
MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB);
256252
MF->insert(MFI, LoopCmpMBB);
257253
MF->insert(MFI, CmpSuccMBB);
258-
MF->insert(MFI, CmpFailMBB);
259254
MF->insert(MFI, ExitMBB);
260255
ExitMBB->splice(ExitMBB->begin(), &MBB, std::next(MI.getIterator()),
261256
MBB.end());
@@ -276,9 +271,9 @@ bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128(
276271
BuildMI(CurrentMBB, DL, TII->get(PPC::BCC))
277272
.addImm(PPC::PRED_NE)
278273
.addReg(PPC::CR0)
279-
.addMBB(CmpFailMBB);
274+
.addMBB(ExitMBB);
280275
CurrentMBB->addSuccessor(CmpSuccMBB);
281-
CurrentMBB->addSuccessor(CmpFailMBB);
276+
CurrentMBB->addSuccessor(ExitMBB);
282277
// Build succ.
283278
CurrentMBB = CmpSuccMBB;
284279
PairedCopy(TII, *CurrentMBB, CurrentMBB->end(), DL, ScratchHi, ScratchLo,
@@ -288,16 +283,11 @@ bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128(
288283
.addImm(PPC::PRED_NE)
289284
.addReg(PPC::CR0)
290285
.addMBB(LoopCmpMBB);
291-
BuildMI(CurrentMBB, DL, TII->get(PPC::B)).addMBB(ExitMBB);
292286
CurrentMBB->addSuccessor(LoopCmpMBB);
293287
CurrentMBB->addSuccessor(ExitMBB);
294-
CurrentMBB = CmpFailMBB;
295-
BuildMI(CurrentMBB, DL, SC).addReg(Old).addReg(RA).addReg(RB);
296-
CurrentMBB->addSuccessor(ExitMBB);
297288

298289
recomputeLiveIns(*LoopCmpMBB);
299290
recomputeLiveIns(*CmpSuccMBB);
300-
recomputeLiveIns(*CmpFailMBB);
301291
recomputeLiveIns(*ExitMBB);
302292
NMBBI = MBB.end();
303293
MI.eraseFromParent();

llvm/test/CodeGen/PowerPC/atomics-i128.ll

Lines changed: 129 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -986,10 +986,7 @@ define i128 @cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) {
986986
; CHECK-NEXT: mr r10, r6
987987
; CHECK-NEXT: stqcx. r10, 0, r3
988988
; CHECK-NEXT: bne cr0, .LBB7_1
989-
; CHECK-NEXT: b .LBB7_4
990989
; CHECK-NEXT: .LBB7_3: # %entry
991-
; CHECK-NEXT: stqcx. r8, 0, r3
992-
; CHECK-NEXT: .LBB7_4: # %entry
993990
; CHECK-NEXT: lwsync
994991
; CHECK-NEXT: mr r3, r8
995992
; CHECK-NEXT: mr r4, r9
@@ -1033,10 +1030,7 @@ define i128 @cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) {
10331030
; LE-PWR8-NEXT: mr r10, r7
10341031
; LE-PWR8-NEXT: stqcx. r10, 0, r3
10351032
; LE-PWR8-NEXT: bne cr0, .LBB7_1
1036-
; LE-PWR8-NEXT: b .LBB7_4
10371033
; LE-PWR8-NEXT: .LBB7_3: # %entry
1038-
; LE-PWR8-NEXT: stqcx. r8, 0, r3
1039-
; LE-PWR8-NEXT: .LBB7_4: # %entry
10401034
; LE-PWR8-NEXT: lwsync
10411035
; LE-PWR8-NEXT: mr r3, r9
10421036
; LE-PWR8-NEXT: mr r4, r8
@@ -1057,10 +1051,7 @@ define i128 @cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) {
10571051
; AIX64-PWR8-NEXT: mr r10, r6
10581052
; AIX64-PWR8-NEXT: stqcx. r10, 0, r3
10591053
; AIX64-PWR8-NEXT: bne cr0, L..BB7_1
1060-
; AIX64-PWR8-NEXT: b L..BB7_4
10611054
; AIX64-PWR8-NEXT: L..BB7_3: # %entry
1062-
; AIX64-PWR8-NEXT: stqcx. r8, 0, r3
1063-
; AIX64-PWR8-NEXT: L..BB7_4: # %entry
10641055
; AIX64-PWR8-NEXT: lwsync
10651056
; AIX64-PWR8-NEXT: mr r3, r8
10661057
; AIX64-PWR8-NEXT: mr r4, r9
@@ -1121,10 +1112,7 @@ define i128 @cas_weak_release_monotonic(ptr %a, i128 %cmp, i128 %new) {
11211112
; CHECK-NEXT: mr r10, r6
11221113
; CHECK-NEXT: stqcx. r10, 0, r3
11231114
; CHECK-NEXT: bne cr0, .LBB8_1
1124-
; CHECK-NEXT: b .LBB8_4
11251115
; CHECK-NEXT: .LBB8_3: # %entry
1126-
; CHECK-NEXT: stqcx. r8, 0, r3
1127-
; CHECK-NEXT: .LBB8_4: # %entry
11281116
; CHECK-NEXT: mr r3, r8
11291117
; CHECK-NEXT: mr r4, r9
11301118
; CHECK-NEXT: blr
@@ -1168,10 +1156,7 @@ define i128 @cas_weak_release_monotonic(ptr %a, i128 %cmp, i128 %new) {
11681156
; LE-PWR8-NEXT: mr r10, r7
11691157
; LE-PWR8-NEXT: stqcx. r10, 0, r3
11701158
; LE-PWR8-NEXT: bne cr0, .LBB8_1
1171-
; LE-PWR8-NEXT: b .LBB8_4
11721159
; LE-PWR8-NEXT: .LBB8_3: # %entry
1173-
; LE-PWR8-NEXT: stqcx. r8, 0, r3
1174-
; LE-PWR8-NEXT: .LBB8_4: # %entry
11751160
; LE-PWR8-NEXT: mr r3, r9
11761161
; LE-PWR8-NEXT: mr r4, r8
11771162
; LE-PWR8-NEXT: blr
@@ -1192,10 +1177,7 @@ define i128 @cas_weak_release_monotonic(ptr %a, i128 %cmp, i128 %new) {
11921177
; AIX64-PWR8-NEXT: mr r10, r6
11931178
; AIX64-PWR8-NEXT: stqcx. r10, 0, r3
11941179
; AIX64-PWR8-NEXT: bne cr0, L..BB8_1
1195-
; AIX64-PWR8-NEXT: b L..BB8_4
11961180
; AIX64-PWR8-NEXT: L..BB8_3: # %entry
1197-
; AIX64-PWR8-NEXT: stqcx. r8, 0, r3
1198-
; AIX64-PWR8-NEXT: L..BB8_4: # %entry
11991181
; AIX64-PWR8-NEXT: mr r3, r8
12001182
; AIX64-PWR8-NEXT: mr r4, r9
12011183
; AIX64-PWR8-NEXT: blr
@@ -1255,10 +1237,7 @@ define i128 @cas_sc_sc(ptr %a, i128 %cmp, i128 %new) {
12551237
; CHECK-NEXT: mr r10, r6
12561238
; CHECK-NEXT: stqcx. r10, 0, r3
12571239
; CHECK-NEXT: bne cr0, .LBB9_1
1258-
; CHECK-NEXT: b .LBB9_4
12591240
; CHECK-NEXT: .LBB9_3: # %entry
1260-
; CHECK-NEXT: stqcx. r8, 0, r3
1261-
; CHECK-NEXT: .LBB9_4: # %entry
12621241
; CHECK-NEXT: lwsync
12631242
; CHECK-NEXT: mr r3, r8
12641243
; CHECK-NEXT: mr r4, r9
@@ -1303,10 +1282,7 @@ define i128 @cas_sc_sc(ptr %a, i128 %cmp, i128 %new) {
13031282
; LE-PWR8-NEXT: mr r10, r7
13041283
; LE-PWR8-NEXT: stqcx. r10, 0, r3
13051284
; LE-PWR8-NEXT: bne cr0, .LBB9_1
1306-
; LE-PWR8-NEXT: b .LBB9_4
13071285
; LE-PWR8-NEXT: .LBB9_3: # %entry
1308-
; LE-PWR8-NEXT: stqcx. r8, 0, r3
1309-
; LE-PWR8-NEXT: .LBB9_4: # %entry
13101286
; LE-PWR8-NEXT: lwsync
13111287
; LE-PWR8-NEXT: mr r3, r9
13121288
; LE-PWR8-NEXT: mr r4, r8
@@ -1328,10 +1304,7 @@ define i128 @cas_sc_sc(ptr %a, i128 %cmp, i128 %new) {
13281304
; AIX64-PWR8-NEXT: mr r10, r6
13291305
; AIX64-PWR8-NEXT: stqcx. r10, 0, r3
13301306
; AIX64-PWR8-NEXT: bne cr0, L..BB9_1
1331-
; AIX64-PWR8-NEXT: b L..BB9_4
13321307
; AIX64-PWR8-NEXT: L..BB9_3: # %entry
1333-
; AIX64-PWR8-NEXT: stqcx. r8, 0, r3
1334-
; AIX64-PWR8-NEXT: L..BB9_4: # %entry
13351308
; AIX64-PWR8-NEXT: lwsync
13361309
; AIX64-PWR8-NEXT: mr r3, r8
13371310
; AIX64-PWR8-NEXT: mr r4, r9
@@ -1392,10 +1365,7 @@ define i128 @cas_acqrel_acquire(ptr %a, i128 %cmp, i128 %new) {
13921365
; CHECK-NEXT: mr r10, r6
13931366
; CHECK-NEXT: stqcx. r10, 0, r3
13941367
; CHECK-NEXT: bne cr0, .LBB10_1
1395-
; CHECK-NEXT: b .LBB10_4
13961368
; CHECK-NEXT: .LBB10_3: # %entry
1397-
; CHECK-NEXT: stqcx. r8, 0, r3
1398-
; CHECK-NEXT: .LBB10_4: # %entry
13991369
; CHECK-NEXT: lwsync
14001370
; CHECK-NEXT: mr r3, r8
14011371
; CHECK-NEXT: mr r4, r9
@@ -1440,10 +1410,7 @@ define i128 @cas_acqrel_acquire(ptr %a, i128 %cmp, i128 %new) {
14401410
; LE-PWR8-NEXT: mr r10, r7
14411411
; LE-PWR8-NEXT: stqcx. r10, 0, r3
14421412
; LE-PWR8-NEXT: bne cr0, .LBB10_1
1443-
; LE-PWR8-NEXT: b .LBB10_4
14441413
; LE-PWR8-NEXT: .LBB10_3: # %entry
1445-
; LE-PWR8-NEXT: stqcx. r8, 0, r3
1446-
; LE-PWR8-NEXT: .LBB10_4: # %entry
14471414
; LE-PWR8-NEXT: lwsync
14481415
; LE-PWR8-NEXT: mr r3, r9
14491416
; LE-PWR8-NEXT: mr r4, r8
@@ -1465,10 +1432,7 @@ define i128 @cas_acqrel_acquire(ptr %a, i128 %cmp, i128 %new) {
14651432
; AIX64-PWR8-NEXT: mr r10, r6
14661433
; AIX64-PWR8-NEXT: stqcx. r10, 0, r3
14671434
; AIX64-PWR8-NEXT: bne cr0, L..BB10_1
1468-
; AIX64-PWR8-NEXT: b L..BB10_4
14691435
; AIX64-PWR8-NEXT: L..BB10_3: # %entry
1470-
; AIX64-PWR8-NEXT: stqcx. r8, 0, r3
1471-
; AIX64-PWR8-NEXT: L..BB10_4: # %entry
14721436
; AIX64-PWR8-NEXT: lwsync
14731437
; AIX64-PWR8-NEXT: mr r3, r8
14741438
; AIX64-PWR8-NEXT: mr r4, r9
@@ -1529,10 +1493,7 @@ define i1 @cas_acqrel_acquire_check_succ(ptr %a, i128 %cmp, i128 %new) {
15291493
; CHECK-NEXT: mr r10, r6
15301494
; CHECK-NEXT: stqcx. r10, 0, r3
15311495
; CHECK-NEXT: bne cr0, .LBB11_1
1532-
; CHECK-NEXT: b .LBB11_4
15331496
; CHECK-NEXT: .LBB11_3: # %entry
1534-
; CHECK-NEXT: stqcx. r8, 0, r3
1535-
; CHECK-NEXT: .LBB11_4: # %entry
15361497
; CHECK-NEXT: lwsync
15371498
; CHECK-NEXT: xor r3, r4, r8
15381499
; CHECK-NEXT: xor r4, r5, r9
@@ -1578,10 +1539,7 @@ define i1 @cas_acqrel_acquire_check_succ(ptr %a, i128 %cmp, i128 %new) {
15781539
; LE-PWR8-NEXT: mr r10, r7
15791540
; LE-PWR8-NEXT: stqcx. r10, 0, r3
15801541
; LE-PWR8-NEXT: bne cr0, .LBB11_1
1581-
; LE-PWR8-NEXT: b .LBB11_4
15821542
; LE-PWR8-NEXT: .LBB11_3: # %entry
1583-
; LE-PWR8-NEXT: stqcx. r8, 0, r3
1584-
; LE-PWR8-NEXT: .LBB11_4: # %entry
15851543
; LE-PWR8-NEXT: lwsync
15861544
; LE-PWR8-NEXT: xor r3, r5, r8
15871545
; LE-PWR8-NEXT: xor r4, r4, r9
@@ -1606,10 +1564,7 @@ define i1 @cas_acqrel_acquire_check_succ(ptr %a, i128 %cmp, i128 %new) {
16061564
; AIX64-PWR8-NEXT: mr r10, r6
16071565
; AIX64-PWR8-NEXT: stqcx. r10, 0, r3
16081566
; AIX64-PWR8-NEXT: bne cr0, L..BB11_1
1609-
; AIX64-PWR8-NEXT: b L..BB11_4
16101567
; AIX64-PWR8-NEXT: L..BB11_3: # %entry
1611-
; AIX64-PWR8-NEXT: stqcx. r8, 0, r3
1612-
; AIX64-PWR8-NEXT: L..BB11_4: # %entry
16131568
; AIX64-PWR8-NEXT: lwsync
16141569
; AIX64-PWR8-NEXT: xor r3, r4, r8
16151570
; AIX64-PWR8-NEXT: xor r4, r5, r9
@@ -1651,3 +1606,132 @@ entry:
16511606
%1 = extractvalue { i128, i1 } %0, 1
16521607
ret i1 %1
16531608
}
1609+
1610+
;; TODO: Optimize CAS at exit block when bool value is returned.
1611+
define i1 @bool_cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) {
1612+
; CHECK-LABEL: bool_cas_weak_acquire_acquire:
1613+
; CHECK: # %bb.0: # %entry
1614+
; CHECK-NEXT: .LBB12_1: # %entry
1615+
; CHECK-NEXT: #
1616+
; CHECK-NEXT: lqarx r8, 0, r3
1617+
; CHECK-NEXT: xor r11, r9, r5
1618+
; CHECK-NEXT: xor r10, r8, r4
1619+
; CHECK-NEXT: or. r11, r11, r10
1620+
; CHECK-NEXT: bne cr0, .LBB12_3
1621+
; CHECK-NEXT: # %bb.2: # %entry
1622+
; CHECK-NEXT: #
1623+
; CHECK-NEXT: mr r11, r7
1624+
; CHECK-NEXT: mr r10, r6
1625+
; CHECK-NEXT: stqcx. r10, 0, r3
1626+
; CHECK-NEXT: bne cr0, .LBB12_1
1627+
; CHECK-NEXT: .LBB12_3: # %entry
1628+
; CHECK-NEXT: lwsync
1629+
; CHECK-NEXT: xor r3, r4, r8
1630+
; CHECK-NEXT: xor r4, r5, r9
1631+
; CHECK-NEXT: or r3, r4, r3
1632+
; CHECK-NEXT: cntlzd r3, r3
1633+
; CHECK-NEXT: rldicl r3, r3, 58, 63
1634+
; CHECK-NEXT: blr
1635+
;
1636+
; PWR7-LABEL: bool_cas_weak_acquire_acquire:
1637+
; PWR7: # %bb.0: # %entry
1638+
; PWR7-NEXT: mflr r0
1639+
; PWR7-NEXT: stdu r1, -128(r1)
1640+
; PWR7-NEXT: std r0, 144(r1)
1641+
; PWR7-NEXT: .cfi_def_cfa_offset 128
1642+
; PWR7-NEXT: .cfi_offset lr, 16
1643+
; PWR7-NEXT: std r5, 120(r1)
1644+
; PWR7-NEXT: std r4, 112(r1)
1645+
; PWR7-NEXT: addi r4, r1, 112
1646+
; PWR7-NEXT: mr r5, r6
1647+
; PWR7-NEXT: mr r6, r7
1648+
; PWR7-NEXT: li r7, 2
1649+
; PWR7-NEXT: li r8, 2
1650+
; PWR7-NEXT: bl __atomic_compare_exchange_16
1651+
; PWR7-NEXT: nop
1652+
; PWR7-NEXT: addi r1, r1, 128
1653+
; PWR7-NEXT: ld r0, 16(r1)
1654+
; PWR7-NEXT: mtlr r0
1655+
; PWR7-NEXT: blr
1656+
;
1657+
; LE-PWR8-LABEL: bool_cas_weak_acquire_acquire:
1658+
; LE-PWR8: # %bb.0: # %entry
1659+
; LE-PWR8-NEXT: .LBB12_1: # %entry
1660+
; LE-PWR8-NEXT: #
1661+
; LE-PWR8-NEXT: lqarx r8, 0, r3
1662+
; LE-PWR8-NEXT: xor r11, r9, r4
1663+
; LE-PWR8-NEXT: xor r10, r8, r5
1664+
; LE-PWR8-NEXT: or. r11, r11, r10
1665+
; LE-PWR8-NEXT: bne cr0, .LBB12_3
1666+
; LE-PWR8-NEXT: # %bb.2: # %entry
1667+
; LE-PWR8-NEXT: #
1668+
; LE-PWR8-NEXT: mr r11, r6
1669+
; LE-PWR8-NEXT: mr r10, r7
1670+
; LE-PWR8-NEXT: stqcx. r10, 0, r3
1671+
; LE-PWR8-NEXT: bne cr0, .LBB12_1
1672+
; LE-PWR8-NEXT: .LBB12_3: # %entry
1673+
; LE-PWR8-NEXT: lwsync
1674+
; LE-PWR8-NEXT: xor r3, r5, r8
1675+
; LE-PWR8-NEXT: xor r4, r4, r9
1676+
; LE-PWR8-NEXT: or r3, r4, r3
1677+
; LE-PWR8-NEXT: cntlzd r3, r3
1678+
; LE-PWR8-NEXT: rldicl r3, r3, 58, 63
1679+
; LE-PWR8-NEXT: blr
1680+
;
1681+
; AIX64-PWR8-LABEL: bool_cas_weak_acquire_acquire:
1682+
; AIX64-PWR8: # %bb.0: # %entry
1683+
; AIX64-PWR8-NEXT: L..BB12_1: # %entry
1684+
; AIX64-PWR8-NEXT: #
1685+
; AIX64-PWR8-NEXT: lqarx r8, 0, r3
1686+
; AIX64-PWR8-NEXT: xor r11, r9, r5
1687+
; AIX64-PWR8-NEXT: xor r10, r8, r4
1688+
; AIX64-PWR8-NEXT: or. r11, r11, r10
1689+
; AIX64-PWR8-NEXT: bne cr0, L..BB12_3
1690+
; AIX64-PWR8-NEXT: # %bb.2: # %entry
1691+
; AIX64-PWR8-NEXT: #
1692+
; AIX64-PWR8-NEXT: mr r11, r7
1693+
; AIX64-PWR8-NEXT: mr r10, r6
1694+
; AIX64-PWR8-NEXT: stqcx. r10, 0, r3
1695+
; AIX64-PWR8-NEXT: bne cr0, L..BB12_1
1696+
; AIX64-PWR8-NEXT: L..BB12_3: # %entry
1697+
; AIX64-PWR8-NEXT: lwsync
1698+
; AIX64-PWR8-NEXT: xor r3, r4, r8
1699+
; AIX64-PWR8-NEXT: xor r4, r5, r9
1700+
; AIX64-PWR8-NEXT: or r3, r4, r3
1701+
; AIX64-PWR8-NEXT: cntlzd r3, r3
1702+
; AIX64-PWR8-NEXT: rldicl r3, r3, 58, 63
1703+
; AIX64-PWR8-NEXT: blr
1704+
;
1705+
; PPC-PWR8-LABEL: bool_cas_weak_acquire_acquire:
1706+
; PPC-PWR8: # %bb.0: # %entry
1707+
; PPC-PWR8-NEXT: mflr r0
1708+
; PPC-PWR8-NEXT: stwu r1, -48(r1)
1709+
; PPC-PWR8-NEXT: stw r0, 52(r1)
1710+
; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48
1711+
; PPC-PWR8-NEXT: .cfi_offset lr, 4
1712+
; PPC-PWR8-NEXT: mr r4, r3
1713+
; PPC-PWR8-NEXT: lwz r3, 60(r1)
1714+
; PPC-PWR8-NEXT: stw r8, 44(r1)
1715+
; PPC-PWR8-NEXT: stw r7, 40(r1)
1716+
; PPC-PWR8-NEXT: stw r6, 36(r1)
1717+
; PPC-PWR8-NEXT: stw r5, 32(r1)
1718+
; PPC-PWR8-NEXT: addi r5, r1, 32
1719+
; PPC-PWR8-NEXT: addi r6, r1, 16
1720+
; PPC-PWR8-NEXT: li r7, 2
1721+
; PPC-PWR8-NEXT: li r8, 2
1722+
; PPC-PWR8-NEXT: stw r10, 20(r1)
1723+
; PPC-PWR8-NEXT: stw r9, 16(r1)
1724+
; PPC-PWR8-NEXT: stw r3, 28(r1)
1725+
; PPC-PWR8-NEXT: lwz r3, 56(r1)
1726+
; PPC-PWR8-NEXT: stw r3, 24(r1)
1727+
; PPC-PWR8-NEXT: li r3, 16
1728+
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
1729+
; PPC-PWR8-NEXT: lwz r0, 52(r1)
1730+
; PPC-PWR8-NEXT: addi r1, r1, 48
1731+
; PPC-PWR8-NEXT: mtlr r0
1732+
; PPC-PWR8-NEXT: blr
1733+
entry:
1734+
%0 = cmpxchg weak ptr %a, i128 %cmp, i128 %new acquire acquire
1735+
%1 = extractvalue { i128, i1 } %0, 1
1736+
ret i1 %1
1737+
}

0 commit comments

Comments
 (0)