Skip to content

Commit 100d9b8

Browse files
committed
Reapply "AtomicExpand: Allow incrementally legalizing atomicrmw" (#107307)
This reverts commit 63da545. Use reverse iteration in the instruction loop to avoid sanitizer errors. This also has the side effect of avoiding the AArch64 codegen quality regressions. Closes #107309
1 parent 383057e commit 100d9b8

File tree

6 files changed

+162
-149
lines changed

6 files changed

+162
-149
lines changed

llvm/lib/CodeGen/AtomicExpandPass.cpp

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -351,17 +351,30 @@ bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) {
351351

352352
bool MadeChange = false;
353353

354-
SmallVector<Instruction *, 1> AtomicInsts;
355-
356-
// Changing control-flow while iterating through it is a bad idea, so gather a
357-
// list of all atomic instructions before we start.
358-
for (Instruction &I : instructions(F))
359-
if (I.isAtomic() && !isa<FenceInst>(&I))
360-
AtomicInsts.push_back(&I);
361-
362-
for (auto *I : AtomicInsts) {
363-
if (processAtomicInstr(I))
364-
MadeChange = true;
354+
for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE;) {
355+
BasicBlock *BB = &*BBI;
356+
++BBI;
357+
358+
BasicBlock::reverse_iterator Next;
359+
360+
for (BasicBlock::reverse_iterator I = BB->rbegin(), E = BB->rend(); I != E;
361+
I = Next) {
362+
Instruction &Inst = *I;
363+
Next = std::next(I);
364+
365+
if (processAtomicInstr(&Inst)) {
366+
MadeChange = true;
367+
368+
// Detect control flow change and resume iteration from the original
369+
// block to inspect any newly inserted blocks. This allows incremental
370+
// legalization of atomicrmw and cmpxchg.
371+
if (Next != E && BB != Next->getParent()) {
372+
BBI = BB->getIterator();
373+
BBE = F.end();
374+
break;
375+
}
376+
}
377+
}
365378
}
366379

367380
return MadeChange;

llvm/test/CodeGen/NVPTX/atomics-sm70.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
6161
; CHECKPTX62-NEXT: shl.b32 %r27, %r26, %r2;
6262
; CHECKPTX62-NEXT: not.b32 %r3, %r27;
6363
; CHECKPTX62-NEXT: ld.u32 %r54, [%r1];
64-
; CHECKPTX62-NEXT: $L__BB0_1: // %atomicrmw.start
64+
; CHECKPTX62-NEXT: $L__BB0_1: // %atomicrmw.start45
6565
; CHECKPTX62-NEXT: // =>This Inner Loop Header: Depth=1
6666
; CHECKPTX62-NEXT: shr.u32 %r28, %r54, %r2;
6767
; CHECKPTX62-NEXT: cvt.u16.u32 %rs2, %r28;
@@ -74,9 +74,9 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
7474
; CHECKPTX62-NEXT: setp.ne.s32 %p1, %r6, %r54;
7575
; CHECKPTX62-NEXT: mov.u32 %r54, %r6;
7676
; CHECKPTX62-NEXT: @%p1 bra $L__BB0_1;
77-
; CHECKPTX62-NEXT: // %bb.2: // %atomicrmw.end
77+
; CHECKPTX62-NEXT: // %bb.2: // %atomicrmw.end44
7878
; CHECKPTX62-NEXT: ld.u32 %r55, [%r1];
79-
; CHECKPTX62-NEXT: $L__BB0_3: // %atomicrmw.start9
79+
; CHECKPTX62-NEXT: $L__BB0_3: // %atomicrmw.start27
8080
; CHECKPTX62-NEXT: // =>This Inner Loop Header: Depth=1
8181
; CHECKPTX62-NEXT: shr.u32 %r33, %r55, %r2;
8282
; CHECKPTX62-NEXT: cvt.u16.u32 %rs6, %r33;
@@ -90,14 +90,14 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
9090
; CHECKPTX62-NEXT: setp.ne.s32 %p2, %r9, %r55;
9191
; CHECKPTX62-NEXT: mov.u32 %r55, %r9;
9292
; CHECKPTX62-NEXT: @%p2 bra $L__BB0_3;
93-
; CHECKPTX62-NEXT: // %bb.4: // %atomicrmw.end8
93+
; CHECKPTX62-NEXT: // %bb.4: // %atomicrmw.end26
9494
; CHECKPTX62-NEXT: and.b32 %r10, %r22, -4;
9595
; CHECKPTX62-NEXT: shl.b32 %r38, %r22, 3;
9696
; CHECKPTX62-NEXT: and.b32 %r11, %r38, 24;
9797
; CHECKPTX62-NEXT: shl.b32 %r40, %r26, %r11;
9898
; CHECKPTX62-NEXT: not.b32 %r12, %r40;
9999
; CHECKPTX62-NEXT: ld.global.u32 %r56, [%r10];
100-
; CHECKPTX62-NEXT: $L__BB0_5: // %atomicrmw.start27
100+
; CHECKPTX62-NEXT: $L__BB0_5: // %atomicrmw.start9
101101
; CHECKPTX62-NEXT: // =>This Inner Loop Header: Depth=1
102102
; CHECKPTX62-NEXT: shr.u32 %r41, %r56, %r11;
103103
; CHECKPTX62-NEXT: cvt.u16.u32 %rs11, %r41;
@@ -110,14 +110,14 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
110110
; CHECKPTX62-NEXT: setp.ne.s32 %p3, %r15, %r56;
111111
; CHECKPTX62-NEXT: mov.u32 %r56, %r15;
112112
; CHECKPTX62-NEXT: @%p3 bra $L__BB0_5;
113-
; CHECKPTX62-NEXT: // %bb.6: // %atomicrmw.end26
113+
; CHECKPTX62-NEXT: // %bb.6: // %atomicrmw.end8
114114
; CHECKPTX62-NEXT: and.b32 %r16, %r23, -4;
115115
; CHECKPTX62-NEXT: shl.b32 %r46, %r23, 3;
116116
; CHECKPTX62-NEXT: and.b32 %r17, %r46, 24;
117117
; CHECKPTX62-NEXT: shl.b32 %r48, %r26, %r17;
118118
; CHECKPTX62-NEXT: not.b32 %r18, %r48;
119119
; CHECKPTX62-NEXT: ld.shared.u32 %r57, [%r16];
120-
; CHECKPTX62-NEXT: $L__BB0_7: // %atomicrmw.start45
120+
; CHECKPTX62-NEXT: $L__BB0_7: // %atomicrmw.start
121121
; CHECKPTX62-NEXT: // =>This Inner Loop Header: Depth=1
122122
; CHECKPTX62-NEXT: shr.u32 %r49, %r57, %r17;
123123
; CHECKPTX62-NEXT: cvt.u16.u32 %rs15, %r49;
@@ -130,7 +130,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
130130
; CHECKPTX62-NEXT: setp.ne.s32 %p4, %r21, %r57;
131131
; CHECKPTX62-NEXT: mov.u32 %r57, %r21;
132132
; CHECKPTX62-NEXT: @%p4 bra $L__BB0_7;
133-
; CHECKPTX62-NEXT: // %bb.8: // %atomicrmw.end44
133+
; CHECKPTX62-NEXT: // %bb.8: // %atomicrmw.end
134134
; CHECKPTX62-NEXT: ret;
135135
%r1 = atomicrmw fadd ptr %dp0, half %val seq_cst
136136
%r2 = atomicrmw fadd ptr %dp0, half 1.0 seq_cst

llvm/test/CodeGen/NVPTX/atomics-sm90.ll

Lines changed: 50 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -45,62 +45,62 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
4545
;
4646
; CHECKPTX71-LABEL: test(
4747
; CHECKPTX71: {
48-
; CHECKPTX71-NEXT: .reg .pred %p<5>;
49-
; CHECKPTX71-NEXT: .reg .b16 %rs<34>;
50-
; CHECKPTX71-NEXT: .reg .b32 %r<4>;
51-
; CHECKPTX71-NEXT: .reg .f32 %f<12>;
48+
; CHECKPTX71-NEXT: .reg .pred %p<5>;
49+
; CHECKPTX71-NEXT: .reg .b16 %rs<34>;
50+
; CHECKPTX71-NEXT: .reg .b32 %r<4>;
51+
; CHECKPTX71-NEXT: .reg .f32 %f<12>;
5252
; CHECKPTX71-EMPTY:
5353
; CHECKPTX71-NEXT: // %bb.0:
54-
; CHECKPTX71-NEXT: ld.param.b16 %rs13, [test_param_3];
55-
; CHECKPTX71-NEXT: ld.param.u32 %r3, [test_param_2];
56-
; CHECKPTX71-NEXT: ld.param.u32 %r2, [test_param_1];
57-
; CHECKPTX71-NEXT: ld.param.u32 %r1, [test_param_0];
58-
; CHECKPTX71-NEXT: ld.b16 %rs30, [%r1];
59-
; CHECKPTX71-NEXT: cvt.f32.bf16 %f1, %rs13;
60-
; CHECKPTX71-NEXT: $L__BB0_1: // %atomicrmw.start
54+
; CHECKPTX71-NEXT: ld.param.b16 %rs13, [test_param_3];
55+
; CHECKPTX71-NEXT: ld.param.u32 %r3, [test_param_2];
56+
; CHECKPTX71-NEXT: ld.param.u32 %r2, [test_param_1];
57+
; CHECKPTX71-NEXT: ld.param.u32 %r1, [test_param_0];
58+
; CHECKPTX71-NEXT: ld.b16 %rs30, [%r1];
59+
; CHECKPTX71-NEXT: cvt.f32.bf16 %f1, %rs13;
60+
; CHECKPTX71-NEXT: $L__BB0_1: // %atomicrmw.start14
6161
; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1
62-
; CHECKPTX71-NEXT: cvt.f32.bf16 %f2, %rs30;
63-
; CHECKPTX71-NEXT: add.rn.f32 %f3, %f2, %f1;
64-
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs14, %f3;
65-
; CHECKPTX71-NEXT: atom.cas.b16 %rs17, [%r1], %rs30, %rs14;
66-
; CHECKPTX71-NEXT: setp.ne.s16 %p1, %rs17, %rs30;
67-
; CHECKPTX71-NEXT: mov.u16 %rs30, %rs17;
68-
; CHECKPTX71-NEXT: @%p1 bra $L__BB0_1;
69-
; CHECKPTX71-NEXT: // %bb.2: // %atomicrmw.end
70-
; CHECKPTX71-NEXT: ld.b16 %rs31, [%r1];
71-
; CHECKPTX71-NEXT: $L__BB0_3: // %atomicrmw.start2
62+
; CHECKPTX71-NEXT: cvt.f32.bf16 %f2, %rs30;
63+
; CHECKPTX71-NEXT: add.rn.f32 %f3, %f2, %f1;
64+
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs14, %f3;
65+
; CHECKPTX71-NEXT: atom.cas.b16 %rs17, [%r1], %rs30, %rs14;
66+
; CHECKPTX71-NEXT: setp.ne.s16 %p1, %rs17, %rs30;
67+
; CHECKPTX71-NEXT: mov.u16 %rs30, %rs17;
68+
; CHECKPTX71-NEXT: @%p1 bra $L__BB0_1;
69+
; CHECKPTX71-NEXT: // %bb.2: // %atomicrmw.end13
70+
; CHECKPTX71-NEXT: ld.b16 %rs31, [%r1];
71+
; CHECKPTX71-NEXT: $L__BB0_3: // %atomicrmw.start8
7272
; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1
73-
; CHECKPTX71-NEXT: cvt.f32.bf16 %f4, %rs31;
74-
; CHECKPTX71-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
75-
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs18, %f5;
76-
; CHECKPTX71-NEXT: atom.cas.b16 %rs21, [%r1], %rs31, %rs18;
77-
; CHECKPTX71-NEXT: setp.ne.s16 %p2, %rs21, %rs31;
78-
; CHECKPTX71-NEXT: mov.u16 %rs31, %rs21;
79-
; CHECKPTX71-NEXT: @%p2 bra $L__BB0_3;
80-
; CHECKPTX71-NEXT: // %bb.4: // %atomicrmw.end1
81-
; CHECKPTX71-NEXT: ld.global.b16 %rs32, [%r2];
82-
; CHECKPTX71-NEXT: $L__BB0_5: // %atomicrmw.start8
73+
; CHECKPTX71-NEXT: cvt.f32.bf16 %f4, %rs31;
74+
; CHECKPTX71-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
75+
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs18, %f5;
76+
; CHECKPTX71-NEXT: atom.cas.b16 %rs21, [%r1], %rs31, %rs18;
77+
; CHECKPTX71-NEXT: setp.ne.s16 %p2, %rs21, %rs31;
78+
; CHECKPTX71-NEXT: mov.u16 %rs31, %rs21;
79+
; CHECKPTX71-NEXT: @%p2 bra $L__BB0_3;
80+
; CHECKPTX71-NEXT: // %bb.4: // %atomicrmw.end7
81+
; CHECKPTX71-NEXT: ld.global.b16 %rs32, [%r2];
82+
; CHECKPTX71-NEXT: $L__BB0_5: // %atomicrmw.start2
8383
; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1
84-
; CHECKPTX71-NEXT: cvt.f32.bf16 %f7, %rs32;
85-
; CHECKPTX71-NEXT: add.rn.f32 %f8, %f7, %f1;
86-
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs22, %f8;
87-
; CHECKPTX71-NEXT: atom.global.cas.b16 %rs25, [%r2], %rs32, %rs22;
88-
; CHECKPTX71-NEXT: setp.ne.s16 %p3, %rs25, %rs32;
89-
; CHECKPTX71-NEXT: mov.u16 %rs32, %rs25;
90-
; CHECKPTX71-NEXT: @%p3 bra $L__BB0_5;
91-
; CHECKPTX71-NEXT: // %bb.6: // %atomicrmw.end7
92-
; CHECKPTX71-NEXT: ld.shared.b16 %rs33, [%r3];
93-
; CHECKPTX71-NEXT: $L__BB0_7: // %atomicrmw.start14
84+
; CHECKPTX71-NEXT: cvt.f32.bf16 %f7, %rs32;
85+
; CHECKPTX71-NEXT: add.rn.f32 %f8, %f7, %f1;
86+
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs22, %f8;
87+
; CHECKPTX71-NEXT: atom.global.cas.b16 %rs25, [%r2], %rs32, %rs22;
88+
; CHECKPTX71-NEXT: setp.ne.s16 %p3, %rs25, %rs32;
89+
; CHECKPTX71-NEXT: mov.u16 %rs32, %rs25;
90+
; CHECKPTX71-NEXT: @%p3 bra $L__BB0_5;
91+
; CHECKPTX71-NEXT: // %bb.6: // %atomicrmw.end1
92+
; CHECKPTX71-NEXT: ld.shared.b16 %rs33, [%r3];
93+
; CHECKPTX71-NEXT: $L__BB0_7: // %atomicrmw.start
9494
; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1
95-
; CHECKPTX71-NEXT: cvt.f32.bf16 %f10, %rs33;
96-
; CHECKPTX71-NEXT: add.rn.f32 %f11, %f10, %f1;
97-
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs26, %f11;
98-
; CHECKPTX71-NEXT: atom.shared.cas.b16 %rs29, [%r3], %rs33, %rs26;
99-
; CHECKPTX71-NEXT: setp.ne.s16 %p4, %rs29, %rs33;
100-
; CHECKPTX71-NEXT: mov.u16 %rs33, %rs29;
101-
; CHECKPTX71-NEXT: @%p4 bra $L__BB0_7;
102-
; CHECKPTX71-NEXT: // %bb.8: // %atomicrmw.end13
103-
; CHECKPTX71-NEXT: ret;
95+
; CHECKPTX71-NEXT: cvt.f32.bf16 %f10, %rs33;
96+
; CHECKPTX71-NEXT: add.rn.f32 %f11, %f10, %f1;
97+
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs26, %f11;
98+
; CHECKPTX71-NEXT: atom.shared.cas.b16 %rs29, [%r3], %rs33, %rs26;
99+
; CHECKPTX71-NEXT: setp.ne.s16 %p4, %rs29, %rs33;
100+
; CHECKPTX71-NEXT: mov.u16 %rs33, %rs29;
101+
; CHECKPTX71-NEXT: @%p4 bra $L__BB0_7;
102+
; CHECKPTX71-NEXT: // %bb.8: // %atomicrmw.end
103+
; CHECKPTX71-NEXT: ret;
104104
%r1 = atomicrmw fadd ptr %dp0, bfloat %val seq_cst
105105
%r2 = atomicrmw fadd ptr %dp0, bfloat 1.0 seq_cst
106106
%r3 = atomicrmw fadd ptr addrspace(1) %dp1, bfloat %val seq_cst

0 commit comments

Comments
 (0)