Skip to content

Commit 30b3aab

Browse files
committed
Copy syncscope when expanding atomicrmw into cmpxchg loop
Fixes: SWDEV-280070 Differential Revision: https://reviews.llvm.org/D99902
1 parent 3915144 commit 30b3aab

File tree

6 files changed

+249
-66
lines changed

6 files changed

+249
-66
lines changed

llvm/include/llvm/CodeGen/AtomicExpandUtils.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ class Value;
2323
/// /* OUT */ %success, /* OUT */ %new_loaded)
2424
using CreateCmpXchgInstFun =
2525
function_ref<void(IRBuilder<> &, Value *, Value *, Value *, Align,
26-
AtomicOrdering, Value *&, Value *&)>;
26+
AtomicOrdering, SyncScope::ID, Value *&, Value *&)>;
2727

2828
/// Expand an atomic RMW instruction into a loop utilizing
2929
/// cmpxchg. You'll want to make sure your target machine likes cmpxchg

llvm/lib/CodeGen/AtomicExpandPass.cpp

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ namespace {
9797
AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
9898
static Value *insertRMWCmpXchgLoop(
9999
IRBuilder<> &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
100-
AtomicOrdering MemOpOrder,
100+
AtomicOrdering MemOpOrder, SyncScope::ID SSID,
101101
function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
102102
CreateCmpXchgInstFun CreateCmpXchg);
103103
bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
@@ -467,8 +467,8 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
467467

468468
static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
469469
Value *Loaded, Value *NewVal, Align AddrAlign,
470-
AtomicOrdering MemOpOrder, Value *&Success,
471-
Value *&NewLoaded) {
470+
AtomicOrdering MemOpOrder, SyncScope::ID SSID,
471+
Value *&Success, Value *&NewLoaded) {
472472
Type *OrigTy = NewVal->getType();
473473

474474
// This code can go away when cmpxchg supports FP types.
@@ -483,7 +483,7 @@ static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
483483

484484
Value *Pair = Builder.CreateAtomicCmpXchg(
485485
Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
486-
AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
486+
AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID);
487487
Success = Builder.CreateExtractValue(Pair, 1, "success");
488488
NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
489489

@@ -768,6 +768,7 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
768768
void AtomicExpand::expandPartwordAtomicRMW(
769769
AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
770770
AtomicOrdering MemOpOrder = AI->getOrdering();
771+
SyncScope::ID SSID = AI->getSyncScopeID();
771772

772773
IRBuilder<> Builder(AI);
773774

@@ -788,7 +789,8 @@ void AtomicExpand::expandPartwordAtomicRMW(
788789
if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
789790
OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
790791
PMV.AlignedAddrAlignment, MemOpOrder,
791-
PerformPartwordOp, createCmpXchgInstFun);
792+
SSID, PerformPartwordOp,
793+
createCmpXchgInstFun);
792794
} else {
793795
assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
794796
OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
@@ -1392,7 +1394,7 @@ bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
13921394

13931395
Value *AtomicExpand::insertRMWCmpXchgLoop(
13941396
IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1395-
AtomicOrdering MemOpOrder,
1397+
AtomicOrdering MemOpOrder, SyncScope::ID SSID,
13961398
function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
13971399
CreateCmpXchgInstFun CreateCmpXchg) {
13981400
LLVMContext &Ctx = Builder.getContext();
@@ -1440,7 +1442,7 @@ Value *AtomicExpand::insertRMWCmpXchgLoop(
14401442
MemOpOrder == AtomicOrdering::Unordered
14411443
? AtomicOrdering::Monotonic
14421444
: MemOpOrder,
1443-
Success, NewLoaded);
1445+
SSID, Success, NewLoaded);
14441446
assert(Success && NewLoaded);
14451447

14461448
Loaded->addIncoming(NewLoaded, LoopBB);
@@ -1477,7 +1479,7 @@ bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
14771479
IRBuilder<> Builder(AI);
14781480
Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
14791481
Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
1480-
AI->getOrdering(),
1482+
AI->getOrdering(), AI->getSyncScopeID(),
14811483
[&](IRBuilder<> &Builder, Value *Loaded) {
14821484
return performAtomicOp(AI->getOperation(), Builder, Loaded,
14831485
AI->getValOperand());
@@ -1628,11 +1630,11 @@ void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
16281630
expandAtomicRMWToCmpXchg(
16291631
I, [this](IRBuilder<> &Builder, Value *Addr, Value *Loaded,
16301632
Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
1631-
Value *&Success, Value *&NewLoaded) {
1633+
SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) {
16321634
// Create the CAS instruction normally...
16331635
AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
16341636
Addr, Loaded, NewVal, Alignment, MemOpOrder,
1635-
AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
1637+
AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID);
16361638
Success = Builder.CreateExtractValue(Pair, 1, "success");
16371639
NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
16381640

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2+
3+
; Check that syncscope it copied from atomicrmw to cmpxchg during expansion.
4+
; There should be no scc unless we have system scope.
5+
6+
; GCN-LABEL: {{^}}expand_atomicrmw_agent:
7+
; GCN: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9:]+}}], v[{{[0-9:]+}}], off glc{{$}}
8+
define void @expand_atomicrmw_agent(float addrspace(1)* nocapture %arg) {
9+
entry:
10+
%ret = atomicrmw fadd float addrspace(1)* %arg, float 1.000000e+00 syncscope("agent") monotonic, align 4
11+
ret void
12+
}
13+
14+
; GCN-LABEL: {{^}}expand_atomicrmw_workgroup:
15+
; GCN: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9:]+}}], v[{{[0-9:]+}}], off glc{{$}}
16+
define void @expand_atomicrmw_workgroup(float addrspace(1)* nocapture %arg) {
17+
entry:
18+
%ret = atomicrmw fadd float addrspace(1)* %arg, float 1.000000e+00 syncscope("workgroup") monotonic, align 4
19+
ret void
20+
}
21+
22+
; GCN-LABEL: {{^}}expand_atomicrmw_wavefront:
23+
; GCN: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9:]+}}], v[{{[0-9:]+}}], off glc{{$}}
24+
define void @expand_atomicrmw_wavefront(float addrspace(1)* nocapture %arg) {
25+
entry:
26+
%ret = atomicrmw fadd float addrspace(1)* %arg, float 1.000000e+00 syncscope("wavefront") monotonic, align 4
27+
ret void
28+
}
29+
30+
; GCN-LABEL: {{^}}expand_atomicrmw_agent_one_as:
31+
; GCN: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9:]+}}], v[{{[0-9:]+}}], off glc{{$}}
32+
define void @expand_atomicrmw_agent_one_as(float addrspace(1)* nocapture %arg) {
33+
entry:
34+
%ret = atomicrmw fadd float addrspace(1)* %arg, float 1.000000e+00 syncscope("agent-one-as") monotonic, align 4
35+
ret void
36+
}
37+
38+
; GCN-LABEL: {{^}}expand_atomicrmw_workgroup_one_as:
39+
; GCN: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9:]+}}], v[{{[0-9:]+}}], off glc{{$}}
40+
define void @expand_atomicrmw_workgroup_one_as(float addrspace(1)* nocapture %arg) {
41+
entry:
42+
%ret = atomicrmw fadd float addrspace(1)* %arg, float 1.000000e+00 syncscope("workgroup-one-as") monotonic, align 4
43+
ret void
44+
}
45+
46+
; GCN-LABEL: {{^}}expand_atomicrmw_wavefront_one_as:
47+
; GCN: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9:]+}}], v[{{[0-9:]+}}], off glc{{$}}
48+
define void @expand_atomicrmw_wavefront_one_as(float addrspace(1)* nocapture %arg) {
49+
entry:
50+
%ret = atomicrmw fadd float addrspace(1)* %arg, float 1.000000e+00 syncscope("wavefront-one-as") monotonic, align 4
51+
ret void
52+
}
53+
54+
; GCN-LABEL: {{^}}expand_atomicrmw_singlethread_one_as:
55+
; GCN: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9:]+}}], v[{{[0-9:]+}}], off glc{{$}}
56+
define void @expand_atomicrmw_singlethread_one_as(float addrspace(1)* nocapture %arg) {
57+
entry:
58+
%ret = atomicrmw fadd float addrspace(1)* %arg, float 1.000000e+00 syncscope("singlethread-one-as") monotonic, align 4
59+
ret void
60+
}
61+
62+
; GCN-LABEL: {{^}}expand_atomicrmw_one_as:
63+
; GCN: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9:]+}}], v[{{[0-9:]+}}], off glc scc{{$}}
64+
define void @expand_atomicrmw_one_as(float addrspace(1)* nocapture %arg) {
65+
entry:
66+
%ret = atomicrmw fadd float addrspace(1)* %arg, float 1.000000e+00 syncscope("one-as") monotonic, align 4
67+
ret void
68+
}
69+
70+
; GCN-LABEL: {{^}}expand_atomicrmw_system:
71+
; GCN: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9:]+}}], v[{{[0-9:]+}}], off glc scc{{$}}
72+
define void @expand_atomicrmw_system(float addrspace(1)* nocapture %arg) {
73+
entry:
74+
%ret = atomicrmw fadd float addrspace(1)* %arg, float 1.000000e+00 monotonic, align 4
75+
ret void
76+
}

llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -474,7 +474,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(double addrsp
474474
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
475475
; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
476476
; GFX90A-NEXT: buffer_wbl2
477-
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
477+
; GFX90A-NEXT: s_waitcnt vmcnt(0)
478478
; GFX90A-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc scc
479479
; GFX90A-NEXT: s_waitcnt vmcnt(0)
480480
; GFX90A-NEXT: buffer_invl2
@@ -505,11 +505,8 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(double addrspa
505505
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
506506
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
507507
; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
508-
; GFX90A-NEXT: buffer_wbl2
509508
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
510-
; GFX90A-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc scc
511-
; GFX90A-NEXT: s_waitcnt vmcnt(0)
512-
; GFX90A-NEXT: buffer_invl2
509+
; GFX90A-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc
513510
; GFX90A-NEXT: s_waitcnt vmcnt(0)
514511
; GFX90A-NEXT: buffer_wbinvl1_vol
515512
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -596,7 +593,7 @@ define double @global_atomic_fadd_f64_rtn_pat_system(double addrspace(1)* %ptr,
596593
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
597594
; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0
598595
; GFX90A-NEXT: buffer_wbl2
599-
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
596+
; GFX90A-NEXT: s_waitcnt vmcnt(0)
600597
; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc scc
601598
; GFX90A-NEXT: s_waitcnt vmcnt(0)
602599
; GFX90A-NEXT: buffer_invl2
@@ -704,12 +701,13 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(double* %ptr) #
704701
; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
705702
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], s[0:1], s[0:1] op_sel:[0,1]
706703
; GFX90A-NEXT: buffer_wbl2
707-
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
704+
; GFX90A-NEXT: s_waitcnt vmcnt(0)
708705
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc scc
709-
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
706+
; GFX90A-NEXT: s_waitcnt vmcnt(0)
710707
; GFX90A-NEXT: buffer_invl2
711708
; GFX90A-NEXT: s_waitcnt vmcnt(0)
712709
; GFX90A-NEXT: buffer_wbinvl1_vol
710+
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
713711
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
714712
; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
715713
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
@@ -783,12 +781,13 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(double* %ptr) #1 {
783781
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
784782
; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0
785783
; GFX90A-NEXT: buffer_wbl2
786-
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
784+
; GFX90A-NEXT: s_waitcnt vmcnt(0)
787785
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc scc
788-
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
786+
; GFX90A-NEXT: s_waitcnt vmcnt(0)
789787
; GFX90A-NEXT: buffer_invl2
790788
; GFX90A-NEXT: s_waitcnt vmcnt(0)
791789
; GFX90A-NEXT: buffer_wbinvl1_vol
790+
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
792791
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
793792
; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
794793
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5]

llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -184,11 +184,8 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_ieee(float addrspace(1)* %
184184
; GFX90A-NEXT: v_mov_b32_e32 v1, v0
185185
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
186186
; GFX90A-NEXT: v_add_f32_e32 v0, 4.0, v1
187-
; GFX90A-NEXT: buffer_wbl2
188187
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
189-
; GFX90A-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc scc
190-
; GFX90A-NEXT: s_waitcnt vmcnt(0)
191-
; GFX90A-NEXT: buffer_invl2
188+
; GFX90A-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
192189
; GFX90A-NEXT: s_waitcnt vmcnt(0)
193190
; GFX90A-NEXT: buffer_wbinvl1_vol
194191
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
@@ -369,11 +366,8 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32_ieee(float addrspace(1)*
369366
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
370367
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
371368
; GFX90A-NEXT: v_add_f32_e32 v0, 4.0, v1
372-
; GFX90A-NEXT: buffer_wbl2
373369
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
374-
; GFX90A-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc scc
375-
; GFX90A-NEXT: s_waitcnt vmcnt(0)
376-
; GFX90A-NEXT: buffer_invl2
370+
; GFX90A-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
377371
; GFX90A-NEXT: s_waitcnt vmcnt(0)
378372
; GFX90A-NEXT: buffer_wbinvl1_vol
379373
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
@@ -524,7 +518,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_system(float addrspace(1)*
524518
; GFX900-NEXT: v_mov_b32_e32 v1, v0
525519
; GFX900-NEXT: v_mov_b32_e32 v2, 0
526520
; GFX900-NEXT: v_add_f32_e32 v0, 4.0, v1
527-
; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
521+
; GFX900-NEXT: s_waitcnt vmcnt(0)
528522
; GFX900-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
529523
; GFX900-NEXT: s_waitcnt vmcnt(0)
530524
; GFX900-NEXT: buffer_wbinvl1_vol
@@ -550,7 +544,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_system(float addrspace(1)*
550544
; GFX908-NEXT: v_mov_b32_e32 v1, v0
551545
; GFX908-NEXT: v_mov_b32_e32 v2, 0
552546
; GFX908-NEXT: v_add_f32_e32 v0, 4.0, v1
553-
; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
547+
; GFX908-NEXT: s_waitcnt vmcnt(0)
554548
; GFX908-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
555549
; GFX908-NEXT: s_waitcnt vmcnt(0)
556550
; GFX908-NEXT: buffer_wbinvl1_vol
@@ -577,7 +571,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_system(float addrspace(1)*
577571
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
578572
; GFX90A-NEXT: v_add_f32_e32 v0, 4.0, v1
579573
; GFX90A-NEXT: buffer_wbl2
580-
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
574+
; GFX90A-NEXT: s_waitcnt vmcnt(0)
581575
; GFX90A-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc scc
582576
; GFX90A-NEXT: s_waitcnt vmcnt(0)
583577
; GFX90A-NEXT: buffer_invl2
@@ -605,7 +599,7 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_system(float addrspace(1)*
605599
; GFX10-NEXT: v_mov_b32_e32 v1, v0
606600
; GFX10-NEXT: v_mov_b32_e32 v2, 0
607601
; GFX10-NEXT: v_add_f32_e32 v0, 4.0, v1
608-
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
602+
; GFX10-NEXT: s_waitcnt vmcnt(0)
609603
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
610604
; GFX10-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc
611605
; GFX10-NEXT: s_waitcnt vmcnt(0)

0 commit comments

Comments
 (0)