Skip to content

Commit 9b8cb87

Browse files
committed
[Clang] Add support for scoped atomic thread fence
Summary: Previously we added support for all of the atomic GNU extensions with optional memory scoped except for `__atomic_thread_fence`. This patch adds support for that. This should ideally allow us to generically emit these LLVM scopes.
1 parent da78ac5 commit 9b8cb87

File tree

3 files changed

+314
-0
lines changed

3 files changed

+314
-0
lines changed

clang/include/clang/Basic/Builtins.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1995,6 +1995,12 @@ def AtomicThreadFence : Builtin {
19951995
let Prototype = "void(int)";
19961996
}
19971997

1998+
def ScopedAtomicThreadFence : Builtin {
1999+
let Spellings = ["__scoped_atomic_thread_fence"];
2000+
let Attributes = [NoThrow];
2001+
let Prototype = "void(int, int)";
2002+
}
2003+
19982004
def AtomicSignalFence : Builtin {
19992005
let Spellings = ["__atomic_signal_fence"];
20002006
let Attributes = [NoThrow];

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5162,6 +5162,135 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
51625162
Builder.SetInsertPoint(ContBB);
51635163
return RValue::get(nullptr);
51645164
}
5165+
case Builtin::BI__scoped_atomic_thread_fence: {
5166+
auto ScopeModel = AtomicScopeModel::create(AtomicScopeModelKind::Generic);
5167+
5168+
Value *Order = EmitScalarExpr(E->getArg(0));
5169+
Value *Scope = EmitScalarExpr(E->getArg(1));
5170+
if (isa<llvm::ConstantInt>(Order) && isa<llvm::ConstantInt>(Scope)) {
5171+
int Ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
5172+
int Scp = cast<llvm::ConstantInt>(Scope)->getZExtValue();
5173+
SyncScope SS = ScopeModel->isValid(Scp)
5174+
? ScopeModel->map(Scp)
5175+
: ScopeModel->map(ScopeModel->getFallBackValue());
5176+
switch (Ord) {
5177+
case 0: // memory_order_relaxed
5178+
default: // invalid order
5179+
break;
5180+
case 1: // memory_order_consume
5181+
case 2: // memory_order_acquire
5182+
Builder.CreateFence(
5183+
llvm::AtomicOrdering::Acquire,
5184+
getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
5185+
llvm::AtomicOrdering::Acquire,
5186+
getLLVMContext()));
5187+
break;
5188+
case 3: // memory_order_release
5189+
Builder.CreateFence(
5190+
llvm::AtomicOrdering::Release,
5191+
getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
5192+
llvm::AtomicOrdering::Release,
5193+
getLLVMContext()));
5194+
break;
5195+
case 4: // memory_order_acq_rel
5196+
Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease,
5197+
getTargetHooks().getLLVMSyncScopeID(
5198+
getLangOpts(), SS,
5199+
llvm::AtomicOrdering::AcquireRelease,
5200+
getLLVMContext()));
5201+
break;
5202+
case 5: // memory_order_seq_cst
5203+
Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
5204+
getTargetHooks().getLLVMSyncScopeID(
5205+
getLangOpts(), SS,
5206+
llvm::AtomicOrdering::SequentiallyConsistent,
5207+
getLLVMContext()));
5208+
break;
5209+
}
5210+
return RValue::get(nullptr);
5211+
}
5212+
5213+
llvm::BasicBlock *ContBB = createBasicBlock("atomic.scope.continue", CurFn);
5214+
5215+
llvm::DenseMap<llvm::BasicBlock *, llvm::AtomicOrdering> OrderBBs;
5216+
if (isa<llvm::ConstantInt>(Order)) {
5217+
int Ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
5218+
switch (Ord) {
5219+
case 0: // memory_order_relaxed
5220+
default: // invalid order
5221+
ContBB->eraseFromParent();
5222+
return RValue::get(nullptr);
5223+
case 1: // memory_order_consume
5224+
case 2: // memory_order_acquire
5225+
OrderBBs[Builder.GetInsertBlock()] = llvm::AtomicOrdering::Acquire;
5226+
break;
5227+
case 3: // memory_order_release
5228+
OrderBBs[Builder.GetInsertBlock()] = llvm::AtomicOrdering::Release;
5229+
break;
5230+
case 4: // memory_order_acq_rel
5231+
OrderBBs[Builder.GetInsertBlock()] =
5232+
llvm::AtomicOrdering::AcquireRelease;
5233+
break;
5234+
case 5: // memory_order_seq_cst
5235+
OrderBBs[Builder.GetInsertBlock()] =
5236+
llvm::AtomicOrdering::SequentiallyConsistent;
5237+
break;
5238+
}
5239+
} else {
5240+
llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
5241+
AcquireBB = createBasicBlock("acquire", CurFn);
5242+
ReleaseBB = createBasicBlock("release", CurFn);
5243+
AcqRelBB = createBasicBlock("acqrel", CurFn);
5244+
SeqCstBB = createBasicBlock("seqcst", CurFn);
5245+
5246+
Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5247+
llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
5248+
SI->addCase(Builder.getInt32(1), AcquireBB);
5249+
SI->addCase(Builder.getInt32(2), AcquireBB);
5250+
SI->addCase(Builder.getInt32(3), ReleaseBB);
5251+
SI->addCase(Builder.getInt32(4), AcqRelBB);
5252+
SI->addCase(Builder.getInt32(5), SeqCstBB);
5253+
5254+
OrderBBs[AcquireBB] = llvm::AtomicOrdering::Acquire;
5255+
OrderBBs[ReleaseBB] = llvm::AtomicOrdering::Release;
5256+
OrderBBs[AcqRelBB] = llvm::AtomicOrdering::AcquireRelease;
5257+
OrderBBs[SeqCstBB] = llvm::AtomicOrdering::SequentiallyConsistent;
5258+
}
5259+
5260+
for (auto &[OrderBB, Ordering] : OrderBBs) {
5261+
Builder.SetInsertPoint(OrderBB);
5262+
if (isa<llvm::ConstantInt>(Scope)) {
5263+
int Scp = cast<llvm::ConstantInt>(Scope)->getZExtValue();
5264+
SyncScope SS = ScopeModel->isValid(Scp)
5265+
? ScopeModel->map(Scp)
5266+
: ScopeModel->map(ScopeModel->getFallBackValue());
5267+
Builder.CreateFence(Ordering,
5268+
getTargetHooks().getLLVMSyncScopeID(
5269+
getLangOpts(), SS, Ordering, getLLVMContext()));
5270+
Builder.CreateBr(ContBB);
5271+
} else {
5272+
llvm::DenseMap<unsigned, llvm::BasicBlock *> BBs;
5273+
for (unsigned Scp : ScopeModel->getRuntimeValues())
5274+
BBs[Scp] = createBasicBlock(getAsString(ScopeModel->map(Scp)), CurFn);
5275+
5276+
auto *SC = Builder.CreateIntCast(Scope, Builder.getInt32Ty(), false);
5277+
llvm::SwitchInst *SI = Builder.CreateSwitch(SC, ContBB);
5278+
for (unsigned Scp : ScopeModel->getRuntimeValues()) {
5279+
auto *B = BBs[Scp];
5280+
SI->addCase(Builder.getInt32(Scp), B);
5281+
5282+
Builder.SetInsertPoint(B);
5283+
Builder.CreateFence(Ordering, getTargetHooks().getLLVMSyncScopeID(
5284+
getLangOpts(), ScopeModel->map(Scp),
5285+
Ordering, getLLVMContext()));
5286+
Builder.CreateBr(ContBB);
5287+
}
5288+
}
5289+
}
5290+
5291+
Builder.SetInsertPoint(ContBB);
5292+
return RValue::get(nullptr);
5293+
}
51655294

51665295
case Builtin::BI__builtin_signbit:
51675296
case Builtin::BI__builtin_signbitf:

clang/test/CodeGen/scoped-fence-ops.c

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \
3+
// RUN: -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s
4+
//: %clang_cc1 %s -emit-llvm -o - -triple=spirv64-unknown-unknown -ffreestanding \
5+
//: -fvisibility=hidden | FileCheck --check-prefix=SPIRV %s
6+
7+
//
8+
// SPIRV-LABEL: define hidden spir_func void @fe1a(
9+
// SPIRV-SAME: ) #[[ATTR0:[0-9]+]] {
10+
// SPIRV-NEXT: [[ENTRY:.*:]]
11+
// SPIRV-NEXT: fence syncscope("workgroup") release
12+
// SPIRV-NEXT: ret void
13+
// AMDGCN-LABEL: define hidden void @fe1a(
14+
// AMDGCN-SAME: ) #[[ATTR0:[0-9]+]] {
15+
// AMDGCN-NEXT: [[ENTRY:.*:]]
16+
// AMDGCN-NEXT: fence syncscope("workgroup-one-as") release
17+
// AMDGCN-NEXT: ret void
18+
//
19+
void fe1a() {
20+
__scoped_atomic_thread_fence(__ATOMIC_RELEASE, __MEMORY_SCOPE_WRKGRP);
21+
}
22+
23+
//
24+
// SPIRV-LABEL: define hidden spir_func void @fe1b(
25+
// SPIRV-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
26+
// SPIRV-NEXT: [[ENTRY:.*:]]
27+
// SPIRV-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4
28+
// SPIRV-NEXT: store i32 [[ORD]], ptr [[ORD_ADDR]], align 4
29+
// SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR]], align 4
30+
// SPIRV-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
31+
// SPIRV-NEXT: i32 1, label %[[ACQUIRE:.*]]
32+
// SPIRV-NEXT: i32 2, label %[[ACQUIRE]]
33+
// SPIRV-NEXT: i32 3, label %[[RELEASE:.*]]
34+
// SPIRV-NEXT: i32 4, label %[[ACQREL:.*]]
35+
// SPIRV-NEXT: i32 5, label %[[SEQCST:.*]]
36+
// SPIRV-NEXT: ]
37+
// SPIRV: [[ATOMIC_SCOPE_CONTINUE]]:
38+
// SPIRV-NEXT: ret void
39+
// SPIRV: [[ACQUIRE]]:
40+
// SPIRV-NEXT: fence syncscope("workgroup") acquire
41+
// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
42+
// SPIRV: [[RELEASE]]:
43+
// SPIRV-NEXT: fence syncscope("workgroup") release
44+
// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
45+
// SPIRV: [[ACQREL]]:
46+
// SPIRV-NEXT: fence syncscope("workgroup") acq_rel
47+
// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
48+
// SPIRV: [[SEQCST]]:
49+
// SPIRV-NEXT: fence syncscope("workgroup") seq_cst
50+
// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
51+
// AMDGCN-LABEL: define hidden void @fe1b(
52+
// AMDGCN-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
53+
// AMDGCN-NEXT: [[ENTRY:.*:]]
54+
// AMDGCN-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
55+
// AMDGCN-NEXT: [[ORD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ORD_ADDR]] to ptr
56+
// AMDGCN-NEXT: store i32 [[ORD]], ptr [[ORD_ADDR_ASCAST]], align 4
57+
// AMDGCN-NEXT: [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR_ASCAST]], align 4
58+
// AMDGCN-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
59+
// AMDGCN-NEXT: i32 1, label %[[ACQUIRE:.*]]
60+
// AMDGCN-NEXT: i32 2, label %[[ACQUIRE]]
61+
// AMDGCN-NEXT: i32 3, label %[[RELEASE:.*]]
62+
// AMDGCN-NEXT: i32 4, label %[[ACQREL:.*]]
63+
// AMDGCN-NEXT: i32 5, label %[[SEQCST:.*]]
64+
// AMDGCN-NEXT: ]
65+
// AMDGCN: [[ATOMIC_SCOPE_CONTINUE]]:
66+
// AMDGCN-NEXT: ret void
67+
// AMDGCN: [[ACQUIRE]]:
68+
// AMDGCN-NEXT: fence syncscope("workgroup-one-as") acquire
69+
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
70+
// AMDGCN: [[RELEASE]]:
71+
// AMDGCN-NEXT: fence syncscope("workgroup-one-as") release
72+
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
73+
// AMDGCN: [[ACQREL]]:
74+
// AMDGCN-NEXT: fence syncscope("workgroup-one-as") acq_rel
75+
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
76+
// AMDGCN: [[SEQCST]]:
77+
// AMDGCN-NEXT: fence syncscope("workgroup") seq_cst
78+
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
79+
//
80+
void fe1b(int ord) {
81+
__scoped_atomic_thread_fence(ord, __MEMORY_SCOPE_WRKGRP);
82+
}
83+
84+
//
85+
// SPIRV-LABEL: define hidden spir_func void @fe1c(
86+
// SPIRV-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
87+
// SPIRV-NEXT: [[ENTRY:.*:]]
88+
// SPIRV-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4
89+
// SPIRV-NEXT: store i32 [[SCOPE]], ptr [[SCOPE_ADDR]], align 4
90+
// SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR]], align 4
91+
// SPIRV-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
92+
// SPIRV-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]]
93+
// SPIRV-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]]
94+
// SPIRV-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]]
95+
// SPIRV-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]]
96+
// SPIRV-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]]
97+
// SPIRV-NEXT: ]
98+
// SPIRV: [[ATOMIC_SCOPE_CONTINUE]]:
99+
// SPIRV-NEXT: ret void
100+
// SPIRV: [[DEVICE_SCOPE]]:
101+
// SPIRV-NEXT: fence syncscope("device") release
102+
// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
103+
// SPIRV: [[SYSTEM_SCOPE]]:
104+
// SPIRV-NEXT: fence release
105+
// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
106+
// SPIRV: [[WORKGROUP_SCOPE]]:
107+
// SPIRV-NEXT: fence syncscope("workgroup") release
108+
// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
109+
// SPIRV: [[WAVEFRONT_SCOPE]]:
110+
// SPIRV-NEXT: fence syncscope("subgroup") release
111+
// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
112+
// SPIRV: [[SINGLE_SCOPE]]:
113+
// SPIRV-NEXT: fence syncscope("singlethread") release
114+
// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
115+
// AMDGCN-LABEL: define hidden void @fe1c(
116+
// AMDGCN-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
117+
// AMDGCN-NEXT: [[ENTRY:.*:]]
118+
// AMDGCN-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
119+
// AMDGCN-NEXT: [[SCOPE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SCOPE_ADDR]] to ptr
120+
// AMDGCN-NEXT: store i32 [[SCOPE]], ptr [[SCOPE_ADDR_ASCAST]], align 4
121+
// AMDGCN-NEXT: [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR_ASCAST]], align 4
122+
// AMDGCN-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
123+
// AMDGCN-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]]
124+
// AMDGCN-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]]
125+
// AMDGCN-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]]
126+
// AMDGCN-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]]
127+
// AMDGCN-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]]
128+
// AMDGCN-NEXT: ]
129+
// AMDGCN: [[ATOMIC_SCOPE_CONTINUE]]:
130+
// AMDGCN-NEXT: ret void
131+
// AMDGCN: [[DEVICE_SCOPE]]:
132+
// AMDGCN-NEXT: fence syncscope("agent-one-as") release
133+
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
134+
// AMDGCN: [[SYSTEM_SCOPE]]:
135+
// AMDGCN-NEXT: fence syncscope("one-as") release
136+
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
137+
// AMDGCN: [[WORKGROUP_SCOPE]]:
138+
// AMDGCN-NEXT: fence syncscope("workgroup-one-as") release
139+
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
140+
// AMDGCN: [[WAVEFRONT_SCOPE]]:
141+
// AMDGCN-NEXT: fence syncscope("wavefront-one-as") release
142+
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
143+
// AMDGCN: [[SINGLE_SCOPE]]:
144+
// AMDGCN-NEXT: fence syncscope("singlethread-one-as") release
145+
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
146+
//
147+
void fe1c(int scope) {
148+
__scoped_atomic_thread_fence(__ATOMIC_RELEASE, scope);
149+
}
150+
151+
//
152+
// SPIRV-LABEL: define hidden spir_func void @fe2a(
153+
// SPIRV-SAME: ) #[[ATTR0]] {
154+
// SPIRV-NEXT: [[ENTRY:.*:]]
155+
// SPIRV-NEXT: ret void
156+
// AMDGCN-LABEL: define hidden void @fe2a(
157+
// AMDGCN-SAME: ) #[[ATTR0]] {
158+
// AMDGCN-NEXT: [[ENTRY:.*:]]
159+
// AMDGCN-NEXT: ret void
160+
//
161+
void fe2a() {
162+
__scoped_atomic_thread_fence(999, __MEMORY_SCOPE_SYSTEM);
163+
}
164+
165+
//
166+
// SPIRV-LABEL: define hidden spir_func void @fe2b(
167+
// SPIRV-SAME: ) #[[ATTR0]] {
168+
// SPIRV-NEXT: [[ENTRY:.*:]]
169+
// SPIRV-NEXT: fence release
170+
// SPIRV-NEXT: ret void
171+
// AMDGCN-LABEL: define hidden void @fe2b(
172+
// AMDGCN-SAME: ) #[[ATTR0]] {
173+
// AMDGCN-NEXT: [[ENTRY:.*:]]
174+
// AMDGCN-NEXT: fence syncscope("one-as") release
175+
// AMDGCN-NEXT: ret void
176+
//
177+
void fe2b() {
178+
__scoped_atomic_thread_fence(__ATOMIC_RELEASE, 999);
179+
}

0 commit comments

Comments
 (0)