Skip to content

Commit d8105f5

Browse files
committed
[Clang] Add support for scoped atomic thread fence
Summary: Previously we added support for all of the atomic GNU extensions with optional memory scoped except for `__atomic_thread_fence`. This patch adds support for that. This should ideally allow us to generically emit these LLVM scopes.
1 parent a9cd941 commit d8105f5

File tree

3 files changed

+314
-0
lines changed

3 files changed

+314
-0
lines changed

clang/include/clang/Basic/Builtins.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1995,6 +1995,12 @@ def AtomicThreadFence : Builtin {
19951995
let Prototype = "void(int)";
19961996
}
19971997

1998+
def ScopedAtomicThreadFence : Builtin {
1999+
let Spellings = ["__scoped_atomic_thread_fence"];
2000+
let Attributes = [NoThrow];
2001+
let Prototype = "void(int, int)";
2002+
}
2003+
19982004
def AtomicSignalFence : Builtin {
19992005
let Spellings = ["__atomic_signal_fence"];
20002006
let Attributes = [NoThrow];

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5133,6 +5133,135 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
51335133
Builder.SetInsertPoint(ContBB);
51345134
return RValue::get(nullptr);
51355135
}
5136+
case Builtin::BI__scoped_atomic_thread_fence: {
5137+
auto ScopeModel = AtomicScopeModel::create(AtomicScopeModelKind::Generic);
5138+
5139+
Value *Order = EmitScalarExpr(E->getArg(0));
5140+
Value *Scope = EmitScalarExpr(E->getArg(1));
5141+
if (isa<llvm::ConstantInt>(Order) && isa<llvm::ConstantInt>(Scope)) {
5142+
int Ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
5143+
int Scp = cast<llvm::ConstantInt>(Scope)->getZExtValue();
5144+
SyncScope SS = ScopeModel->isValid(Scp)
5145+
? ScopeModel->map(Scp)
5146+
: ScopeModel->map(ScopeModel->getFallBackValue());
5147+
switch (Ord) {
5148+
case 0: // memory_order_relaxed
5149+
default: // invalid order
5150+
break;
5151+
case 1: // memory_order_consume
5152+
case 2: // memory_order_acquire
5153+
Builder.CreateFence(
5154+
llvm::AtomicOrdering::Acquire,
5155+
getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
5156+
llvm::AtomicOrdering::Acquire,
5157+
getLLVMContext()));
5158+
break;
5159+
case 3: // memory_order_release
5160+
Builder.CreateFence(
5161+
llvm::AtomicOrdering::Release,
5162+
getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
5163+
llvm::AtomicOrdering::Release,
5164+
getLLVMContext()));
5165+
break;
5166+
case 4: // memory_order_acq_rel
5167+
Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease,
5168+
getTargetHooks().getLLVMSyncScopeID(
5169+
getLangOpts(), SS,
5170+
llvm::AtomicOrdering::AcquireRelease,
5171+
getLLVMContext()));
5172+
break;
5173+
case 5: // memory_order_seq_cst
5174+
Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
5175+
getTargetHooks().getLLVMSyncScopeID(
5176+
getLangOpts(), SS,
5177+
llvm::AtomicOrdering::SequentiallyConsistent,
5178+
getLLVMContext()));
5179+
break;
5180+
}
5181+
return RValue::get(nullptr);
5182+
}
5183+
5184+
llvm::BasicBlock *ContBB = createBasicBlock("atomic.scope.continue", CurFn);
5185+
5186+
llvm::DenseMap<llvm::BasicBlock *, llvm::AtomicOrdering> OrderBBs;
5187+
if (isa<llvm::ConstantInt>(Order)) {
5188+
int Ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
5189+
switch (Ord) {
5190+
case 0: // memory_order_relaxed
5191+
default: // invalid order
5192+
ContBB->eraseFromParent();
5193+
return RValue::get(nullptr);
5194+
case 1: // memory_order_consume
5195+
case 2: // memory_order_acquire
5196+
OrderBBs[Builder.GetInsertBlock()] = llvm::AtomicOrdering::Acquire;
5197+
break;
5198+
case 3: // memory_order_release
5199+
OrderBBs[Builder.GetInsertBlock()] = llvm::AtomicOrdering::Release;
5200+
break;
5201+
case 4: // memory_order_acq_rel
5202+
OrderBBs[Builder.GetInsertBlock()] =
5203+
llvm::AtomicOrdering::AcquireRelease;
5204+
break;
5205+
case 5: // memory_order_seq_cst
5206+
OrderBBs[Builder.GetInsertBlock()] =
5207+
llvm::AtomicOrdering::SequentiallyConsistent;
5208+
break;
5209+
}
5210+
} else {
5211+
llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
5212+
AcquireBB = createBasicBlock("acquire", CurFn);
5213+
ReleaseBB = createBasicBlock("release", CurFn);
5214+
AcqRelBB = createBasicBlock("acqrel", CurFn);
5215+
SeqCstBB = createBasicBlock("seqcst", CurFn);
5216+
5217+
Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5218+
llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
5219+
SI->addCase(Builder.getInt32(1), AcquireBB);
5220+
SI->addCase(Builder.getInt32(2), AcquireBB);
5221+
SI->addCase(Builder.getInt32(3), ReleaseBB);
5222+
SI->addCase(Builder.getInt32(4), AcqRelBB);
5223+
SI->addCase(Builder.getInt32(5), SeqCstBB);
5224+
5225+
OrderBBs[AcquireBB] = llvm::AtomicOrdering::Acquire;
5226+
OrderBBs[ReleaseBB] = llvm::AtomicOrdering::Release;
5227+
OrderBBs[AcqRelBB] = llvm::AtomicOrdering::AcquireRelease;
5228+
OrderBBs[SeqCstBB] = llvm::AtomicOrdering::SequentiallyConsistent;
5229+
}
5230+
5231+
for (auto &[OrderBB, Ordering] : OrderBBs) {
5232+
Builder.SetInsertPoint(OrderBB);
5233+
if (isa<llvm::ConstantInt>(Scope)) {
5234+
int Scp = cast<llvm::ConstantInt>(Scope)->getZExtValue();
5235+
SyncScope SS = ScopeModel->isValid(Scp)
5236+
? ScopeModel->map(Scp)
5237+
: ScopeModel->map(ScopeModel->getFallBackValue());
5238+
Builder.CreateFence(Ordering,
5239+
getTargetHooks().getLLVMSyncScopeID(
5240+
getLangOpts(), SS, Ordering, getLLVMContext()));
5241+
Builder.CreateBr(ContBB);
5242+
} else {
5243+
llvm::DenseMap<unsigned, llvm::BasicBlock *> BBs;
5244+
for (unsigned Scp : ScopeModel->getRuntimeValues())
5245+
BBs[Scp] = createBasicBlock(getAsString(ScopeModel->map(Scp)), CurFn);
5246+
5247+
auto *SC = Builder.CreateIntCast(Scope, Builder.getInt32Ty(), false);
5248+
llvm::SwitchInst *SI = Builder.CreateSwitch(SC, ContBB);
5249+
for (unsigned Scp : ScopeModel->getRuntimeValues()) {
5250+
auto *B = BBs[Scp];
5251+
SI->addCase(Builder.getInt32(Scp), B);
5252+
5253+
Builder.SetInsertPoint(B);
5254+
Builder.CreateFence(Ordering, getTargetHooks().getLLVMSyncScopeID(
5255+
getLangOpts(), ScopeModel->map(Scp),
5256+
Ordering, getLLVMContext()));
5257+
Builder.CreateBr(ContBB);
5258+
}
5259+
}
5260+
}
5261+
5262+
Builder.SetInsertPoint(ContBB);
5263+
return RValue::get(nullptr);
5264+
}
51365265

51375266
case Builtin::BI__builtin_signbit:
51385267
case Builtin::BI__builtin_signbitf:

clang/test/CodeGen/scoped-fence-ops.c

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \
3+
// RUN: -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s
4+
//: %clang_cc1 %s -emit-llvm -o - -triple=spirv64-unknown-unknown -ffreestanding \
5+
//: -fvisibility=hidden | FileCheck --check-prefix=SPIRV %s
6+
7+
//
8+
// SPIRV-LABEL: define hidden spir_func void @fe1a(
9+
// SPIRV-SAME: ) #[[ATTR0:[0-9]+]] {
10+
// SPIRV-NEXT: [[ENTRY:.*:]]
11+
// SPIRV-NEXT: fence syncscope("workgroup") release
12+
// SPIRV-NEXT: ret void
13+
// AMDGCN-LABEL: define hidden void @fe1a(
14+
// AMDGCN-SAME: ) #[[ATTR0:[0-9]+]] {
15+
// AMDGCN-NEXT: [[ENTRY:.*:]]
16+
// AMDGCN-NEXT: fence syncscope("workgroup-one-as") release
17+
// AMDGCN-NEXT: ret void
18+
//
19+
void fe1a() {
20+
__scoped_atomic_thread_fence(__ATOMIC_RELEASE, __MEMORY_SCOPE_WRKGRP);
21+
}
22+
23+
//
24+
// SPIRV-LABEL: define hidden spir_func void @fe1b(
25+
// SPIRV-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
26+
// SPIRV-NEXT: [[ENTRY:.*:]]
27+
// SPIRV-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4
28+
// SPIRV-NEXT: store i32 [[ORD]], ptr [[ORD_ADDR]], align 4
29+
// SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR]], align 4
30+
// SPIRV-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
31+
// SPIRV-NEXT: i32 1, label %[[ACQUIRE:.*]]
32+
// SPIRV-NEXT: i32 2, label %[[ACQUIRE]]
33+
// SPIRV-NEXT: i32 3, label %[[RELEASE:.*]]
34+
// SPIRV-NEXT: i32 4, label %[[ACQREL:.*]]
35+
// SPIRV-NEXT: i32 5, label %[[SEQCST:.*]]
36+
// SPIRV-NEXT: ]
37+
// SPIRV: [[ATOMIC_SCOPE_CONTINUE]]:
38+
// SPIRV-NEXT: ret void
39+
// SPIRV: [[ACQUIRE]]:
40+
// SPIRV-NEXT: fence syncscope("workgroup") acquire
41+
// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
42+
// SPIRV: [[RELEASE]]:
43+
// SPIRV-NEXT: fence syncscope("workgroup") release
44+
// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
45+
// SPIRV: [[ACQREL]]:
46+
// SPIRV-NEXT: fence syncscope("workgroup") acq_rel
47+
// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
48+
// SPIRV: [[SEQCST]]:
49+
// SPIRV-NEXT: fence syncscope("workgroup") seq_cst
50+
// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
51+
// AMDGCN-LABEL: define hidden void @fe1b(
52+
// AMDGCN-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
53+
// AMDGCN-NEXT: [[ENTRY:.*:]]
54+
// AMDGCN-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
55+
// AMDGCN-NEXT: [[ORD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ORD_ADDR]] to ptr
56+
// AMDGCN-NEXT: store i32 [[ORD]], ptr [[ORD_ADDR_ASCAST]], align 4
57+
// AMDGCN-NEXT: [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR_ASCAST]], align 4
58+
// AMDGCN-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
59+
// AMDGCN-NEXT: i32 1, label %[[ACQUIRE:.*]]
60+
// AMDGCN-NEXT: i32 2, label %[[ACQUIRE]]
61+
// AMDGCN-NEXT: i32 3, label %[[RELEASE:.*]]
62+
// AMDGCN-NEXT: i32 4, label %[[ACQREL:.*]]
63+
// AMDGCN-NEXT: i32 5, label %[[SEQCST:.*]]
64+
// AMDGCN-NEXT: ]
65+
// AMDGCN: [[ATOMIC_SCOPE_CONTINUE]]:
66+
// AMDGCN-NEXT: ret void
67+
// AMDGCN: [[ACQUIRE]]:
68+
// AMDGCN-NEXT: fence syncscope("workgroup-one-as") acquire
69+
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
70+
// AMDGCN: [[RELEASE]]:
71+
// AMDGCN-NEXT: fence syncscope("workgroup-one-as") release
72+
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
73+
// AMDGCN: [[ACQREL]]:
74+
// AMDGCN-NEXT: fence syncscope("workgroup-one-as") acq_rel
75+
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
76+
// AMDGCN: [[SEQCST]]:
77+
// AMDGCN-NEXT: fence syncscope("workgroup") seq_cst
78+
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
79+
//
80+
void fe1b(int ord) {
81+
__scoped_atomic_thread_fence(ord, __MEMORY_SCOPE_WRKGRP);
82+
}
83+
84+
//
85+
// SPIRV-LABEL: define hidden spir_func void @fe1c(
86+
// SPIRV-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
87+
// SPIRV-NEXT: [[ENTRY:.*:]]
88+
// SPIRV-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4
89+
// SPIRV-NEXT: store i32 [[SCOPE]], ptr [[SCOPE_ADDR]], align 4
90+
// SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR]], align 4
91+
// SPIRV-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
92+
// SPIRV-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]]
93+
// SPIRV-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]]
94+
// SPIRV-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]]
95+
// SPIRV-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]]
96+
// SPIRV-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]]
97+
// SPIRV-NEXT: ]
98+
// SPIRV: [[ATOMIC_SCOPE_CONTINUE]]:
99+
// SPIRV-NEXT: ret void
100+
// SPIRV: [[DEVICE_SCOPE]]:
101+
// SPIRV-NEXT: fence syncscope("device") release
102+
// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
103+
// SPIRV: [[SYSTEM_SCOPE]]:
104+
// SPIRV-NEXT: fence release
105+
// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
106+
// SPIRV: [[WORKGROUP_SCOPE]]:
107+
// SPIRV-NEXT: fence syncscope("workgroup") release
108+
// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
109+
// SPIRV: [[WAVEFRONT_SCOPE]]:
110+
// SPIRV-NEXT: fence syncscope("subgroup") release
111+
// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
112+
// SPIRV: [[SINGLE_SCOPE]]:
113+
// SPIRV-NEXT: fence syncscope("singlethread") release
114+
// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
115+
// AMDGCN-LABEL: define hidden void @fe1c(
116+
// AMDGCN-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
117+
// AMDGCN-NEXT: [[ENTRY:.*:]]
118+
// AMDGCN-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
119+
// AMDGCN-NEXT: [[SCOPE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SCOPE_ADDR]] to ptr
120+
// AMDGCN-NEXT: store i32 [[SCOPE]], ptr [[SCOPE_ADDR_ASCAST]], align 4
121+
// AMDGCN-NEXT: [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR_ASCAST]], align 4
122+
// AMDGCN-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
123+
// AMDGCN-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]]
124+
// AMDGCN-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]]
125+
// AMDGCN-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]]
126+
// AMDGCN-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]]
127+
// AMDGCN-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]]
128+
// AMDGCN-NEXT: ]
129+
// AMDGCN: [[ATOMIC_SCOPE_CONTINUE]]:
130+
// AMDGCN-NEXT: ret void
131+
// AMDGCN: [[DEVICE_SCOPE]]:
132+
// AMDGCN-NEXT: fence syncscope("agent-one-as") release
133+
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
134+
// AMDGCN: [[SYSTEM_SCOPE]]:
135+
// AMDGCN-NEXT: fence syncscope("one-as") release
136+
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
137+
// AMDGCN: [[WORKGROUP_SCOPE]]:
138+
// AMDGCN-NEXT: fence syncscope("workgroup-one-as") release
139+
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
140+
// AMDGCN: [[WAVEFRONT_SCOPE]]:
141+
// AMDGCN-NEXT: fence syncscope("wavefront-one-as") release
142+
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
143+
// AMDGCN: [[SINGLE_SCOPE]]:
144+
// AMDGCN-NEXT: fence syncscope("singlethread-one-as") release
145+
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
146+
//
147+
void fe1c(int scope) {
148+
__scoped_atomic_thread_fence(__ATOMIC_RELEASE, scope);
149+
}
150+
151+
//
152+
// SPIRV-LABEL: define hidden spir_func void @fe2a(
153+
// SPIRV-SAME: ) #[[ATTR0]] {
154+
// SPIRV-NEXT: [[ENTRY:.*:]]
155+
// SPIRV-NEXT: ret void
156+
// AMDGCN-LABEL: define hidden void @fe2a(
157+
// AMDGCN-SAME: ) #[[ATTR0]] {
158+
// AMDGCN-NEXT: [[ENTRY:.*:]]
159+
// AMDGCN-NEXT: ret void
160+
//
161+
void fe2a() {
162+
__scoped_atomic_thread_fence(999, __MEMORY_SCOPE_SYSTEM);
163+
}
164+
165+
//
166+
// SPIRV-LABEL: define hidden spir_func void @fe2b(
167+
// SPIRV-SAME: ) #[[ATTR0]] {
168+
// SPIRV-NEXT: [[ENTRY:.*:]]
169+
// SPIRV-NEXT: fence release
170+
// SPIRV-NEXT: ret void
171+
// AMDGCN-LABEL: define hidden void @fe2b(
172+
// AMDGCN-SAME: ) #[[ATTR0]] {
173+
// AMDGCN-NEXT: [[ENTRY:.*:]]
174+
// AMDGCN-NEXT: fence syncscope("one-as") release
175+
// AMDGCN-NEXT: ret void
176+
//
177+
void fe2b() {
178+
__scoped_atomic_thread_fence(__ATOMIC_RELEASE, 999);
179+
}

0 commit comments

Comments
 (0)