-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[Clang] Add support for scoped atomic thread fence #115545
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-clang Author: Joseph Huber (jhuber6) ChangesSummary: Full diff: https://github.com/llvm/llvm-project/pull/115545.diff 3 Files Affected:
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 87a798183d6e19..4c2f9f621915b1 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1995,6 +1995,12 @@ def AtomicThreadFence : Builtin {
let Prototype = "void(int)";
}
+def ScopedAtomicThreadFence : Builtin {
+ let Spellings = ["__scoped_atomic_thread_fence"];
+ let Attributes = [NoThrow];
+ let Prototype = "void(int, int)";
+}
+
def AtomicSignalFence : Builtin {
let Spellings = ["__atomic_signal_fence"];
let Attributes = [NoThrow];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 5c3df5124517d6..2ec3770ecd42ca 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -5133,6 +5133,135 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Builder.SetInsertPoint(ContBB);
return RValue::get(nullptr);
}
+ case Builtin::BI__scoped_atomic_thread_fence: {
+ auto ScopeModel = AtomicScopeModel::create(AtomicScopeModelKind::Generic);
+
+ Value *Order = EmitScalarExpr(E->getArg(0));
+ Value *Scope = EmitScalarExpr(E->getArg(1));
+ if (isa<llvm::ConstantInt>(Order) && isa<llvm::ConstantInt>(Scope)) {
+ int Ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
+ int Scp = cast<llvm::ConstantInt>(Scope)->getZExtValue();
+ SyncScope SS = ScopeModel->isValid(Scp)
+ ? ScopeModel->map(Scp)
+ : ScopeModel->map(ScopeModel->getFallBackValue());
+ switch (Ord) {
+ case 0: // memory_order_relaxed
+ default: // invalid order
+ break;
+ case 1: // memory_order_consume
+ case 2: // memory_order_acquire
+ Builder.CreateFence(
+ llvm::AtomicOrdering::Acquire,
+ getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
+ llvm::AtomicOrdering::Acquire,
+ getLLVMContext()));
+ break;
+ case 3: // memory_order_release
+ Builder.CreateFence(
+ llvm::AtomicOrdering::Release,
+ getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
+ llvm::AtomicOrdering::Release,
+ getLLVMContext()));
+ break;
+ case 4: // memory_order_acq_rel
+ Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease,
+ getTargetHooks().getLLVMSyncScopeID(
+ getLangOpts(), SS,
+ llvm::AtomicOrdering::AcquireRelease,
+ getLLVMContext()));
+ break;
+ case 5: // memory_order_seq_cst
+ Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
+ getTargetHooks().getLLVMSyncScopeID(
+ getLangOpts(), SS,
+ llvm::AtomicOrdering::SequentiallyConsistent,
+ getLLVMContext()));
+ break;
+ }
+ return RValue::get(nullptr);
+ }
+
+ llvm::BasicBlock *ContBB = createBasicBlock("atomic.scope.continue", CurFn);
+
+ llvm::DenseMap<llvm::BasicBlock *, llvm::AtomicOrdering> OrderBBs;
+ if (isa<llvm::ConstantInt>(Order)) {
+ int Ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
+ switch (Ord) {
+ case 0: // memory_order_relaxed
+ default: // invalid order
+ ContBB->eraseFromParent();
+ return RValue::get(nullptr);
+ case 1: // memory_order_consume
+ case 2: // memory_order_acquire
+ OrderBBs[Builder.GetInsertBlock()] = llvm::AtomicOrdering::Acquire;
+ break;
+ case 3: // memory_order_release
+ OrderBBs[Builder.GetInsertBlock()] = llvm::AtomicOrdering::Release;
+ break;
+ case 4: // memory_order_acq_rel
+ OrderBBs[Builder.GetInsertBlock()] =
+ llvm::AtomicOrdering::AcquireRelease;
+ break;
+ case 5: // memory_order_seq_cst
+ OrderBBs[Builder.GetInsertBlock()] =
+ llvm::AtomicOrdering::SequentiallyConsistent;
+ break;
+ }
+ } else {
+ llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
+ AcquireBB = createBasicBlock("acquire", CurFn);
+ ReleaseBB = createBasicBlock("release", CurFn);
+ AcqRelBB = createBasicBlock("acqrel", CurFn);
+ SeqCstBB = createBasicBlock("seqcst", CurFn);
+
+ Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
+ llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
+ SI->addCase(Builder.getInt32(1), AcquireBB);
+ SI->addCase(Builder.getInt32(2), AcquireBB);
+ SI->addCase(Builder.getInt32(3), ReleaseBB);
+ SI->addCase(Builder.getInt32(4), AcqRelBB);
+ SI->addCase(Builder.getInt32(5), SeqCstBB);
+
+ OrderBBs[AcquireBB] = llvm::AtomicOrdering::Acquire;
+ OrderBBs[ReleaseBB] = llvm::AtomicOrdering::Release;
+ OrderBBs[AcqRelBB] = llvm::AtomicOrdering::AcquireRelease;
+ OrderBBs[SeqCstBB] = llvm::AtomicOrdering::SequentiallyConsistent;
+ }
+
+ for (auto &[OrderBB, Ordering] : OrderBBs) {
+ Builder.SetInsertPoint(OrderBB);
+ if (isa<llvm::ConstantInt>(Scope)) {
+ int Scp = cast<llvm::ConstantInt>(Scope)->getZExtValue();
+ SyncScope SS = ScopeModel->isValid(Scp)
+ ? ScopeModel->map(Scp)
+ : ScopeModel->map(ScopeModel->getFallBackValue());
+ Builder.CreateFence(Ordering,
+ getTargetHooks().getLLVMSyncScopeID(
+ getLangOpts(), SS, Ordering, getLLVMContext()));
+ Builder.CreateBr(ContBB);
+ } else {
+ llvm::DenseMap<unsigned, llvm::BasicBlock *> BBs;
+ for (unsigned Scp : ScopeModel->getRuntimeValues())
+ BBs[Scp] = createBasicBlock(getAsString(ScopeModel->map(Scp)), CurFn);
+
+ auto *SC = Builder.CreateIntCast(Scope, Builder.getInt32Ty(), false);
+ llvm::SwitchInst *SI = Builder.CreateSwitch(SC, ContBB);
+ for (unsigned Scp : ScopeModel->getRuntimeValues()) {
+ auto *B = BBs[Scp];
+ SI->addCase(Builder.getInt32(Scp), B);
+
+ Builder.SetInsertPoint(B);
+ Builder.CreateFence(Ordering, getTargetHooks().getLLVMSyncScopeID(
+ getLangOpts(), ScopeModel->map(Scp),
+ Ordering, getLLVMContext()));
+ Builder.CreateBr(ContBB);
+ }
+ }
+ }
+
+ Builder.SetInsertPoint(ContBB);
+ return RValue::get(nullptr);
+ }
case Builtin::BI__builtin_signbit:
case Builtin::BI__builtin_signbitf:
diff --git a/clang/test/CodeGen/scoped-fence-ops.c b/clang/test/CodeGen/scoped-fence-ops.c
new file mode 100644
index 00000000000000..cb48176d37c852
--- /dev/null
+++ b/clang/test/CodeGen/scoped-fence-ops.c
@@ -0,0 +1,179 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \
+// RUN: -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s
+//: %clang_cc1 %s -emit-llvm -o - -triple=spirv64-unknown-unknown -ffreestanding \
+//: -fvisibility=hidden | FileCheck --check-prefix=SPIRV %s
+
+//
+// SPIRV-LABEL: define hidden spir_func void @fe1a(
+// SPIRV-SAME: ) #[[ATTR0:[0-9]+]] {
+// SPIRV-NEXT: [[ENTRY:.*:]]
+// SPIRV-NEXT: fence syncscope("workgroup") release
+// SPIRV-NEXT: ret void
+// AMDGCN-LABEL: define hidden void @fe1a(
+// AMDGCN-SAME: ) #[[ATTR0:[0-9]+]] {
+// AMDGCN-NEXT: [[ENTRY:.*:]]
+// AMDGCN-NEXT: fence syncscope("workgroup-one-as") release
+// AMDGCN-NEXT: ret void
+//
+void fe1a() {
+ __scoped_atomic_thread_fence(__ATOMIC_RELEASE, __MEMORY_SCOPE_WRKGRP);
+}
+
+//
+// SPIRV-LABEL: define hidden spir_func void @fe1b(
+// SPIRV-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
+// SPIRV-NEXT: [[ENTRY:.*:]]
+// SPIRV-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4
+// SPIRV-NEXT: store i32 [[ORD]], ptr [[ORD_ADDR]], align 4
+// SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR]], align 4
+// SPIRV-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
+// SPIRV-NEXT: i32 1, label %[[ACQUIRE:.*]]
+// SPIRV-NEXT: i32 2, label %[[ACQUIRE]]
+// SPIRV-NEXT: i32 3, label %[[RELEASE:.*]]
+// SPIRV-NEXT: i32 4, label %[[ACQREL:.*]]
+// SPIRV-NEXT: i32 5, label %[[SEQCST:.*]]
+// SPIRV-NEXT: ]
+// SPIRV: [[ATOMIC_SCOPE_CONTINUE]]:
+// SPIRV-NEXT: ret void
+// SPIRV: [[ACQUIRE]]:
+// SPIRV-NEXT: fence syncscope("workgroup") acquire
+// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// SPIRV: [[RELEASE]]:
+// SPIRV-NEXT: fence syncscope("workgroup") release
+// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// SPIRV: [[ACQREL]]:
+// SPIRV-NEXT: fence syncscope("workgroup") acq_rel
+// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// SPIRV: [[SEQCST]]:
+// SPIRV-NEXT: fence syncscope("workgroup") seq_cst
+// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-LABEL: define hidden void @fe1b(
+// AMDGCN-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
+// AMDGCN-NEXT: [[ENTRY:.*:]]
+// AMDGCN-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-NEXT: [[ORD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ORD_ADDR]] to ptr
+// AMDGCN-NEXT: store i32 [[ORD]], ptr [[ORD_ADDR_ASCAST]], align 4
+// AMDGCN-NEXT: [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR_ASCAST]], align 4
+// AMDGCN-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
+// AMDGCN-NEXT: i32 1, label %[[ACQUIRE:.*]]
+// AMDGCN-NEXT: i32 2, label %[[ACQUIRE]]
+// AMDGCN-NEXT: i32 3, label %[[RELEASE:.*]]
+// AMDGCN-NEXT: i32 4, label %[[ACQREL:.*]]
+// AMDGCN-NEXT: i32 5, label %[[SEQCST:.*]]
+// AMDGCN-NEXT: ]
+// AMDGCN: [[ATOMIC_SCOPE_CONTINUE]]:
+// AMDGCN-NEXT: ret void
+// AMDGCN: [[ACQUIRE]]:
+// AMDGCN-NEXT: fence syncscope("workgroup-one-as") acquire
+// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN: [[RELEASE]]:
+// AMDGCN-NEXT: fence syncscope("workgroup-one-as") release
+// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN: [[ACQREL]]:
+// AMDGCN-NEXT: fence syncscope("workgroup-one-as") acq_rel
+// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN: [[SEQCST]]:
+// AMDGCN-NEXT: fence syncscope("workgroup") seq_cst
+// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+//
+void fe1b(int ord) {
+ __scoped_atomic_thread_fence(ord, __MEMORY_SCOPE_WRKGRP);
+}
+
+//
+// SPIRV-LABEL: define hidden spir_func void @fe1c(
+// SPIRV-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
+// SPIRV-NEXT: [[ENTRY:.*:]]
+// SPIRV-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4
+// SPIRV-NEXT: store i32 [[SCOPE]], ptr [[SCOPE_ADDR]], align 4
+// SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR]], align 4
+// SPIRV-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
+// SPIRV-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]]
+// SPIRV-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]]
+// SPIRV-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]]
+// SPIRV-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]]
+// SPIRV-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]]
+// SPIRV-NEXT: ]
+// SPIRV: [[ATOMIC_SCOPE_CONTINUE]]:
+// SPIRV-NEXT: ret void
+// SPIRV: [[DEVICE_SCOPE]]:
+// SPIRV-NEXT: fence syncscope("device") release
+// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// SPIRV: [[SYSTEM_SCOPE]]:
+// SPIRV-NEXT: fence release
+// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// SPIRV: [[WORKGROUP_SCOPE]]:
+// SPIRV-NEXT: fence syncscope("workgroup") release
+// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// SPIRV: [[WAVEFRONT_SCOPE]]:
+// SPIRV-NEXT: fence syncscope("subgroup") release
+// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// SPIRV: [[SINGLE_SCOPE]]:
+// SPIRV-NEXT: fence syncscope("singlethread") release
+// SPIRV-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-LABEL: define hidden void @fe1c(
+// AMDGCN-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
+// AMDGCN-NEXT: [[ENTRY:.*:]]
+// AMDGCN-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-NEXT: [[SCOPE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SCOPE_ADDR]] to ptr
+// AMDGCN-NEXT: store i32 [[SCOPE]], ptr [[SCOPE_ADDR_ASCAST]], align 4
+// AMDGCN-NEXT: [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR_ASCAST]], align 4
+// AMDGCN-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
+// AMDGCN-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]]
+// AMDGCN-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]]
+// AMDGCN-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]]
+// AMDGCN-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]]
+// AMDGCN-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]]
+// AMDGCN-NEXT: ]
+// AMDGCN: [[ATOMIC_SCOPE_CONTINUE]]:
+// AMDGCN-NEXT: ret void
+// AMDGCN: [[DEVICE_SCOPE]]:
+// AMDGCN-NEXT: fence syncscope("agent-one-as") release
+// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN: [[SYSTEM_SCOPE]]:
+// AMDGCN-NEXT: fence syncscope("one-as") release
+// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN: [[WORKGROUP_SCOPE]]:
+// AMDGCN-NEXT: fence syncscope("workgroup-one-as") release
+// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN: [[WAVEFRONT_SCOPE]]:
+// AMDGCN-NEXT: fence syncscope("wavefront-one-as") release
+// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN: [[SINGLE_SCOPE]]:
+// AMDGCN-NEXT: fence syncscope("singlethread-one-as") release
+// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+//
+void fe1c(int scope) {
+ __scoped_atomic_thread_fence(__ATOMIC_RELEASE, scope);
+}
+
+//
+// SPIRV-LABEL: define hidden spir_func void @fe2a(
+// SPIRV-SAME: ) #[[ATTR0]] {
+// SPIRV-NEXT: [[ENTRY:.*:]]
+// SPIRV-NEXT: ret void
+// AMDGCN-LABEL: define hidden void @fe2a(
+// AMDGCN-SAME: ) #[[ATTR0]] {
+// AMDGCN-NEXT: [[ENTRY:.*:]]
+// AMDGCN-NEXT: ret void
+//
+void fe2a() {
+ __scoped_atomic_thread_fence(999, __MEMORY_SCOPE_SYSTEM);
+}
+
+//
+// SPIRV-LABEL: define hidden spir_func void @fe2b(
+// SPIRV-SAME: ) #[[ATTR0]] {
+// SPIRV-NEXT: [[ENTRY:.*:]]
+// SPIRV-NEXT: fence release
+// SPIRV-NEXT: ret void
+// AMDGCN-LABEL: define hidden void @fe2b(
+// AMDGCN-SAME: ) #[[ATTR0]] {
+// AMDGCN-NEXT: [[ENTRY:.*:]]
+// AMDGCN-NEXT: fence syncscope("one-as") release
+// AMDGCN-NEXT: ret void
+//
+void fe2b() {
+ __scoped_atomic_thread_fence(__ATOMIC_RELEASE, 999);
+}
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
LGTM |
5b14ae0
to
a6e71cb
Compare
Summary: Previously we added support for all of the atomic GNU extensions with optional memory scoped except for `__atomic_thread_fence`. This patch adds support for that. This should ideally allow us to generically emit these LLVM scopes.
Ping |
Summary:
Previously we added support for all of the atomic GNU extensions with
optional memory scoped except for
__atomic_thread_fence
. This patchadds support for that. This should ideally allow us to generically emit
these LLVM scopes.