Skip to content

Commit eade534

Browse files
committed
Refactor
1 parent 8a490d0 commit eade534

File tree

11 files changed

+240
-238
lines changed

11 files changed

+240
-238
lines changed

clang/docs/LanguageExtensions.rst

Lines changed: 14 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4419,47 +4419,35 @@ AMDGPU Language Extensions
44194419
__builtin_amdgcn_fence
44204420
^^^^^^^^^^^^^^^^^^^^^^
44214421
4422-
``__builtin_amdgcn_fence`` emits a fence for all address spaces
4423-
and takes the following arguments:
4422+
``__builtin_amdgcn_fence`` emits a fence.
44244423
44254424
* ``unsigned`` atomic ordering, e.g. ``__ATOMIC_ACQUIRE``
44264425
* ``const char *`` synchronization scope, e.g. ``workgroup``
4427-
4428-
.. code-block:: c++
4429-
4430-
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
4431-
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
4432-
4433-
__builtin_amdgcn_masked_fence
4434-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4435-
4436-
``__builtin_amdgcn_masked_fence`` emits a fence for one or more address
4437-
spaces and takes the following arguments:
4438-
4439-
* ``unsigned`` atomic ordering, e.g. ``__ATOMIC_ACQUIRE``
4440-
* ``const char *`` synchronization scope, e.g. ``workgroup``
4441-
* Zero or more ``const char *`` address spaces.
4426+
* Zero or more ``const char *`` address spaces names.
44424427
44434428
The address spaces arguments must be string literals with known values, such as:
44444429
44454430
* ``"local"``
44464431
* ``"global"``
44474432
* ``"image"``
44484433
4449-
If there are no address spaces specified, this fence behaves like
4450-
``__builtin_amdgcn_fence``.
4434+
If one or more address space name are provided, the code generator will attempt
4435+
to emit potentially faster instructions that only fence those address spaces.
4436+
Emitting such instructions may not always be possible and the compiler is free
4437+
to fence more aggressively.
44514438
4452-
Examples:
4439+
If no address spaces names are provided, all address spaces are fenced.
44534440
44544441
.. code-block:: c++
44554442
4456-
__builtin_amdgcn_masked_fence(__ATOMIC_SEQ_CST, "workgroup", "local")
4457-
__builtin_amdgcn_masked_fence(__ATOMIC_SEQ_CST, "workgroup", "local", "global")
4443+
// Fence all address spaces.
4444+
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
4445+
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
4446+
4447+
// Fence only requested address spaces.
4448+
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup", "local")
4449+
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup", "local", "global")
44584450
4459-
Note that this fence may affect more than just the address spaces
4460-
specified; in some cases, the address space mask may
4461-
be lost during optimization and a normal fence for all address
4462-
spaces (``__builtin_amdgcn_fence``) will be emitted instead.
44634451
44644452
ARM/AArch64 Language Extensions
44654453
-------------------------------

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,7 @@ BUILTIN(__builtin_amdgcn_sched_group_barrier, "vIiIiIi", "n")
6868
BUILTIN(__builtin_amdgcn_iglp_opt, "vIi", "n")
6969
BUILTIN(__builtin_amdgcn_s_dcache_inv, "v", "n")
7070
BUILTIN(__builtin_amdgcn_buffer_wbinvl1, "v", "n")
71-
BUILTIN(__builtin_amdgcn_fence, "vUicC*", "n")
72-
BUILTIN(__builtin_amdgcn_masked_fence, "vUicC*.", "n")
71+
BUILTIN(__builtin_amdgcn_fence, "vUicC*.", "n")
7372
BUILTIN(__builtin_amdgcn_groupstaticsize, "Ui", "n")
7473
BUILTIN(__builtin_amdgcn_wavefrontsize, "Ui", "nc")
7574

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18328,14 +18328,13 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
1832818328
return nullptr;
1832918329
}
1833018330

18331-
void CodeGenFunction::AddAMDGCNAddressSpaceMMRA(llvm::Instruction *Inst,
18332-
const CallExpr *E,
18333-
unsigned FirstASNameIdx) {
18334-
constexpr const char *Tag = "opencl-fence-mem";
18331+
void CodeGenFunction::AddAMDGCNFenceAddressSpaceMMRA(llvm::Instruction *Inst,
18332+
const CallExpr *E) {
18333+
constexpr const char *Tag = "amdgpu-as";
1833518334

1833618335
LLVMContext &Ctx = Inst->getContext();
1833718336
SmallVector<MMRAMetadata::TagT, 3> MMRAs;
18338-
for (unsigned K = FirstASNameIdx; K < E->getNumArgs(); ++K) {
18337+
for (unsigned K = 2; K < E->getNumArgs(); ++K) {
1833918338
llvm::Value *V = EmitScalarExpr(E->getArg(K));
1834018339
StringRef AS;
1834118340
if (llvm::getConstantStringInfo(V, AS) &&
@@ -19019,14 +19018,12 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1901919018
Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
1902019019
return Builder.CreateCall(F, { Src0, Src1, Src2 });
1902119020
}
19022-
case AMDGPU::BI__builtin_amdgcn_fence:
19023-
case AMDGPU::BI__builtin_amdgcn_masked_fence: {
19021+
case AMDGPU::BI__builtin_amdgcn_fence: {
1902419022
ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)),
1902519023
EmitScalarExpr(E->getArg(1)), AO, SSID);
1902619024
FenceInst *Fence = Builder.CreateFence(AO, SSID);
19027-
if (BuiltinID == AMDGPU::BI__builtin_amdgcn_masked_fence &&
19028-
E->getNumArgs() > 2)
19029-
AddAMDGCNAddressSpaceMMRA(Fence, E, 2);
19025+
if (E->getNumArgs() > 2)
19026+
AddAMDGCNFenceAddressSpaceMMRA(Fence, E);
1903019027
return Fence;
1903119028
}
1903219029
case AMDGPU::BI__builtin_amdgcn_atomic_inc32:

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4636,8 +4636,8 @@ class CodeGenFunction : public CodeGenTypeCache {
46364636
llvm::Value *EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
46374637
ReturnValueSlot ReturnValue);
46384638

4639-
void AddAMDGCNAddressSpaceMMRA(llvm::Instruction *Inst, const CallExpr *E,
4640-
unsigned FirstASNameIdx);
4639+
void AddAMDGCNFenceAddressSpaceMMRA(llvm::Instruction *Inst,
4640+
const CallExpr *E);
46414641
void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope,
46424642
llvm::AtomicOrdering &AO,
46434643
llvm::SyncScope::ID &SSID);

clang/lib/Sema/SemaChecking.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5729,7 +5729,6 @@ bool Sema::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
57295729
ScopeIndex = 3;
57305730
break;
57315731
case AMDGPU::BI__builtin_amdgcn_fence:
5732-
case AMDGPU::BI__builtin_amdgcn_masked_fence:
57335732
OrderIndex = 0;
57345733
ScopeIndex = 1;
57355734
break;
@@ -5755,8 +5754,7 @@ bool Sema::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
57555754
switch (static_cast<llvm::AtomicOrderingCABI>(Ord)) {
57565755
case llvm::AtomicOrderingCABI::relaxed:
57575756
case llvm::AtomicOrderingCABI::consume:
5758-
if (BuiltinID == AMDGPU::BI__builtin_amdgcn_fence ||
5759-
BuiltinID == AMDGPU::BI__builtin_amdgcn_masked_fence)
5757+
if (BuiltinID == AMDGPU::BI__builtin_amdgcn_fence)
57605758
return Diag(ArgExpr->getBeginLoc(),
57615759
diag::warn_atomic_op_has_invalid_memory_order)
57625760
<< 0 << ArgExpr->getSourceRange();

clang/test/CodeGenCXX/builtin-amdgcn-fence-opencl.cpp

Lines changed: 0 additions & 100 deletions
This file was deleted.
Lines changed: 98 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,113 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
12
// REQUIRES: amdgpu-registered-target
23
// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
3-
// RUN: -triple=amdgcn-amd-amdhsa | opt -S | FileCheck %s
4+
// RUN: -triple=amdgcn-amd-amdhsa | FileCheck %s
45

6+
// CHECK-LABEL: define dso_local void @_Z25test_memory_fence_successv(
7+
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
8+
// CHECK-NEXT: entry:
9+
// CHECK-NEXT: fence syncscope("workgroup") seq_cst
10+
// CHECK-NEXT: fence syncscope("agent") acquire
11+
// CHECK-NEXT: fence seq_cst
12+
// CHECK-NEXT: fence syncscope("agent") acq_rel
13+
// CHECK-NEXT: fence syncscope("workgroup") release
14+
// CHECK-NEXT: ret void
15+
//
516
void test_memory_fence_success() {
6-
// CHECK-LABEL: test_memory_fence_success
717

8-
// CHECK: fence syncscope("workgroup") seq_cst
918
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
1019

11-
// CHECK: fence syncscope("agent") acquire
1220
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
1321

14-
// CHECK: fence seq_cst
1522
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "");
1623

17-
// CHECK: fence syncscope("agent") acq_rel
1824
__builtin_amdgcn_fence(4, "agent");
1925

20-
// CHECK: fence syncscope("workgroup") release
2126
__builtin_amdgcn_fence(3, "workgroup");
2227
}
28+
29+
// CHECK-LABEL: define dso_local void @_Z10test_localv(
30+
// CHECK-SAME: ) #[[ATTR0]] {
31+
// CHECK-NEXT: entry:
32+
// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3:![0-9]+]]
33+
// CHECK-NEXT: fence syncscope("agent") acquire, !mmra [[META3]]
34+
// CHECK-NEXT: fence seq_cst, !mmra [[META3]]
35+
// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]]
36+
// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
37+
// CHECK-NEXT: ret void
38+
//
39+
void test_local() {
40+
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local");
41+
42+
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent", "local");
43+
44+
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "", "local");
45+
46+
__builtin_amdgcn_fence(4, "agent", "local");
47+
48+
__builtin_amdgcn_fence(3, "workgroup", "local");
49+
}
50+
51+
52+
// CHECK-LABEL: define dso_local void @_Z11test_globalv(
53+
// CHECK-SAME: ) #[[ATTR0]] {
54+
// CHECK-NEXT: entry:
55+
// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META4:![0-9]+]]
56+
// CHECK-NEXT: fence syncscope("agent") acquire, !mmra [[META4]]
57+
// CHECK-NEXT: fence seq_cst, !mmra [[META4]]
58+
// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META4]]
59+
// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META4]]
60+
// CHECK-NEXT: ret void
61+
//
62+
void test_global() {
63+
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "global");
64+
65+
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent", "global");
66+
67+
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "", "global");
68+
69+
__builtin_amdgcn_fence(4, "agent", "global");
70+
71+
__builtin_amdgcn_fence(3, "workgroup", "global");
72+
}
73+
74+
// CHECK-LABEL: define dso_local void @_Z10test_imagev(
75+
// CHECK-SAME: ) #[[ATTR0]] {
76+
// CHECK-NEXT: entry:
77+
// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]]
78+
// CHECK-NEXT: fence syncscope("agent") acquire, !mmra [[META5]]
79+
// CHECK-NEXT: fence seq_cst, !mmra [[META5]]
80+
// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META5]]
81+
// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META5]]
82+
// CHECK-NEXT: ret void
83+
//
84+
void test_image() {
85+
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "image");
86+
87+
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent", "image");
88+
89+
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "", "image");
90+
91+
__builtin_amdgcn_fence(4, "agent", "image");
92+
93+
__builtin_amdgcn_fence(3, "workgroup", "image");
94+
}
95+
96+
// CHECK-LABEL: define dso_local void @_Z10test_mixedv(
97+
// CHECK-SAME: ) #[[ATTR0]] {
98+
// CHECK-NEXT: entry:
99+
// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META6:![0-9]+]]
100+
// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META7:![0-9]+]]
101+
// CHECK-NEXT: ret void
102+
//
103+
void test_mixed() {
104+
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "image", "global");
105+
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "image", "local", "global");
106+
}
107+
//.
108+
// CHECK: [[META3]] = !{!"amdgpu-as", !"local"}
109+
// CHECK: [[META4]] = !{!"amdgpu-as", !"global"}
110+
// CHECK: [[META5]] = !{!"amdgpu-as", !"image"}
111+
// CHECK: [[META6]] = !{[[META5]], [[META4]]}
112+
// CHECK: [[META7]] = !{[[META5]], [[META3]], [[META4]]}
113+
//.

0 commit comments

Comments
 (0)