Skip to content

Commit 8c75290

Browse files
authored
[AMDGPU] modify named barrier builtins and intrinsics (#114550)
Use a local pointer type to represent the named barrier in builtin and intrinsic. This makes the definitions more user friendly bacause they do not need to worry about the hardware ID assignment. Also this approach is more like the other popular GPU programming language. Named barriers should be represented as global variables of addrspace(3) in LLVM-IR. Compiler assigns the special LDS offsets for those variables during AMDGPULowerModuleLDS pass. Those addresses are converted to hw barrier ID during instruction selection. The rest of the instruction-selection changes are primarily due to the intrinsic-definition changes.
1 parent ffc2233 commit 8c75290

21 files changed

+857
-1609
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -439,15 +439,15 @@ TARGET_BUILTIN(__builtin_amdgcn_s_sleep_var, "vUi", "n", "gfx12-insts")
439439
TARGET_BUILTIN(__builtin_amdgcn_permlane16_var, "UiUiUiUiIbIb", "nc", "gfx12-insts")
440440
TARGET_BUILTIN(__builtin_amdgcn_permlanex16_var, "UiUiUiUiIbIb", "nc", "gfx12-insts")
441441
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal, "vIi", "n", "gfx12-insts")
442-
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal_var, "vi", "n", "gfx12-insts")
442+
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal_var, "vv*i", "n", "gfx12-insts")
443443
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_wait, "vIs", "n", "gfx12-insts")
444444
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal_isfirst, "bIi", "n", "gfx12-insts")
445-
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal_isfirst_var, "bi", "n", "gfx12-insts")
446-
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_init, "vii", "n", "gfx12-insts")
447-
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_join, "vi", "n", "gfx12-insts")
448-
TARGET_BUILTIN(__builtin_amdgcn_s_wakeup_barrier, "vi", "n", "gfx12-insts")
449-
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_leave, "b", "n", "gfx12-insts")
445+
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_init, "vv*i", "n", "gfx12-insts")
446+
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_join, "vv*", "n", "gfx12-insts")
447+
TARGET_BUILTIN(__builtin_amdgcn_s_wakeup_barrier, "vv*", "n", "gfx12-insts")
448+
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_leave, "vIs", "n", "gfx12-insts")
450449
TARGET_BUILTIN(__builtin_amdgcn_s_get_barrier_state, "Uii", "n", "gfx12-insts")
450+
TARGET_BUILTIN(__builtin_amdgcn_s_get_named_barrier_state, "Uiv*", "n", "gfx12-insts")
451451
TARGET_BUILTIN(__builtin_amdgcn_s_prefetch_data, "vvC*Ui", "nc", "gfx12-insts")
452452
TARGET_BUILTIN(__builtin_amdgcn_s_buffer_prefetch_data, "vQbIiUi", "nc", "gfx12-insts")
453453

clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,13 @@ kernel void builtins_amdgcn_s_barrier_signal_isfirst_err(global int* in, global
2323
*out = *in;
2424
}
2525

26+
kernel void builtins_amdgcn_s_barrier_leave_err(global int* in, global int* out, int barrier) {
27+
28+
__builtin_amdgcn_s_barrier_signal(-1);
29+
__builtin_amdgcn_s_barrier_leave(barrier); // expected-error {{'__builtin_amdgcn_s_barrier_leave' must be a constant integer}}
30+
*out = *in;
31+
}
32+
2633
void test_s_buffer_prefetch_data(__amdgpu_buffer_rsrc_t rsrc, unsigned int off)
2734
{
2835
__builtin_amdgcn_s_buffer_prefetch_data(rsrc, off, 31); // expected-error {{'__builtin_amdgcn_s_buffer_prefetch_data' must be a constant integer}}

clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl

Lines changed: 59 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -87,16 +87,21 @@ void test_s_barrier_signal()
8787

8888
// CHECK-LABEL: @test_s_barrier_signal_var(
8989
// CHECK-NEXT: entry:
90+
// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
9091
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
92+
// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
9193
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
94+
// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
9295
// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
93-
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
94-
// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.signal.var(i32 [[TMP0]])
96+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
97+
// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
98+
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
99+
// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) [[TMP1]], i32 [[TMP2]])
95100
// CHECK-NEXT: ret void
96101
//
97-
void test_s_barrier_signal_var(int a)
102+
void test_s_barrier_signal_var(void *bar, int a)
98103
{
99-
__builtin_amdgcn_s_barrier_signal_var(a);
104+
__builtin_amdgcn_s_barrier_signal_var(bar, a);
100105
}
101106

102107
// CHECK-LABEL: @test_s_barrier_signal_isfirst(
@@ -134,110 +139,63 @@ void test_s_barrier_signal_isfirst(int* a, int* b, int *c)
134139
__builtin_amdgcn_s_barrier_wait(1);
135140
}
136141

137-
// CHECK-LABEL: @test_s_barrier_isfirst_var(
138-
// CHECK-NEXT: entry:
139-
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
140-
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
141-
// CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
142-
// CHECK-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
143-
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
144-
// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
145-
// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
146-
// CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
147-
// CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 8
148-
// CHECK-NEXT: store ptr [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 8
149-
// CHECK-NEXT: store ptr [[C:%.*]], ptr [[C_ADDR_ASCAST]], align 8
150-
// CHECK-NEXT: store i32 [[D:%.*]], ptr [[D_ADDR_ASCAST]], align 4
151-
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[D_ADDR_ASCAST]], align 4
152-
// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.s.barrier.signal.isfirst.var(i32 [[TMP0]])
153-
// CHECK-NEXT: br i1 [[TMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
154-
// CHECK: if.then:
155-
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
156-
// CHECK-NEXT: store ptr [[TMP2]], ptr [[A_ADDR_ASCAST]], align 8
157-
// CHECK-NEXT: br label [[IF_END:%.*]]
158-
// CHECK: if.else:
159-
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8
160-
// CHECK-NEXT: store ptr [[TMP3]], ptr [[A_ADDR_ASCAST]], align 8
161-
// CHECK-NEXT: br label [[IF_END]]
162-
// CHECK: if.end:
163-
// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.wait(i16 1)
164-
// CHECK-NEXT: ret void
165-
//
166-
void test_s_barrier_isfirst_var(int* a, int* b, int *c, int d)
167-
{
168-
if ( __builtin_amdgcn_s_barrier_signal_isfirst_var(d))
169-
a = b;
170-
else
171-
a = c;
172-
173-
__builtin_amdgcn_s_barrier_wait(1);
174-
175-
}
176-
177142
// CHECK-LABEL: @test_s_barrier_init(
178143
// CHECK-NEXT: entry:
144+
// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
179145
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
146+
// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
180147
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
148+
// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
181149
// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
182-
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
183-
// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.init(i32 1, i32 [[TMP0]])
150+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
151+
// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
152+
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
153+
// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.init(ptr addrspace(3) [[TMP1]], i32 [[TMP2]])
184154
// CHECK-NEXT: ret void
185155
//
186-
void test_s_barrier_init(int a)
156+
void test_s_barrier_init(void *bar, int a)
187157
{
188-
__builtin_amdgcn_s_barrier_init(1, a);
158+
__builtin_amdgcn_s_barrier_init(bar, a);
189159
}
190160

191161
// CHECK-LABEL: @test_s_barrier_join(
192162
// CHECK-NEXT: entry:
193-
// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.join(i32 1)
163+
// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
164+
// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
165+
// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
166+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
167+
// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
168+
// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) [[TMP1]])
194169
// CHECK-NEXT: ret void
195170
//
196-
void test_s_barrier_join()
171+
void test_s_barrier_join(void *bar)
197172
{
198-
__builtin_amdgcn_s_barrier_join(1);
173+
__builtin_amdgcn_s_barrier_join(bar);
199174
}
200175

201176
// CHECK-LABEL: @test_s_wakeup_barrier(
202177
// CHECK-NEXT: entry:
203-
// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.join(i32 1)
178+
// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
179+
// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
180+
// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
181+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
182+
// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
183+
// CHECK-NEXT: call void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) [[TMP1]])
204184
// CHECK-NEXT: ret void
205185
//
206-
void test_s_wakeup_barrier()
186+
void test_s_wakeup_barrier(void *bar)
207187
{
208-
__builtin_amdgcn_s_barrier_join(1);
188+
__builtin_amdgcn_s_wakeup_barrier(bar);
209189
}
210190

211191
// CHECK-LABEL: @test_s_barrier_leave(
212192
// CHECK-NEXT: entry:
213-
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
214-
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
215-
// CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
216-
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
217-
// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
218-
// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
219-
// CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 8
220-
// CHECK-NEXT: store ptr [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 8
221-
// CHECK-NEXT: store ptr [[C:%.*]], ptr [[C_ADDR_ASCAST]], align 8
222-
// CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.amdgcn.s.barrier.leave()
223-
// CHECK-NEXT: br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
224-
// CHECK: if.then:
225-
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
226-
// CHECK-NEXT: store ptr [[TMP1]], ptr [[A_ADDR_ASCAST]], align 8
227-
// CHECK-NEXT: br label [[IF_END:%.*]]
228-
// CHECK: if.else:
229-
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8
230-
// CHECK-NEXT: store ptr [[TMP2]], ptr [[A_ADDR_ASCAST]], align 8
231-
// CHECK-NEXT: br label [[IF_END]]
232-
// CHECK: if.end:
193+
// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.leave(i16 1)
233194
// CHECK-NEXT: ret void
234195
//
235-
void test_s_barrier_leave(int* a, int* b, int *c)
196+
void test_s_barrier_leave()
236197
{
237-
if (__builtin_amdgcn_s_barrier_leave())
238-
a = b;
239-
else
240-
a = c;
198+
__builtin_amdgcn_s_barrier_leave(1);
241199
}
242200

243201
// CHECK-LABEL: @test_s_get_barrier_state(
@@ -261,6 +219,28 @@ unsigned test_s_get_barrier_state(int a)
261219
return State;
262220
}
263221

222+
// CHECK-LABEL: @test_s_get_named_barrier_state(
223+
// CHECK-NEXT: entry:
224+
// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
225+
// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
226+
// CHECK-NEXT: [[STATE:%.*]] = alloca i32, align 4, addrspace(5)
227+
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
228+
// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
229+
// CHECK-NEXT: [[STATE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[STATE]] to ptr
230+
// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
231+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
232+
// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
233+
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) [[TMP1]])
234+
// CHECK-NEXT: store i32 [[TMP2]], ptr [[STATE_ASCAST]], align 4
235+
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[STATE_ASCAST]], align 4
236+
// CHECK-NEXT: ret i32 [[TMP3]]
237+
//
238+
unsigned test_s_get_named_barrier_state(void *bar)
239+
{
240+
unsigned State = __builtin_amdgcn_s_get_named_barrier_state(bar);
241+
return State;
242+
}
243+
264244
// CHECK-LABEL: @test_s_ttracedata(
265245
// CHECK-NEXT: entry:
266246
// CHECK-NEXT: call void @llvm.amdgcn.s.ttracedata(i32 1)

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
def global_ptr_ty : LLVMQualPointerType<1>;
14+
def local_ptr_ty : LLVMQualPointerType<3>;
1415

1516
// The amdgpu-no-* attributes (ex amdgpu-no-workitem-id-z) typically inferred
1617
// by the backend cause whole-program undefined behavior when violated, such as
@@ -247,48 +248,70 @@ def int_amdgcn_s_sendmsghalt : ClangBuiltin<"__builtin_amdgcn_s_sendmsghalt">,
247248
def int_amdgcn_s_sendmsg_rtn : Intrinsic <[llvm_anyint_ty], [llvm_i32_ty],
248249
[ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;
249250

251+
// Vanilla workgroup sync-barrier
250252
def int_amdgcn_s_barrier : ClangBuiltin<"__builtin_amdgcn_s_barrier">,
251253
Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
252254

255+
// Lower-level split-barrier intrinsics
256+
257+
// void @llvm.amdgcn.s.barrier.signal(i32 %barrierType)
258+
// only for non-named barrier
253259
def int_amdgcn_s_barrier_signal : ClangBuiltin<"__builtin_amdgcn_s_barrier_signal">,
254260
Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
255261
IntrNoCallback, IntrNoFree]>;
256262

263+
// void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) %barrier, i32 %memberCnt)
264+
// The %barrier and %memberCnt argument must be uniform, otherwise behavior is undefined.
257265
def int_amdgcn_s_barrier_signal_var : ClangBuiltin<"__builtin_amdgcn_s_barrier_signal_var">,
258-
Intrinsic<[], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
266+
Intrinsic<[], [local_ptr_ty, llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
259267
IntrNoCallback, IntrNoFree]>;
260268

269+
// bool @llvm.amdgcn.s.barrier.signal.isfirst(i32 %barrierType)
270+
// only for non-named barrier
261271
def int_amdgcn_s_barrier_signal_isfirst : ClangBuiltin<"__builtin_amdgcn_s_barrier_signal_isfirst">,
262272
Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent,
263273
IntrWillReturn, IntrNoCallback, IntrNoFree]>;
264274

265-
def int_amdgcn_s_barrier_signal_isfirst_var : ClangBuiltin<"__builtin_amdgcn_s_barrier_signal_isfirst_var">,
266-
Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
267-
IntrNoCallback, IntrNoFree]>;
268-
275+
// void @llvm.amdgcn.s.barrier.init(ptr addrspace(3) %barrier, i32 %memberCnt)
276+
// The %barrier and %memberCnt argument must be uniform, otherwise behavior is undefined.
269277
def int_amdgcn_s_barrier_init : ClangBuiltin<"__builtin_amdgcn_s_barrier_init">,
270-
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent,
278+
Intrinsic<[], [local_ptr_ty, llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent,
271279
IntrWillReturn, IntrNoCallback, IntrNoFree]>;
272280

281+
// void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) %barrier)
282+
// The %barrier argument must be uniform, otherwise behavior is undefined.
273283
def int_amdgcn_s_barrier_join : ClangBuiltin<"__builtin_amdgcn_s_barrier_join">,
274-
Intrinsic<[], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
284+
Intrinsic<[], [local_ptr_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
275285
IntrNoCallback, IntrNoFree]>;
276286

287+
// void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) %barrier)
288+
// The %barrier argument must be uniform, otherwise behavior is undefined.
277289
def int_amdgcn_s_wakeup_barrier : ClangBuiltin<"__builtin_amdgcn_s_wakeup_barrier">,
278-
Intrinsic<[], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
290+
Intrinsic<[], [local_ptr_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
279291
IntrNoCallback, IntrNoFree]>;
280292

293+
// void @llvm.amdgcn.s.barrier.wait(i16 %barrierType)
281294
def int_amdgcn_s_barrier_wait : ClangBuiltin<"__builtin_amdgcn_s_barrier_wait">,
282295
Intrinsic<[], [llvm_i16_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent,
283296
IntrWillReturn, IntrNoCallback, IntrNoFree]>;
284297

298+
// void @llvm.amdgcn.s.barrier.leave(i16 %barrierType)
285299
def int_amdgcn_s_barrier_leave : ClangBuiltin<"__builtin_amdgcn_s_barrier_leave">,
286-
Intrinsic<[llvm_i1_ty], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
300+
Intrinsic<[], [llvm_i16_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent,
301+
IntrWillReturn, IntrNoCallback, IntrNoFree]>;
287302

303+
// uint32_t @llvm.amdgcn.s.get.barrier.state(i32 %barrierId)
304+
// The %barrierType argument must be uniform, otherwise behavior is undefined.
288305
def int_amdgcn_s_get_barrier_state : ClangBuiltin<"__builtin_amdgcn_s_get_barrier_state">,
289306
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
290307
IntrNoCallback, IntrNoFree]>;
291308

309+
// uint32_t @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) %barrier)
310+
// The %barrier argument must be uniform, otherwise behavior is undefined.
311+
def int_amdgcn_s_get_named_barrier_state : ClangBuiltin<"__builtin_amdgcn_s_get_named_barrier_state">,
312+
Intrinsic<[llvm_i32_ty], [local_ptr_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
313+
IntrNoCallback, IntrNoFree]>;
314+
292315
def int_amdgcn_wave_barrier : ClangBuiltin<"__builtin_amdgcn_wave_barrier">,
293316
Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
294317

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "AMDGPU.h"
1717
#include "AMDGPUInstrInfo.h"
1818
#include "AMDGPUMachineFunction.h"
19+
#include "AMDGPUMemoryUtils.h"
1920
#include "SIMachineFunctionInfo.h"
2021
#include "llvm/CodeGen/Analysis.h"
2122
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
@@ -1508,7 +1509,8 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
15081509
if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
15091510
G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) {
15101511
if (!MFI->isModuleEntryFunction() &&
1511-
GV->getName() != "llvm.amdgcn.module.lds") {
1512+
GV->getName() != "llvm.amdgcn.module.lds" &&
1513+
!AMDGPU::isNamedBarrier(*cast<GlobalVariable>(GV))) {
15121514
SDLoc DL(Op);
15131515
const Function &Fn = DAG.getMachineFunction().getFunction();
15141516
DiagnosticInfoUnsupported BadLDSDecl(

0 commit comments

Comments
 (0)