Skip to content

Commit 2fa4186

Browse files
[libomptarget][amdgcn] Fix language linkage post D95300, drop use of assert
1 parent 64b448b commit 2fa4186

File tree

1 file changed

+27
-27
lines changed

1 file changed

+27
-27
lines changed

openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
// Initialized with a 64-bit mask with bits set in positions less than the
1919
// thread's lane number in the warp
20-
DEVICE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_lt() {
20+
EXTERN __kmpc_impl_lanemask_t __kmpc_impl_lanemask_lt() {
2121
uint32_t lane = GetLaneId();
2222
int64_t ballot = __kmpc_impl_activemask();
2323
uint64_t mask = ((uint64_t)1 << lane) - (uint64_t)1;
@@ -26,7 +26,7 @@ DEVICE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_lt() {
2626

2727
// Initialized with a 64-bit mask with bits set in positions greater than the
2828
// thread's lane number in the warp
29-
DEVICE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt() {
29+
EXTERN __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt() {
3030
uint32_t lane = GetLaneId();
3131
if (lane == (WARPSIZE - 1))
3232
return 0;
@@ -35,9 +35,9 @@ DEVICE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt() {
3535
return mask & ballot;
3636
}
3737

38-
DEVICE double __kmpc_impl_get_wtick() { return ((double)1E-9); }
38+
EXTERN double __kmpc_impl_get_wtick() { return ((double)1E-9); }
3939

40-
DEVICE double __kmpc_impl_get_wtime() {
40+
EXTERN double __kmpc_impl_get_wtime() {
4141
// The intrinsics for measuring time have undocumented frequency
4242
// This will probably need to be found by measurement on a number of
4343
// architectures. Until then, return 0, which is very inaccurate as a
@@ -46,19 +46,19 @@ DEVICE double __kmpc_impl_get_wtime() {
4646
}
4747

4848
// Warp vote function
49-
DEVICE __kmpc_impl_lanemask_t __kmpc_impl_activemask() {
49+
EXTERN __kmpc_impl_lanemask_t __kmpc_impl_activemask() {
5050
return __builtin_amdgcn_read_exec();
5151
}
5252

53-
DEVICE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t var,
53+
EXTERN int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t var,
5454
int32_t srcLane) {
5555
int width = WARPSIZE;
5656
int self = GetLaneId();
5757
int index = srcLane + (self & ~(width - 1));
5858
return __builtin_amdgcn_ds_bpermute(index << 2, var);
5959
}
6060

61-
DEVICE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t, int32_t var,
61+
EXTERN int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t, int32_t var,
6262
uint32_t laneDelta, int32_t width) {
6363
int self = GetLaneId();
6464
int index = self + laneDelta;
@@ -68,12 +68,12 @@ DEVICE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t, int32_t var,
6868

6969
static DEVICE SHARED uint32_t L1_Barrier;
7070

71-
DEVICE void __kmpc_impl_target_init() {
71+
EXTERN void __kmpc_impl_target_init() {
7272
// Don't have global ctors, and shared memory is not zero init
7373
__atomic_store_n(&L1_Barrier, 0u, __ATOMIC_RELEASE);
7474
}
7575

76-
DEVICE void __kmpc_impl_named_sync(uint32_t num_threads) {
76+
EXTERN void __kmpc_impl_named_sync(uint32_t num_threads) {
7777
__atomic_thread_fence(__ATOMIC_ACQUIRE);
7878

7979
uint32_t num_waves = num_threads / WARPSIZE;
@@ -85,9 +85,9 @@ DEVICE void __kmpc_impl_named_sync(uint32_t num_threads) {
8585
// Low bits for the number of waves, assumed zero before this call.
8686
// High bits to count the number of times the barrier has been passed.
8787

88-
assert(num_waves != 0);
89-
assert(num_waves * WARPSIZE == num_threads);
90-
assert(num_waves < 0xffffu);
88+
// precondition: num_waves != 0;
89+
// invariant: num_waves * WARPSIZE == num_threads;
90+
// precondition: num_waves < 0xffffu;
9191

9292
// Increment the low 16 bits once, using the lowest active thread.
9393
uint64_t lowestActiveThread = __kmpc_impl_ffs(__kmpc_impl_activemask()) - 1;
@@ -131,19 +131,19 @@ DEVICE uint32_t get_workgroup_dim(uint32_t group_id, uint32_t grid_size,
131131
}
132132
} // namespace
133133

134-
DEVICE int GetNumberOfBlocksInKernel() {
134+
EXTERN int GetNumberOfBlocksInKernel() {
135135
return get_grid_dim(__builtin_amdgcn_grid_size_x(),
136136
__builtin_amdgcn_workgroup_size_x());
137137
}
138138

139-
DEVICE int GetNumberOfThreadsInBlock() {
139+
EXTERN int GetNumberOfThreadsInBlock() {
140140
return get_workgroup_dim(__builtin_amdgcn_workgroup_id_x(),
141141
__builtin_amdgcn_grid_size_x(),
142142
__builtin_amdgcn_workgroup_size_x());
143143
}
144144

145-
DEVICE unsigned GetWarpId() { return GetThreadIdInBlock() / WARPSIZE; }
146-
DEVICE unsigned GetLaneId() {
145+
EXTERN unsigned GetWarpId() { return GetThreadIdInBlock() / WARPSIZE; }
146+
EXTERN unsigned GetLaneId() {
147147
return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
148148
}
149149

@@ -186,38 +186,38 @@ DEVICE unsigned long long __kmpc_atomic_add(unsigned long long *Address,
186186
}
187187

188188
// Stub implementations
189-
DEVICE void *__kmpc_impl_malloc(size_t) { return nullptr; }
190-
DEVICE void __kmpc_impl_free(void *) {}
189+
EXTERN void *__kmpc_impl_malloc(size_t) { return nullptr; }
190+
EXTERN void __kmpc_impl_free(void *) {}
191191

192-
DEVICE void __kmpc_impl_unpack(uint64_t val, uint32_t &lo, uint32_t &hi) {
192+
EXTERN void __kmpc_impl_unpack(uint64_t val, uint32_t &lo, uint32_t &hi) {
193193
lo = (uint32_t)(val & UINT64_C(0x00000000FFFFFFFF));
194194
hi = (uint32_t)((val & UINT64_C(0xFFFFFFFF00000000)) >> 32);
195195
}
196196

197-
DEVICE uint64_t __kmpc_impl_pack(uint32_t lo, uint32_t hi) {
197+
EXTERN uint64_t __kmpc_impl_pack(uint32_t lo, uint32_t hi) {
198198
return (((uint64_t)hi) << 32) | (uint64_t)lo;
199199
}
200200

201-
DEVICE void __kmpc_impl_syncthreads() { __builtin_amdgcn_s_barrier(); }
201+
EXTERN void __kmpc_impl_syncthreads() { __builtin_amdgcn_s_barrier(); }
202202

203-
DEVICE void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t) {
203+
EXTERN void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t) {
204204
// AMDGCN doesn't need to sync threads in a warp
205205
}
206206

207-
DEVICE void __kmpc_impl_threadfence() {
207+
EXTERN void __kmpc_impl_threadfence() {
208208
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent");
209209
}
210210

211-
DEVICE void __kmpc_impl_threadfence_block() {
211+
EXTERN void __kmpc_impl_threadfence_block() {
212212
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
213213
}
214214

215-
DEVICE void __kmpc_impl_threadfence_system() {
215+
EXTERN void __kmpc_impl_threadfence_system() {
216216
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "");
217217
}
218218

219219
// Calls to the AMDGCN layer (assuming 1D layout)
220-
DEVICE int GetThreadIdInBlock() { return __builtin_amdgcn_workitem_id_x(); }
221-
DEVICE int GetBlockIdInKernel() { return __builtin_amdgcn_workgroup_id_x(); }
220+
EXTERN int GetThreadIdInBlock() { return __builtin_amdgcn_workitem_id_x(); }
221+
EXTERN int GetBlockIdInKernel() { return __builtin_amdgcn_workgroup_id_x(); }
222222

223223
#pragma omp end declare target

0 commit comments

Comments
 (0)