Skip to content

Commit e5e6fd1

Browse files
mhalkronlieb
authored andcommitted
[OpenMP][DeviceRTL][NFC] Move diff into extra_allocators.h
While keeping the structure of the upstream Interface.h, we move the additional declarations into their own ASO-exclusive header. Change-Id: I4a563ac61f925acc7d923769e7dbf16a0e20ebf7
1 parent d0f3c7e commit e5e6fd1

File tree

2 files changed

+112
-79
lines changed

2 files changed

+112
-79
lines changed

openmp/libomptarget/DeviceRTL/include/Interface.h

Lines changed: 2 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
#define OMPTARGET_DEVICERTL_INTERFACE_H
1414

1515
#include "Types.h"
16-
#include "Xteamr.h"
16+
#include "extra_allocators.h"
1717

1818
/// External API
1919
///
@@ -154,13 +154,9 @@ int omp_test_lock(omp_lock_t *Lock);
154154
/// Tasking
155155
///
156156
///{
157-
extern "C" {
158157
int omp_in_final(void);
159158

160159
int omp_get_max_task_priority(void);
161-
162-
void omp_fulfill_event(uint64_t);
163-
}
164160
///}
165161

166162
/// Misc
@@ -170,50 +166,6 @@ double omp_get_wtick(void);
170166

171167
double omp_get_wtime(void);
172168
///}
173-
174-
/// OpenMP 5.1 Memory Management routines (from libomp)
175-
/// OpenMP allocator API is currently unimplemented, including traits.
176-
/// All allocation routines will directly call the global memory allocation
177-
/// routine and, consequently, omp_free will call device memory deallocation.
178-
///
179-
/// {
180-
omp_allocator_handle_t omp_init_allocator(omp_memspace_handle_t m, int ntraits,
181-
omp_alloctrait_t traits[]);
182-
183-
void omp_destroy_allocator(omp_allocator_handle_t allocator);
184-
185-
void omp_set_default_allocator(omp_allocator_handle_t a);
186-
187-
omp_allocator_handle_t omp_get_default_allocator(void);
188-
189-
void *omp_alloc(uint64_t size,
190-
omp_allocator_handle_t allocator = omp_null_allocator);
191-
192-
void *omp_aligned_alloc(uint64_t align, uint64_t size,
193-
omp_allocator_handle_t allocator = omp_null_allocator);
194-
195-
void *omp_calloc(uint64_t nmemb, uint64_t size,
196-
omp_allocator_handle_t allocator = omp_null_allocator);
197-
198-
void *omp_aligned_calloc(uint64_t align, uint64_t nmemb, uint64_t size,
199-
omp_allocator_handle_t allocator = omp_null_allocator);
200-
201-
void *omp_realloc(void *ptr, uint64_t size,
202-
omp_allocator_handle_t allocator = omp_null_allocator,
203-
omp_allocator_handle_t free_allocator = omp_null_allocator);
204-
205-
void omp_free(void *ptr, omp_allocator_handle_t allocator = omp_null_allocator);
206-
/// }
207-
208-
/// CUDA exposes a native malloc/free API, while ROCm does not.
209-
//// Any re-definitions of malloc/free delete the native CUDA
210-
//// but they are necessary
211-
#ifdef __AMDGCN__
212-
void *malloc(uint64_t Size);
213-
void free(void *Ptr);
214-
size_t external_get_local_size(uint32_t dim);
215-
size_t external_get_num_groups(uint32_t dim);
216-
#endif
217169
}
218170

219171
extern "C" {
@@ -260,9 +212,6 @@ uint32_t __kmpc_get_hardware_num_threads_in_block();
260212
/// External interface to get the warp size.
261213
uint32_t __kmpc_get_warp_size();
262214

263-
/// External interface to get the block size
264-
uint32_t __kmpc_get_hardware_num_blocks();
265-
266215
/// Kernel
267216
///
268217
///{
@@ -306,8 +255,6 @@ int32_t __kmpc_cancel_barrier(IdentTy *Loc_ref, int32_t TId);
306255

307256
void __kmpc_barrier(IdentTy *Loc_ref, int32_t TId);
308257

309-
void __kmpc_impl_syncthreads();
310-
311258
void __kmpc_barrier_simple_spmd(IdentTy *Loc_ref, int32_t TId);
312259

313260
void __kmpc_barrier_simple_generic(IdentTy *Loc_ref, int32_t TId);
@@ -326,12 +273,6 @@ void __kmpc_end_single(IdentTy *Loc, int32_t TId);
326273

327274
void __kmpc_flush(IdentTy *Loc);
328275

329-
void __kmpc_flush_acquire(IdentTy *Loc);
330-
331-
void __kmpc_flush_release(IdentTy *Loc);
332-
333-
void __kmpc_flush_acqrel(IdentTy *Loc);
334-
335276
uint64_t __kmpc_warp_active_thread_mask(void);
336277

337278
void __kmpc_syncwarp(uint64_t Mask);
@@ -401,10 +342,7 @@ void __kmpc_taskloop(IdentTy *Loc, uint32_t TId,
401342
TaskDescriptorTy *TaskDescriptor, int,
402343
uint64_t *LowerBound, uint64_t *UpperBound, int64_t, int,
403344
int32_t, uint64_t, void *);
404-
405-
void *__kmpc_task_allow_completion_event(IdentTy *loc_ref,
406-
uint32_t gtid,
407-
TaskDescriptorTy *task);
345+
///}
408346

409347
/// Misc
410348
///
@@ -420,21 +358,6 @@ int32_t __kmpc_cancel(IdentTy *Loc, int32_t TId, int32_t CancelVal);
420358
int32_t __kmpc_shuffle_int32(int32_t val, int16_t delta, int16_t size);
421359
int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size);
422360
///}
423-
424-
/// __init_ThreadDSTPtrPtr is defined in Workshare.cpp to initialize
425-
/// the static LDS global variable ThreadDSTPtrPtr to 0.
426-
/// It is called in Kernel.cpp at the end of initializeRuntime().
427-
void __init_ThreadDSTPtrPtr();
428361
}
429362

430-
/// Extra API exposed by ROCm
431-
extern "C" {
432-
int omp_ext_get_warp_id(void);
433-
int omp_ext_get_lane_id(void);
434-
int omp_ext_get_master_thread_id(void);
435-
int omp_ext_get_smid(void);
436-
int omp_ext_is_spmd_mode(void);
437-
unsigned long long omp_ext_get_active_threads_mask(void);
438-
} // extern "C"
439-
440363
#endif
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
//===---------- extra_allocators.h - OpenMP interface -----------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// Additional OpenMP interface definitions, in conjunction with Interface.h.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#ifndef OPENMP_LIBOMPTARGET_DEVICERTL_INCLUDE_EXTRA_ALLOCATORS_H
14+
#define OPENMP_LIBOMPTARGET_DEVICERTL_INCLUDE_EXTRA_ALLOCATORS_H
15+
16+
#include "Types.h"
17+
#include "Xteamr.h"
18+
19+
extern "C" {
20+
/// Tasking
21+
///
22+
///{
23+
void omp_fulfill_event(uint64_t);
24+
///}
25+
26+
/// OpenMP 5.1 Memory Management routines (from libomp)
27+
/// OpenMP allocator API is currently unimplemented, including traits.
28+
/// All allocation routines will directly call the global memory allocation
29+
/// routine and, consequently, omp_free will call device memory deallocation.
30+
///
31+
/// {
32+
omp_allocator_handle_t omp_init_allocator(omp_memspace_handle_t m, int ntraits,
33+
omp_alloctrait_t traits[]);
34+
35+
void omp_destroy_allocator(omp_allocator_handle_t allocator);
36+
37+
void omp_set_default_allocator(omp_allocator_handle_t a);
38+
39+
omp_allocator_handle_t omp_get_default_allocator(void);
40+
41+
void *omp_alloc(uint64_t size,
42+
omp_allocator_handle_t allocator = omp_null_allocator);
43+
44+
void *omp_aligned_alloc(uint64_t align, uint64_t size,
45+
omp_allocator_handle_t allocator = omp_null_allocator);
46+
47+
void *omp_calloc(uint64_t nmemb, uint64_t size,
48+
omp_allocator_handle_t allocator = omp_null_allocator);
49+
50+
void *omp_aligned_calloc(uint64_t align, uint64_t nmemb, uint64_t size,
51+
omp_allocator_handle_t allocator = omp_null_allocator);
52+
53+
void *omp_realloc(void *ptr, uint64_t size,
54+
omp_allocator_handle_t allocator = omp_null_allocator,
55+
omp_allocator_handle_t free_allocator = omp_null_allocator);
56+
57+
void omp_free(void *ptr, omp_allocator_handle_t allocator = omp_null_allocator);
58+
/// }
59+
60+
/// CUDA exposes a native malloc/free API, while ROCm does not.
61+
//// Any re-definitions of malloc/free delete the native CUDA
62+
//// but they are necessary
63+
#ifdef __AMDGCN__
64+
void *malloc(uint64_t Size);
65+
void free(void *Ptr);
66+
size_t external_get_local_size(uint32_t dim);
67+
size_t external_get_num_groups(uint32_t dim);
68+
#endif
69+
} // extern "C"
70+
71+
extern "C" {
72+
/// External interface to get the block size
73+
uint32_t __kmpc_get_hardware_num_blocks();
74+
75+
/// Synchronization
76+
///
77+
///{
78+
void __kmpc_impl_syncthreads();
79+
80+
void __kmpc_flush_acquire(IdentTy *Loc);
81+
82+
void __kmpc_flush_release(IdentTy *Loc);
83+
84+
void __kmpc_flush_acqrel(IdentTy *Loc);
85+
///}
86+
87+
/// Tasking
88+
///
89+
///{
90+
void *__kmpc_task_allow_completion_event(IdentTy *loc_ref, uint32_t gtid,
91+
TaskDescriptorTy *task);
92+
///}
93+
94+
/// __init_ThreadDSTPtrPtr is defined in Workshare.cpp to initialize
95+
/// the static LDS global variable ThreadDSTPtrPtr to 0.
96+
/// It is called in Kernel.cpp at the end of initializeRuntime().
97+
void __init_ThreadDSTPtrPtr();
98+
} // extern "C"
99+
100+
/// Extra API exposed by ROCm
101+
extern "C" {
102+
int omp_ext_get_warp_id(void);
103+
int omp_ext_get_lane_id(void);
104+
int omp_ext_get_master_thread_id(void);
105+
int omp_ext_get_smid(void);
106+
int omp_ext_is_spmd_mode(void);
107+
unsigned long long omp_ext_get_active_threads_mask(void);
108+
} // extern "C"
109+
110+
#endif // OPENMP_LIBOMPTARGET_DEVICERTL_INCLUDE_EXTRA_ALLOCATORS_H

0 commit comments

Comments
 (0)