Skip to content

Commit b6c2f53

Browse files
committed
[OpenMP] Add allocator support for target memory
This is a preview of allocator support for target memory that depends on the offload runtime API which allocates memory as described below. llvm_omp_target_alloc_host(size_t size, int device_num); -- Returns non-migratable memory owned by host. -- Memory is accessible by host and device(s). llvm_omp_target_alloc_shared(size_t size, int device_num); -- Returns migratable memory owned by host and device. -- Memory is accessible by host and device. llvm_omp_target_alloc_device(size_t size, int device_num); -- Returns memory owned by device. -- Memory is only accessible by device. New memory space and predefined allocator names are -- llvm_omp_target_host_mem_space -- llvm_omp_target_shared_mem_space -- llvm_omp_target_device_mem_space -- llvm_omp_target_host_mem_alloc -- llvm_omp_target_shared_mem_alloc -- llvm_omp_target_device_mem_alloc Differential Revision: https://reviews.llvm.org/D96669
1 parent eadece3 commit b6c2f53

File tree

9 files changed

+155
-2
lines changed

9 files changed

+155
-2
lines changed

openmp/runtime/src/dllexports

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -555,12 +555,20 @@ kmp_set_disp_num_buffers 890
555555
omp_cgroup_mem_alloc DATA
556556
omp_pteam_mem_alloc DATA
557557
omp_thread_mem_alloc DATA
558+
# Preview of target memory support
559+
llvm_omp_target_host_mem_alloc DATA
560+
llvm_omp_target_shared_mem_alloc DATA
561+
llvm_omp_target_device_mem_alloc DATA
558562

559563
omp_default_mem_space DATA
560564
omp_large_cap_mem_space DATA
561565
omp_const_mem_space DATA
562566
omp_high_bw_mem_space DATA
563567
omp_low_lat_mem_space DATA
568+
# Preview of target memory support
569+
llvm_omp_target_host_mem_space DATA
570+
llvm_omp_target_shared_mem_space DATA
571+
llvm_omp_target_device_mem_space DATA
564572

565573
%ifndef stub
566574
# Ordinals between 900 and 999 are reserved

openmp/runtime/src/include/omp.h.var

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,12 +357,21 @@
357357
extern __KMP_IMP omp_allocator_handle_t const omp_cgroup_mem_alloc;
358358
extern __KMP_IMP omp_allocator_handle_t const omp_pteam_mem_alloc;
359359
extern __KMP_IMP omp_allocator_handle_t const omp_thread_mem_alloc;
360+
/* Preview of target memory support */
361+
extern __KMP_IMP omp_allocator_handle_t const llvm_omp_target_host_mem_alloc;
362+
extern __KMP_IMP omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc;
363+
extern __KMP_IMP omp_allocator_handle_t const llvm_omp_target_device_mem_alloc;
364+
360365
typedef omp_uintptr_t omp_memspace_handle_t;
361366
extern __KMP_IMP omp_memspace_handle_t const omp_default_mem_space;
362367
extern __KMP_IMP omp_memspace_handle_t const omp_large_cap_mem_space;
363368
extern __KMP_IMP omp_memspace_handle_t const omp_const_mem_space;
364369
extern __KMP_IMP omp_memspace_handle_t const omp_high_bw_mem_space;
365370
extern __KMP_IMP omp_memspace_handle_t const omp_low_lat_mem_space;
371+
/* Preview of target memory support */
372+
extern __KMP_IMP omp_memspace_handle_t const llvm_omp_target_host_mem_space;
373+
extern __KMP_IMP omp_memspace_handle_t const llvm_omp_target_shared_mem_space;
374+
extern __KMP_IMP omp_memspace_handle_t const llvm_omp_target_device_mem_space;
366375
# else
367376
# if __cplusplus >= 201103
368377
typedef enum omp_allocator_handle_t : omp_uintptr_t
@@ -379,6 +388,10 @@
379388
omp_cgroup_mem_alloc = 6,
380389
omp_pteam_mem_alloc = 7,
381390
omp_thread_mem_alloc = 8,
391+
/* Preview of target memory support */
392+
llvm_omp_target_host_mem_alloc = 100,
393+
llvm_omp_target_shared_mem_alloc = 101,
394+
llvm_omp_target_device_mem_alloc = 102,
382395
KMP_ALLOCATOR_MAX_HANDLE = UINTPTR_MAX
383396
} omp_allocator_handle_t;
384397
# if __cplusplus >= 201103
@@ -392,6 +405,10 @@
392405
omp_const_mem_space = 2,
393406
omp_high_bw_mem_space = 3,
394407
omp_low_lat_mem_space = 4,
408+
/* Preview of target memory support */
409+
llvm_omp_target_host_mem_space = 100,
410+
llvm_omp_target_shared_mem_space = 101,
411+
llvm_omp_target_device_mem_space = 102,
395412
KMP_MEMSPACE_MAX_HANDLE = UINTPTR_MAX
396413
} omp_memspace_handle_t;
397414
# endif

openmp/runtime/src/include/omp_lib.f90.var

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,12 +137,20 @@
137137
integer (kind=omp_allocator_handle_kind), parameter :: omp_cgroup_mem_alloc = 6
138138
integer (kind=omp_allocator_handle_kind), parameter :: omp_pteam_mem_alloc = 7
139139
integer (kind=omp_allocator_handle_kind), parameter :: omp_thread_mem_alloc = 8
140+
! Preview of target memory support
141+
integer (kind=omp_allocator_handle_kind), parameter :: llvm_omp_target_host_mem_alloc = 100
142+
integer (kind=omp_allocator_handle_kind), parameter :: llvm_omp_target_shared_mem_alloc = 101
143+
integer (kind=omp_allocator_handle_kind), parameter :: llvm_omp_target_device_mem_alloc = 102
140144

141145
integer (kind=omp_memspace_handle_kind), parameter :: omp_default_mem_space = 0
142146
integer (kind=omp_memspace_handle_kind), parameter :: omp_large_cap_mem_space = 1
143147
integer (kind=omp_memspace_handle_kind), parameter :: omp_const_mem_space = 2
144148
integer (kind=omp_memspace_handle_kind), parameter :: omp_high_bw_mem_space = 3
145149
integer (kind=omp_memspace_handle_kind), parameter :: omp_low_lat_mem_space = 4
150+
! Preview of target memory support
151+
integer (kind=omp_memspace_handle_kind), parameter :: llvm_omp_target_host_mem_space = 100
152+
integer (kind=omp_memspace_handle_kind), parameter :: llvm_omp_target_shared_mem_space = 101
153+
integer (kind=omp_memspace_handle_kind), parameter :: llvm_omp_target_device_mem_space = 102
146154

147155
integer (kind=omp_pause_resource_kind), parameter :: omp_pause_resume = 0
148156
integer (kind=omp_pause_resource_kind), parameter :: omp_pause_soft = 1

openmp/runtime/src/include/omp_lib.h.var

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,13 @@
214214
parameter(omp_pteam_mem_alloc=7)
215215
integer(kind=omp_allocator_handle_kind)omp_thread_mem_alloc
216216
parameter(omp_thread_mem_alloc=8)
217+
! Preview of target memory support
218+
integer(kind=omp_allocator_handle_kind)llvm_omp_target_host_mem_alloc
219+
parameter(llvm_omp_target_host_mem_alloc=100)
220+
integer(kind=omp_allocator_handle_kind)llvm_omp_target_shared_mem_alloc
221+
parameter(llvm_omp_target_shared_mem_alloc=101)
222+
integer(kind=omp_allocator_handle_kind)llvm_omp_target_device_mem_alloc
223+
parameter(llvm_omp_target_device_mem_alloc=102)
217224

218225
integer(kind=omp_memspace_handle_kind)omp_default_mem_space
219226
parameter(omp_default_mem_space=0)
@@ -225,6 +232,13 @@
225232
parameter(omp_high_bw_mem_space=3)
226233
integer(kind=omp_memspace_handle_kind)omp_low_lat_mem_space
227234
parameter(omp_low_lat_mem_space=4)
235+
! Preview of target memory support
236+
integer(kind=omp_memspace_handle_kind)llvm_omp_target_host_mem_space
237+
parameter(llvm_omp_target_host_mem_space=100)
238+
integer(kind=omp_memspace_handle_kind)llvm_omp_target_shared_mem_space
239+
parameter(llvm_omp_target_shared_mem_space=101)
240+
integer(kind=omp_memspace_handle_kind)llvm_omp_target_device_mem_space
241+
parameter(llvm_omp_target_device_mem_space=102)
228242

229243
integer(kind=omp_pause_resource_kind)omp_pause_resume
230244
parameter(omp_pause_resume=0)

openmp/runtime/src/kmp.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -958,6 +958,10 @@ extern omp_memspace_handle_t const omp_large_cap_mem_space;
958958
extern omp_memspace_handle_t const omp_const_mem_space;
959959
extern omp_memspace_handle_t const omp_high_bw_mem_space;
960960
extern omp_memspace_handle_t const omp_low_lat_mem_space;
961+
// Preview of target memory support
962+
extern omp_memspace_handle_t const llvm_omp_target_host_mem_space;
963+
extern omp_memspace_handle_t const llvm_omp_target_shared_mem_space;
964+
extern omp_memspace_handle_t const llvm_omp_target_device_mem_space;
961965

962966
typedef struct {
963967
omp_alloctrait_key_t key;
@@ -974,6 +978,10 @@ extern omp_allocator_handle_t const omp_low_lat_mem_alloc;
974978
extern omp_allocator_handle_t const omp_cgroup_mem_alloc;
975979
extern omp_allocator_handle_t const omp_pteam_mem_alloc;
976980
extern omp_allocator_handle_t const omp_thread_mem_alloc;
981+
// Preview of target memory support
982+
extern omp_allocator_handle_t const llvm_omp_target_host_mem_alloc;
983+
extern omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc;
984+
extern omp_allocator_handle_t const llvm_omp_target_device_mem_alloc;
977985
extern omp_allocator_handle_t const kmp_max_mem_alloc;
978986
extern omp_allocator_handle_t __kmp_def_allocator;
979987

@@ -1011,6 +1019,7 @@ extern void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
10111019

10121020
extern void __kmp_init_memkind();
10131021
extern void __kmp_fini_memkind();
1022+
extern void __kmp_init_target_mem();
10141023

10151024
/* ------------------------------------------------------------------------ */
10161025

openmp/runtime/src/kmp_alloc.cpp

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1242,6 +1242,20 @@ static void **mk_hbw_preferred_hugetlb;
12421242
static void **mk_dax_kmem;
12431243
static void **mk_dax_kmem_all;
12441244
static void **mk_dax_kmem_preferred;
1245+
// Preview of target memory support
1246+
static void *(*kmp_target_alloc_host)(size_t size, int device);
1247+
static void *(*kmp_target_alloc_shared)(size_t size, int device);
1248+
static void *(*kmp_target_alloc_device)(size_t size, int device);
1249+
static void *(*kmp_target_free)(void *ptr, int device);
1250+
static bool __kmp_target_mem_available;
1251+
#define KMP_IS_TARGET_MEM_SPACE(MS) \
1252+
(MS == llvm_omp_target_host_mem_space || \
1253+
MS == llvm_omp_target_shared_mem_space || \
1254+
MS == llvm_omp_target_device_mem_space)
1255+
#define KMP_IS_TARGET_MEM_ALLOC(MA) \
1256+
(MA == llvm_omp_target_host_mem_alloc || \
1257+
MA == llvm_omp_target_shared_mem_alloc || \
1258+
MA == llvm_omp_target_device_mem_alloc)
12451259

12461260
#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
12471261
static inline void chk_kind(void ***pkind) {
@@ -1338,14 +1352,26 @@ void __kmp_fini_memkind() {
13381352
mk_dax_kmem_preferred = NULL;
13391353
#endif
13401354
}
1355+
// Preview of target memory support
1356+
void __kmp_init_target_mem() {
1357+
*(void **)(&kmp_target_alloc_host) = KMP_DLSYM("llvm_omp_target_alloc_host");
1358+
*(void **)(&kmp_target_alloc_shared) =
1359+
KMP_DLSYM("llvm_omp_target_alloc_shared");
1360+
*(void **)(&kmp_target_alloc_device) =
1361+
KMP_DLSYM("llvm_omp_target_alloc_device");
1362+
*(void **)(&kmp_target_free) = KMP_DLSYM("omp_target_free");
1363+
__kmp_target_mem_available = kmp_target_alloc_host &&
1364+
kmp_target_alloc_shared &&
1365+
kmp_target_alloc_device && kmp_target_free;
1366+
}
13411367

13421368
omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
13431369
int ntraits,
13441370
omp_alloctrait_t traits[]) {
13451371
// OpenMP 5.0 only allows predefined memspaces
13461372
KMP_DEBUG_ASSERT(ms == omp_default_mem_space || ms == omp_low_lat_mem_space ||
13471373
ms == omp_large_cap_mem_space || ms == omp_const_mem_space ||
1348-
ms == omp_high_bw_mem_space);
1374+
ms == omp_high_bw_mem_space || KMP_IS_TARGET_MEM_SPACE(ms));
13491375
kmp_allocator_t *al;
13501376
int i;
13511377
al = (kmp_allocator_t *)__kmp_allocate(sizeof(kmp_allocator_t)); // zeroed
@@ -1423,6 +1449,9 @@ omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
14231449
al->memkind = mk_default;
14241450
}
14251451
}
1452+
} else if (KMP_IS_TARGET_MEM_SPACE(ms) && !__kmp_target_mem_available) {
1453+
__kmp_free(al);
1454+
return omp_null_allocator;
14261455
} else {
14271456
if (ms == omp_high_bw_mem_space) {
14281457
// cannot detect HBW memory presence without memkind library
@@ -1543,6 +1572,22 @@ void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
15431572
}
15441573
}
15451574
} else if (allocator < kmp_max_mem_alloc) {
1575+
if (KMP_IS_TARGET_MEM_ALLOC(allocator)) {
1576+
// Use size input directly as the memory may not be accessible on host.
1577+
// Use default device for now.
1578+
if (__kmp_target_mem_available) {
1579+
kmp_int32 device =
1580+
__kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1581+
if (allocator == llvm_omp_target_host_mem_alloc)
1582+
ptr = kmp_target_alloc_host(size, device);
1583+
else if (allocator == llvm_omp_target_shared_mem_alloc)
1584+
ptr = kmp_target_alloc_shared(size, device);
1585+
else // allocator == llvm_omp_target_device_mem_alloc
1586+
ptr = kmp_target_alloc_device(size, device);
1587+
}
1588+
return ptr;
1589+
}
1590+
15461591
// pre-defined allocator
15471592
if (allocator == omp_high_bw_mem_alloc) {
15481593
// ptr = NULL;
@@ -1551,6 +1596,18 @@ void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
15511596
} else {
15521597
ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
15531598
}
1599+
} else if (KMP_IS_TARGET_MEM_SPACE(al->memspace)) {
1600+
if (__kmp_target_mem_available) {
1601+
kmp_int32 device =
1602+
__kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1603+
if (al->memspace == llvm_omp_target_host_mem_space)
1604+
ptr = kmp_target_alloc_host(size, device);
1605+
else if (al->memspace == llvm_omp_target_shared_mem_space)
1606+
ptr = kmp_target_alloc_shared(size, device);
1607+
else // al->memspace == llvm_omp_target_device_mem_space
1608+
ptr = kmp_target_alloc_device(size, device);
1609+
}
1610+
return ptr;
15541611
} else if (al->pool_size > 0) {
15551612
// custom allocator with pool size requested
15561613
kmp_uint64 used =
@@ -1685,6 +1742,15 @@ void __kmpc_free(int gtid, void *ptr, const omp_allocator_handle_t allocator) {
16851742
kmp_mem_desc_t desc;
16861743
kmp_uintptr_t addr_align; // address to return to caller
16871744
kmp_uintptr_t addr_descr; // address of memory block descriptor
1745+
if (KMP_IS_TARGET_MEM_ALLOC(allocator) ||
1746+
(allocator > kmp_max_mem_alloc &&
1747+
KMP_IS_TARGET_MEM_SPACE(al->memspace))) {
1748+
KMP_DEBUG_ASSERT(kmp_target_free);
1749+
kmp_int32 device =
1750+
__kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1751+
kmp_target_free(ptr, device);
1752+
return;
1753+
}
16881754

16891755
addr_align = (kmp_uintptr_t)ptr;
16901756
addr_descr = addr_align - sizeof(kmp_mem_desc_t);

openmp/runtime/src/kmp_global.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,13 @@ omp_allocator_handle_t const omp_pteam_mem_alloc =
319319
(omp_allocator_handle_t const)7;
320320
omp_allocator_handle_t const omp_thread_mem_alloc =
321321
(omp_allocator_handle_t const)8;
322+
// Preview of target memory support
323+
omp_allocator_handle_t const llvm_omp_target_host_mem_alloc =
324+
(omp_allocator_handle_t const)100;
325+
omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc =
326+
(omp_allocator_handle_t const)101;
327+
omp_allocator_handle_t const llvm_omp_target_device_mem_alloc =
328+
(omp_allocator_handle_t const)102;
322329
omp_allocator_handle_t const kmp_max_mem_alloc =
323330
(omp_allocator_handle_t const)1024;
324331
omp_allocator_handle_t __kmp_def_allocator = omp_default_mem_alloc;
@@ -333,6 +340,13 @@ omp_memspace_handle_t const omp_high_bw_mem_space =
333340
(omp_memspace_handle_t const)3;
334341
omp_memspace_handle_t const omp_low_lat_mem_space =
335342
(omp_memspace_handle_t const)4;
343+
// Preview of target memory support
344+
omp_memspace_handle_t const llvm_omp_target_host_mem_space =
345+
(omp_memspace_handle_t const)100;
346+
omp_memspace_handle_t const llvm_omp_target_shared_mem_space =
347+
(omp_memspace_handle_t const)101;
348+
omp_memspace_handle_t const llvm_omp_target_device_mem_space =
349+
(omp_memspace_handle_t const)102;
336350

337351
/* This check ensures that the compiler is passing the correct data type for the
338352
flags formal parameter of the function kmpc_omp_task_alloc(). If the type is

openmp/runtime/src/kmp_runtime.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -540,7 +540,10 @@ static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team,
540540
"%s_%d.t_disp_buffer", header, team_id);
541541
}
542542

543-
static void __kmp_init_allocator() { __kmp_init_memkind(); }
543+
static void __kmp_init_allocator() {
544+
__kmp_init_memkind();
545+
__kmp_init_target_mem();
546+
}
544547
static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
545548

546549
/* ------------------------------------------------------------------------ */

openmp/runtime/src/kmp_stub.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,13 @@ omp_allocator_handle_t const omp_pteam_mem_alloc =
350350
(omp_allocator_handle_t const)7;
351351
omp_allocator_handle_t const omp_thread_mem_alloc =
352352
(omp_allocator_handle_t const)8;
353+
// Preview of target memory support
354+
omp_allocator_handle_t const llvm_omp_target_host_mem_alloc =
355+
(omp_allocator_handle_t const)100;
356+
omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc =
357+
(omp_allocator_handle_t const)101;
358+
omp_allocator_handle_t const llvm_omp_target_device_mem_alloc =
359+
(omp_allocator_handle_t const)102;
353360

354361
omp_memspace_handle_t const omp_default_mem_space =
355362
(omp_memspace_handle_t const)0;
@@ -361,6 +368,13 @@ omp_memspace_handle_t const omp_high_bw_mem_space =
361368
(omp_memspace_handle_t const)3;
362369
omp_memspace_handle_t const omp_low_lat_mem_space =
363370
(omp_memspace_handle_t const)4;
371+
// Preview of target memory support
372+
omp_memspace_handle_t const llvm_omp_target_host_mem_space =
373+
(omp_memspace_handle_t const)100;
374+
omp_memspace_handle_t const llvm_omp_target_shared_mem_space =
375+
(omp_memspace_handle_t const)101;
376+
omp_memspace_handle_t const llvm_omp_target_device_mem_space =
377+
(omp_memspace_handle_t const)102;
364378
#endif /* KMP_OS_WINDOWS */
365379
void *omp_alloc(size_t size, const omp_allocator_handle_t allocator) {
366380
i;

0 commit comments

Comments
 (0)