Skip to content

Commit 2468fdd

Browse files
author
George Rokos
committed
[libomptarget] Add allocator support for target memory
This patch adds the infrastructure for allocator support for target memory. Three allocators are introduced for device, host and shared memory. The corresponding API functions have the llvm_ prefix temporarily, until they become part of the OpenMP standard. Differential Revision: https://reviews.llvm.org/D97883
1 parent 2902bde commit 2468fdd

File tree

14 files changed

+119
-36
lines changed

14 files changed

+119
-36
lines changed

openmp/libomptarget/include/omptarget.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,13 @@ enum OpenMPOffloadingRequiresDirFlags {
8686
OMP_REQ_DYNAMIC_ALLOCATORS = 0x010
8787
};
8888

89+
enum TargetAllocTy : int32_t {
90+
TARGET_ALLOC_DEVICE = 0,
91+
TARGET_ALLOC_HOST,
92+
TARGET_ALLOC_SHARED,
93+
TARGET_ALLOC_DEFAULT
94+
};
95+
8996
/// This struct is a record of an entry point or global. For a function
9097
/// entry point the size is expected to be zero
9198
struct __tgt_offload_entry {
@@ -190,6 +197,12 @@ int omp_target_associate_ptr(void *host_ptr, void *device_ptr, size_t size,
190197
size_t device_offset, int device_num);
191198
int omp_target_disassociate_ptr(void *host_ptr, int device_num);
192199

200+
/// Explicit target memory allocators
201+
/// Using the llvm_ prefix until they become part of the OpenMP standard.
202+
void *llvm_omp_target_alloc_device(size_t size, int device_num);
203+
void *llvm_omp_target_alloc_host(size_t size, int device_num);
204+
void *llvm_omp_target_alloc_shared(size_t size, int device_num);
205+
193206
/// add the clauses of the requires directives in a given file
194207
void __tgt_register_requires(int64_t flags);
195208

openmp/libomptarget/include/omptargetplugin.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,10 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t ID,
6565
// initialize the target data mapping structures. These addresses are
6666
// used to generate a table of target variables to pass to
6767
// __tgt_rtl_run_region(). The __tgt_rtl_data_alloc() returns NULL in
68-
// case an error occurred on the target device.
69-
void *__tgt_rtl_data_alloc(int32_t ID, int64_t Size, void *HostPtr);
68+
// case an error occurred on the target device. Kind dictates what allocator
69+
// to use (e.g. shared, host, device).
70+
void *__tgt_rtl_data_alloc(int32_t ID, int64_t Size, void *HostPtr,
71+
int32_t Kind);
7072

7173
// Pass the data content to the target device using the target address. In case
7274
// of success, return zero. Otherwise, return an error code.

openmp/libomptarget/plugins/amdgpu/src/rtl.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1488,9 +1488,16 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
14881488
return DeviceInfo.getOffloadEntriesTable(device_id);
14891489
}
14901490

1491-
void *__tgt_rtl_data_alloc(int device_id, int64_t size, void *) {
1491+
void *__tgt_rtl_data_alloc(int device_id, int64_t size, void *, int32_t kind) {
14921492
void *ptr = NULL;
14931493
assert(device_id < DeviceInfo.NumberOfDevices && "Device ID too large");
1494+
1495+
if (kind != TARGET_ALLOC_DEFAULT) {
1496+
REPORT("Invalid target data allocation kind or requested allocator not "
1497+
"implemented yet\n");
1498+
return NULL;
1499+
}
1500+
14941501
atmi_status_t err = atmi_malloc(&ptr, size, get_gpu_mem_place(device_id));
14951502
DP("Tgt alloc data %ld bytes, (tgt:%016llx).\n", size,
14961503
(long long unsigned)(Elf64_Addr)ptr);

openmp/libomptarget/plugins/cuda/src/rtl.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1095,9 +1095,16 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t device_id,
10951095
return DeviceRTL.loadBinary(device_id, image);
10961096
}
10971097

1098-
void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *) {
1098+
void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *,
1099+
int32_t kind) {
10991100
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
11001101

1102+
if (kind != TARGET_ALLOC_DEFAULT) {
1103+
REPORT("Invalid target data allocation kind or requested allocator not "
1104+
"implemented yet\n");
1105+
return NULL;
1106+
}
1107+
11011108
return DeviceRTL.dataAlloc(device_id, size);
11021109
}
11031110

openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -250,8 +250,23 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t device_id,
250250
return DeviceInfo.getOffloadEntriesTable(device_id);
251251
}
252252

253-
void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *hst_ptr) {
254-
void *ptr = malloc(size);
253+
// Sample implementation of explicit memory allocator. For this plugin all kinds
254+
// are equivalent to each other.
255+
void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *hst_ptr,
256+
int32_t kind) {
257+
void *ptr = NULL;
258+
259+
switch (kind) {
260+
case TARGET_ALLOC_DEVICE:
261+
case TARGET_ALLOC_HOST:
262+
case TARGET_ALLOC_SHARED:
263+
case TARGET_ALLOC_DEFAULT:
264+
ptr = malloc(size);
265+
break;
266+
default:
267+
REPORT("Invalid target data allocation kind");
268+
}
269+
255270
return ptr;
256271
}
257272

openmp/libomptarget/plugins/remote/src/rtl.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,14 @@ int32_t __tgt_rtl_is_data_exchangable(int32_t SrcDevId, int32_t DstDevId) {
8484
return Manager->isDataExchangeable(SrcDevId, DstDevId);
8585
}
8686

87-
void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *HstPtr) {
87+
void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *HstPtr,
88+
int32_t kind) {
89+
if (kind != TARGET_ALLOC_DEFAULT) {
90+
REPORT("Invalid target data allocation kind or requested allocator not "
91+
"implemented yet\n");
92+
return NULL;
93+
}
94+
8895
return Manager->dataAlloc(DeviceId, Size, HstPtr);
8996
}
9097

openmp/libomptarget/plugins/ve/src/rtl.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,10 +330,17 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t ID,
330330
// used to generate a table of target variables to pass to
331331
// __tgt_rtl_run_region(). The __tgt_rtl_data_alloc() returns NULL in
332332
// case an error occurred on the target device.
333-
void *__tgt_rtl_data_alloc(int32_t ID, int64_t Size, void *HostPtr) {
333+
void *__tgt_rtl_data_alloc(int32_t ID, int64_t Size, void *HostPtr,
334+
int32_t kind) {
334335
int ret;
335336
uint64_t addr;
336337

338+
if (kind != TARGET_ALLOC_DEFAULT) {
339+
REPORT("Invalid target data allocation kind or requested allocator not "
340+
"implemented yet\n");
341+
return NULL;
342+
}
343+
337344
if (DeviceInfo.ProcHandles[ID] == NULL) {
338345
struct veo_proc_handle *proc_handle;
339346
proc_handle = veo_proc_create(DeviceInfo.NodeIds[ID]);

openmp/libomptarget/src/api.cpp

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -38,31 +38,19 @@ EXTERN int omp_get_initial_device(void) {
3838
}
3939

4040
EXTERN void *omp_target_alloc(size_t size, int device_num) {
41-
TIMESCOPE();
42-
DP("Call to omp_target_alloc for device %d requesting %zu bytes\n",
43-
device_num, size);
44-
45-
if (size <= 0) {
46-
DP("Call to omp_target_alloc with non-positive length\n");
47-
return NULL;
48-
}
49-
50-
void *rc = NULL;
41+
return targetAllocExplicit(size, device_num, TARGET_ALLOC_DEFAULT, __func__);
42+
}
5143

52-
if (device_num == omp_get_initial_device()) {
53-
rc = malloc(size);
54-
DP("omp_target_alloc returns host ptr " DPxMOD "\n", DPxPTR(rc));
55-
return rc;
56-
}
44+
EXTERN void *llvm_omp_target_alloc_device(size_t size, int device_num) {
45+
return targetAllocExplicit(size, device_num, TARGET_ALLOC_DEVICE, __func__);
46+
}
5747

58-
if (!device_is_ready(device_num)) {
59-
DP("omp_target_alloc returns NULL ptr\n");
60-
return NULL;
61-
}
48+
EXTERN void *llvm_omp_target_alloc_host(size_t size, int device_num) {
49+
return targetAllocExplicit(size, device_num, TARGET_ALLOC_HOST, __func__);
50+
}
6251

63-
rc = PM->Devices[device_num].allocData(size);
64-
DP("omp_target_alloc returns device ptr " DPxMOD "\n", DPxPTR(rc));
65-
return rc;
52+
EXTERN void *llvm_omp_target_alloc_shared(size_t size, int device_num) {
53+
return targetAllocExplicit(size, device_num, TARGET_ALLOC_SHARED, __func__);
6654
}
6755

6856
EXTERN void omp_target_free(void *device_ptr, int device_num) {

openmp/libomptarget/src/device.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -405,8 +405,8 @@ __tgt_target_table *DeviceTy::load_binary(void *Img) {
405405
return rc;
406406
}
407407

408-
void *DeviceTy::allocData(int64_t Size, void *HstPtr) {
409-
return RTL->data_alloc(RTLDeviceID, Size, HstPtr);
408+
void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) {
409+
return RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind);
410410
}
411411

412412
int32_t DeviceTy::deleteData(void *TgtPtrBegin) {

openmp/libomptarget/src/device.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -185,13 +185,16 @@ struct DeviceTy {
185185
__tgt_target_table *load_binary(void *Img);
186186

187187
// device memory allocation/deallocation routines
188-
/// Allocates \p Size bytes on the device and returns the address/nullptr when
188+
/// Allocates \p Size bytes on the device, host or shared memory space
189+
/// (depending on \p Kind) and returns the address/nullptr when
189190
/// succeeds/fails. \p HstPtr is an address of the host data which the
190191
/// allocated target data will be associated with. If it is unknown, the
191192
/// default value of \p HstPtr is nullptr. Note: this function doesn't do
192193
/// pointer association. Actually, all the __tgt_rtl_data_alloc
193-
/// implementations ignore \p HstPtr.
194-
void *allocData(int64_t Size, void *HstPtr = nullptr);
194+
/// implementations ignore \p HstPtr. \p Kind dictates what allocator should
195+
/// be used (host, shared, device).
196+
void *allocData(int64_t Size, void *HstPtr = nullptr,
197+
int32_t Kind = TARGET_ALLOC_DEFAULT);
195198
/// Deallocates memory which \p TgtPtrBegin points at and returns
196199
/// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails.
197200
int32_t deleteData(void *TgtPtrBegin);

openmp/libomptarget/src/exports

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ VERS1.0 {
3636
omp_target_memcpy_rect;
3737
omp_target_associate_ptr;
3838
omp_target_disassociate_ptr;
39+
llvm_omp_target_alloc_host;
40+
llvm_omp_target_alloc_shared;
41+
llvm_omp_target_alloc_device;
3942
local:
4043
*;
4144
};

openmp/libomptarget/src/omptarget.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,35 @@ static int32_t getParentIndex(int64_t type) {
328328
return ((type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1;
329329
}
330330

331+
void *targetAllocExplicit(size_t size, int device_num, int kind,
332+
const char *name) {
333+
TIMESCOPE();
334+
DP("Call to %s for device %d requesting %zu bytes\n", name, device_num, size);
335+
336+
if (size <= 0) {
337+
DP("Call to %s with non-positive length\n", name);
338+
return NULL;
339+
}
340+
341+
void *rc = NULL;
342+
343+
if (device_num == omp_get_initial_device()) {
344+
rc = malloc(size);
345+
DP("%s returns host ptr " DPxMOD "\n", name, DPxPTR(rc));
346+
return rc;
347+
}
348+
349+
if (!device_is_ready(device_num)) {
350+
DP("%s returns NULL ptr\n", name);
351+
return NULL;
352+
}
353+
354+
DeviceTy &Device = PM->Devices[device_num];
355+
rc = Device.allocData(size, nullptr, kind);
356+
DP("%s returns device ptr " DPxMOD "\n", name, DPxPTR(rc));
357+
return rc;
358+
}
359+
331360
/// Call the user-defined mapper function followed by the appropriate
332361
// targetData* function (targetData{Begin,End,Update}).
333362
int targetDataMapper(ident_t *loc, DeviceTy &Device, void *arg_base, void *arg,

openmp/libomptarget/src/private.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ extern int target(ident_t *loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
4646

4747
extern void handleTargetOutcome(bool Success, ident_t *Loc);
4848
extern int checkDeviceAndCtors(int64_t &DeviceID, ident_t *Loc);
49+
extern void *targetAllocExplicit(size_t size, int device_num, int kind,
50+
const char *name);
4951

5052
// This structure stores information of a mapped memory region.
5153
struct MapComponentInfoTy {

openmp/libomptarget/src/rtl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ struct RTLInfoTy {
3030
typedef int32_t(number_of_devices_ty)();
3131
typedef int32_t(init_device_ty)(int32_t);
3232
typedef __tgt_target_table *(load_binary_ty)(int32_t, void *);
33-
typedef void *(data_alloc_ty)(int32_t, int64_t, void *);
33+
typedef void *(data_alloc_ty)(int32_t, int64_t, void *, int32_t);
3434
typedef int32_t(data_submit_ty)(int32_t, void *, void *, int64_t);
3535
typedef int32_t(data_submit_async_ty)(int32_t, void *, void *, int64_t,
3636
__tgt_async_info *);

0 commit comments

Comments
 (0)