-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[libomptarget][OpenMP] Initial implementation of omp_target_memset() and omp_target_memset_async() #68706
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
[libomptarget][OpenMP] Initial implementation of omp_target_memset() and omp_target_memset_async() #68706
Changes from 4 commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
06d5eef
Add stub functions and exports for omp_target_memset()
mjklemm c1791d6
Implement a slow-path version of omp_target_memset*()
mjklemm 97dba33
Refactor code a bit to reduce code duplication
mjklemm 33e7eb6
Apply formatting rules
mjklemm 7933bfa
(Partly) Address feedback by shiltian
mjklemm 5e87a57
Slightly reduce code complexity, merge task helpers for memset/memcpy
mjklemm 012952a
Add a safety net against catastrophic failures of omp_target_memset
mjklemm 33475f4
Improve code formatting
mjklemm f2fd280
Fix style
mjklemm d63e703
Fix last style issue with the new code. Hopefully :-)
mjklemm File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -241,10 +241,125 @@ static int libomp_target_memcpy_async_helper(kmp_int32 Gtid, kmp_task_t *Task) { | |
return Rc; | ||
} | ||
|
||
static int libomp_target_memset_async_helper(kmp_int32 Gtid, kmp_task_t *Task) { | ||
if (!Task) { | ||
return OFFLOAD_FAIL; | ||
} | ||
mjklemm marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
auto *Args = reinterpret_cast<TargetMemsetArgsTy *>(Task->shareds); | ||
if (!Args) { | ||
mjklemm marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return OFFLOAD_FAIL; | ||
} | ||
|
||
// call omp_target_memset() | ||
omp_target_memset(Args->Ptr, Args->C, Args->N, Args->DeviceNum); | ||
|
||
delete Args; | ||
|
||
return OFFLOAD_SUCCESS; | ||
} | ||
|
||
static inline void | ||
ConvertDepObjVector(llvm::SmallVector<kmp_depend_info_t> &Vec, int DepObjCount, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't this function name be lower case? |
||
omp_depend_t *DepObjList) { | ||
for (int i = 0; i < DepObjCount; ++i) { | ||
omp_depend_t DepObj = DepObjList[i]; | ||
Vec.push_back(*((kmp_depend_info_t *)DepObj)); | ||
} | ||
} | ||
|
||
static int libomp_helper_memset_task_creation(TargetMemsetArgsTy *Args, | ||
int DepObjCount, | ||
omp_depend_t *DepObjList) { | ||
// Create global thread ID | ||
int Gtid = __kmpc_global_thread_num(nullptr); | ||
int (*Fn)(kmp_int32, kmp_task_t *) = &libomp_target_memset_async_helper; | ||
|
||
// Setup the hidden helper flags | ||
kmp_int32 Flags = 0; | ||
kmp_tasking_flags_t *InputFlags = (kmp_tasking_flags_t *)&Flags; | ||
InputFlags->hidden_helper = 1; | ||
|
||
// Alloc the helper task | ||
kmp_task_t *Task = __kmpc_omp_target_task_alloc( | ||
nullptr, Gtid, Flags, sizeof(kmp_task_t), 0, Fn, -1); | ||
if (!Task) { | ||
delete Args; | ||
return OFFLOAD_FAIL; | ||
} | ||
|
||
// Setup the arguments for the helper task | ||
Task->shareds = Args; | ||
|
||
// Convert types of depend objects | ||
llvm::SmallVector<kmp_depend_info_t> DepObjs; | ||
ConvertDepObjVector(DepObjs, DepObjCount, DepObjList); | ||
|
||
// Launch the helper task | ||
int Rc = __kmpc_omp_task_with_deps(nullptr, Gtid, Task, DepObjCount, | ||
DepObjs.data(), 0, nullptr); | ||
|
||
return Rc; | ||
} | ||
|
||
EXTERN void *omp_target_memset(void *Ptr, int C, size_t N, int DeviceNum) { | ||
TIMESCOPE(); | ||
DP("Call to omp_target_memset, device %d, device pointer %p, size %zu\n", | ||
DeviceNum, Ptr, N); | ||
|
||
// Behave as a no-op if N==0 or if Ptr is nullptr (as a useful implementation | ||
// of unspecified behavior, see OpenMP spec). | ||
if (!Ptr || N == 0) { | ||
return Ptr; | ||
} | ||
|
||
if (DeviceNum == omp_get_initial_device()) { | ||
DP("filling memory on host via memset"); | ||
memset(Ptr, C, N); // ignore return value, memset() cannot fail | ||
} else { | ||
// TODO: replace the omp_target_memset() slow path with the fast path. | ||
// That will require the ability to execute a kernel from within | ||
// libomptarget.so (which we do not have at the moment). | ||
|
||
// This is a very slow path: create a filled array on the host and upload | ||
// it to the GPU device. | ||
int InitialDevice = omp_get_initial_device(); | ||
void *Shadow = omp_target_alloc(N, InitialDevice); | ||
(void)memset(Shadow, C, N); | ||
(void)omp_target_memcpy(Ptr, Shadow, N, 0, 0, DeviceNum, InitialDevice); | ||
(void)omp_target_free(Shadow, InitialDevice); | ||
} | ||
|
||
DP("omp_target_memset returns %p\n", Ptr); | ||
return Ptr; | ||
} | ||
|
||
EXTERN void *omp_target_memset_async(void *Ptr, int C, size_t N, int DeviceNum, | ||
int DepObjCount, | ||
omp_depend_t *DepObjList) { | ||
DP("Call to omp_target_memset_async, device %d, device pointer %p, size %zu", | ||
DeviceNum, Ptr, N); | ||
|
||
// Behave as a no-op if N==0 or if Ptr is nullptr (as a useful implementation | ||
// of unspecified behavior, see OpenMP spec). | ||
if (!Ptr || N == 0) { | ||
return Ptr; | ||
} | ||
|
||
// Create the task object to deal with the async invocation | ||
auto *Args = new TargetMemsetArgsTy{Ptr, C, N, DeviceNum}; | ||
|
||
// omp_target_memset_async() cannot fail via a return code, so ignore the | ||
// return code of the helper function | ||
(void)libomp_helper_memset_task_creation(Args, DepObjCount, DepObjList); | ||
|
||
return Ptr; | ||
} | ||
|
||
// Allocate and launch helper task | ||
static int libomp_helper_task_creation(TargetMemcpyArgsTy *Args, | ||
int DepObjCount, | ||
omp_depend_t *DepObjList) { | ||
static int libomp_helper_memcpy_task_creation(TargetMemcpyArgsTy *Args, | ||
mjklemm marked this conversation as resolved.
Show resolved
Hide resolved
|
||
int DepObjCount, | ||
omp_depend_t *DepObjList) { | ||
// Create global thread ID | ||
int Gtid = __kmpc_global_thread_num(nullptr); | ||
int (*Fn)(kmp_int32, kmp_task_t *) = &libomp_target_memcpy_async_helper; | ||
|
@@ -270,10 +385,7 @@ static int libomp_helper_task_creation(TargetMemcpyArgsTy *Args, | |
|
||
// Convert the type of depend objects | ||
llvm::SmallVector<kmp_depend_info_t> DepObjs; | ||
for (int i = 0; i < DepObjCount; i++) { | ||
omp_depend_t DepObj = DepObjList[i]; | ||
DepObjs.push_back(*((kmp_depend_info_t *)DepObj)); | ||
} | ||
ConvertDepObjVector(DepObjs, DepObjCount, DepObjList); | ||
|
||
// Launch the helper task | ||
int Rc = __kmpc_omp_task_with_deps(nullptr, Gtid, Ptr, DepObjCount, | ||
|
@@ -302,7 +414,7 @@ EXTERN int omp_target_memcpy_async(void *Dst, const void *Src, size_t Length, | |
Dst, Src, Length, DstOffset, SrcOffset, DstDevice, SrcDevice); | ||
|
||
// Create and launch helper task | ||
int Rc = libomp_helper_task_creation(Args, DepObjCount, DepObjList); | ||
int Rc = libomp_helper_memcpy_task_creation(Args, DepObjCount, DepObjList); | ||
|
||
DP("omp_target_memcpy_async returns %d\n", Rc); | ||
return Rc; | ||
|
@@ -399,7 +511,7 @@ EXTERN int omp_target_memcpy_rect_async( | |
DstDimensions, SrcDimensions, DstDevice, SrcDevice); | ||
|
||
// Create and launch helper task | ||
int Rc = libomp_helper_task_creation(Args, DepObjCount, DepObjList); | ||
int Rc = libomp_helper_memcpy_task_creation(Args, DepObjCount, DepObjList); | ||
|
||
DP("omp_target_memcpy_rect_async returns %d\n", Rc); | ||
return Rc; | ||
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
// RUN: %libomptarget-compile-and-run-generic | ||
|
||
#include "stdio.h" | ||
#include <omp.h> | ||
#include <stdlib.h> | ||
|
||
int main() { | ||
int d = omp_get_default_device(); | ||
int id = omp_get_initial_device(); | ||
int q[128], i; | ||
void *p; | ||
void *result; | ||
|
||
if (d < 0 || d >= omp_get_num_devices()) | ||
d = id; | ||
|
||
p = omp_target_alloc(130 * sizeof(int), d); | ||
if (p == NULL) | ||
return 0; | ||
|
||
for (i = 0; i < 128; i++) | ||
q[i] = i; | ||
|
||
result = omp_target_memset(p, 0, 130 * sizeof(int), d); | ||
if (result != p) { | ||
abort(); | ||
} | ||
|
||
int q2[128]; | ||
for (i = 0; i < 128; ++i) | ||
q2[i] = i; | ||
if (omp_target_memcpy_async(q2, p, 128 * sizeof(int), 0, sizeof(int), id, d, | ||
0, NULL)) | ||
abort(); | ||
|
||
#pragma omp taskwait | ||
|
||
for (i = 0; i < 128; ++i) | ||
if (q2[i] != 0) | ||
abort(); | ||
|
||
omp_target_free(p, d); | ||
|
||
return 0; | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.