|
| 1 | +// clang-format off |
| 2 | +// RUN: %libomptarget-compilexx-generic |
| 3 | +// RUN: env OMPX_APU_MAPS=1 HSA_XNACK=1 LIBOMPTARGET_INFO=60 %libomptarget-run-generic 2>&1 \ |
| 4 | +// RUN: | %fcheck-generic -check-prefix=CHECK |
| 5 | + |
| 6 | +// UNSUPPORTED: aarch64-unknown-linux-gnu |
| 7 | +// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO |
| 8 | +// UNSUPPORTED: nvptx64-nvidia-cuda |
| 9 | +// UNSUPPORTED: nvptx64-nvidia-cuda-LTO |
| 10 | +// UNSUPPORTED: x86_64-pc-linux-gnu |
| 11 | +// UNSUPPORTED: x86_64-pc-linux-gnu-LTO |
| 12 | + |
| 13 | +// REQUIRES: unified_shared_memory |
| 14 | + |
| 15 | +// clang-format on |
| 16 | + |
| 17 | +#include <cstdint> |
| 18 | +#include <cstdio> |
| 19 | + |
| 20 | +/// Test for globals under automatic zero-copy. |
| 21 | +/// Because we are building without unified_shared_memory |
| 22 | +/// requirement pragma, all globals are allocated in the device |
| 23 | +/// memory of all used GPUs. To ensure those globals contain the intended |
| 24 | +/// values, we need to execute H2D and D2H memory copies even if we are running |
| 25 | +/// in automatic zero-copy. This only applies to globals. Local variables (their |
| 26 | +/// host pointers) are passed to the kernels by-value, according to the |
| 27 | +/// automatic zero-copy behavior. |
| 28 | + |
| 29 | +#pragma omp begin declare target |
| 30 | +int32_t x; // 4 bytes |
| 31 | +int32_t z[10]; // 40 bytes |
| 32 | +int32_t *k; // 20 bytes |
| 33 | +#pragma omp end declare target |
| 34 | + |
| 35 | +int main() { |
| 36 | + int32_t *dev_k = nullptr; |
| 37 | + x = 3; |
| 38 | + int32_t y = -1; |
| 39 | + for (size_t t = 0; t < 10; t++) |
| 40 | + z[t] = t; |
| 41 | + k = new int32_t[5]; |
| 42 | + |
| 43 | + printf("Host pointer for k = %p\n", k); |
| 44 | + for (size_t t = 0; t < 5; t++) |
| 45 | + k[t] = -t; |
| 46 | + |
| 47 | +/// target update to forces a copy between host and device global, which we must |
| 48 | +/// execute to keep the two global copies consistent. CHECK: Copying data from |
| 49 | +/// host to device, HstPtr={{.*}}, TgtPtr={{.*}}, Size=40, Name=z |
| 50 | +#pragma omp target update to(z[ : 10]) |
| 51 | + |
| 52 | +/// target map with always modifier (for x) forces a copy between host and |
| 53 | +/// device global, which we must execute to keep the two global copies |
| 54 | +/// consistent. k's content (host address) is passed by-value to the kernel |
| 55 | +/// (Size=20 case). y, being a local variable, is also passed by-value to the |
| 56 | +/// kernel (Size=4 case) CHECK: Return HstPtrBegin {{.*}} Size=4 for unified |
| 57 | +/// shared memory CHECK: Return HstPtrBegin {{.*}} Size=20 for unified shared |
| 58 | +/// memory CHECK: Copying data from host to device, HstPtr={{.*}}, |
| 59 | +/// TgtPtr={{.*}}, Size=4, Name=x |
| 60 | +#pragma omp target map(to : k[ : 5]) map(always, tofrom : x) map(tofrom : y) \ |
| 61 | + map(from : dev_k) |
| 62 | + { |
| 63 | + x++; |
| 64 | + y++; |
| 65 | + for (size_t t = 0; t < 10; t++) |
| 66 | + z[t]++; |
| 67 | + dev_k = k; |
| 68 | + } |
| 69 | +/// CHECK-NOT: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}}, |
| 70 | +/// Size=20, Name=k |
| 71 | + |
| 72 | +/// CHECK: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}}, |
| 73 | +/// Size=4, Name=x |
| 74 | + |
| 75 | +/// CHECK: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}}, |
| 76 | +/// Size=40, Name=z |
| 77 | +#pragma omp target update from(z[ : 10]) |
| 78 | + |
| 79 | + /// CHECK-NOT: k pointer not correctly passed to kernel |
| 80 | + if (dev_k != k) |
| 81 | + printf("k pointer not correctly passed to kernel\n"); |
| 82 | + |
| 83 | + delete[] k; |
| 84 | + return 0; |
| 85 | +} |
0 commit comments