|
9 | 9 | #include <cuda.h>
|
10 | 10 | #include <cuda_runtime_api.h>
|
11 | 11 | #include <iostream>
|
12 |
| -int main(){ |
| 12 | +#include <vector> |
| 13 | +#include <algorithm> |
| 14 | + |
| 15 | +void test1(){ |
13 | 16 | size_t result1, result2;
|
14 | 17 | int size = 32;
|
15 | 18 | float* f_A;
|
@@ -53,6 +56,15 @@ int main(){
|
53 | 56 |
|
54 | 57 | cuMemcpyDtoH(f_A, f_D, size);
|
55 | 58 |
|
| 59 | + cuMemcpy(f_D, f_D2, size); |
| 60 | + r = cuMemcpy(f_D, f_D2, size); |
| 61 | + |
| 62 | + cuMemcpyAsync(f_D, f_D2, size, stream); |
| 63 | + r = cuMemcpyAsync(f_D, f_D2, size, stream); |
| 64 | + |
| 65 | + cuMemcpyAsync(f_D, f_D2, size, 0); |
| 66 | + r = cuMemcpyAsync(f_D, f_D2, size, 0); |
| 67 | + |
56 | 68 |
|
57 | 69 | cuMemHostGetDevicePointer(&f_D, f_A, 0);
|
58 | 70 |
|
@@ -147,7 +159,72 @@ int main(){
|
147 | 159 | cuMemHostRegister((void *)pFlags, size, CU_MEMHOSTREGISTER_PORTABLE);
|
148 | 160 |
|
149 | 161 | cuMemHostUnregister((void *)pFlags);
|
| 162 | +} |
150 | 163 |
|
151 |
| - return 0; |
| 164 | +int test2() { |
| 165 | + int ret = 0; |
| 166 | + constexpr int size = 64; |
| 167 | + int v1[size]; |
| 168 | + int v2[size]; |
| 169 | + |
| 170 | + CUdeviceptr p1 = (CUdeviceptr)v1; |
| 171 | + CUdeviceptr p2 = (CUdeviceptr)v2; |
| 172 | + CUdeviceptr q1; |
| 173 | + CUdeviceptr q2; |
| 174 | + |
| 175 | + // check if v1 and v2 agree on first i elements |
| 176 | + |
| 177 | + auto check = [&](int i, std::string fail) { |
| 178 | + if (!std::equal(v1, v1+i, v2)) { |
| 179 | + std::cout << fail << "\n"; |
| 180 | + ret = 1; |
| 181 | + } |
| 182 | + }; |
| 183 | + |
| 184 | + // v1 = {0, 1, 2, ...} |
| 185 | + // v2 = {-1, -1, ...} |
| 186 | + auto initialize = [&]() { |
| 187 | + for (int i = 0; i < size; ++i) { |
| 188 | + v1[i] = i; |
| 189 | + v2[i] = -1; |
| 190 | + } |
| 191 | + cuMemAlloc(&q1, sizeof(int)*size); |
| 192 | + cuMemAlloc(&q2, sizeof(int)*size); |
| 193 | + }; |
| 194 | + |
| 195 | + for (int i = 1; i < size; i *= 2) { |
| 196 | + int n = sizeof(int)*i; |
| 197 | + |
| 198 | + // host to host copy |
| 199 | + initialize(); |
| 200 | + cuMemcpy(p2, p1, n); |
| 201 | + check(i, "cuMemcpy fail " + std::to_string(i)); |
| 202 | + |
| 203 | + // host to device copy async, device to host copy |
| 204 | + initialize(); |
| 205 | + cuMemcpyAsync(q1, p1, n, 0); |
| 206 | + cuStreamSynchronize(0); |
| 207 | + cuMemcpy(p2, q1, n); |
| 208 | + check(i, "cuMemcpyAsync 1 fail " + std::to_string(i)); |
| 209 | + |
| 210 | + // host to device copy, device to device async copy, |
| 211 | + // device to host copy |
| 212 | + initialize(); |
| 213 | + cuMemcpy(q1, p1, n); |
| 214 | + cuMemcpyAsync(q2, q1, n, 0); |
| 215 | + cuStreamSynchronize(0); |
| 216 | + cuMemcpy(p2, q2, n); |
| 217 | + check(i, "cuMemcpyAsync 2 fail " + std::to_string(i)); |
| 218 | + } |
| 219 | + |
| 220 | + return ret; |
152 | 221 | }
|
153 | 222 |
|
| 223 | +int main() { |
| 224 | + cuInit(0); |
| 225 | + CUdevice dev = 0; |
| 226 | + cuDeviceGet(&dev, 0); |
| 227 | + CUcontext ctx = 0; |
| 228 | + cuCtxCreate(&ctx, 0, dev); |
| 229 | + return test2(); |
| 230 | +} |
0 commit comments