Skip to content

Commit 95f0d1e

Browse files
[libomptarget] Compile with older cuda, revert D95274
[libomptarget] Compile with older cuda, revert D95274 Fixes regression reported in comments of D95274. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D95367
1 parent 558b3bb commit 95f0d1e

File tree

3 files changed

+54
-43
lines changed

3 files changed

+54
-43
lines changed

openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -28,26 +28,26 @@ DLWRAP(cuFuncGetAttribute, 3);
2828
DLWRAP(cuGetErrorString, 2);
2929
DLWRAP(cuLaunchKernel, 11);
3030

31-
DLWRAP(cuMemAlloc_v2, 2);
32-
DLWRAP(cuMemcpyDtoDAsync_v2, 4);
31+
DLWRAP(cuMemAlloc, 2);
32+
DLWRAP(cuMemcpyDtoDAsync, 4);
3333

34-
DLWRAP(cuMemcpyDtoH_v2, 3);
35-
DLWRAP(cuMemcpyDtoHAsync_v2, 4);
36-
DLWRAP(cuMemcpyHtoD_v2, 3);
37-
DLWRAP(cuMemcpyHtoDAsync_v2, 4);
34+
DLWRAP(cuMemcpyDtoH, 3);
35+
DLWRAP(cuMemcpyDtoHAsync, 4);
36+
DLWRAP(cuMemcpyHtoD, 3);
37+
DLWRAP(cuMemcpyHtoDAsync, 4);
3838

39-
DLWRAP(cuMemFree_v2, 1);
39+
DLWRAP(cuMemFree, 1);
4040
DLWRAP(cuModuleGetFunction, 3);
41-
DLWRAP(cuModuleGetGlobal_v2, 4);
41+
DLWRAP(cuModuleGetGlobal, 4);
4242

4343
DLWRAP(cuModuleUnload, 1);
4444
DLWRAP(cuStreamCreate, 2);
45-
DLWRAP(cuStreamDestroy_v2, 1);
45+
DLWRAP(cuStreamDestroy, 1);
4646
DLWRAP(cuStreamSynchronize, 1);
4747
DLWRAP(cuCtxSetCurrent, 1);
48-
DLWRAP(cuDevicePrimaryCtxRelease_v2, 1);
48+
DLWRAP(cuDevicePrimaryCtxRelease, 1);
4949
DLWRAP(cuDevicePrimaryCtxGetState, 3);
50-
DLWRAP(cuDevicePrimaryCtxSetFlags_v2, 2);
50+
DLWRAP(cuDevicePrimaryCtxSetFlags, 2);
5151
DLWRAP(cuDevicePrimaryCtxRetain, 2);
5252
DLWRAP(cuModuleLoadDataEx, 5);
5353

openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,18 @@ typedef enum CUctx_flags_enum {
4848
CU_CTX_SCHED_MASK = 0x07,
4949
} CUctx_flags;
5050

51+
#define cuMemFree cuMemFree_v2
52+
#define cuMemAlloc cuMemAlloc_v2
53+
#define cuMemcpyDtoH cuMemcpyDtoH_v2
54+
#define cuMemcpyHtoD cuMemcpyHtoD_v2
55+
#define cuStreamDestroy cuStreamDestroy_v2
56+
#define cuModuleGetGlobal cuModuleGetGlobal_v2
57+
#define cuMemcpyDtoHAsync cuMemcpyDtoHAsync_v2
58+
#define cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2
59+
#define cuMemcpyHtoDAsync cuMemcpyHtoDAsync_v2
60+
#define cuDevicePrimaryCtxRelease cuDevicePrimaryCtxRelease_v2
61+
#define cuDevicePrimaryCtxSetFlags cuDevicePrimaryCtxSetFlags_v2
62+
5163
CUresult cuCtxGetDevice(CUdevice *);
5264
CUresult cuDeviceGet(CUdevice *, int);
5365
CUresult cuDeviceGetAttribute(int *, CUdevice_attribute, CUdevice);
@@ -60,26 +72,26 @@ CUresult cuLaunchKernel(CUfunction, unsigned, unsigned, unsigned, unsigned,
6072
unsigned, unsigned, unsigned, CUstream, void **,
6173
void **);
6274

63-
CUresult cuMemAlloc_v2(CUdeviceptr *, size_t);
64-
CUresult cuMemcpyDtoDAsync_v2(CUdeviceptr, CUdeviceptr, size_t, CUstream);
75+
CUresult cuMemAlloc(CUdeviceptr *, size_t);
76+
CUresult cuMemcpyDtoDAsync(CUdeviceptr, CUdeviceptr, size_t, CUstream);
6577

66-
CUresult cuMemcpyDtoH_v2(void *, CUdeviceptr, size_t);
67-
CUresult cuMemcpyDtoHAsync_v2(void *, CUdeviceptr, size_t, CUstream);
68-
CUresult cuMemcpyHtoD_v2(CUdeviceptr, const void *, size_t);
69-
CUresult cuMemcpyHtoDAsync_v2(CUdeviceptr, const void *, size_t, CUstream);
78+
CUresult cuMemcpyDtoH(void *, CUdeviceptr, size_t);
79+
CUresult cuMemcpyDtoHAsync(void *, CUdeviceptr, size_t, CUstream);
80+
CUresult cuMemcpyHtoD(CUdeviceptr, const void *, size_t);
81+
CUresult cuMemcpyHtoDAsync(CUdeviceptr, const void *, size_t, CUstream);
7082

71-
CUresult cuMemFree_v2(CUdeviceptr);
83+
CUresult cuMemFree(CUdeviceptr);
7284
CUresult cuModuleGetFunction(CUfunction *, CUmodule, const char *);
73-
CUresult cuModuleGetGlobal_v2(CUdeviceptr *, size_t *, CUmodule, const char *);
85+
CUresult cuModuleGetGlobal(CUdeviceptr *, size_t *, CUmodule, const char *);
7486

7587
CUresult cuModuleUnload(CUmodule);
7688
CUresult cuStreamCreate(CUstream *, unsigned);
77-
CUresult cuStreamDestroy_v2(CUstream);
89+
CUresult cuStreamDestroy(CUstream);
7890
CUresult cuStreamSynchronize(CUstream);
7991
CUresult cuCtxSetCurrent(CUcontext);
80-
CUresult cuDevicePrimaryCtxRelease_v2(CUdevice);
92+
CUresult cuDevicePrimaryCtxRelease(CUdevice);
8193
CUresult cuDevicePrimaryCtxGetState(CUdevice, unsigned *, int *);
82-
CUresult cuDevicePrimaryCtxSetFlags_v2(CUdevice, unsigned);
94+
CUresult cuDevicePrimaryCtxSetFlags(CUdevice, unsigned);
8395
CUresult cuDevicePrimaryCtxRetain(CUcontext *, CUdevice);
8496
CUresult cuModuleLoadDataEx(CUmodule *, const void *, unsigned, void *,
8597
void **);

openmp/libomptarget/plugins/cuda/src/rtl.cpp

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,8 @@ bool checkResult(CUresult Err, const char *ErrMsg) {
110110

111111
int memcpyDtoD(const void *SrcPtr, void *DstPtr, int64_t Size,
112112
CUstream Stream) {
113-
CUresult Err = cuMemcpyDtoDAsync_v2((CUdeviceptr)DstPtr, (CUdeviceptr)SrcPtr,
114-
Size, Stream);
113+
CUresult Err =
114+
cuMemcpyDtoDAsync((CUdeviceptr)DstPtr, (CUdeviceptr)SrcPtr, Size, Stream);
115115

116116
if (Err != CUDA_SUCCESS) {
117117
REPORT("Error when copying data from device to device. Pointers: src "
@@ -207,8 +207,8 @@ class StreamManagerTy {
207207

208208
for (CUstream &S : StreamPool[I]) {
209209
if (S)
210-
checkResult(cuStreamDestroy_v2(S),
211-
"Error returned from cuStreamDestroy_v2\n");
210+
checkResult(cuStreamDestroy(S),
211+
"Error returned from cuStreamDestroy\n");
212212
}
213213
}
214214
}
@@ -311,8 +311,8 @@ class DeviceRTLTy {
311311
return nullptr;
312312

313313
CUdeviceptr DevicePtr;
314-
Err = cuMemAlloc_v2(&DevicePtr, Size);
315-
if (!checkResult(Err, "Error returned from cuMemAlloc_v2\n"))
314+
Err = cuMemAlloc(&DevicePtr, Size);
315+
if (!checkResult(Err, "Error returned from cuMemAlloc\n"))
316316
return nullptr;
317317

318318
return (void *)DevicePtr;
@@ -323,8 +323,8 @@ class DeviceRTLTy {
323323
if (!checkResult(Err, "Error returned from cuCtxSetCurrent\n"))
324324
return OFFLOAD_FAIL;
325325

326-
Err = cuMemFree_v2((CUdeviceptr)TgtPtr);
327-
if (!checkResult(Err, "Error returned from cuMemFree_v2\n"))
326+
Err = cuMemFree((CUdeviceptr)TgtPtr);
327+
if (!checkResult(Err, "Error returned from cuMemFree\n"))
328328
return OFFLOAD_FAIL;
329329

330330
return OFFLOAD_SUCCESS;
@@ -466,8 +466,8 @@ class DeviceRTLTy {
466466
CUdevice Device;
467467
checkResult(cuCtxGetDevice(&Device),
468468
"Error returned from cuCtxGetDevice\n");
469-
checkResult(cuDevicePrimaryCtxRelease_v2(Device),
470-
"Error returned from cuDevicePrimaryCtxRelease_v2\n");
469+
checkResult(cuDevicePrimaryCtxRelease(Device),
470+
"Error returned from cuDevicePrimaryCtxRelease\n");
471471
}
472472
}
473473
}
@@ -506,9 +506,8 @@ class DeviceRTLTy {
506506
} else {
507507
DP("The primary context is inactive, set its flags to "
508508
"CU_CTX_SCHED_BLOCKING_SYNC\n");
509-
Err = cuDevicePrimaryCtxSetFlags_v2(Device, CU_CTX_SCHED_BLOCKING_SYNC);
510-
if (!checkResult(Err,
511-
"Error returned from cuDevicePrimaryCtxSetFlags_v2\n"))
509+
Err = cuDevicePrimaryCtxSetFlags(Device, CU_CTX_SCHED_BLOCKING_SYNC);
510+
if (!checkResult(Err, "Error returned from cuDevicePrimaryCtxSetFlags\n"))
512511
return OFFLOAD_FAIL;
513512
}
514513

@@ -657,7 +656,7 @@ class DeviceRTLTy {
657656
__tgt_offload_entry Entry = *E;
658657
CUdeviceptr CUPtr;
659658
size_t CUSize;
660-
Err = cuModuleGetGlobal_v2(&CUPtr, &CUSize, Module, E->name);
659+
Err = cuModuleGetGlobal(&CUPtr, &CUSize, Module, E->name);
661660
// We keep this style here because we need the name
662661
if (Err != CUDA_SUCCESS) {
663662
REPORT("Loading global '%s' Failed\n", E->name);
@@ -689,7 +688,7 @@ class DeviceRTLTy {
689688
// If unified memory is present any target link or to variables
690689
// can access host addresses directly. There is no longer a
691690
// need for device copies.
692-
cuMemcpyHtoD_v2(CUPtr, E->addr, sizeof(void *));
691+
cuMemcpyHtoD(CUPtr, E->addr, sizeof(void *));
693692
DP("Copy linked variable host address (" DPxMOD
694693
") to device address (" DPxMOD ")\n",
695694
DPxPTR(*((void **)E->addr)), DPxPTR(CUPtr));
@@ -720,15 +719,15 @@ class DeviceRTLTy {
720719

721720
CUdeviceptr ExecModePtr;
722721
size_t CUSize;
723-
Err = cuModuleGetGlobal_v2(&ExecModePtr, &CUSize, Module, ExecModeName);
722+
Err = cuModuleGetGlobal(&ExecModePtr, &CUSize, Module, ExecModeName);
724723
if (Err == CUDA_SUCCESS) {
725724
if (CUSize != sizeof(int8_t)) {
726725
DP("Loading global exec_mode '%s' - size mismatch (%zd != %zd)\n",
727726
ExecModeName, CUSize, sizeof(int8_t));
728727
return nullptr;
729728
}
730729

731-
Err = cuMemcpyDtoH_v2(&ExecModeVal, ExecModePtr, CUSize);
730+
Err = cuMemcpyDtoH(&ExecModeVal, ExecModePtr, CUSize);
732731
if (Err != CUDA_SUCCESS) {
733732
REPORT("Error when copying data from device to host. Pointers: "
734733
"host = " DPxMOD ", device = " DPxMOD ", size = %zd\n",
@@ -769,7 +768,7 @@ class DeviceRTLTy {
769768
CUdeviceptr DeviceEnvPtr;
770769
size_t CUSize;
771770

772-
Err = cuModuleGetGlobal_v2(&DeviceEnvPtr, &CUSize, Module, DeviceEnvName);
771+
Err = cuModuleGetGlobal(&DeviceEnvPtr, &CUSize, Module, DeviceEnvName);
773772
if (Err == CUDA_SUCCESS) {
774773
if (CUSize != sizeof(DeviceEnv)) {
775774
REPORT(
@@ -779,7 +778,7 @@ class DeviceRTLTy {
779778
return nullptr;
780779
}
781780

782-
Err = cuMemcpyHtoD_v2(DeviceEnvPtr, &DeviceEnv, CUSize);
781+
Err = cuMemcpyHtoD(DeviceEnvPtr, &DeviceEnv, CUSize);
783782
if (Err != CUDA_SUCCESS) {
784783
REPORT("Error when copying data from host to device. Pointers: "
785784
"host = " DPxMOD ", device = " DPxMOD ", size = %zu\n",
@@ -817,7 +816,7 @@ class DeviceRTLTy {
817816

818817
CUstream Stream = getStream(DeviceId, AsyncInfoPtr);
819818

820-
Err = cuMemcpyHtoDAsync_v2((CUdeviceptr)TgtPtr, HstPtr, Size, Stream);
819+
Err = cuMemcpyHtoDAsync((CUdeviceptr)TgtPtr, HstPtr, Size, Stream);
821820
if (Err != CUDA_SUCCESS) {
822821
REPORT("Error when copying data from host to device. Pointers: host "
823822
"= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 "\n",
@@ -839,7 +838,7 @@ class DeviceRTLTy {
839838

840839
CUstream Stream = getStream(DeviceId, AsyncInfoPtr);
841840

842-
Err = cuMemcpyDtoHAsync_v2(HstPtr, (CUdeviceptr)TgtPtr, Size, Stream);
841+
Err = cuMemcpyDtoHAsync(HstPtr, (CUdeviceptr)TgtPtr, Size, Stream);
843842
if (Err != CUDA_SUCCESS) {
844843
REPORT("Error when copying data from device to host. Pointers: host "
845844
"= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 "\n",

0 commit comments

Comments
 (0)