Skip to content

Commit 18f23b0

Browse files
authored
[SYCL][UR][CUDA] Fix some issues in cuda adapter's urDeviceGetInfo. (#10248)
1 parent 7e7971a commit 18f23b0

File tree

1 file changed

+20
-17
lines changed
  • sycl/plugins/unified_runtime/ur/adapters/cuda

1 file changed

+20
-17
lines changed

sycl/plugins/unified_runtime/ur/adapters/cuda/device.cpp

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -205,10 +205,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
205205
return ReturnValue(Atomic64);
206206
}
207207
case UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
208-
uint64_t Capabilities = UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED |
209-
UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE |
210-
UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE |
211-
UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL;
208+
ur_memory_order_capability_flags_t Capabilities =
209+
UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED |
210+
UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE |
211+
UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE |
212+
UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL;
212213
return ReturnValue(Capabilities);
213214
}
214215
case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: {
@@ -314,7 +315,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
314315
"runtime.");
315316
}
316317

317-
return ReturnValue(uint32_t{Enabled});
318+
return ReturnValue(Enabled);
318319
}
319320
case UR_DEVICE_INFO_MAX_READ_IMAGE_ARGS: {
320321
// This call doesn't match to CUDA as it doesn't have images, but instead
@@ -472,7 +473,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
472473
}
473474
case UR_DEVICE_INFO_SINGLE_FP_CONFIG: {
474475
// TODO: is this config consistent across all NVIDIA GPUs?
475-
uint64_t Config =
476+
ur_device_fp_capability_flags_t Config =
476477
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
477478
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
478479
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
@@ -484,12 +485,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
484485
}
485486
case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: {
486487
// TODO: is this config consistent across all NVIDIA GPUs?
487-
uint64_t Config = UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
488-
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
489-
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
490-
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
491-
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
492-
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
488+
ur_device_fp_capability_flags_t Config =
489+
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
490+
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
491+
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
492+
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
493+
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
494+
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
493495
return ReturnValue(Config);
494496
}
495497
case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: {
@@ -599,13 +601,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
599601
UR_QUEUE_FLAG_PROFILING_ENABLE));
600602
case UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: {
601603
// The mandated minimum capability:
602-
uint64_t Capability = UR_QUEUE_FLAG_PROFILING_ENABLE |
603-
UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE;
604+
ur_queue_flags_t Capability = UR_QUEUE_FLAG_PROFILING_ENABLE |
605+
UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE;
604606
return ReturnValue(Capability);
605607
}
606608
case UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: {
607609
// The mandated minimum capability:
608-
uint64_t Capability = UR_QUEUE_FLAG_PROFILING_ENABLE;
610+
ur_queue_flags_t Capability = UR_QUEUE_FLAG_PROFILING_ENABLE;
609611
return ReturnValue(Capability);
610612
}
611613
case UR_DEVICE_INFO_BUILT_IN_KERNELS: {
@@ -1015,7 +1017,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
10151017
hDevice->get()) == CUDA_SUCCESS);
10161018
}
10171019

1018-
uint64_t MemoryBandwidth = uint64_t(MemoryClockKHz) * MemoryBusWidth * 250;
1020+
uint32_t MemoryBandwidth = MemoryClockKHz * MemoryBusWidth * 250;
10191021

10201022
return ReturnValue(MemoryBandwidth);
10211023
}
@@ -1075,13 +1077,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
10751077
case UR_DEVICE_INFO_KERNEL_SET_SPECIALIZATION_CONSTANTS:
10761078
return ReturnValue(false);
10771079
// TODO: Investigate if this information is available on CUDA.
1080+
case UR_DEVICE_INFO_MAX_READ_WRITE_IMAGE_ARGS:
10781081
case UR_DEVICE_INFO_GPU_EU_COUNT:
10791082
case UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH:
10801083
case UR_DEVICE_INFO_GPU_EU_SLICES:
10811084
case UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE:
10821085
case UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE:
10831086
case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU:
1084-
return UR_RESULT_ERROR_INVALID_ENUMERATION;
1087+
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
10851088

10861089
default:
10871090
break;

0 commit comments

Comments
 (0)