Skip to content

Commit e3b16b5

Browse files
committed
[SYCL][UR][CUDA] Correct some types for a few GetDeviceInfo queries.
Also return the correct error code for unsupported enumerations.
1 parent 5cb8279 commit e3b16b5

File tree

1 file changed

+20
-17
lines changed
  • sycl/plugins/unified_runtime/ur/adapters/cuda

1 file changed

+20
-17
lines changed

sycl/plugins/unified_runtime/ur/adapters/cuda/device.cpp

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -206,10 +206,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
206206
return ReturnValue(Atomic64);
207207
}
208208
case UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
209-
uint64_t Capabilities = UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED |
210-
UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE |
211-
UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE |
212-
UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL;
209+
ur_memory_order_capability_flags_t Capabilities =
210+
UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED |
211+
UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE |
212+
UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE |
213+
UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL;
213214
return ReturnValue(Capabilities);
214215
}
215216
case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: {
@@ -315,7 +316,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
315316
"runtime.");
316317
}
317318

318-
return ReturnValue(uint32_t{Enabled});
319+
return ReturnValue(Enabled);
319320
}
320321
case UR_DEVICE_INFO_MAX_READ_IMAGE_ARGS: {
321322
// This call doesn't match to CUDA as it doesn't have images, but instead
@@ -473,7 +474,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
473474
}
474475
case UR_DEVICE_INFO_SINGLE_FP_CONFIG: {
475476
// TODO: is this config consistent across all NVIDIA GPUs?
476-
uint64_t Config =
477+
ur_device_fp_capability_flags_t Config =
477478
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
478479
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
479480
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
@@ -485,12 +486,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
485486
}
486487
case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: {
487488
// TODO: is this config consistent across all NVIDIA GPUs?
488-
uint64_t Config = UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
489-
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
490-
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
491-
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
492-
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
493-
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
489+
ur_device_fp_capability_flags_t Config =
490+
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
491+
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
492+
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
493+
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
494+
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
495+
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
494496
return ReturnValue(Config);
495497
}
496498
case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: {
@@ -600,13 +602,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
600602
UR_QUEUE_FLAG_PROFILING_ENABLE));
601603
case UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: {
602604
// The mandated minimum capability:
603-
uint64_t Capability = UR_QUEUE_FLAG_PROFILING_ENABLE |
604-
UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE;
605+
ur_queue_flags_t Capability = UR_QUEUE_FLAG_PROFILING_ENABLE |
606+
UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE;
605607
return ReturnValue(Capability);
606608
}
607609
case UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: {
608610
// The mandated minimum capability:
609-
uint64_t Capability = UR_QUEUE_FLAG_PROFILING_ENABLE;
611+
ur_queue_flags_t Capability = UR_QUEUE_FLAG_PROFILING_ENABLE;
610612
return ReturnValue(Capability);
611613
}
612614
case UR_DEVICE_INFO_BUILT_IN_KERNELS: {
@@ -934,7 +936,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
934936
hDevice->get()) == CUDA_SUCCESS);
935937
}
936938

937-
uint64_t MemoryBandwidth = uint64_t(MemoryClockKHz) * MemoryBusWidth * 250;
939+
uint32_t MemoryBandwidth = MemoryClockKHz * MemoryBusWidth * 250;
938940

939941
return ReturnValue(MemoryBandwidth);
940942
}
@@ -996,13 +998,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
996998
case UR_DEVICE_INFO_KERNEL_SET_SPECIALIZATION_CONSTANTS:
997999
return ReturnValue(false);
9981000
// TODO: Investigate if this information is available on CUDA.
1001+
case UR_DEVICE_INFO_MAX_READ_WRITE_IMAGE_ARGS:
9991002
case UR_DEVICE_INFO_GPU_EU_COUNT:
10001003
case UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH:
10011004
case UR_DEVICE_INFO_GPU_EU_SLICES:
10021005
case UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE:
10031006
case UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE:
10041007
case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU:
1005-
return UR_RESULT_ERROR_INVALID_ENUMERATION;
1008+
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
10061009

10071010
default:
10081011
break;

0 commit comments

Comments
 (0)