Skip to content

[SYCL][UR][CUDA] Fix some issues in cuda adapter's urDeviceGetInfo. #10248

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 20 additions & 17 deletions sycl/plugins/unified_runtime/ur/adapters/cuda/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,10 +206,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
return ReturnValue(Atomic64);
}
case UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: {
uint64_t Capabilities = UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED |
UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE |
UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE |
UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL;
ur_memory_order_capability_flags_t Capabilities =
UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED |
UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE |
UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE |
UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL;
return ReturnValue(Capabilities);
}
case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: {
Expand Down Expand Up @@ -315,7 +316,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
"runtime.");
}

return ReturnValue(uint32_t{Enabled});
return ReturnValue(Enabled);
}
case UR_DEVICE_INFO_MAX_READ_IMAGE_ARGS: {
// This call doesn't match to CUDA as it doesn't have images, but instead
Expand Down Expand Up @@ -473,7 +474,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
}
case UR_DEVICE_INFO_SINGLE_FP_CONFIG: {
// TODO: is this config consistent across all NVIDIA GPUs?
uint64_t Config =
ur_device_fp_capability_flags_t Config =
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
Expand All @@ -485,12 +486,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
}
case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: {
// TODO: is this config consistent across all NVIDIA GPUs?
uint64_t Config = UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
ur_device_fp_capability_flags_t Config =
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
return ReturnValue(Config);
}
case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: {
Expand Down Expand Up @@ -600,13 +602,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
UR_QUEUE_FLAG_PROFILING_ENABLE));
case UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: {
// The mandated minimum capability:
uint64_t Capability = UR_QUEUE_FLAG_PROFILING_ENABLE |
UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE;
ur_queue_flags_t Capability = UR_QUEUE_FLAG_PROFILING_ENABLE |
UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE;
return ReturnValue(Capability);
}
case UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: {
// The mandated minimum capability:
uint64_t Capability = UR_QUEUE_FLAG_PROFILING_ENABLE;
ur_queue_flags_t Capability = UR_QUEUE_FLAG_PROFILING_ENABLE;
return ReturnValue(Capability);
}
case UR_DEVICE_INFO_BUILT_IN_KERNELS: {
Expand Down Expand Up @@ -934,7 +936,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
hDevice->get()) == CUDA_SUCCESS);
}

uint64_t MemoryBandwidth = uint64_t(MemoryClockKHz) * MemoryBusWidth * 250;
uint32_t MemoryBandwidth = MemoryClockKHz * MemoryBusWidth * 250;

return ReturnValue(MemoryBandwidth);
}
Expand Down Expand Up @@ -996,13 +998,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
case UR_DEVICE_INFO_KERNEL_SET_SPECIALIZATION_CONSTANTS:
return ReturnValue(false);
// TODO: Investigate if this information is available on CUDA.
case UR_DEVICE_INFO_MAX_READ_WRITE_IMAGE_ARGS:
case UR_DEVICE_INFO_GPU_EU_COUNT:
case UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH:
case UR_DEVICE_INFO_GPU_EU_SLICES:
case UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE:
case UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE:
case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU:
return UR_RESULT_ERROR_INVALID_ENUMERATION;
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;

default:
break;
Expand Down