Skip to content

Commit 72b92c3

Browse files
[NFC][SYCL] Cache aspects that call has_extension (#18546)
That call requires string comparison which isn't trivial. On the other hand, querying supported extensions is going to happen anyway, so it makes sense to eagerly initialize everything extensions-related in the `device_impl` ctor. --------- Co-authored-by: Udit Kumar Agarwal <[email protected]>
1 parent df4bd05 commit 72b92c3

File tree

4 files changed

+426
-363
lines changed

4 files changed

+426
-363
lines changed

sycl/source/detail/device_impl.cpp

Lines changed: 0 additions & 327 deletions
Original file line numberDiff line numberDiff line change
@@ -312,333 +312,6 @@ ur_native_handle_t device_impl::getNative() const {
312312
return Handle;
313313
}
314314

315-
bool device_impl::has(aspect Aspect) const {
316-
switch (Aspect) {
317-
case aspect::host:
318-
// Deprecated
319-
return false;
320-
case aspect::cpu:
321-
return is_cpu();
322-
case aspect::gpu:
323-
return is_gpu();
324-
case aspect::accelerator:
325-
return is_accelerator();
326-
case aspect::custom:
327-
return false;
328-
// TODO: Implement this for FPGA emulator.
329-
case aspect::emulated:
330-
return false;
331-
case aspect::host_debuggable:
332-
return false;
333-
case aspect::fp16:
334-
return has_extension("cl_khr_fp16");
335-
case aspect::fp64:
336-
return has_extension("cl_khr_fp64");
337-
case aspect::int64_base_atomics:
338-
return has_extension("cl_khr_int64_base_atomics");
339-
case aspect::int64_extended_atomics:
340-
return has_extension("cl_khr_int64_extended_atomics");
341-
case aspect::atomic64:
342-
return get_info<info::device::atomic64>();
343-
case aspect::image:
344-
return get_info<info::device::image_support>();
345-
case aspect::online_compiler:
346-
return get_info<info::device::is_compiler_available>();
347-
case aspect::online_linker:
348-
return get_info<info::device::is_linker_available>();
349-
case aspect::queue_profiling:
350-
return get_info<info::device::queue_profiling>();
351-
case aspect::usm_device_allocations:
352-
return get_info<info::device::usm_device_allocations>();
353-
case aspect::usm_host_allocations:
354-
return get_info<info::device::usm_host_allocations>();
355-
case aspect::ext_intel_mem_channel:
356-
return get_info<info::device::ext_intel_mem_channel>();
357-
case aspect::ext_oneapi_cuda_cluster_group:
358-
return get_info<info::device::ext_oneapi_cuda_cluster_group>();
359-
case aspect::usm_atomic_host_allocations:
360-
return (get_info_impl<UR_DEVICE_INFO_USM_HOST_SUPPORT>() &
361-
UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS);
362-
case aspect::usm_shared_allocations:
363-
return get_info<info::device::usm_shared_allocations>();
364-
case aspect::usm_atomic_shared_allocations:
365-
return (get_info_impl<UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT>() &
366-
UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS);
367-
case aspect::usm_restricted_shared_allocations:
368-
return get_info<info::device::usm_restricted_shared_allocations>();
369-
case aspect::usm_system_allocations:
370-
return get_info<info::device::usm_system_allocations>();
371-
case aspect::ext_intel_device_id:
372-
return has_info_desc(UR_DEVICE_INFO_DEVICE_ID);
373-
case aspect::ext_intel_pci_address:
374-
return has_info_desc(UR_DEVICE_INFO_PCI_ADDRESS);
375-
case aspect::ext_intel_gpu_eu_count:
376-
return has_info_desc(UR_DEVICE_INFO_GPU_EU_COUNT);
377-
case aspect::ext_intel_gpu_eu_simd_width:
378-
return has_info_desc(UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH);
379-
case aspect::ext_intel_gpu_slices:
380-
return has_info_desc(UR_DEVICE_INFO_GPU_EU_SLICES);
381-
case aspect::ext_intel_gpu_subslices_per_slice:
382-
return has_info_desc(UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE);
383-
case aspect::ext_intel_gpu_eu_count_per_subslice:
384-
return has_info_desc(UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE);
385-
case aspect::ext_intel_gpu_hw_threads_per_eu:
386-
return has_info_desc(UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU);
387-
case aspect::ext_intel_free_memory:
388-
return has_info_desc(UR_DEVICE_INFO_GLOBAL_MEM_FREE);
389-
case aspect::ext_intel_memory_clock_rate:
390-
return has_info_desc(UR_DEVICE_INFO_MEMORY_CLOCK_RATE);
391-
case aspect::ext_intel_memory_bus_width:
392-
return has_info_desc(UR_DEVICE_INFO_MEMORY_BUS_WIDTH);
393-
case aspect::ext_intel_device_info_uuid:
394-
return has_info_desc(UR_DEVICE_INFO_UUID);
395-
case aspect::ext_intel_max_mem_bandwidth:
396-
// currently not supported
397-
return false;
398-
case aspect::ext_intel_current_clock_throttle_reasons:
399-
return has_info_desc(UR_DEVICE_INFO_CURRENT_CLOCK_THROTTLE_REASONS);
400-
case aspect::ext_intel_fan_speed:
401-
return has_info_desc(UR_DEVICE_INFO_FAN_SPEED);
402-
case aspect::ext_intel_power_limits:
403-
return has_info_desc(UR_DEVICE_INFO_MIN_POWER_LIMIT) &&
404-
has_info_desc(UR_DEVICE_INFO_MAX_POWER_LIMIT);
405-
case aspect::ext_oneapi_srgb:
406-
return get_info<info::device::ext_oneapi_srgb>();
407-
case aspect::ext_oneapi_native_assert:
408-
return get_info_impl<UR_DEVICE_INFO_USE_NATIVE_ASSERT>();
409-
case aspect::ext_oneapi_cuda_async_barrier: {
410-
return get_info_impl_nocheck<UR_DEVICE_INFO_ASYNC_BARRIER>().value_or(0);
411-
}
412-
case aspect::ext_intel_legacy_image: {
413-
return get_info_impl_nocheck<UR_DEVICE_INFO_IMAGE_SUPPORT>().value_or(0);
414-
}
415-
case aspect::ext_oneapi_bindless_images: {
416-
return get_info_impl_nocheck<UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP>()
417-
.value_or(0);
418-
}
419-
case aspect::ext_oneapi_bindless_images_shared_usm: {
420-
return get_info_impl_nocheck<
421-
UR_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT_EXP>()
422-
.value_or(0);
423-
}
424-
case aspect::ext_oneapi_bindless_images_1d_usm: {
425-
return get_info_impl_nocheck<
426-
UR_DEVICE_INFO_BINDLESS_IMAGES_1D_USM_SUPPORT_EXP>()
427-
.value_or(0);
428-
}
429-
case aspect::ext_oneapi_bindless_images_2d_usm: {
430-
return get_info_impl_nocheck<
431-
UR_DEVICE_INFO_BINDLESS_IMAGES_2D_USM_SUPPORT_EXP>()
432-
.value_or(0);
433-
}
434-
case aspect::ext_oneapi_external_memory_import: {
435-
return get_info_impl_nocheck<
436-
UR_DEVICE_INFO_EXTERNAL_MEMORY_IMPORT_SUPPORT_EXP>()
437-
.value_or(0);
438-
}
439-
case aspect::ext_oneapi_external_semaphore_import: {
440-
return get_info_impl_nocheck<
441-
UR_DEVICE_INFO_EXTERNAL_SEMAPHORE_IMPORT_SUPPORT_EXP>()
442-
.value_or(0);
443-
}
444-
case aspect::ext_oneapi_mipmap: {
445-
return get_info_impl_nocheck<UR_DEVICE_INFO_MIPMAP_SUPPORT_EXP>().value_or(
446-
0);
447-
}
448-
case aspect::ext_oneapi_mipmap_anisotropy: {
449-
return get_info_impl_nocheck<UR_DEVICE_INFO_MIPMAP_ANISOTROPY_SUPPORT_EXP>()
450-
.value_or(0);
451-
}
452-
case aspect::ext_oneapi_mipmap_level_reference: {
453-
return get_info_impl_nocheck<
454-
UR_DEVICE_INFO_MIPMAP_LEVEL_REFERENCE_SUPPORT_EXP>()
455-
.value_or(0);
456-
}
457-
case aspect::ext_oneapi_bindless_sampled_image_fetch_1d_usm: {
458-
return get_info_impl_nocheck<
459-
UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_SUPPORT_EXP>()
460-
.value_or(0);
461-
}
462-
case aspect::ext_oneapi_bindless_sampled_image_fetch_1d: {
463-
return get_info_impl_nocheck<
464-
UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_SUPPORT_EXP>()
465-
.value_or(0);
466-
}
467-
case aspect::ext_oneapi_bindless_sampled_image_fetch_2d_usm: {
468-
return get_info_impl_nocheck<
469-
UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_SUPPORT_EXP>()
470-
.value_or(0);
471-
}
472-
case aspect::ext_oneapi_bindless_sampled_image_fetch_2d: {
473-
return get_info_impl_nocheck<
474-
UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_SUPPORT_EXP>()
475-
.value_or(0);
476-
}
477-
case aspect::ext_oneapi_bindless_sampled_image_fetch_3d: {
478-
return get_info_impl_nocheck<
479-
UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_SUPPORT_EXP>()
480-
.value_or(0);
481-
}
482-
case aspect::ext_oneapi_bindless_images_gather: {
483-
return get_info_impl_nocheck<
484-
UR_DEVICE_INFO_BINDLESS_IMAGES_GATHER_SUPPORT_EXP>()
485-
.value_or(0);
486-
}
487-
case aspect::ext_oneapi_cubemap: {
488-
return get_info_impl_nocheck<UR_DEVICE_INFO_CUBEMAP_SUPPORT_EXP>().value_or(
489-
0);
490-
}
491-
case aspect::ext_oneapi_cubemap_seamless_filtering: {
492-
return get_info_impl_nocheck<
493-
UR_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP>()
494-
.value_or(0);
495-
}
496-
case aspect::ext_oneapi_image_array: {
497-
return get_info_impl_nocheck<UR_DEVICE_INFO_IMAGE_ARRAY_SUPPORT_EXP>()
498-
.value_or(0);
499-
}
500-
case aspect::ext_oneapi_unique_addressing_per_dim: {
501-
return get_info_impl_nocheck<
502-
UR_DEVICE_INFO_BINDLESS_UNIQUE_ADDRESSING_PER_DIM_SUPPORT_EXP>()
503-
.value_or(0);
504-
}
505-
case aspect::ext_oneapi_bindless_images_sample_1d_usm: {
506-
return get_info_impl_nocheck<
507-
UR_DEVICE_INFO_BINDLESS_SAMPLE_1D_USM_SUPPORT_EXP>()
508-
.value_or(0);
509-
}
510-
case aspect::ext_oneapi_bindless_images_sample_2d_usm: {
511-
return get_info_impl_nocheck<
512-
UR_DEVICE_INFO_BINDLESS_SAMPLE_2D_USM_SUPPORT_EXP>()
513-
.value_or(0);
514-
}
515-
case aspect::ext_intel_esimd: {
516-
return get_info_impl_nocheck<UR_DEVICE_INFO_ESIMD_SUPPORT>().value_or(0);
517-
}
518-
case aspect::ext_oneapi_ballot_group:
519-
case aspect::ext_oneapi_fixed_size_group:
520-
case aspect::ext_oneapi_opportunistic_group: {
521-
return (this->getBackend() == backend::ext_oneapi_level_zero) ||
522-
(this->getBackend() == backend::opencl) ||
523-
(this->getBackend() == backend::ext_oneapi_cuda);
524-
}
525-
case aspect::ext_oneapi_tangle_group: {
526-
// TODO: tangle_group is not currently supported for CUDA devices. Add when
527-
// implemented.
528-
return (this->getBackend() == backend::ext_oneapi_level_zero) ||
529-
(this->getBackend() == backend::opencl);
530-
}
531-
case aspect::ext_intel_matrix: {
532-
using arch = sycl::ext::oneapi::experimental::architecture;
533-
const arch supported_archs[] = {
534-
arch::intel_cpu_spr, arch::intel_cpu_gnr,
535-
arch::intel_cpu_dmr, arch::intel_gpu_pvc,
536-
arch::intel_gpu_dg2_g10, arch::intel_gpu_dg2_g11,
537-
arch::intel_gpu_dg2_g12, arch::intel_gpu_bmg_g21,
538-
arch::intel_gpu_lnl_m, arch::intel_gpu_arl_h,
539-
arch::intel_gpu_ptl_h, arch::intel_gpu_ptl_u,
540-
};
541-
try {
542-
return std::any_of(
543-
std::begin(supported_archs), std::end(supported_archs),
544-
[=](const arch a) { return this->extOneapiArchitectureIs(a); });
545-
} catch (const sycl::exception &) {
546-
// If we're here it means the device does not support architecture
547-
// querying
548-
return false;
549-
}
550-
}
551-
case aspect::ext_oneapi_is_composite: {
552-
auto components = get_info<
553-
sycl::ext::oneapi::experimental::info::device::component_devices>();
554-
// Any device with ext_oneapi_is_composite aspect will have at least two
555-
// constituent component devices.
556-
return components.size() >= 2;
557-
}
558-
case aspect::ext_oneapi_is_component: {
559-
return get_info_impl_nocheck<UR_DEVICE_INFO_COMPOSITE_DEVICE>().value_or(
560-
nullptr) != nullptr;
561-
}
562-
case aspect::ext_oneapi_graph: {
563-
ur_device_command_buffer_update_capability_flags_t UpdateCapabilities;
564-
bool CallSuccessful =
565-
getAdapter()->call_nocheck<UrApiKind::urDeviceGetInfo>(
566-
MDevice, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP,
567-
sizeof(UpdateCapabilities), &UpdateCapabilities,
568-
nullptr) == UR_RESULT_SUCCESS;
569-
if (!CallSuccessful) {
570-
return false;
571-
}
572-
573-
/* The kernel handle update capability is not yet required for the
574-
* ext_oneapi_graph aspect */
575-
ur_device_command_buffer_update_capability_flags_t RequiredCapabilities =
576-
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS |
577-
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE |
578-
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE |
579-
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET |
580-
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE;
581-
582-
return has(aspect::ext_oneapi_limited_graph) &&
583-
(UpdateCapabilities & RequiredCapabilities) == RequiredCapabilities;
584-
}
585-
case aspect::ext_oneapi_limited_graph: {
586-
bool SupportsCommandBuffers = false;
587-
bool CallSuccessful =
588-
getAdapter()->call_nocheck<UrApiKind::urDeviceGetInfo>(
589-
MDevice, UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP,
590-
sizeof(SupportsCommandBuffers), &SupportsCommandBuffers,
591-
nullptr) == UR_RESULT_SUCCESS;
592-
if (!CallSuccessful) {
593-
return false;
594-
}
595-
596-
return SupportsCommandBuffers;
597-
}
598-
case aspect::ext_oneapi_private_alloca: {
599-
// Extension only supported on SPIR-V targets.
600-
backend be = getBackend();
601-
return be == sycl::backend::ext_oneapi_level_zero ||
602-
be == sycl::backend::opencl;
603-
}
604-
case aspect::ext_oneapi_queue_profiling_tag: {
605-
return get_info_impl_nocheck<
606-
UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP>()
607-
.value_or(0);
608-
}
609-
case aspect::ext_oneapi_virtual_mem: {
610-
return get_info_impl_nocheck<UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT>()
611-
.value_or(0);
612-
}
613-
case aspect::ext_intel_fpga_task_sequence: {
614-
return is_accelerator();
615-
}
616-
case aspect::ext_oneapi_atomic16: {
617-
// Likely L0 doesn't check it properly. Need to double-check.
618-
return has_extension("cl_ext_float_atomics");
619-
}
620-
case aspect::ext_oneapi_virtual_functions: {
621-
// TODO: move to UR like e.g. aspect::ext_oneapi_virtual_mem
622-
backend BE = getBackend();
623-
bool isCompatibleBE = BE == sycl::backend::ext_oneapi_level_zero ||
624-
BE == sycl::backend::opencl;
625-
return (is_cpu() || is_gpu()) && isCompatibleBE;
626-
}
627-
case aspect::ext_intel_spill_memory_size: {
628-
backend BE = getBackend();
629-
bool isCompatibleBE = BE == sycl::backend::ext_oneapi_level_zero;
630-
return is_gpu() && isCompatibleBE;
631-
}
632-
case aspect::ext_oneapi_async_memory_alloc: {
633-
return get_info_impl_nocheck<
634-
UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_SUPPORT_EXP>()
635-
.value_or(0);
636-
}
637-
}
638-
639-
return false; // This device aspect has not been implemented yet.
640-
}
641-
642315
// On the first call this function queries for device timestamp
643316
// along with host synchronized timestamp and stores it in member variable
644317
// MDeviceHostBaseTime. Subsequent calls to this function would just retrieve

0 commit comments

Comments
 (0)