@@ -4832,16 +4832,13 @@ pi_result cuda_piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr,
4832
4832
pi_uint32 num_events_in_waitlist,
4833
4833
const pi_event *events_waitlist,
4834
4834
pi_event *event) {
4835
+ pi_device device = queue->get_context ()->get_device ();
4835
4836
4836
4837
// Certain cuda devices and Windows do not have support for some Unified
4837
4838
// Memory features. cuMemPrefetchAsync requires concurrent memory access
4838
4839
// for managed memory. Therfore, ignore prefetch hint if concurrent managed
4839
4840
// memory access is not available.
4840
- int isConcurrentManagedAccessAvailable = 0 ;
4841
- cuDeviceGetAttribute (&isConcurrentManagedAccessAvailable,
4842
- CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS,
4843
- queue->get_context ()->get_device ()->get ());
4844
- if (!isConcurrentManagedAccessAvailable) {
4841
+ if (!getAttribute (device, CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS)) {
4845
4842
setErrorMessage (" Prefetch hint ignored as device does not support "
4846
4843
" concurrent managed access" ,
4847
4844
PI_SUCCESS);
@@ -4875,9 +4872,8 @@ pi_result cuda_piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr,
4875
4872
PI_COMMAND_TYPE_MEM_BUFFER_COPY, queue, cuStream));
4876
4873
event_ptr->start ();
4877
4874
}
4878
- result = PI_CHECK_ERROR (cuMemPrefetchAsync (
4879
- (CUdeviceptr)ptr, size, queue->get_context ()->get_device ()->get (),
4880
- cuStream));
4875
+ result = PI_CHECK_ERROR (
4876
+ cuMemPrefetchAsync ((CUdeviceptr)ptr, size, device->get (), cuStream));
4881
4877
if (event) {
4882
4878
result = event_ptr->record ();
4883
4879
*event = event_ptr.release ();
@@ -4895,6 +4891,29 @@ pi_result cuda_piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr,
4895
4891
assert (queue != nullptr );
4896
4892
assert (ptr != nullptr );
4897
4893
4894
+ // Certain cuda devices and Windows do not have support for some Unified
4895
+ // Memory features. Passing CU_MEM_ADVISE_[UN]SET_PREFERRED_LOCATION and
4896
+ // CU_MEM_ADVISE_[UN]SET_ACCESSED_BY to cuMemAdvise on a GPU device requires
4897
+ // the GPU device to report a non-zero value for
4898
+ // CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS. Therfore, ignore memory
4899
+ // advise if concurrent managed memory access is not available.
4900
+ if (advice == PI_MEM_ADVICE_CUDA_SET_PREFERRED_LOCATION ||
4901
+ advice == PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION ||
4902
+ advice == PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY ||
4903
+ advice == PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY) {
4904
+ pi_device device = queue->get_context ()->get_device ();
4905
+ if (!getAttribute (device, CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS)) {
4906
+ setErrorMessage (" Mem advise ignored as device does not support "
4907
+ " concurrent managed access" ,
4908
+ PI_SUCCESS);
4909
+ return PI_ERROR_PLUGIN_SPECIFIC_ERROR;
4910
+ }
4911
+
4912
+ // TODO: If ptr points to valid system-allocated pageable memory we should
4913
+ // check that the device also has the
4914
+ // CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS property.
4915
+ }
4916
+
4898
4917
pi_result result = PI_SUCCESS;
4899
4918
std::unique_ptr<_pi_event> event_ptr{nullptr };
4900
4919
0 commit comments