@@ -1067,6 +1067,9 @@ pi_result cuda_piContextGetInfo(pi_context context, pi_context_info param_name,
1067
1067
capabilities);
1068
1068
}
1069
1069
case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT:
1070
+ // 2D USM memcpy is supported.
1071
+ return getInfo<pi_bool>(param_value_size, param_value, param_value_size_ret,
1072
+ true );
1070
1073
case PI_EXT_ONEAPI_CONTEXT_INFO_USM_FILL2D_SUPPORT:
1071
1074
case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT:
1072
1075
// 2D USM operations currently not supported.
@@ -1949,10 +1952,12 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
1949
1952
CUresult current_ctx_device_ret = cuCtxGetDevice (¤t_ctx_device);
1950
1953
if (current_ctx_device_ret != CUDA_ERROR_INVALID_CONTEXT)
1951
1954
PI_CHECK_ERROR (current_ctx_device_ret);
1952
- bool need_primary_ctx = current_ctx_device_ret == CUDA_ERROR_INVALID_CONTEXT ||
1953
- current_ctx_device != device->get ();
1955
+ bool need_primary_ctx =
1956
+ current_ctx_device_ret == CUDA_ERROR_INVALID_CONTEXT ||
1957
+ current_ctx_device != device->get ();
1954
1958
if (need_primary_ctx) {
1955
- // Use the primary context for the device if no context with the device is set.
1959
+ // Use the primary context for the device if no context with the device is
1960
+ // set.
1956
1961
CUcontext primary_context;
1957
1962
PI_CHECK_ERROR (cuDevicePrimaryCtxRetain (&primary_context, device->get ()));
1958
1963
PI_CHECK_ERROR (cuCtxSetCurrent (primary_context));
@@ -5383,14 +5388,91 @@ pi_result cuda_piextUSMEnqueueMemset2D(pi_queue, void *, size_t, int, size_t,
5383
5388
return {};
5384
5389
}
5385
5390
5386
- // TODO: Implement this. Remember to return true for
5387
- // PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT when it is implemented.
5388
- pi_result cuda_piextUSMEnqueueMemcpy2D (pi_queue, pi_bool, void *, size_t ,
5389
- const void *, size_t , size_t , size_t ,
5390
- pi_uint32, const pi_event *,
5391
- pi_event *) {
5392
- sycl::detail::pi::die (" piextUSMEnqueueMemcpy2D not implemented" );
5393
- return {};
5391
+ // / 2D Memcpy API
5392
+ // /
5393
+ // / \param queue is the queue to submit to
5394
+ // / \param blocking is whether this operation should block the host
5395
+ // / \param dst_ptr is the location the data will be copied
5396
+ // / \param dst_pitch is the total width of the destination memory including
5397
+ // / padding
5398
+ // / \param src_ptr is the data to be copied
5399
+ // / \param dst_pitch is the total width of the source memory including padding
5400
+ // / \param width is width in bytes of each row to be copied
5401
+ // / \param height is height the columns to be copied
5402
+ // / \param num_events_in_waitlist is the number of events to wait on
5403
+ // / \param events_waitlist is an array of events to wait on
5404
+ // / \param event is the event that represents this operation
5405
+ pi_result cuda_piextUSMEnqueueMemcpy2D (pi_queue queue, pi_bool blocking,
5406
+ void *dst_ptr, size_t dst_pitch,
5407
+ const void *src_ptr, size_t src_pitch,
5408
+ size_t width, size_t height,
5409
+ pi_uint32 num_events_in_wait_list,
5410
+ const pi_event *event_wait_list,
5411
+ pi_event *event) {
5412
+
5413
+ assert (queue != nullptr );
5414
+
5415
+ pi_result result = PI_SUCCESS;
5416
+
5417
+ try {
5418
+ ScopedContext active (queue->get_context ());
5419
+ CUstream cuStream = queue->get_next_transfer_stream ();
5420
+ result = enqueueEventsWait (queue, cuStream, num_events_in_wait_list,
5421
+ event_wait_list);
5422
+ if (event) {
5423
+ (*event) = _pi_event::make_native (PI_COMMAND_TYPE_MEM_BUFFER_COPY_RECT,
5424
+ queue, cuStream);
5425
+ (*event)->start ();
5426
+ }
5427
+
5428
+ // Determine the direction of Copy using cuPointerGetAttributes
5429
+ // for both the src_ptr and dst_ptr
5430
+ // TODO: Doesn't yet support CU_MEMORYTYPE_UNIFIED
5431
+ CUpointer_attribute attributes = {CU_POINTER_ATTRIBUTE_MEMORY_TYPE};
5432
+
5433
+ CUmemorytype src_type = static_cast <CUmemorytype>(0 );
5434
+ void *src_attribute_values[] = {(void *)(&src_type)};
5435
+ result = PI_CHECK_ERROR (cuPointerGetAttributes (
5436
+ 1 , &attributes, src_attribute_values, (CUdeviceptr)src_ptr));
5437
+ assert (src_type == CU_MEMORYTYPE_DEVICE || src_type == CU_MEMORYTYPE_HOST);
5438
+
5439
+ CUmemorytype dst_type = static_cast <CUmemorytype>(0 );
5440
+ void *dst_attribute_values[] = {(void *)(&dst_type)};
5441
+ result = PI_CHECK_ERROR (cuPointerGetAttributes (
5442
+ 1 , &attributes, dst_attribute_values, (CUdeviceptr)dst_ptr));
5443
+ assert (dst_type == CU_MEMORYTYPE_DEVICE || dst_type == CU_MEMORYTYPE_HOST);
5444
+
5445
+ CUDA_MEMCPY2D cpyDesc = {0 };
5446
+
5447
+ cpyDesc.srcMemoryType = src_type;
5448
+ cpyDesc.srcDevice = (src_type == CU_MEMORYTYPE_DEVICE)
5449
+ ? reinterpret_cast <CUdeviceptr>(src_ptr)
5450
+ : 0 ;
5451
+ cpyDesc.srcHost = (src_type == CU_MEMORYTYPE_HOST) ? src_ptr : nullptr ;
5452
+ cpyDesc.srcPitch = src_pitch;
5453
+
5454
+ cpyDesc.dstMemoryType = dst_type;
5455
+ cpyDesc.dstDevice = (dst_type == CU_MEMORYTYPE_DEVICE)
5456
+ ? reinterpret_cast <CUdeviceptr>(dst_ptr)
5457
+ : 0 ;
5458
+ cpyDesc.dstHost = (dst_type == CU_MEMORYTYPE_HOST) ? dst_ptr : nullptr ;
5459
+ cpyDesc.dstPitch = dst_pitch;
5460
+
5461
+ cpyDesc.WidthInBytes = width;
5462
+ cpyDesc.Height = height;
5463
+
5464
+ result = PI_CHECK_ERROR (cuMemcpy2DAsync (&cpyDesc, cuStream));
5465
+
5466
+ if (event) {
5467
+ (*event)->record ();
5468
+ }
5469
+ if (blocking) {
5470
+ result = PI_CHECK_ERROR (cuStreamSynchronize (cuStream));
5471
+ }
5472
+ } catch (pi_result err) {
5473
+ result = err;
5474
+ }
5475
+ return result;
5394
5476
}
5395
5477
5396
5478
// / API to query information about USM allocated pointers
0 commit comments