34
34
#include < sycl/reduction_forward.hpp>
35
35
#include < sycl/sampler.hpp>
36
36
#include < sycl/stl.hpp>
37
+ #include < sycl/usm/usm_pointer_info.hpp>
37
38
38
39
#include < functional>
39
40
#include < limits>
@@ -2469,13 +2470,30 @@ class __SYCL_EXPORT handler {
2469
2470
throw sycl::exception (sycl::make_error_code (errc::invalid),
2470
2471
" Source pitch must be greater than or equal "
2471
2472
" to the width specified in 'ext_oneapi_memcpy2d'" );
2472
- // If the backends supports 2D copy we use that. Otherwise we use a fallback
2473
- // kernel.
2474
- if (supportsUSMMemcpy2D ())
2473
+
2474
+ // Get the type of the pointers.
2475
+ context Ctx = detail::createSyclObjFromImpl<context>(getContextImplPtr ());
2476
+ usm::alloc SrcAllocType = get_pointer_type (Src, Ctx);
2477
+ usm::alloc DestAllocType = get_pointer_type (Dest, Ctx);
2478
+ bool SrcIsHost =
2479
+ SrcAllocType == usm::alloc::unknown || SrcAllocType == usm::alloc::host;
2480
+ bool DestIsHost = DestAllocType == usm::alloc::unknown ||
2481
+ DestAllocType == usm::alloc::host;
2482
+
2483
+ // Do the following:
2484
+ // 1. If both are host, use host_task to copy.
2485
+ // 2. If either pointer is host or of the backend supports native memcpy2d,
2486
+ // use special command.
2487
+ // 3. Otherwise, launch a kernel for copying.
2488
+ if (SrcIsHost && DestIsHost) {
2489
+ commonUSMCopy2DFallbackHostTask<T>(Src, SrcPitch, Dest, DestPitch, Width,
2490
+ Height);
2491
+ } else if (SrcIsHost || DestIsHost || supportsUSMMemcpy2D ()) {
2475
2492
ext_oneapi_memcpy2d_impl (Dest, DestPitch, Src, SrcPitch, Width, Height);
2476
- else
2493
+ } else {
2477
2494
commonUSMCopy2DFallbackKernel<T>(Src, SrcPitch, Dest, DestPitch, Width,
2478
2495
Height);
2496
+ }
2479
2497
}
2480
2498
2481
2499
// / Copies data from one 2D memory region to another, both pointed by
@@ -2503,14 +2521,31 @@ class __SYCL_EXPORT handler {
2503
2521
throw sycl::exception (sycl::make_error_code (errc::invalid),
2504
2522
" Source pitch must be greater than or equal "
2505
2523
" to the width specified in 'ext_oneapi_copy2d'" );
2506
- // If the backends supports 2D copy we use that. Otherwise we use a fallback
2507
- // kernel.
2508
- if (supportsUSMMemcpy2D ())
2524
+
2525
+ // Get the type of the pointers.
2526
+ context Ctx = detail::createSyclObjFromImpl<context>(getContextImplPtr ());
2527
+ usm::alloc SrcAllocType = get_pointer_type (Src, Ctx);
2528
+ usm::alloc DestAllocType = get_pointer_type (Dest, Ctx);
2529
+ bool SrcIsHost =
2530
+ SrcAllocType == usm::alloc::unknown || SrcAllocType == usm::alloc::host;
2531
+ bool DestIsHost = DestAllocType == usm::alloc::unknown ||
2532
+ DestAllocType == usm::alloc::host;
2533
+
2534
+ // Do the following:
2535
+ // 1. If both are host, use host_task to copy.
2536
+ // 2. If either pointer is host or of the backend supports native memcpy2d,
2537
+ // use special command.
2538
+ // 3. Otherwise, launch a kernel for copying.
2539
+ if (SrcIsHost && DestIsHost) {
2540
+ commonUSMCopy2DFallbackHostTask<T>(Src, SrcPitch, Dest, DestPitch, Width,
2541
+ Height);
2542
+ } else if (SrcIsHost || DestIsHost || supportsUSMMemcpy2D ()) {
2509
2543
ext_oneapi_memcpy2d_impl (Dest, DestPitch * sizeof (T), Src,
2510
2544
SrcPitch * sizeof (T), Width * sizeof (T), Height);
2511
- else
2545
+ } else {
2512
2546
commonUSMCopy2DFallbackKernel<T>(Src, SrcPitch, Dest, DestPitch, Width,
2513
2547
Height);
2548
+ }
2514
2549
}
2515
2550
2516
2551
// / Fills the memory pointed by a USM pointer with the value specified.
@@ -2538,9 +2573,16 @@ class __SYCL_EXPORT handler {
2538
2573
" Destination pitch must be greater than or equal "
2539
2574
" to the width specified in 'ext_oneapi_memset2d'" );
2540
2575
T CharVal = static_cast <T>(Value);
2576
+
2577
+ context Ctx = detail::createSyclObjFromImpl<context>(getContextImplPtr ());
2578
+ usm::alloc DestAllocType = get_pointer_type (Dest, Ctx);
2579
+
2541
2580
// If the backends supports 2D fill we use that. Otherwise we use a fallback
2542
- // kernel.
2543
- if (supportsUSMMemset2D ())
2581
+ // kernel. If the target is on host we will always do the operation on host.
2582
+ if (DestAllocType == usm::alloc::unknown ||
2583
+ DestAllocType == usm::alloc::host)
2584
+ commonUSMFill2DFallbackHostTask (Dest, DestPitch, CharVal, Width, Height);
2585
+ else if (supportsUSMMemset2D ())
2544
2586
ext_oneapi_memset2d_impl (Dest, DestPitch, Value, Width, Height);
2545
2587
else
2546
2588
commonUSMFill2DFallbackKernel (Dest, DestPitch, CharVal, Width, Height);
@@ -2568,9 +2610,16 @@ class __SYCL_EXPORT handler {
2568
2610
throw sycl::exception (sycl::make_error_code (errc::invalid),
2569
2611
" Destination pitch must be greater than or equal "
2570
2612
" to the width specified in 'ext_oneapi_fill2d'" );
2613
+
2614
+ context Ctx = detail::createSyclObjFromImpl<context>(getContextImplPtr ());
2615
+ usm::alloc DestAllocType = get_pointer_type (Dest, Ctx);
2616
+
2571
2617
// If the backends supports 2D fill we use that. Otherwise we use a fallback
2572
- // kernel.
2573
- if (supportsUSMFill2D ())
2618
+ // kernel. If the target is on host we will always do the operation on host.
2619
+ if (DestAllocType == usm::alloc::unknown ||
2620
+ DestAllocType == usm::alloc::host)
2621
+ commonUSMFill2DFallbackHostTask (Dest, DestPitch, Pattern, Width, Height);
2622
+ else if (supportsUSMFill2D ())
2574
2623
ext_oneapi_fill2d_impl (Dest, DestPitch, &Pattern, sizeof (T), Width,
2575
2624
Height);
2576
2625
else
@@ -2792,6 +2841,8 @@ class __SYCL_EXPORT handler {
2792
2841
NumWorkItems, KernelFunc);
2793
2842
}
2794
2843
2844
+ const std::shared_ptr<detail::context_impl> &getContextImplPtr () const ;
2845
+
2795
2846
// Checks if 2D memory operations are supported by the underlying platform.
2796
2847
bool supportsUSMMemcpy2D ();
2797
2848
bool supportsUSMFill2D ();
@@ -2806,6 +2857,8 @@ class __SYCL_EXPORT handler {
2806
2857
void commonUSMCopy2DFallbackKernel (const void *Src, size_t SrcPitch,
2807
2858
void *Dest, size_t DestPitch, size_t Width,
2808
2859
size_t Height) {
2860
+ // Otherwise the data is accessible on the device so we do the operation
2861
+ // there instead.
2809
2862
// Limit number of work items to be resistant to big copies.
2810
2863
id<2 > Chunk = computeFallbackKernelBounds (Height, Width);
2811
2864
id<2 > Iterations = (Chunk + id<2 >{Height, Width} - 1 ) / Chunk;
@@ -2825,12 +2878,33 @@ class __SYCL_EXPORT handler {
2825
2878
});
2826
2879
}
2827
2880
2881
+ // Common function for launching a 2D USM memcpy host-task to avoid
2882
+ // redefinitions of the kernel from copy and memcpy.
2883
+ template <typename T>
2884
+ void commonUSMCopy2DFallbackHostTask (const void *Src, size_t SrcPitch,
2885
+ void *Dest, size_t DestPitch,
2886
+ size_t Width, size_t Height) {
2887
+ // If both pointers are host USM or unknown (assumed non-USM) we use a
2888
+ // host-task to satisfy dependencies.
2889
+ host_task ([=] {
2890
+ const T *CastedSrc = static_cast <const T *>(Src);
2891
+ T *CastedDest = static_cast <T *>(Dest);
2892
+ for (size_t I = 0 ; I < Height; ++I) {
2893
+ const T *SrcItBegin = CastedSrc + SrcPitch * I;
2894
+ T *DestItBegin = CastedDest + DestPitch * I;
2895
+ std::copy (SrcItBegin, SrcItBegin + Width, DestItBegin);
2896
+ }
2897
+ });
2898
+ }
2899
+
2828
2900
// Common function for launching a 2D USM fill kernel to avoid redefinitions
2829
2901
// of the kernel from memset and fill.
2830
2902
template <typename T>
2831
2903
void commonUSMFill2DFallbackKernel (void *Dest, size_t DestPitch,
2832
2904
const T &Pattern, size_t Width,
2833
2905
size_t Height) {
2906
+ // Otherwise the data is accessible on the device so we do the operation
2907
+ // there instead.
2834
2908
// Limit number of work items to be resistant to big fill operations.
2835
2909
id<2 > Chunk = computeFallbackKernelBounds (Height, Width);
2836
2910
id<2 > Iterations = (Chunk + id<2 >{Height, Width} - 1 ) / Chunk;
@@ -2849,6 +2923,23 @@ class __SYCL_EXPORT handler {
2849
2923
});
2850
2924
}
2851
2925
2926
+ // Common function for launching a 2D USM fill kernel or host_task to avoid
2927
+ // redefinitions of the kernel from memset and fill.
2928
+ template <typename T>
2929
+ void commonUSMFill2DFallbackHostTask (void *Dest, size_t DestPitch,
2930
+ const T &Pattern, size_t Width,
2931
+ size_t Height) {
2932
+ // If the pointer is host USM or unknown (assumed non-USM) we use a
2933
+ // host-task to satisfy dependencies.
2934
+ host_task ([=] {
2935
+ T *CastedDest = static_cast <T *>(Dest);
2936
+ for (size_t I = 0 ; I < Height; ++I) {
2937
+ T *ItBegin = CastedDest + DestPitch * I;
2938
+ std::fill (ItBegin, ItBegin + Width, Pattern);
2939
+ }
2940
+ });
2941
+ }
2942
+
2852
2943
// Implementation of ext_oneapi_memcpy2d using command for native 2D memcpy.
2853
2944
void ext_oneapi_memcpy2d_impl (void *Dest, size_t DestPitch, const void *Src,
2854
2945
size_t SrcPitch, size_t Width, size_t Height);
0 commit comments