11
11
#pragma once
12
12
13
13
#include < CL/sycl/INTEL/esimd/detail/esimd_intrin.hpp>
14
+ #include < CL/sycl/INTEL/esimd/detail/esimd_memory_intrin.hpp>
15
+ #include < CL/sycl/INTEL/esimd/detail/esimd_sycl_util.hpp>
14
16
#include < CL/sycl/INTEL/esimd/detail/esimd_types.hpp>
15
17
16
18
__SYCL_INLINE_NAMESPACE (cl) {
@@ -480,6 +482,49 @@ template <typename Ty, int N> class simd {
480
482
}
481
483
}
482
484
485
+ // / @name Memory operations
486
+ // / TODO NOTE: These APIs do not support cache hint specification yet, as this
487
+ // / is WIP. Later addition of hints is not expected to break code using these
488
+ // / APIs.
489
+ // /
490
+ // / @{
491
+
492
+ // / Copy a contiguous block of data from memory into this simd object.
493
+ // / The amount of memory copied equals the total size of vector elements in
494
+ // / this object.
495
+ // / @param addr the memory address to copy from. Must be a pointer to the
496
+ // / global address space, otherwise behavior is undefined.
497
+ ESIMD_INLINE void copy_from (const Ty *const addr) SYCL_ESIMD_FUNCTION;
498
+
499
+ // / Copy a contiguous block of data from memory into this simd object.
500
+ // / The amount of memory copied equals the total size of vector elements in
501
+ // / this object.
502
+ // / Source memory location is represented via a global accessor and offset.
503
+ // / @param acc accessor to copy from.
504
+ // / @param offset offset to copy from.
505
+ template <typename AccessorT>
506
+ ESIMD_INLINE
507
+ detail::EnableIfAccessor<AccessorT, detail::accessor_mode_cap::can_read,
508
+ sycl::access::target::global_buffer, void >
509
+ copy_from (AccessorT acc, uint32_t offset) SYCL_ESIMD_FUNCTION;
510
+
511
+ // / Copy all vector elements of this object into a contiguous block in memory.
512
+ // / @param addr the memory address to copy to. Must be a pointer to the
513
+ // / global address space, otherwise behavior is undefined.
514
+ ESIMD_INLINE void copy_to (Ty *addr) SYCL_ESIMD_FUNCTION;
515
+
516
+ // / Copy all vector elements of this object into a contiguous block in memory.
517
+ // / Destination memory location is represented via a global accessor and
518
+ // / offset.
519
+ // / @param acc accessor to copy from.
520
+ // / @param offset offset to copy from.
521
+ template <typename AccessorT>
522
+ ESIMD_INLINE
523
+ detail::EnableIfAccessor<AccessorT, detail::accessor_mode_cap::can_write,
524
+ sycl::access::target::global_buffer, void >
525
+ copy_to (AccessorT acc, uint32_t offset) SYCL_ESIMD_FUNCTION;
526
+
527
+ // / @} // Memory operations
483
528
private:
484
529
// The underlying data for this vector.
485
530
vector_type M_data;
@@ -498,6 +543,88 @@ ESIMD_INLINE simd<U, n> convert(simd<T, n> val) {
498
543
return __builtin_convertvector (val.data (), detail::vector_type_t <U, n>);
499
544
}
500
545
546
+ // ----------- Outlined implementations of esimd class APIs.
547
+
548
+ template <typename T, int N> void simd<T, N>::copy_from(const T *const Addr) {
549
+ constexpr unsigned Sz = sizeof (T) * N;
550
+ static_assert (Sz >= detail::OperandSize::OWORD,
551
+ " block size must be at least 1 oword" );
552
+ static_assert (Sz % detail::OperandSize::OWORD == 0 ,
553
+ " block size must be whole number of owords" );
554
+ static_assert (detail::isPowerOf2 (Sz / detail::OperandSize::OWORD),
555
+ " block must be 1, 2, 4 or 8 owords long" );
556
+ static_assert (Sz <= 8 * detail::OperandSize::OWORD,
557
+ " block size must be at most 8 owords" );
558
+
559
+ uintptr_t AddrVal = reinterpret_cast <uintptr_t >(Addr);
560
+ *this =
561
+ __esimd_flat_block_read_unaligned<T, N, CacheHint::None, CacheHint::None>(
562
+ AddrVal);
563
+ }
564
+
565
+ template <typename T, int N>
566
+ template <typename AccessorT>
567
+ ESIMD_INLINE
568
+ detail::EnableIfAccessor<AccessorT, detail::accessor_mode_cap::can_read,
569
+ sycl::access::target::global_buffer, void >
570
+ simd<T, N>::copy_from(AccessorT acc, uint32_t offset) {
571
+ constexpr unsigned Sz = sizeof (T) * N;
572
+ static_assert (Sz >= detail::OperandSize::OWORD,
573
+ " block size must be at least 1 oword" );
574
+ static_assert (Sz % detail::OperandSize::OWORD == 0 ,
575
+ " block size must be whole number of owords" );
576
+ static_assert (detail::isPowerOf2 (Sz / detail::OperandSize::OWORD),
577
+ " block must be 1, 2, 4 or 8 owords long" );
578
+ static_assert (Sz <= 8 * detail::OperandSize::OWORD,
579
+ " block size must be at most 8 owords" );
580
+ #if defined(__SYCL_DEVICE_ONLY__)
581
+ auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj (acc);
582
+ *this = __esimd_block_read<T, N>(surf_ind, offset);
583
+ #else
584
+ *this = __esimd_block_read<T, N>(acc, offset);
585
+ #endif // __SYCL_DEVICE_ONLY__
586
+ }
587
+
588
+ template <typename T, int N> void simd<T, N>::copy_to(T *addr) {
589
+ constexpr unsigned Sz = sizeof (T) * N;
590
+ static_assert (Sz >= detail::OperandSize::OWORD,
591
+ " block size must be at least 1 oword" );
592
+ static_assert (Sz % detail::OperandSize::OWORD == 0 ,
593
+ " block size must be whole number of owords" );
594
+ static_assert (detail::isPowerOf2 (Sz / detail::OperandSize::OWORD),
595
+ " block must be 1, 2, 4 or 8 owords long" );
596
+ static_assert (Sz <= 8 * detail::OperandSize::OWORD,
597
+ " block size must be at most 8 owords" );
598
+
599
+ uintptr_t AddrVal = reinterpret_cast <uintptr_t >(addr);
600
+ __esimd_flat_block_write<T, N, CacheHint::None, CacheHint::None>(AddrVal,
601
+ data ());
602
+ }
603
+
604
+ template <typename T, int N>
605
+ template <typename AccessorT>
606
+ ESIMD_INLINE
607
+ detail::EnableIfAccessor<AccessorT, detail::accessor_mode_cap::can_write,
608
+ sycl::access::target::global_buffer, void >
609
+ simd<T, N>::copy_to(AccessorT acc, uint32_t offset) {
610
+ constexpr unsigned Sz = sizeof (T) * N;
611
+ static_assert (Sz >= detail::OperandSize::OWORD,
612
+ " block size must be at least 1 oword" );
613
+ static_assert (Sz % detail::OperandSize::OWORD == 0 ,
614
+ " block size must be whole number of owords" );
615
+ static_assert (detail::isPowerOf2 (Sz / detail::OperandSize::OWORD),
616
+ " block must be 1, 2, 4 or 8 owords long" );
617
+ static_assert (Sz <= 8 * detail::OperandSize::OWORD,
618
+ " block size must be at most 8 owords" );
619
+
620
+ #if defined(__SYCL_DEVICE_ONLY__)
621
+ auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj (acc);
622
+ __esimd_block_write<T, N>(surf_ind, offset >> 4 , data ());
623
+ #else
624
+ __esimd_block_write<T, N>(acc, offset >> 4 , data ());
625
+ #endif // __SYCL_DEVICE_ONLY__
626
+ }
627
+
501
628
} // namespace gpu
502
629
} // namespace INTEL
503
630
} // namespace sycl
@@ -516,4 +643,5 @@ std::ostream &operator<<(std::ostream &OS,
516
643
OS << " }" ;
517
644
return OS;
518
645
}
646
+
519
647
#endif
0 commit comments