Skip to content

Commit 3726e0d

Browse files
authored
[SYCL][ESIMD] Implement unified memory API - atomic_update SLM+lacc (#11924)
1 parent 4ab007d commit 3726e0d

14 files changed

+1599
-350
lines changed

sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,104 @@ __esimd_lsc_xatomic_bti_2(
645645
}
646646
#endif // __SYCL_DEVICE_ONLY__
647647

648+
/// SLM atomic.
649+
/// Supported platforms: DG2, PVC
650+
///
651+
/// @tparam Ty is element type.
652+
/// @tparam InternalOp is operation type.
653+
/// @tparam L1H is L1 cache hint.
654+
/// @tparam L2H is L2 cache hint.
655+
/// @tparam AddressScale is the address scale.
656+
/// @tparam ImmOffset is the immediate offset added to each address.
657+
/// @tparam DS is the data size.
658+
/// @tparam VS is the number of elements per address.
659+
/// @tparam Transposed indicates if the data is transposed during the transfer.
660+
/// @tparam N is the SIMD size of operation (the number of addresses to access)
661+
/// @param pred is predicates.
662+
/// @param offsets is the zero-based offsets.
663+
template <typename Ty, int InternalOpOp, __ESIMD_NS::cache_hint L1H,
664+
__ESIMD_NS::cache_hint L2H, uint16_t AddressScale, int ImmOffset,
665+
__ESIMD_DNS::lsc_data_size DS, __ESIMD_DNS::lsc_vector_size VS,
666+
__ESIMD_DNS::lsc_data_order Transposed, int N>
667+
__ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
668+
__esimd_lsc_xatomic_slm_0(__ESIMD_DNS::simd_mask_storage_t<N> pred,
669+
__ESIMD_DNS::vector_type_t<uint32_t, N> offsets)
670+
#ifdef __SYCL_DEVICE_ONLY__
671+
;
672+
#else // __SYCL_DEVICE_ONLY__
673+
{
674+
__ESIMD_UNSUPPORTED_ON_HOST;
675+
}
676+
#endif // __SYCL_DEVICE_ONLY__
677+
678+
/// SLM atomic.
679+
/// Supported platforms: DG2, PVC
680+
///
681+
/// @tparam Ty is element type.
682+
/// @tparam InternalOp is operation type.
683+
/// @tparam L1H is L1 cache hint.
684+
/// @tparam L2H is L2 cache hint.
685+
/// @tparam AddressScale is the address scale.
686+
/// @tparam ImmOffset is the immediate offset added to each address.
687+
/// @tparam DS is the data size.
688+
/// @tparam VS is the number of elements per address.
689+
/// @tparam Transposed indicates if the data is transposed during the transfer.
690+
/// @tparam N is the SIMD size of operation (the number of addresses to access)
691+
/// @param pred is predicates.
692+
/// @param offsets is the zero-based offsets.
693+
/// @param src0 is the first atomic operand.
694+
template <typename Ty, int InternalOp, __ESIMD_NS::cache_hint L1H,
695+
__ESIMD_NS::cache_hint L2H, uint16_t AddressScale, int ImmOffset,
696+
__ESIMD_DNS::lsc_data_size DS, __ESIMD_DNS::lsc_vector_size VS,
697+
__ESIMD_DNS::lsc_data_order Transposed, int N>
698+
__ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
699+
__esimd_lsc_xatomic_slm_1(
700+
__ESIMD_DNS::simd_mask_storage_t<N> pred,
701+
__ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
702+
__ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> src0)
703+
#ifdef __SYCL_DEVICE_ONLY__
704+
;
705+
#else // __SYCL_DEVICE_ONLY__
706+
{
707+
__ESIMD_UNSUPPORTED_ON_HOST;
708+
}
709+
#endif // __SYCL_DEVICE_ONLY__
710+
711+
/// SLM atomic.
712+
/// Supported platforms: DG2, PVC
713+
///
714+
/// @tparam Ty is element type.
715+
/// @tparam InternalOp is operation type.
716+
/// @tparam L1H is L1 cache hint.
717+
/// @tparam L2H is L2 cache hint.
718+
/// @tparam AddressScale is the address scale.
719+
/// @tparam ImmOffset is the immediate offset added to each address.
720+
/// @tparam DS is the data size.
721+
/// @tparam VS is the number of elements per address.
722+
/// @tparam Transposed indicates if the data is transposed during the transfer.
723+
/// @tparam N is the SIMD size of operation (the number of addresses to access)
724+
/// @param pred is predicates.
725+
/// @param offsets is the zero-based offsets.
726+
/// @param src0 is the first atomic operand.
727+
/// @param src1 is the second atomic operand.
728+
template <typename Ty, int InternalOp, __ESIMD_NS::cache_hint L1H,
729+
__ESIMD_NS::cache_hint L2H, uint16_t AddressScale, int ImmOffset,
730+
__ESIMD_DNS::lsc_data_size DS, __ESIMD_DNS::lsc_vector_size VS,
731+
__ESIMD_DNS::lsc_data_order Transposed, int N>
732+
__ESIMD_INTRIN __ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()>
733+
__esimd_lsc_xatomic_slm_2(
734+
__ESIMD_DNS::simd_mask_storage_t<N> pred,
735+
__ESIMD_DNS::vector_type_t<uint32_t, N> offsets,
736+
__ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> src0,
737+
__ESIMD_DNS::vector_type_t<Ty, N * __ESIMD_DNS::to_int<VS>()> src1)
738+
#ifdef __SYCL_DEVICE_ONLY__
739+
;
740+
#else // __SYCL_DEVICE_ONLY__
741+
{
742+
__ESIMD_UNSUPPORTED_ON_HOST;
743+
}
744+
#endif // __SYCL_DEVICE_ONLY__
745+
648746
__ESIMD_INTRIN void __esimd_slm_init(uint32_t size)
649747
#ifdef __SYCL_DEVICE_ONLY__
650748
;

0 commit comments

Comments
 (0)