Skip to content

[ESIMD] Implement property-based gather(usm, ...) #12316

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/Demangle/Demangle.h"
#include "llvm/Demangle/ItaniumDemangle.h"
#include "llvm/GenXIntrinsics/GenXIntrinsics.h"
Expand Down Expand Up @@ -970,6 +971,38 @@ static void translateBlockStore(CallInst &CI, bool IsSLM) {
SI->setDebugLoc(CI.getDebugLoc());
}

static void translateGatherLoad(CallInst &CI, bool IsSLM) {
IRBuilder<> Builder(&CI);
constexpr int AlignmentTemplateArgIdx = 2;
APInt Val = parseTemplateArg(CI, AlignmentTemplateArgIdx,
ESIMDIntrinDesc::GenXArgConversion::TO_I64);
Align AlignValue(Val.getZExtValue());

auto OffsetsOp = CI.getArgOperand(0);
auto MaskOp = CI.getArgOperand(1);
auto PassThroughOp = CI.getArgOperand(2);
auto DataType = CI.getType();

// Convert the mask from <N x i16> to <N x i1>.
Value *Zero = ConstantInt::get(MaskOp->getType(), 0);
MaskOp = Builder.CreateICmp(ICmpInst::ICMP_NE, MaskOp, Zero);

// The address space may be 3-SLM, 1-global or private.
// At the moment of calling 'gather()' operation the pointer passed to it
// is already 4-generic. Thus, simply use 4-generic for global and private
// and let GPU BE deduce the actual address space from the use-def graph.
unsigned AS = IsSLM ? 3 : 4;
auto ElemType = DataType->getScalarType();
auto NumElems = (cast<VectorType>(DataType))->getElementCount();
auto VPtrType = VectorType::get(PointerType::get(ElemType, AS), NumElems);
auto VPtrOp = Builder.CreateIntToPtr(OffsetsOp, VPtrType);

auto LI = Builder.CreateMaskedGather(DataType, VPtrOp, AlignValue, MaskOp,
PassThroughOp);
LI->setDebugLoc(CI.getDebugLoc());
CI.replaceAllUsesWith(LI);
}

// TODO Specify document behavior for slm_init and nbarrier_init when:
// 1) they are called not from kernels
// 2) there are multiple such calls reachable from a kernel
Expand Down Expand Up @@ -1910,6 +1943,13 @@ size_t SYCLLowerESIMDPass::runOnFunction(Function &F,
ToErase.push_back(CI);
continue;
}
if (Name.startswith("__esimd_gather_ld") ||
Name.startswith("__esimd_slm_gather_ld")) {
translateGatherLoad(*CI, Name.startswith("__esimd_slm_gather_ld"));
ToErase.push_back(CI);
continue;
}

if (Name.startswith("__esimd_nbarrier_init")) {
translateNbarrierInit(*CI);
ToErase.push_back(CI);
Expand Down
8 changes: 4 additions & 4 deletions sycl/include/sycl/ext/intel/esimd/detail/intrin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@
//
template <typename T, int N, int M, int VStride, int Width, int Stride,
int ParentWidth = 0>
__ESIMD_INTRIN __ESIMD_INTRIN std::enable_if_t<(Width > 0) && M % Width == 0,
__ESIMD_DNS::vector_type_t<T, M>>
__ESIMD_INTRIN std::enable_if_t<(Width > 0) && M % Width == 0,
__ESIMD_DNS::vector_type_t<T, M>>
__esimd_rdregion(__ESIMD_DNS::vector_type_t<T, N> Input, uint16_t Offset);

template <typename T, int N, int M, int ParentWidth = 0>
Expand Down Expand Up @@ -263,8 +263,8 @@ __ESIMD_INTRIN uint16_t __esimd_all(__ESIMD_DNS::vector_type_t<T, N> src)
// Implementations of ESIMD intrinsics for the SYCL host device
template <typename T, int N, int M, int VStride, int Width, int Stride,
int ParentWidth>
__ESIMD_INTRIN __ESIMD_INTRIN std::enable_if_t<(Width > 0) && M % Width == 0,
__ESIMD_DNS::vector_type_t<T, M>>
__ESIMD_INTRIN std::enable_if_t<(Width > 0) && M % Width == 0,
__ESIMD_DNS::vector_type_t<T, M>>
__esimd_rdregion(__ESIMD_DNS::vector_type_t<T, N> Input, uint16_t Offset) {
uint16_t EltOffset = Offset / sizeof(T);
assert(Offset % sizeof(T) == 0);
Expand Down
20 changes: 18 additions & 2 deletions sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,20 @@ __esimd_lsc_load_slm(__ESIMD_DNS::simd_mask_storage_t<N> pred,
__ESIMD_DNS::vector_type_t<uint32_t, N> offsets)
__ESIMD_INTRIN_END;

// Gather data from the given global or private addresses.
template <typename T, int N, size_t Align>
__ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N> __esimd_gather_ld(
__ESIMD_DNS::vector_type_t<uint64_t, N> vptr,
__ESIMD_DNS::simd_mask_storage_t<N> pred,
__ESIMD_DNS::vector_type_t<T, N> pass_thru) __ESIMD_INTRIN_END;

// Gather data from the given SLM addresses.
template <typename T, int N, size_t Align>
__ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N> __esimd_slm_gather_ld(
__ESIMD_DNS::vector_type_t<uint32_t, N> vptr,
__ESIMD_DNS::simd_mask_storage_t<N> pred,
__ESIMD_DNS::vector_type_t<T, N> pass_thru) __ESIMD_INTRIN_END;

/// Surface-based gather.
/// Supported platforms: DG2, PVC
///
Expand All @@ -212,8 +226,10 @@ __esimd_lsc_load_slm(__ESIMD_DNS::simd_mask_storage_t<N> pred,
/// @tparam ImmOffset is the immediate offset added to each address.
/// @tparam DS is the data size.
/// @tparam VS is the number of elements to load per address.
/// @tparam Transposed indicates if the data is transposed during the transfer.
/// @tparam N is the SIMD size of operation (the number of addresses to access)
/// @tparam Transposed indicates if the data is transposed during the
/// transfer.
/// @tparam N is the SIMD size of operation (the number of addresses to
/// access)
/// @tparam SurfIndAliasT is the \ref sycl::accessor type.
/// @param pred is predicates.
/// @param offsets is the zero-based offsets in bytes.
Expand Down
Loading