Skip to content

[ESIMD] Implement stateless memory accesses enforcement #6287

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Jun 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
f1bf868
[ESIMD] Implement stateless memory accesses enforcement
v-klochkov Jun 3, 2022
eadc644
Add the bool argument EnforceStateless to ESIMDVerifier pass
v-klochkov Jun 10, 2022
1568f79
clang-format
v-klochkov Jun 10, 2022
cda2d69
Address reviewer's comments (all except more tests and documentation)
v-klochkov Jun 10, 2022
225d5a1
Merge remote-tracking branch 'intel_llvm/sycl' into esimd_stateless_acc
v-klochkov Jun 10, 2022
e6bb2e9
Address reviewer's comments (couple tests + func for repeated code pa…
v-klochkov Jun 11, 2022
ceeed8d
Merge remote-tracking branch 'intel_llvm/sycl' into esimd_stateless_acc
v-klochkov Jun 11, 2022
6034502
Fix an error in scatter implementation
v-klochkov Jun 13, 2022
50e06cb
Merge remote-tracking branch 'intel_llvm/sycl' into esimd_stateless_acc
v-klochkov Jun 13, 2022
da4184f
Fix LIT test
v-klochkov Jun 13, 2022
d4383ad
Disable intrinsics that are not supported with __ESIMD_FORCE_STATELES…
v-klochkov Jun 14, 2022
55b3c24
Add the description for -fsycl-esimd-force-stateless-mem to user manual
v-klochkov Jun 14, 2022
1f3f757
Marked the new option with the "EXPERIMENTAL" keyword
v-klochkov Jun 14, 2022
8559274
Update the option description/definition in driver. Update UserManual.
v-klochkov Jun 14, 2022
80b009d
Merge remote-tracking branch 'intel_llvm/sycl' into esimd_stateless_acc
v-klochkov Jun 14, 2022
fc687f6
Address reviewer's comments.
v-klochkov Jun 15, 2022
e910230
Merge remote-tracking branch 'intel_llvm/sycl' into esimd_stateless_acc
v-klochkov Jun 15, 2022
11d9c6d
Address reviewer's comment (fix in UserManual.md only)
v-klochkov Jun 15, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/include/clang/Basic/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ LANGOPT(SYCLIsHost , 1, 0, "SYCL host compilation")
LANGOPT(SYCLAllowFuncPtr , 1, 0, "Allow function pointers in SYCL device code")
LANGOPT(SYCLStdLayoutKernelParams, 1, 0, "Enable standard layout requirement for SYCL kernel parameters")
LANGOPT(SYCLUnnamedLambda , 1, 0, "Allow unnamed lambda SYCL kernels")
LANGOPT(SYCLESIMDForceStatelessMem, 1, 0, "Make accessors use USM memory in ESIMD kernels")
ENUM_LANGOPT(SYCLVersion , SYCLMajorVersion, 2, SYCL_None, "Version of the SYCL standard used")
LANGOPT(DeclareSPIRVBuiltins, 1, 0, "Declare SPIR-V builtin functions")
LANGOPT(SYCLExplicitSIMD , 1, 0, "SYCL compilation with explicit SIMD extension")
Expand Down
8 changes: 8 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -2788,6 +2788,14 @@ def fintelfpga : Flag<["-"], "fintelfpga">, Group<f_Group>,
Flags<[CC1Option, CoreOption]>, HelpText<"Perform ahead of time compilation for FPGA">;
def fsycl_device_only : Flag<["-"], "fsycl-device-only">, Flags<[CoreOption]>,
HelpText<"Compile SYCL kernels for device">;
defm sycl_esimd_force_stateless_mem : BoolFOption<"sycl-esimd-force-stateless-mem",
LangOpts<"SYCLESIMDForceStatelessMem">, DefaultFalse,
PosFlag<SetTrue, [], "Enforce using stateless memory accesses. "
"Convert stateful accesses via SYCL accessors to stateless within ESIMD kernels. "
"Disabled by default. (experimental)">,
NegFlag<SetFalse, [], "Do not enforce using stateless memory accesses. (experimental)">,
BothFlags<[CC1Option, CoreOption], "">>;

def fsycl_targets_EQ : CommaJoined<["-"], "fsycl-targets=">, Flags<[NoXarchOption, CC1Option, CoreOption]>,
HelpText<"Specify comma-separated list of triples SYCL offloading targets to be supported">;
def fsycl_add_targets_EQ : CommaJoined<["-"], "fsycl-add-targets=">,
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/CodeGen/BackendUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -829,8 +829,8 @@ void EmitAssemblyHelper::RunOptimizationPipeline(

if (LangOpts.SYCLIsDevice)
PB.registerPipelineStartEPCallback(
[](ModulePassManager &MPM, OptimizationLevel Level) {
MPM.addPass(ESIMDVerifierPass());
[&](ModulePassManager &MPM, OptimizationLevel Level) {
MPM.addPass(ESIMDVerifierPass(LangOpts.SYCLESIMDForceStatelessMem));
});

bool IsThinLTO = CodeGenOpts.PrepareForThinLTO;
Expand Down
9 changes: 9 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4913,6 +4913,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("-fsycl-allow-func-ptr");
}

if (Args.hasFlag(options::OPT_fsycl_esimd_force_stateless_mem,
options::OPT_fno_sycl_esimd_force_stateless_mem, false))
CmdArgs.push_back("-fsycl-esimd-force-stateless-mem");

// Forward -fsycl-instrument-device-code option to cc1. This option will
// only be used for SPIR-V-based targets.
if (Triple.isSPIR())
Expand Down Expand Up @@ -9478,6 +9482,11 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA,
else
addArgs(CmdArgs, TCArgs, {"-spec-const=default"});

// Make ESIMD accessors use stateless memory accesses.
if (TCArgs.hasFlag(options::OPT_fsycl_esimd_force_stateless_mem,
options::OPT_fno_sycl_esimd_force_stateless_mem, false))
addArgs(CmdArgs, TCArgs, {"-lower-esimd-force-stateless-mem"});

// Add output file table file option
assert(Output.isFilename() && "output must be a filename");
addArgs(CmdArgs, TCArgs, {"-o", Output.getFilename()});
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Frontend/InitPreprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1303,6 +1303,9 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
Builder.defineMacro("__ENABLE_USM_ADDR_SPACE__");
Builder.defineMacro("SYCL_DISABLE_FALLBACK_ASSERT");
}

if (LangOpts.SYCLESIMDForceStatelessMem)
Builder.defineMacro("__ESIMD_FORCE_STATELESS_MEM");
}
if (LangOpts.SYCLUnnamedLambda)
Builder.defineMacro("__SYCL_UNNAMED_LAMBDA__");
Expand Down
14 changes: 14 additions & 0 deletions clang/test/Driver/sycl-esimd-force-stateless-mem.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

/// Verify that the driver option is translated to corresponding options
/// to device compilation and sycl-post-link.
// RUN: %clang -### -fsycl -fsycl-esimd-force-stateless-mem \
// RUN: %s 2>&1 | FileCheck -check-prefix=CHECK-PASS-TO-COMPS %s
// CHECK-PASS-TO-COMPS: clang{{.*}} "-fsycl-esimd-force-stateless-mem"
// CHECK-PASS-TO-COMPS: sycl-post-link{{.*}} "-lower-esimd-force-stateless-mem"
// CHECK-PASS-TO-COMPS-NOT: clang{{.*}} "-fsycl-is-host" {{.*}}"-fsycl-esimd-force-stateless-mem"
// CHECK-PASS-TO-COMPS-NOT: clang{{.*}} "-fsycl-esimd-force-stateless-mem" {{.*}}"-fsycl-is-host"

/// Verify that stateless memory accesses mapping is not enforced by default
// RUN: %clang -### -fsycl %s 2>&1 | FileCheck -check-prefix=CHECK-DEFAULT %s
// CHECK-DEFAULT-NOT: clang{{.*}} "-fsycl-esimd-force-stateless-mem"
// CHECK-DEFAULT-NOT: sycl-post-link{{.*}} "-lower-esimd-force-stateless-mem"
11 changes: 11 additions & 0 deletions clang/test/Preprocessor/sycl-esimd-force-stateless-mem.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/// This test checks that the macro __ESIMD_FORCE_STATELESS_MEM is automatically
/// defined only if the option -fsycl-esimd-force-stateless-mem is used.

// RUN: %clang_cc1 %s -fsycl-is-device -fsycl-esimd-force-stateless-mem -E -dM | FileCheck --check-prefix=CHECK-OPT %s

// RUN: %clang_cc1 %s -E -dM | FileCheck --check-prefix=CHECK-NOOPT %s
// RUN: %clang_cc1 %s -fsycl-is-device -E -dM | FileCheck --check-prefix=CHECK-NOOPT %s
// RUN: %clang_cc1 %s -fsycl-is-host -E -dM | FileCheck --check-prefix=CHECK-NOOPT %s

// CHECK-OPT:#define __ESIMD_FORCE_STATELESS_MEM 1
// CHECK-NOOPT-NOT:#define __ESIMD_FORCE_STATELESS_MEM 1
8 changes: 7 additions & 1 deletion llvm/include/llvm/SYCLLowerIR/ESIMD/ESIMDVerifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,15 @@ namespace llvm {
class ModulePass;

struct ESIMDVerifierPass : public PassInfoMixin<ESIMDVerifierPass> {
ESIMDVerifierPass() {}
ESIMDVerifierPass() : ForceStatelessMem(false) {}
ESIMDVerifierPass(bool ForceStatelessMem)
: ForceStatelessMem(ForceStatelessMem) {}
PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
static bool isRequired() { return true; }

// The verifier pass allows more SYCL classes/methods when
// stateless memory accesses are enforced.
bool ForceStatelessMem;
};

ModulePass *createESIMDVerifierPass();
Expand Down
29 changes: 21 additions & 8 deletions llvm/lib/SYCLLowerIR/ESIMD/ESIMDVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Regex.h"

Expand Down Expand Up @@ -49,6 +50,12 @@ static const char *LegalSYCLFunctions[] = {
"^cl::sycl::ext::oneapi::experimental::spec_constant<.+>::.+",
"^cl::sycl::ext::oneapi::experimental::this_sub_group"};

static const char *LegalSYCLFunctionsInStatelessMode[] = {
"^cl::sycl::multi_ptr<.+>::get", "^cl::sycl::multi_ptr<.+>::multi_ptr",
"^cl::sycl::accessor<.+>::get_pointer.+",
"^cl::sycl::accessor<.+>::getPointerAdjusted",
"^cl::sycl::accessor<.+>::getQualifiedPtr"};

namespace {

// Simplest possible implementation of an allocator for the Itanium demangler
Expand Down Expand Up @@ -83,9 +90,11 @@ class SimpleAllocator {

class ESIMDVerifierImpl {
const Module &M;
bool ForceStatelessMem;

public:
ESIMDVerifierImpl(const Module &M) : M(M) {}
ESIMDVerifierImpl(const Module &M, bool ForceStatelessMem)
: M(M), ForceStatelessMem(ForceStatelessMem) {}

void verify() {
SmallPtrSet<const Function *, 8u> Visited;
Expand Down Expand Up @@ -142,11 +151,14 @@ class ESIMDVerifierImpl {
continue;

// Check if function name matches any allowed SYCL function name.
if (any_of(LegalSYCLFunctions, [Name](const char *LegalName) {
Regex LegalNameRE(LegalName);
assert(LegalNameRE.isValid() && "invalid function name regex");
return LegalNameRE.match(Name);
}))
auto checkLegalFunc = [Name](const char *LegalName) {
Regex LegalNameRE(LegalName);
assert(LegalNameRE.isValid() && "invalid function name regex");
return LegalNameRE.match(Name);
};
if (any_of(LegalSYCLFunctions, checkLegalFunc) ||
(ForceStatelessMem &&
any_of(LegalSYCLFunctionsInStatelessMode, checkLegalFunc)))
continue;

// If not, report an error.
Expand All @@ -163,14 +175,15 @@ class ESIMDVerifierImpl {
} // end anonymous namespace

PreservedAnalyses ESIMDVerifierPass::run(Module &M, ModuleAnalysisManager &AM) {
ESIMDVerifierImpl(M).verify();
ESIMDVerifierImpl(M, ForceStatelessMem).verify();
return PreservedAnalyses::all();
}

namespace {

struct ESIMDVerifier : public ModulePass {
static char ID;
bool ForceStatelessMem;

ESIMDVerifier() : ModulePass(ID) {
initializeESIMDVerifierPass(*PassRegistry::getPassRegistry());
Expand All @@ -181,7 +194,7 @@ struct ESIMDVerifier : public ModulePass {
}

bool runOnModule(Module &M) override {
ESIMDVerifierImpl(M).verify();
ESIMDVerifierImpl(M, ForceStatelessMem).verify();
return false;
}
};
Expand Down
7 changes: 6 additions & 1 deletion llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ namespace id = itanium_demangle;

#define MAX_DIMS 3

cl::opt<bool> ForceStatelessMem(
"lower-esimd-force-stateless-mem", llvm::cl::Optional, llvm::cl::Hidden,
llvm::cl::desc("Use stateless API for accessor based API."),
llvm::cl::init(false));

namespace {
SmallPtrSet<Type *, 4> collectGenXVolatileTypes(Module &);
void generateKernelMetadata(Module &);
Expand Down Expand Up @@ -1564,7 +1569,7 @@ void generateKernelMetadata(Module &M) {
->getValue()
.getZExtValue())
: 0;
if (IsAcc) {
if (IsAcc && !ForceStatelessMem) {
ArgDesc = "buffer_t";
Kind = AK_SURFACE;
} else
Expand Down
25 changes: 25 additions & 0 deletions sycl/doc/UsersManual.md
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,31 @@ and not recommended to use in production environment.

NOTE: This flag is currently only supported with the CUDA and HIP targets.


**`-f[no-]sycl-esimd-force-stateless-mem`** [EXPERIMENTAL]

Enforces stateless memory access and enables the automatic conversion of
"stateful" memory access via SYCL accessors to "stateless" within ESIMD
(Explicit SIMD) kernels.

-fsycl-esimd-force-stateless-mem disables the intrinsics and methods
accepting SYCL accessors or "surface-index" which cannot be automatically
converted to their "stateless" equivalents.

-fno-sycl-esimd-force-stateless-mem is used to tell compiler not to
enforce usage of stateless memory accesses. This is the default behavior.

NOTE: "Stateful" access is the one that uses SYCL accessor or a pair
of "surface-index" + 32-bit byte-offset and uses specific memory access
data port messages to read/write/fetch.
"Stateless" memory access uses memory location represented with virtual
memory address pointer such as USM pointer.

The "stateless" memory may be beneficial as it does not have the limit
of 4Gb per surface.
Also, some of Intel GPUs or GPU run-time/drivers may support only
"stateless" memory accesses.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add information about default value.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added. Thank you.

# Example: SYCL device code compilation

To invoke SYCL device compiler set `-fsycl-device-only` flag.
Expand Down
6 changes: 6 additions & 0 deletions sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1099,6 +1099,10 @@ __ESIMD_INTRIN void __esimd_media_st(TACC handle, unsigned x, unsigned y,
}
#endif // __SYCL_DEVICE_ONLY__

// getter methods returning surface index are not available when stateless
// memory accesses are enforced.
#ifndef __ESIMD_FORCE_STATELESS_MEM

// \brief Converts given value to a surface index.
// The input must always be a result of
// detail::AccessorPrivateProxy::getNativeImageObj(acc)
Expand Down Expand Up @@ -1131,4 +1135,6 @@ __ESIMD_INTRIN __ESIMD_NS::SurfaceIndex __esimd_get_surface_index(MemObjTy obj)
}
#endif // __SYCL_DEVICE_ONLY__

#endif // !__ESIMD_FORCE_STATELESS_MEM

/// @endcond ESIMD_DETAIL
10 changes: 10 additions & 0 deletions sycl/include/sycl/ext/intel/esimd/detail/util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,16 @@ template <unsigned N> class ForHelper {
}
};

#ifdef __ESIMD_FORCE_STATELESS_MEM
/// Returns the address referenced by the accessor \p Acc and
/// the byte offset \p Offset.
template <typename T, typename AccessorTy>
T *accessorToPointer(AccessorTy Acc, uint32_t Offset = 0) {
auto BytePtr = reinterpret_cast<char *>(Acc.get_pointer().get()) + Offset;
return reinterpret_cast<T *>(BytePtr);
}
#endif // __ESIMD_FORCE_STATELESS_MEM

} // namespace __ESIMD_DNS
} // __SYCL_INLINE_NAMESPACE(cl)

Expand Down
32 changes: 30 additions & 2 deletions sycl/include/sycl/ext/intel/esimd/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,9 @@ template <typename Tx, int N, typename AccessorTy,
class T = detail::__raw_t<Tx>>
__ESIMD_API simd<Tx, N> block_load(AccessorTy acc, uint32_t offset,
Flags = {}) {
#ifdef __ESIMD_FORCE_STATELESS_MEM
return block_load<Tx, N>(__ESIMD_DNS::accessorToPointer<Tx>(acc, offset));
#else
constexpr unsigned Sz = sizeof(T) * N;
static_assert(Sz >= detail::OperandSize::OWORD,
"block size must be at least 1 oword");
Expand All @@ -263,6 +266,7 @@ __ESIMD_API simd<Tx, N> block_load(AccessorTy acc, uint32_t offset,
} else {
return __esimd_oword_ld_unaligned<T, N>(surf_ind, offset);
}
#endif
}

/// Stores elements of a vector to a contiguous block of memory at given
Expand Down Expand Up @@ -304,6 +308,9 @@ template <typename Tx, int N, typename AccessorTy,
class T = detail::__raw_t<Tx>>
__ESIMD_API void block_store(AccessorTy acc, uint32_t offset,
simd<Tx, N> vals) {
#ifdef __ESIMD_FORCE_STATELESS_MEM
block_store<Tx, N>(__ESIMD_DNS::accessorToPointer<Tx>(acc, offset), vals);
#else
constexpr unsigned Sz = sizeof(T) * N;
static_assert(Sz >= detail::OperandSize::OWORD,
"block size must be at least 1 oword");
Expand All @@ -317,6 +324,7 @@ __ESIMD_API void block_store(AccessorTy acc, uint32_t offset,
auto surf_ind = __esimd_get_surface_index(
detail::AccessorPrivateProxy::getNativeImageObj(acc));
__esimd_oword_st<T, N>(surf_ind, offset >> 4, vals.data());
#endif
}

/// @} sycl_esimd_memory
Expand Down Expand Up @@ -426,8 +434,12 @@ __ESIMD_API std::enable_if_t<(sizeof(T) <= 4) &&
simd<T, N>>
gather(AccessorTy acc, simd<uint32_t, N> offsets, uint32_t glob_offset = 0,
simd_mask<N> mask = 1) {

#ifdef __ESIMD_FORCE_STATELESS_MEM
return gather<T, N>(__ESIMD_DNS::accessorToPointer<T>(acc, glob_offset),
offsets, mask);
#else
return detail::gather_impl<T, N, AccessorTy>(acc, offsets, glob_offset, mask);
#endif
}

/// @anchor accessor_scatter
Expand Down Expand Up @@ -455,8 +467,12 @@ __ESIMD_API std::enable_if_t<(sizeof(T) <= 4) &&
!std::is_pointer<AccessorTy>::value>
scatter(AccessorTy acc, simd<uint32_t, N> offsets, simd<T, N> vals,
uint32_t glob_offset = 0, simd_mask<N> mask = 1) {

#ifdef __ESIMD_FORCE_STATELESS_MEM
scatter<T, N>(__ESIMD_DNS::accessorToPointer<T>(acc, glob_offset), offsets,
vals, mask);
#else
detail::scatter_impl<T, N, AccessorTy>(acc, vals, offsets, glob_offset, mask);
#endif
}

/// Load a scalar value from an accessor.
Expand Down Expand Up @@ -623,12 +639,17 @@ __ESIMD_API std::enable_if_t<((N == 8 || N == 16 || N == 32) &&
simd<T, N * get_num_channels_enabled(RGBAMask)>>
gather_rgba(AccessorT acc, simd<uint32_t, N> offsets,
uint32_t global_offset = 0, simd_mask<N> mask = 1) {
#ifdef __ESIMD_FORCE_STATELESS_MEM
return gather_rgba<RGBAMask>(
__ESIMD_DNS::accessorToPointer<T>(acc, global_offset), offsets, mask);
#else
// TODO (performance) use hardware-supported scale once BE supports it
constexpr uint32_t Scale = 0;
const auto SI = get_surface_index(acc);
return __esimd_gather4_masked_scaled2<detail::__raw_t<T>, N, RGBAMask,
decltype(SI), Scale>(
SI, global_offset, offsets.data(), mask.data());
#endif
}

/// Gather data from the memory addressed by accessor \c acc, offset common
Expand All @@ -654,11 +675,16 @@ scatter_rgba(AccessorT acc, simd<uint32_t, N> offsets,
simd<T, N * get_num_channels_enabled(RGBAMask)> vals,
uint32_t global_offset = 0, simd_mask<N> mask = 1) {
detail::validate_rgba_write_channel_mask<RGBAMask>();
#ifdef __ESIMD_FORCE_STATELESS_MEM
scatter_rgba<RGBAMask>(__ESIMD_DNS::accessorToPointer<T>(acc, global_offset),
offsets, vals, mask);
#else
// TODO (performance) use hardware-supported scale once BE supports it
constexpr uint32_t Scale = 0;
const auto SI = get_surface_index(acc);
__esimd_scatter4_scaled<T, N, decltype(SI), RGBAMask, Scale>(
mask.data(), SI, global_offset, offsets.data(), vals.data());
#endif
}

/// @} sycl_esimd_memory
Expand Down Expand Up @@ -1082,6 +1108,7 @@ slm_atomic_update(simd<uint32_t, N> offsets, simd<Tx, N> src0, simd<Tx, N> src1,

/// @} sycl_esimd_memory_slm

#ifndef __ESIMD_FORCE_STATELESS_MEM
/// @addtogroup sycl_esimd_memory
/// @{

Expand Down Expand Up @@ -1167,6 +1194,7 @@ __ESIMD_API void media_block_store(AccessorTy acc, unsigned x, unsigned y,
vals.data());
}
}
#endif // !__ESIMD_FORCE_STATELESS_MEM

/// @} sycl_esimd_memory

Expand Down
Loading