Skip to content

Commit 48abe38

Browse files
authored
[SYCL][ESIMD] Lower global volatile stores to vstores (#9088)
According to the VC team, all stores to volatile globals need to be vstores for correctness. Sometimes clang implicitly inserts stores, and if this happens, we need to lower to vstores. With that, we no longer need the commit() function, so remove it and update related doc. It never made it onto a compiler release, so we should be able to remove it with no deprecation. The function ext::intel::experimental::esimd::simd::commit() was turned into NO-OP and deprecated. --------- Signed-off-by: Sarnie, Nick <[email protected]>
1 parent 865db9f commit 48abe38

File tree

6 files changed

+78
-14
lines changed

6 files changed

+78
-14
lines changed

llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,8 @@ enum class lsc_subopcode : uint8_t {
131131
// /^_Z(\d+)__esimd_\w+/
132132
static constexpr char ESIMD_INTRIN_PREF0[] = "_Z";
133133
static constexpr char ESIMD_INTRIN_PREF1[] = "__esimd_";
134+
static constexpr char ESIMD_INSERTED_VSTORE_FUNC_NAME[] = "_Z14__esimd_vstorev";
134135
static constexpr char SPIRV_INTRIN_PREF[] = "__spirv_BuiltIn";
135-
136136
struct ESIMDIntrinDesc {
137137
// Denotes argument translation rule kind.
138138
enum GenXArgRuleKind {
@@ -714,10 +714,10 @@ static bool isDevicelibFunction(StringRef FunctionName) {
714714
.Default(false);
715715
}
716716

717-
// Mangle deviceLib function to make it pass through the regular workflow
718-
// These functions are defined as extern "C" which Demangler that is used
719-
// fails to handle properly.
720-
static std::string mangleDevicelibFunction(StringRef FunctionName) {
717+
static std::string mangleFunction(StringRef FunctionName) {
718+
// Mangle deviceLib function to make it pass through the regular workflow
719+
// These functions are defined as extern "C" which Demangler that is used
720+
// fails to handle properly.
721721
if (isDevicelibFunction(FunctionName)) {
722722
if (FunctionName.startswith("__devicelib_ConvertFToBF16INTEL")) {
723723
return (Twine("_Z31") + FunctionName + "RKf").str();
@@ -726,6 +726,11 @@ static std::string mangleDevicelibFunction(StringRef FunctionName) {
726726
return (Twine("_Z31") + FunctionName + "RKt").str();
727727
}
728728
}
729+
// Every inserted vstore gets its own function with the same name,
730+
// so they are mangled with ".[0-9]+". Just use the
731+
// raw name to pass through the demangler.
732+
if (FunctionName.startswith(ESIMD_INSERTED_VSTORE_FUNC_NAME))
733+
return ESIMD_INSERTED_VSTORE_FUNC_NAME;
729734
return FunctionName.str();
730735
}
731736

@@ -1469,7 +1474,7 @@ static void translateESIMDIntrinsicCall(CallInst &CI) {
14691474
using Demangler = id::ManglingParser<SimpleAllocator>;
14701475
Function *F = CI.getCalledFunction();
14711476
llvm::esimd::assert_and_diag(F, "function to translate is invalid");
1472-
std::string MnglNameStr = mangleDevicelibFunction(F->getName());
1477+
std::string MnglNameStr = mangleFunction(F->getName());
14731478
StringRef MnglName = MnglNameStr;
14741479

14751480
Demangler Parser(MnglName.begin(), MnglName.end());
@@ -1718,6 +1723,45 @@ SmallPtrSet<Type *, 4> collectGenXVolatileTypes(Module &M) {
17181723
return GenXVolatileTypeSet;
17191724
}
17201725

1726+
// genx_volatile variables are special and require vstores instead of stores.
1727+
// In most cases, the vstores are called directly in the implementation
1728+
// of the simd object operations, but in some cases clang can implicitly
1729+
// insert stores, such as after a write in inline assembly. To handle that
1730+
// case, lower any stores of genx_volatiles into vstores.
1731+
void lowerGlobalStores(Module &M, const SmallPtrSet<Type *, 4> &GVTS) {
1732+
SmallVector<Instruction *, 4> ToErase;
1733+
for (auto &F : M.functions()) {
1734+
for (Instruction &I : instructions(F)) {
1735+
auto SI = dyn_cast_or_null<StoreInst>(&I);
1736+
if (!SI)
1737+
continue;
1738+
if (GVTS.find(SI->getValueOperand()->getType()) == GVTS.end())
1739+
continue;
1740+
SmallVector<Type *, 2> ArgTypes;
1741+
IRBuilder<> Builder(SI);
1742+
ArgTypes.push_back(SI->getPointerOperand()->getType());
1743+
ArgTypes.push_back(SI->getValueOperand()->getType());
1744+
auto *NewFType = FunctionType::get(SI->getType(), ArgTypes, false);
1745+
auto *NewF =
1746+
Function::Create(NewFType, GlobalVariable::ExternalWeakLinkage,
1747+
ESIMD_INSERTED_VSTORE_FUNC_NAME, M);
1748+
NewF->addFnAttr(Attribute::NoUnwind);
1749+
NewF->addFnAttr(Attribute::Convergent);
1750+
NewF->setDSOLocal(true);
1751+
NewF->setCallingConv(CallingConv::SPIR_FUNC);
1752+
SmallVector<Value *, 2> Args;
1753+
Args.push_back(SI->getPointerOperand());
1754+
Args.push_back(SI->getValueOperand());
1755+
auto *NewCI = Builder.CreateCall(NewFType, NewF, Args);
1756+
NewCI->setDebugLoc(SI->getDebugLoc());
1757+
ToErase.push_back(SI);
1758+
}
1759+
}
1760+
for (auto *Inst : ToErase) {
1761+
Inst->eraseFromParent();
1762+
}
1763+
}
1764+
17211765
} // namespace
17221766

17231767
PreservedAnalyses SYCLLowerESIMDPass::run(Module &M, ModuleAnalysisManager &) {
@@ -1726,7 +1770,7 @@ PreservedAnalyses SYCLLowerESIMDPass::run(Module &M, ModuleAnalysisManager &) {
17261770
// uses the generated metadata:
17271771
size_t AmountOfESIMDIntrCalls = lowerSLMReservationCalls(M);
17281772
SmallPtrSet<Type *, 4> GVTS = collectGenXVolatileTypes(M);
1729-
1773+
lowerGlobalStores(M, GVTS);
17301774
for (auto &F : M.functions()) {
17311775
AmountOfESIMDIntrCalls += this->runOnFunction(F, GVTS);
17321776
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
; This test checks whether global stores are converted to vstores
2+
;
3+
; RUN: opt < %s -passes=LowerESIMD -S | FileCheck %s
4+
5+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
6+
target triple = "spir64-unknown-unknown"
7+
8+
%"class.sycl::_V1::ext::intel::esimd::simd" = type { %"class.sycl::_V1::ext::intel::esimd::detail::simd_obj_impl" }
9+
%"class.sycl::_V1::ext::intel::esimd::detail::simd_obj_impl" = type { <16 x float> }
10+
11+
@va = dso_local global %"class.sycl::_V1::ext::intel::esimd::simd" zeroinitializer, align 64 #0
12+
@vb = dso_local global %"class.sycl::_V1::ext::intel::esimd::simd" zeroinitializer, align 64 #0
13+
14+
define weak_odr dso_local spir_kernel void @foo() {
15+
%1 = call <16 x float> asm "", "=rw"()
16+
; CHECK: call void @llvm.genx.vstore.v16f32.p0v16f32(<16 x float> %1, <16 x float>* getelementptr inbounds (%"class.sycl::_V1::ext::intel::esimd::simd", %"class.sycl::_V1::ext::intel::esimd::simd"* @va, i64 0, i32 0, i32 0))
17+
store <16 x float> %1, <16 x float>* getelementptr inbounds (%"class.sycl::_V1::ext::intel::esimd::simd", %"class.sycl::_V1::ext::intel::esimd::simd"* @va, i64 0, i32 0, i32 0)
18+
; CHECK-NEXT: call void @llvm.genx.vstore.v16f32.p0v16f32(<16 x float> %1, <16 x float>* getelementptr inbounds (%"class.sycl::_V1::ext::intel::esimd::simd", %"class.sycl::_V1::ext::intel::esimd::simd"* @vb, i64 0, i32 0, i32 0))
19+
store <16 x float> %1, <16 x float>* getelementptr inbounds (%"class.sycl::_V1::ext::intel::esimd::simd", %"class.sycl::_V1::ext::intel::esimd::simd"* @vb, i64 0, i32 0, i32 0)
20+
ret void
21+
}
22+
23+
attributes #0 = { "genx_byte_offset"="0" "genx_volatile" }
24+
attributes #1 = { "" }

sycl/doc/extensions/experimental/sycl_ext_intel_esimd/sycl_ext_intel_esimd.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -819,7 +819,6 @@ The parameter and the return type in the ABI form will be `<8 x float>`.
819819
Inline assembly is supported with ESIMD classes `simd`, `simd_mask` and `simd_view`. `simd_view` only supports read operations.
820820
In order the access the raw underlying vector required for inline assembly, the `data` function can be used for read-only access and
821821
the `data_ref` function can be used for write access. The `data_ref` function only exists for `simd` and `simd_mask`, and should only be used in inline assembly.
822-
If the `simd` or `simd_mask` object is a private global variable, the `commit` function must be called after any write in inline assembly.
823822

824823
Example of inline GEN assembly:
825824
```cpp
@@ -842,7 +841,6 @@ void calledFromKernel() {
842841
__asm__("add (M1, 16) %0 %1 %2"
843842
: "=rw"(vc.data_ref())
844843
: "rw"(va.data()), "rw"(vb.data()));
845-
vc.commit();
846844
}
847845
```
848846

sycl/include/sycl/ext/intel/esimd/detail/simd_obj_impl.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,9 @@ class simd_obj_impl {
361361

362362
/// Commit the current stored underlying raw vector to memory.
363363
/// This is required when using inline assembly with private global variables.
364-
void commit() { __esimd_vstore<RawTy, N>(&M_data, M_data); }
364+
__SYCL_DEPRECATED(
365+
"commit is deprecated and will be removed in a future release")
366+
void commit() {}
365367

366368
/// @return Newly constructed (from the underlying data) object of the Derived
367369
/// type.

sycl/test-e2e/ESIMD/InlineAsm/asm_glb.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ int main(void) {
6666
__asm__("add (M1, 16) %0 %1 %2"
6767
: "=rw"(vc.data_ref())
6868
: "rw"(va.data()), "rw"(vb.data()));
69-
vc.commit();
7069
#else
7170
vc = va+vb;
7271
#endif

sycl/test/esimd/simd_inline_asm.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,4 @@ void test_error() SYCL_ESIMD_FUNCTION {
2424
__asm__("%0" : "=rw"(mask.data()));
2525

2626
__asm__("%0" : "=rw"(mask.data_ref()));
27-
28-
s.commit();
29-
mask.commit();
3027
}

0 commit comments

Comments
 (0)