Skip to content

Commit 4ff8fcf

Browse files
authored
[SYCL][SCLA] Add basic sycl_ext_oneapi_private_alloca functionality (#12966)
After a105055 implementing CodeGen capabilities for `sycl_ext_oneapi_private_alloca`, this patch handles the generated intrinsic in `sycl-post-link` for targets with native specialization constants support. Headers for the new extension are also added, as well as a feature test macro. `multi_ptr` definitions in the SYCL headers are annotated with the `__sycl_detail__::sycl_type` to be detected by the frontend. --------- Signed-off-by: Victor Perez <[email protected]>
1 parent cf402b8 commit 4ff8fcf

File tree

15 files changed

+400
-13
lines changed

15 files changed

+400
-13
lines changed

llvm/include/llvm/IR/IntrinsicInst.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1809,6 +1809,25 @@ class ConvergenceControlInst : public IntrinsicInst {
18091809
}
18101810
};
18111811

1812+
/// This represents the llvm.sycl.alloca intrinsic.
1813+
class SYCLAllocaInst : public IntrinsicInst {
1814+
public:
1815+
static bool classof(const IntrinsicInst *I) {
1816+
return I->getIntrinsicID() == Intrinsic::sycl_alloca;
1817+
}
1818+
1819+
static bool classof(const Value *V) {
1820+
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
1821+
}
1822+
1823+
unsigned getAddressSpace() const;
1824+
Value *getSizeSymbolicID() const;
1825+
Value *getSizeDefaultValue() const;
1826+
Value *getRTBuffer() const;
1827+
Type *getAllocatedType() const;
1828+
Align getAlign() const;
1829+
};
1830+
18121831
} // end namespace llvm
18131832

18141833
#endif // LLVM_IR_INTRINSICINST_H

llvm/lib/IR/IntrinsicInst.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -980,3 +980,21 @@ Value *GCRelocateInst::getDerivedPtr() const {
980980
return *(Opt->Inputs.begin() + getDerivedPtrIndex());
981981
return *(GCInst->arg_begin() + getDerivedPtrIndex());
982982
}
983+
984+
unsigned SYCLAllocaInst::getAddressSpace() const {
985+
return getType()->getPointerAddressSpace();
986+
}
987+
988+
Value *SYCLAllocaInst::getSizeSymbolicID() const { return getArgOperand(0); }
989+
990+
Value *SYCLAllocaInst::getSizeDefaultValue() const { return getArgOperand(1); }
991+
992+
Value *SYCLAllocaInst::getRTBuffer() const { return getArgOperand(2); }
993+
994+
Type *SYCLAllocaInst::getAllocatedType() const {
995+
return getFunctionType()->getFunctionParamType(3);
996+
}
997+
998+
Align SYCLAllocaInst::getAlign() const {
999+
return cast<ConstantInt>(getArgOperand(4))->getAlignValue();
1000+
}
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
; RUN: sycl-post-link -spec-const=native < %s -S -o %t.table
2+
; RUN: FileCheck %s -check-prefixes=CHECK-RT < %t_0.ll
3+
; RUN: FileCheck %s --check-prefixes=CHECK-PROPS < %t_0.prop
4+
5+
; This test checks that the post link tool is able to correctly transform
6+
; SYCL alloca intrinsics in SPIR-V devices.
7+
8+
%"class.sycl::_V1::specialization_id" = type { i64 }
9+
%"class.sycl::_V1::specialization_id.0" = type { i32 }
10+
%"class.sycl::_V1::specialization_id.1" = type { i16 }
11+
%my_range = type { ptr addrspace(4), ptr addrspace(4) }
12+
13+
@size_i64 = internal addrspace(1) constant %"class.sycl::_V1::specialization_id" { i64 10 }, align 8
14+
@size_i32 = internal addrspace(1) constant %"class.sycl::_V1::specialization_id.0" { i32 120 }, align 4
15+
@size_i16 = internal addrspace(1) constant %"class.sycl::_V1::specialization_id.1" { i16 1 }, align 2
16+
17+
; Check that the following globals are preserved: even though they are not used
18+
; in the module anymore, they could still be referenced by debug info metadata
19+
; (specialization_id objects are used as template arguments in SYCL
20+
; specialization constant APIs).
21+
; CHECK: @size_i64
22+
; CHECK: @size_i32
23+
; CHECK: @size_i16
24+
25+
@size_i64_stable_name = private unnamed_addr constant [36 x i8] c"_ZTS14name_generatorIL_Z8size_i64EE\00", align 1
26+
@size_i32_stable_name = private unnamed_addr constant [36 x i8] c"_ZTS14name_generatorIL_Z8size_i32EE\00", align 1
27+
@size_i16_stable_name = private unnamed_addr constant [36 x i8] c"_ZTS14name_generatorIL_Z8size_i16EE\00", align 1
28+
29+
; CHECK-LABEL: define dso_local void @private_alloca
30+
define dso_local void @private_alloca() {
31+
; CHECK-RT: [[LENGTH:%.*]] = call i32 @_Z20__spirv_SpecConstantii(i32 1, i32 120)
32+
; CHECK-RT: {{.*}} = alloca double, i32 [[LENGTH]], align 8
33+
call ptr @llvm.sycl.alloca.p0.p4.p4.p4.f64(ptr addrspace(4) addrspacecast (ptr @size_i32_stable_name to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) @size_i32 to ptr addrspace(4)), ptr addrspace(4) null, double 0.000000e+00, i64 8)
34+
; CHECK-RT: [[LENGTH:%.*]] = call i64 @_Z20__spirv_SpecConstantix(i32 0, i64 10)
35+
; CHECK-RT: {{.*}} = alloca float, i64 [[LENGTH]], align 8
36+
call ptr @llvm.sycl.alloca.p0.p4.p4.p4.f32(ptr addrspace(4) addrspacecast (ptr @size_i64_stable_name to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) @size_i64 to ptr addrspace(4)), ptr addrspace(4) null, float 0.000000e+00, i64 8)
37+
call ptr @llvm.sycl.alloca.p0.p4.p4.p4.s_my_range(ptr addrspace(4) addrspacecast (ptr @size_i16_stable_name to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) @size_i16 to ptr addrspace(4)), ptr addrspace(4) null, %my_range zeroinitializer, i64 64)
38+
ret void
39+
}
40+
41+
declare ptr @llvm.sycl.alloca.p0.p4.p4.p4.f32(ptr addrspace(4), ptr addrspace(4), ptr addrspace(4), float, i64)
42+
declare ptr @llvm.sycl.alloca.p0.p4.p4.p4.f64(ptr addrspace(4), ptr addrspace(4), ptr addrspace(4), double, i64)
43+
declare ptr @llvm.sycl.alloca.p0.p4.p4.p4.s_my_range(ptr addrspace(4), ptr addrspace(4), ptr addrspace(4), %my_range, i64)
44+
45+
; CHECK-RT: !sycl.specialization-constants = !{![[#ID0:]], ![[#ID1:]], ![[#ID2:]]}
46+
; CHECK-RT: !sycl.specialization-constants-default-values = !{![[#DEF0:]], ![[#DEF1:]], ![[#DEF2:]]}
47+
48+
; CHECK-RT: ![[#ID0]] = !{!"_ZTS14name_generatorIL_Z8size_i64EE", i32 0, i32 0, i32 8}
49+
; CHECK-RT: ![[#ID1]] = !{!"_ZTS14name_generatorIL_Z8size_i32EE", i32 1, i32 0, i32 4}
50+
; CHECK-RT: ![[#ID2]] = !{!"_ZTS14name_generatorIL_Z8size_i16EE", i32 2, i32 0, i32 2}
51+
; CHECK-RT: ![[#DEF0]] = !{i64 10}
52+
; CHECK-RT: ![[#DEF1]] = !{i32 120}
53+
; CHECK-RT: ![[#DEF2]] = !{i16 1}
54+
55+
; CHECK-PROPS: [SYCL/specialization constants]
56+
; CHECK-PROPS: _ZTS14name_generatorIL_Z8size_i64EE=2|
57+
; CHECK-PROPS: _ZTS14name_generatorIL_Z8size_i32EE=2|
58+
; CHECK-PROPS: _ZTS14name_generatorIL_Z8size_i16EE=2|
59+
; CHECK-PROPS: [SYCL/specialization constants default values]
60+
; CHECK-PROPS: all=2|

llvm/tools/sycl-post-link/SpecConstants.cpp

Lines changed: 46 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "llvm/IR/InstIterator.h"
1919
#include "llvm/IR/Instruction.h"
2020
#include "llvm/IR/Instructions.h"
21+
#include "llvm/IR/IntrinsicInst.h"
2122
#include "llvm/IR/Operator.h"
2223

2324
#include <vector>
@@ -818,8 +819,11 @@ PreservedAnalyses SpecConstantsPass::run(Module &M,
818819
if (!F.isDeclaration())
819820
continue;
820821

822+
const bool IsSYCLAlloca = F.getIntrinsicID() == Intrinsic::sycl_alloca;
823+
821824
if (!F.getName().starts_with(SYCL_GET_SCALAR_2020_SPEC_CONST_VAL) &&
822-
!F.getName().starts_with(SYCL_GET_COMPOSITE_2020_SPEC_CONST_VAL))
825+
!F.getName().starts_with(SYCL_GET_COMPOSITE_2020_SPEC_CONST_VAL) &&
826+
!IsSYCLAlloca)
823827
continue;
824828

825829
SmallVector<CallInst *, 32> SCIntrCalls;
@@ -838,21 +842,39 @@ PreservedAnalyses SpecConstantsPass::run(Module &M,
838842

839843
SmallVector<Instruction *, 3> DelInsts;
840844
DelInsts.push_back(CI);
841-
Type *SCTy = CI->getType();
842-
unsigned NameArgNo = 0;
843845
Function *Callee = CI->getCalledFunction();
844846
assert(Callee && "Failed to get spec constant call");
845-
bool HasSretParameter = Callee->hasStructRetAttr();
847+
846848
// Structs are returned via 'sret' arguments if they are larger than 64b
847-
if (HasSretParameter) {
848-
// Get structure type stored in an argument annotated with 'sret'
849-
// parameter attribute and skip it.
850-
SCTy = Callee->getParamStructRetType(NameArgNo++);
851-
}
849+
bool HasSretParameter = Callee->hasStructRetAttr();
850+
assert(!(HasSretParameter && IsSYCLAlloca) &&
851+
"'llvm.sycl.alloca' returns a pointer");
852+
// Skip 'sret' parameter.
853+
unsigned NameArgNo = HasSretParameter ? 1 : 0;
854+
852855
StringRef SymID = getStringLiteralArg(CI, NameArgNo, DelInsts);
853856
Value *Replacement = nullptr;
854857

855858
Constant *DefaultValue = getSpecConstInitializerFromCI(CI, NameArgNo + 1);
859+
Type *SCTy;
860+
if (HasSretParameter) {
861+
// Specialization constant type is given by the 'sret' parameter.
862+
SCTy = Callee->getParamStructRetType(0);
863+
} else if (IsSYCLAlloca) {
864+
// 'llvm.sycl.alloca' returns a pointer, so we need to take the
865+
// specialization constant type from the default value. At this stage,
866+
// we will have lost the original scalar representation of the type, so
867+
// we have to take the in-memory representation. This is only relevant
868+
// when a 'bool' ('i1' scalar representation and 'i8' in-memory
869+
// representation) specialization constant is used as size. In that
870+
// case, for a value of 'true' (the only legal value), the default value
871+
// will be 1 ('i8'), thus keeping the original semantics.
872+
SCTy = DefaultValue->getType();
873+
} else {
874+
// Specialization constant type is the same as the one returned by the
875+
// function in the general case.
876+
SCTy = CI->getType();
877+
}
856878

857879
bool IsNewSpecConstant = false;
858880
unsigned Padding = 0;
@@ -872,6 +894,17 @@ PreservedAnalyses SpecConstantsPass::run(Module &M,
872894
// 3. Transform to spirv intrinsic _Z*__spirv_SpecConstant* or
873895
// _Z*__spirv_SpecConstantComposite
874896
Replacement = emitSpecConstantRecursive(SCTy, CI, IDs, DefaultValue);
897+
if (IsSYCLAlloca) {
898+
// In case this is a 'sycl.llvm.alloca' intrinsic, use the emitted
899+
// specialization constant as the allocation size.
900+
auto *Intr = cast<SYCLAllocaInst>(CI);
901+
Value *ArraySize = Replacement;
902+
assert(ArraySize->getType()->isIntegerTy() &&
903+
"Expecting integer type");
904+
Replacement =
905+
new AllocaInst(Intr->getAllocatedType(), Intr->getAddressSpace(),
906+
ArraySize, Intr->getAlign(), "alloca", CI);
907+
}
875908
if (IsNewSpecConstant) {
876909
// emitSpecConstantRecursive might emit more than one spec constant
877910
// (because of composite types) and therefore, we need to adjust
@@ -884,6 +917,8 @@ PreservedAnalyses SpecConstantsPass::run(Module &M,
884917
M, SymID, SCTy, IDs, /* is native spec constant */ true);
885918
}
886919
} else if (Mode == HandlingMode::emulation) {
920+
assert(!IsSYCLAlloca && "sycl_ext_oneapi_private_alloca not yet "
921+
"supported in emulation mode");
887922
// 2a. Spec constant will be passed as kernel argument;
888923

889924
// Replace it with a load from the pointer to the specialization
@@ -1043,7 +1078,8 @@ bool SpecConstantsPass::collectSpecConstantDefaultValuesMetadata(
10431078
bool llvm::checkModuleContainsSpecConsts(const Module &M) {
10441079
for (const Function &F : M.functions()) {
10451080
if (F.getName().starts_with(SYCL_GET_SCALAR_2020_SPEC_CONST_VAL) ||
1046-
F.getName().starts_with(SYCL_GET_COMPOSITE_2020_SPEC_CONST_VAL))
1081+
F.getName().starts_with(SYCL_GET_COMPOSITE_2020_SPEC_CONST_VAL) ||
1082+
F.getIntrinsicID() == llvm::Intrinsic::sycl_alloca)
10471083
return true;
10481084
}
10491085

sycl/include/sycl/detail/defines.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,9 @@
3838
#else
3939
#define __SYCL_TYPE(x)
4040
#endif
41+
42+
#if __has_cpp_attribute(clang::builtin_alias)
43+
#define __SYCL_BUILTIN_ALIAS(x) [[clang::builtin_alias(x)]]
44+
#else
45+
#define __SYCL_BUILTIN_ALIAS(x)
46+
#endif
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
//==--- alloca.hpp --- SYCL extension for private memory allocations--------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include "sycl/exception.hpp"
12+
#include "sycl/kernel_handler.hpp"
13+
#include "sycl/multi_ptr.hpp"
14+
15+
namespace sycl {
16+
inline namespace _V1 {
17+
namespace ext::oneapi::experimental {
18+
19+
#ifdef __SYCL_DEVICE_ONLY__
20+
21+
// On the device, this is an alias to __builtin_intel_sycl_alloca.
22+
23+
/// Function allocating and returning a pointer to an unitialized region of
24+
/// memory capable of hosting `kh.get_specialization_constant<SizeSpecName>()`
25+
/// elements of type \tp ElementType. The pointer will be a `sycl::private_ptr`
26+
/// and will or will not be decorated depending on \tp DecorateAddres.
27+
///
28+
/// On the host, this function simply throws, as this is not supported there.
29+
///
30+
/// See sycl_ext_oneapi_private_alloca.
31+
template <typename ElementType, auto &SizeSpecName,
32+
access::decorated DecorateAddress>
33+
__SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca)
34+
private_ptr<ElementType, DecorateAddress> private_alloca(kernel_handler &kh);
35+
36+
#else
37+
38+
// On the host, throw, this is not supported.
39+
template <typename ElementType, auto &SizeSpecName,
40+
access::decorated DecorateAddress>
41+
private_ptr<ElementType, DecorateAddress> private_alloca(kernel_handler &kh) {
42+
throw feature_not_supported("sycl::ext::oneapi::experimental::private_alloca "
43+
"is not supported on host",
44+
PI_ERROR_INVALID_OPERATION);
45+
}
46+
47+
#endif // __SYCL_DEVICE_ONLY__
48+
49+
} // namespace ext::oneapi::experimental
50+
} // namespace _V1
51+
} // namespace sycl

sycl/include/sycl/multi_ptr.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ template <typename dataT, int dimensions> class local_accessor;
8080
// should be removed.
8181
template <typename ElementType, access::address_space Space,
8282
access::decorated DecorateAddress = access::decorated::legacy>
83-
class multi_ptr {
83+
class __SYCL_TYPE(multi_ptr) multi_ptr {
8484
private:
8585
using decorated_type =
8686
typename detail::DecoratedType<ElementType, Space>::type;
@@ -444,7 +444,7 @@ class multi_ptr {
444444

445445
/// Specialization of multi_ptr for const void.
446446
template <access::address_space Space, access::decorated DecorateAddress>
447-
class multi_ptr<const void, Space, DecorateAddress> {
447+
class __SYCL_TYPE(multi_ptr) multi_ptr<const void, Space, DecorateAddress> {
448448
private:
449449
using decorated_type =
450450
typename detail::DecoratedType<const void, Space>::type;
@@ -592,7 +592,7 @@ class multi_ptr<const void, Space, DecorateAddress> {
592592

593593
// Specialization of multi_ptr for void.
594594
template <access::address_space Space, access::decorated DecorateAddress>
595-
class multi_ptr<void, Space, DecorateAddress> {
595+
class __SYCL_TYPE(multi_ptr) multi_ptr<void, Space, DecorateAddress> {
596596
private:
597597
using decorated_type = typename detail::DecoratedType<void, Space>::type;
598598

sycl/source/feature_test.hpp.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ inline namespace _V1 {
103103
#define SYCL_EXT_ONEAPI_IN_ORDER_QUEUE_EVENTS 1
104104
#define SYCL_EXT_INTEL_MATRIX 1
105105
#define SYCL_EXT_INTEL_FPGA_TASK_SEQUENCE 1
106+
#define SYCL_EXT_ONEAPI_PRIVATE_ALLOCA 1
106107

107108
#ifndef __has_include
108109
#define __has_include(x) 0
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#pragma once
2+
3+
// Template for private alloca tests.
4+
5+
#include <sycl/sycl.hpp>
6+
7+
#include <sycl/ext/oneapi/experimental/alloca.hpp>
8+
9+
template <typename ElementType, typename SizeType,
10+
sycl::access::decorated DecorateAddress>
11+
class Kernel;
12+
13+
template <typename ElementType, auto &Size,
14+
sycl::access::decorated DecorateAddress>
15+
void test() {
16+
std::size_t N;
17+
18+
std::cin >> N;
19+
20+
std::vector<std::size_t> v(N);
21+
{
22+
sycl::queue q;
23+
sycl::buffer<std::size_t> b(v);
24+
q.submit([&](sycl::handler &cgh) {
25+
sycl::accessor acc(b, cgh, sycl::write_only, sycl::no_init);
26+
cgh.set_specialization_constant<Size>(N);
27+
using spec_const_type = std::remove_reference_t<decltype(Size)>;
28+
using size_type = typename spec_const_type::value_type;
29+
cgh.single_task<Kernel<ElementType, size_type, DecorateAddress>>(
30+
[=](sycl::kernel_handler h) {
31+
auto ptr = sycl::ext::oneapi::experimental::private_alloca<
32+
ElementType, Size, DecorateAddress>(h);
33+
const std::size_t M = h.get_specialization_constant<Size>();
34+
ptr[0] = static_cast<ElementType>(M);
35+
ElementType value{1};
36+
for (auto begin = ptr.get() + 1, end = ptr.get() + M; begin < end;
37+
++begin, ++value) {
38+
*begin = value;
39+
}
40+
auto accBegin = acc.begin();
41+
for (auto begin = ptr.get(), end = ptr.get() + M; begin < end;
42+
++begin, ++accBegin) {
43+
*accBegin = *begin;
44+
}
45+
});
46+
});
47+
q.wait_and_throw();
48+
}
49+
assert(static_cast<std::size_t>(v.front()) == N &&
50+
"Wrong private alloca length reported");
51+
for (std::size_t i = 1; i < N; ++i) {
52+
assert(static_cast<std::size_t>(v[i]) == i &&
53+
"Wrong value in copied-back sequence");
54+
}
55+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// RUN: %{build} -w -o %t.out
2+
// RUN: echo 1 | %{run} %t.out
3+
// UNSUPPORTED: cuda || hip
4+
5+
// Test checking size of 'bool' type. This is not expected to be ever used, but,
6+
// as 'bool' is an integral type, it is a possible scenario.
7+
8+
#include "Inputs/private_alloca_test.hpp"
9+
10+
constexpr sycl::specialization_id<bool> size(true);
11+
12+
int main() { test<int, size, sycl::access::decorated::legacy>(); }
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: echo 1 | %{run} %t.out
3+
// RUN: echo 10 | %{run} %t.out
4+
// RUN: echo 20 | %{run} %t.out
5+
// RUN: echo 30 | %{run} %t.out
6+
// UNSUPPORTED: cuda || hip
7+
8+
// Simple test filling a SYCL private alloca and copying it back to an output
9+
// accessor using a decorated multi_ptr.
10+
11+
#include "Inputs/private_alloca_test.hpp"
12+
13+
constexpr sycl::specialization_id<int> size(10);
14+
15+
int main() { test<float, size, sycl::access::decorated::yes>(); }

0 commit comments

Comments
 (0)