Skip to content

Commit 15bbed8

Browse files
authored
[SYCL] Refactor stream class handing implementation (#3646)
Currently the stream class when passed from host to device requires additional handling. It is handled as a wrapper struct for several accessors. In order to create the AST, the stream class is processed as a whole and each accessor is processed individually. This patch is to simplify the process. An __init and __finalize methods are introduced that eliminate the need to process the field individually. The steps are as follows: 1. Look up for the member function sycl::stream::__init 2. Copy the sycl::stream::__init arguments into the kernel function argument list 3. Allocate a sycl::stream object and call sycl::stream::__init with the kernel argument 4. Call sycl::stream::__finalize with the kernel argument at the end of the kernel Signed-off-by: Zahira Ammarguellat <[email protected]>
1 parent 79cd7b0 commit 15bbed8

File tree

14 files changed

+261
-624
lines changed

14 files changed

+261
-624
lines changed

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11412,6 +11412,8 @@ def err_sycl_mismatch_group_size
1141211412
"have a sub group size that matches the size specified for the "
1141311413
"kernel">;
1141411414
def note_sycl_kernel_declared_here : Note<"kernel declared here">;
11415+
def err_sycl_expected_finalize_method : Error<
11416+
"expected a 'finalize' method for the 'stream' class">;
1141511417
def ext_sycl_2020_attr_spelling : ExtWarn<
1141611418
"use of attribute %0 is a SYCL 2020 extension">,
1141711419
InGroup<Sycl2017Compat>;

clang/include/clang/Sema/Sema.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,8 @@ class SYCLIntegrationHeader {
316316
kind_sampler,
317317
kind_pointer,
318318
kind_specialization_constants_buffer,
319-
kind_last = kind_specialization_constants_buffer
319+
kind_stream,
320+
kind_last = kind_stream
320321
};
321322

322323
public:

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 13 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -1034,23 +1034,6 @@ class KernelObjVisitor {
10341034
VisitRecordFields(Owner, Handlers...);
10351035
}
10361036

1037-
// FIXME: Can this be refactored/handled some other way?
1038-
template <typename ParentTy, typename... HandlerTys>
1039-
void visitStreamRecord(const CXXRecordDecl *Owner, ParentTy &Parent,
1040-
CXXRecordDecl *Wrapper, QualType RecordTy,
1041-
HandlerTys &... Handlers) {
1042-
(void)std::initializer_list<int>{
1043-
(Handlers.enterStream(Owner, Parent, RecordTy), 0)...};
1044-
for (const auto &Field : Wrapper->fields()) {
1045-
QualType FieldTy = Field->getType();
1046-
// Required to initialize accessors inside streams.
1047-
if (Util::isSyclAccessorType(FieldTy))
1048-
KF_FOR_EACH(handleSyclAccessorType, Field, FieldTy);
1049-
}
1050-
(void)std::initializer_list<int>{
1051-
(Handlers.leaveStream(Owner, Parent, RecordTy), 0)...};
1052-
}
1053-
10541037
template <typename... HandlerTys>
10551038
void visitArrayElementImpl(const CXXRecordDecl *Owner, FieldDecl *ArrayField,
10561039
QualType ElementTy, uint64_t Index,
@@ -1125,12 +1108,9 @@ class KernelObjVisitor {
11251108
KF_FOR_EACH(handleSyclHalfType, Field, FieldTy);
11261109
else if (Util::isSyclSpecConstantType(FieldTy))
11271110
KF_FOR_EACH(handleSyclSpecConstantType, Field, FieldTy);
1128-
else if (Util::isSyclStreamType(FieldTy)) {
1129-
CXXRecordDecl *RD = FieldTy->getAsCXXRecordDecl();
1130-
// Handle accessors in stream class.
1111+
else if (Util::isSyclStreamType(FieldTy))
11311112
KF_FOR_EACH(handleSyclStreamType, Field, FieldTy);
1132-
visitStreamRecord(Owner, Field, RD, FieldTy, Handlers...);
1133-
} else if (FieldTy->isStructureOrClassType()) {
1113+
else if (FieldTy->isStructureOrClassType()) {
11341114
if (KF_FOR_EACH(handleStructType, Field, FieldTy)) {
11351115
CXXRecordDecl *RD = FieldTy->getAsCXXRecordDecl();
11361116
visitRecord(Owner, Field, RD, FieldTy, Handlers...);
@@ -1244,12 +1224,6 @@ class SyclKernelFieldHandlerBase {
12441224
virtual bool leaveStruct(const CXXRecordDecl *, FieldDecl *, QualType) {
12451225
return true;
12461226
}
1247-
virtual bool enterStream(const CXXRecordDecl *, FieldDecl *, QualType) {
1248-
return true;
1249-
}
1250-
virtual bool leaveStream(const CXXRecordDecl *, FieldDecl *, QualType) {
1251-
return true;
1252-
}
12531227
virtual bool enterStruct(const CXXRecordDecl *, const CXXBaseSpecifier &,
12541228
QualType) {
12551229
return true;
@@ -1697,18 +1671,6 @@ class SyclKernelDecompMarker : public SyclKernelFieldHandler {
16971671
return true;
16981672
}
16991673

1700-
// Stream is always decomposed (and whether it gets decomposed is handled in
1701-
// handleSyclStreamType), but we need a CollectionStack entry to capture the
1702-
// accessors that get handled.
1703-
bool enterStream(const CXXRecordDecl *, FieldDecl *, QualType) final {
1704-
CollectionStack.push_back(false);
1705-
return true;
1706-
}
1707-
bool leaveStream(const CXXRecordDecl *, FieldDecl *, QualType Ty) final {
1708-
CollectionStack.pop_back();
1709-
return true;
1710-
}
1711-
17121674
bool enterStruct(const CXXRecordDecl *, FieldDecl *, QualType) final {
17131675
CollectionStack.push_back(false);
17141676
return true;
@@ -1956,14 +1918,6 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler {
19561918
SemaRef.addSyclDeviceDecl(KernelDecl);
19571919
}
19581920

1959-
bool enterStream(const CXXRecordDecl *RD, FieldDecl *FD, QualType Ty) final {
1960-
return enterStruct(RD, FD, Ty);
1961-
}
1962-
1963-
bool leaveStream(const CXXRecordDecl *RD, FieldDecl *FD, QualType Ty) final {
1964-
return leaveStruct(RD, FD, Ty);
1965-
}
1966-
19671921
bool enterStruct(const CXXRecordDecl *, FieldDecl *, QualType) final {
19681922
++StructDepth;
19691923
return true;
@@ -2099,8 +2053,7 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler {
20992053
}
21002054

21012055
bool handleSyclStreamType(FieldDecl *FD, QualType FieldTy) final {
2102-
addParam(FD, FieldTy);
2103-
return true;
2056+
return handleSpecialType(FD, FieldTy);
21042057
}
21052058

21062059
bool handleSyclStreamType(const CXXRecordDecl *, const CXXBaseSpecifier &,
@@ -2419,15 +2372,6 @@ class SyclOptReportCreator : public SyclKernelFieldHandler {
24192372
}
24202373

24212374
bool handleSyclStreamType(FieldDecl *FD, QualType FieldTy) final {
2422-
// For the current implementation of stream class, the Visitor 'handles'
2423-
// stream argument and then visits each accessor field in stream. Therefore
2424-
// handleSpecialType in this case only adds a single argument for stream.
2425-
// The arguments corresponding to accessors in stream are handled in
2426-
// handleSyclAccessorType. The opt-report therefore does not diffrentiate
2427-
// between the accessors in streams and accessors captured by SYCL kernel.
2428-
// Once stream API is modified to use __init(), the visitor will no longer
2429-
// visit the stream object and opt-report output for stream class will be
2430-
// similar to that of other special types.
24312375
return handleSpecialType(
24322376
FD, FieldTy, KernelArgDescription(KernelArgDescription::Stream));
24332377
}
@@ -2805,6 +2749,13 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler {
28052749

28062750
const auto *RecordDecl = Ty->getAsCXXRecordDecl();
28072751
createSpecialMethodCall(RecordDecl, getInitMethodName(), BodyStmts);
2752+
CXXMethodDecl *FinalizeMethod =
2753+
getMethodByName(RecordDecl, FinalizeMethodName);
2754+
// A finalize-method is expected for stream class.
2755+
if (!FinalizeMethod && Util::isSyclStreamType(Ty))
2756+
SemaRef.Diag(FD->getLocation(), diag::err_sycl_expected_finalize_method);
2757+
else
2758+
createSpecialMethodCall(RecordDecl, FinalizeMethodName, FinalizeStmts);
28082759

28092760
removeFieldMemberExpr(FD, Ty);
28102761

@@ -2898,9 +2849,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler {
28982849
}
28992850

29002851
bool handleSyclStreamType(FieldDecl *FD, QualType Ty) final {
2901-
// Streams just get copied as a new init.
2902-
addSimpleFieldInit(FD, Ty);
2903-
return true;
2852+
return handleSpecialType(FD, Ty);
29042853
}
29052854

29062855
bool handleSyclStreamType(const CXXRecordDecl *, const CXXBaseSpecifier &BS,
@@ -2977,31 +2926,6 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler {
29772926
handleSpecialType(KernelHandlerArg->getType());
29782927
}
29792928

2980-
bool enterStream(const CXXRecordDecl *RD, FieldDecl *FD, QualType Ty) final {
2981-
++StructDepth;
2982-
// Add a dummy init expression to catch the accessor initializers.
2983-
const auto *StreamDecl = Ty->getAsCXXRecordDecl();
2984-
CollectionInitExprs.push_back(createInitListExpr(StreamDecl));
2985-
2986-
addFieldMemberExpr(FD, Ty);
2987-
return true;
2988-
}
2989-
2990-
bool leaveStream(const CXXRecordDecl *RD, FieldDecl *FD, QualType Ty) final {
2991-
--StructDepth;
2992-
// Stream requires that its 'init' calls happen after its accessors init
2993-
// calls, so add them here instead.
2994-
const auto *StreamDecl = Ty->getAsCXXRecordDecl();
2995-
2996-
createSpecialMethodCall(StreamDecl, getInitMethodName(), BodyStmts);
2997-
createSpecialMethodCall(StreamDecl, FinalizeMethodName, FinalizeStmts);
2998-
2999-
removeFieldMemberExpr(FD, Ty);
3000-
3001-
CollectionInitExprs.pop_back();
3002-
return true;
3003-
}
3004-
30052929
bool enterStruct(const CXXRecordDecl *RD, FieldDecl *FD, QualType Ty) final {
30062930
++StructDepth;
30072931
addCollectionInitListExpr(Ty->getAsCXXRecordDecl());
@@ -3315,7 +3239,7 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler {
33153239
}
33163240

33173241
bool handleSyclStreamType(FieldDecl *FD, QualType FieldTy) final {
3318-
addParam(FD, FieldTy, SYCLIntegrationHeader::kind_std_layout);
3242+
addParam(FD, FieldTy, SYCLIntegrationHeader::kind_stream);
33193243
return true;
33203244
}
33213245

@@ -3347,18 +3271,6 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler {
33473271
SYCLIntegrationHeader::kind_specialization_constants_buffer, 0);
33483272
}
33493273

3350-
bool enterStream(const CXXRecordDecl *, FieldDecl *FD, QualType Ty) final {
3351-
++StructDepth;
3352-
CurOffset += offsetOf(FD, Ty);
3353-
return true;
3354-
}
3355-
3356-
bool leaveStream(const CXXRecordDecl *, FieldDecl *FD, QualType Ty) final {
3357-
--StructDepth;
3358-
CurOffset -= offsetOf(FD, Ty);
3359-
return true;
3360-
}
3361-
33623274
bool enterStruct(const CXXRecordDecl *, FieldDecl *FD, QualType Ty) final {
33633275
++StructDepth;
33643276
CurOffset += offsetOf(FD, Ty);
@@ -4160,6 +4072,7 @@ static const char *paramKind2Str(KernelParamKind K) {
41604072
CASE(accessor);
41614073
CASE(std_layout);
41624074
CASE(sampler);
4075+
CASE(stream);
41634076
CASE(specialization_constants_buffer);
41644077
CASE(pointer);
41654078
}

clang/test/CodeGenSYCL/Inputs/sycl.hpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ class accessor {
181181
void __init(__attribute__((opencl_global)) dataT *Ptr, range<dimensions> AccessRange,
182182
range<dimensions> MemRange, id<dimensions> Offset) {}
183183
void __init_esimd(__attribute__((opencl_global)) dataT *Ptr) {}
184+
friend class stream;
184185
};
185186

186187
template <int dimensions, access::mode accessmode, access::target accesstarget>
@@ -411,10 +412,22 @@ class stream {
411412
public:
412413
stream(unsigned long BufferSize, unsigned long MaxStatementSize,
413414
handler &CGH) {}
415+
#ifdef __SYCL_DEVICE_ONLY__
416+
// Default constructor for objects later initialized with __init member.
417+
stream() = default;
418+
#endif
414419

415-
void __init() {}
420+
void __init(__attribute((opencl_global)) char *Ptr, range<1> AccessRange,
421+
range<1> MemRange, id<1> Offset, int _FlushBufferSize) {
422+
Acc.__init(Ptr, AccessRange, MemRange, Offset);
423+
FlushBufferSize = _FlushBufferSize;
424+
}
416425

417426
void __finalize() {}
427+
428+
private:
429+
cl::sycl::accessor<char, 1, cl::sycl::access::mode::read_write> Acc;
430+
int FlushBufferSize;
418431
};
419432

420433
template <typename T>

clang/test/CodeGenSYCL/stream.cpp

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,24 @@
11
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm %s -o %t.ll
22
// RUN: FileCheck < %t.ll --enable-var-scope %s
33
//
4-
// CHECK: define {{.*}}spir_kernel void @"{{.*}}StreamTester"(%"{{.*}}cl::sycl::stream"* byval(%"{{.*}}cl::sycl::stream") {{.*}}){{.*}}
5-
// CHECK: call spir_func void @{{.*}}__init{{.*}}(%{{.*}}cl::sycl::stream{{.*}} addrspace(4)* {{[^,]*}} %{{[0-9]+}})
6-
// CHECK: call spir_func void @{{.*}}__finalize{{.*}}(%{{.*}}cl::sycl::stream{{.*}} addrspace(4)* {{[^,]*}} %{{[0-9]+}})
7-
//
4+
// CHECK: %[[RANGE_TYPE:"struct.*cl::sycl::range"]]
5+
// CHECK: %[[ID_TYPE:"struct.*cl::sycl::id"]]
6+
7+
// CHECK: define dso_local spir_kernel void @{{.*}}StreamTester
8+
// CHECK-SAME: i8 addrspace(1)* [[ACC_DATA:%[a-zA-Z0-9_]+]],
9+
// CHECK-SAME: %[[RANGE_TYPE]]* byval(%[[RANGE_TYPE]]) align 4 [[ACC_RANGE1:%[a-zA-Z0-9_]+]],
10+
// CHECK-SAME: %[[RANGE_TYPE]]* byval(%[[RANGE_TYPE]]) align 4 [[ACC_RANGE2:%[a-zA-Z0-9_]+]],
11+
// CHECK-SAME: %[[ID_TYPE]]* byval(%[[ID_TYPE]]) align 4 [[ACC_ID:%[a-zA-Z0-9_]+]],
12+
// CHECK-SAME: i32 [[ACC_INT:%[a-zA-Z0-9_]+]])
13+
14+
// Alloca and addrspace casts for kernel parameters
15+
// CHECK: [[ARG:%[a-zA-Z0-9_]+]].addr = alloca i8 addrspace(1)*, align 8
16+
// CHECK: [[ARG:%[a-zA-Z0-9_]+]].addr.ascast = addrspacecast i8 addrspace(1)** [[ARG]].addr to i8 addrspace(1)* addrspace(4)*
17+
// CHECK: [[ARG_LOAD:%[a-zA-Z0-9_]+]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[ARG]].addr.ascast, align 8,
18+
19+
// Check __init and __finalize method calls
20+
// CHECK: call spir_func void @{{.*}}__init{{.*}}(%{{.*}}cl::sycl::stream" addrspace(4)* align 4 dereferenceable_or_null(16) %4, i8 addrspace(1)* [[ARG_LOAD]], %[[RANGE_TYPE]]* byval(%[[RANGE_TYPE]]) {{.*}}%{{.*}}
21+
// CHECK: call spir_func void @_ZN2cl4sycl6stream10__finalizeEv(%{{.*}}cl::sycl::stream" addrspace(4)* align 4 dereferenceable_or_null(16) %{{[0-9]+}})
822

923
#include "Inputs/sycl.hpp"
1024

clang/test/SemaSYCL/Inputs/sycl.hpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ class accessor {
114114
using PtrType = typename DeviceValueType<dataT, accessTarget>::type *;
115115
void __init(PtrType Ptr, range<dimensions> AccessRange,
116116
range<dimensions> MemRange, id<dimensions> Offset) {}
117+
friend class stream;
117118
};
118119

119120
template <int dimensions, access::mode accessmode, access::target accesstarget>
@@ -291,11 +292,24 @@ class stream {
291292
public:
292293
stream(unsigned long BufferSize, unsigned long MaxStatementSize,
293294
handler &CGH) {}
295+
#ifdef __SYCL_DEVICE_ONLY__
296+
// Default constructor for objects later initialized with __init member.
297+
stream() = default;
298+
#endif
299+
300+
void __init(__attribute((opencl_global)) char *Ptr, range<1> AccessRange,
301+
range<1> MemRange, id<1> Offset, int _FlushBufferSize) {
302+
Acc.__init(Ptr, AccessRange, MemRange, Offset);
303+
FlushBufferSize = _FlushBufferSize;
304+
}
294305

295-
void __init() {}
296306
void use() const {}
297307

298308
void __finalize() {}
309+
310+
private:
311+
cl::sycl::accessor<char, 1, cl::sycl::access::mode::read_write> Acc;
312+
int FlushBufferSize;
299313
};
300314

301315
namespace ONEAPI {

clang/test/SemaSYCL/decomposition.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,13 +131,12 @@ int main() {
131131
myQueue.submit([&](sycl::handler &h) {
132132
h.single_task<class Stream1>([=]() { return t1.i; });
133133
});
134-
// CHECK: FunctionDecl {{.*}}Stream1{{.*}} 'void (sycl::stream, __global int *, sycl::range<1>, sycl::range<1>, sycl::id<1>, sycl::stream, __global int *, sycl::range<1>, sycl::range<1>, sycl::id<1>, sycl::stream, __global int *, sycl::range<1>, sycl::range<1>, sycl::id<1>, StructNonDecomposed, int)'
135-
134+
// CHECK: FunctionDecl {{.*}}Stream1{{.*}} 'void (__global char *, sycl::range<1>, sycl::range<1>, sycl::id<1>, int, __global char *, sycl::range<1>, sycl::range<1>, sycl::id<1>, int, __global char *, sycl::range<1>, sycl::range<1>, sycl::id<1>, int, StructNonDecomposed, int)'
136135
DerivedStruct<StructWithStream> t2;
137136
myQueue.submit([&](sycl::handler &h) {
138137
h.single_task<class Stream2>([=]() { return t2.i; });
139138
});
140-
// CHECK: FunctionDecl {{.*}}Stream2{{.*}} 'void (sycl::stream, __global int *, sycl::range<1>, sycl::range<1>, sycl::id<1>, StructNonDecomposed, int)'
139+
// CHECK: FunctionDecl {{.*}}Stream2{{.*}} 'void (__global char *, sycl::range<1>, sycl::range<1>, sycl::id<1>, int, StructNonDecomposed, int)'
141140
}
142141

143142
{

0 commit comments

Comments
 (0)