Skip to content

Commit 6af2069

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web' (#3)
2 parents dcd1f12 + 73e957f commit 6af2069

File tree

7 files changed

+280
-77
lines changed

7 files changed

+280
-77
lines changed

llvm/lib/SYCLLowerIR/LowerESIMDVecArg.cpp

Lines changed: 5 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,6 @@ class ESIMDLowerVecArgPass {
8989
Function *rewriteFunc(Function &F);
9090
Type *getSimdArgPtrTyOrNull(Value *arg);
9191
void fixGlobals(Module &M);
92-
void replaceConstExprWithGlobals(Module &M);
93-
ConstantExpr *createNewConstantExpr(GlobalVariable *newGlobalVar,
94-
Type *oldGlobalType, Value *old);
9592
void removeOldGlobals();
9693
};
9794

@@ -229,41 +226,6 @@ Function *ESIMDLowerVecArgPass::rewriteFunc(Function &F) {
229226
return NF;
230227
}
231228

232-
// Replace ConstantExpr if it contains old global variable.
233-
ConstantExpr *
234-
ESIMDLowerVecArgPass::createNewConstantExpr(GlobalVariable *NewGlobalVar,
235-
Type *OldGlobalType, Value *Old) {
236-
ConstantExpr *NewConstantExpr = nullptr;
237-
238-
if (isa<GlobalVariable>(Old)) {
239-
NewConstantExpr = cast<ConstantExpr>(
240-
ConstantExpr::getBitCast(NewGlobalVar, OldGlobalType));
241-
return NewConstantExpr;
242-
}
243-
244-
auto InnerMost = createNewConstantExpr(
245-
NewGlobalVar, OldGlobalType, cast<ConstantExpr>(Old)->getOperand(0));
246-
247-
NewConstantExpr = cast<ConstantExpr>(
248-
cast<ConstantExpr>(Old)->getWithOperandReplaced(0, InnerMost));
249-
250-
return NewConstantExpr;
251-
}
252-
253-
// Globals are part of ConstantExpr. This loop iterates over
254-
// all such instances and replaces them with a new ConstantExpr
255-
// consisting of new global vector* variable.
256-
void ESIMDLowerVecArgPass::replaceConstExprWithGlobals(Module &M) {
257-
for (auto &GlobalVars : OldNewGlobal) {
258-
auto &G = *GlobalVars.first;
259-
for (auto UseOfG : G.users()) {
260-
auto NewGlobal = GlobalVars.second;
261-
auto NewConstExpr = createNewConstantExpr(NewGlobal, G.getType(), UseOfG);
262-
UseOfG->replaceAllUsesWith(NewConstExpr);
263-
}
264-
}
265-
}
266-
267229
// This function creates new global variables of type vector* type
268230
// when old one is of simd* type.
269231
void ESIMDLowerVecArgPass::fixGlobals(Module &M) {
@@ -288,16 +250,17 @@ void ESIMDLowerVecArgPass::fixGlobals(Module &M) {
288250
}
289251
}
290252

291-
replaceConstExprWithGlobals(M);
292-
293253
removeOldGlobals();
294254
}
295255

296256
// Remove old global variables from the program.
297257
void ESIMDLowerVecArgPass::removeOldGlobals() {
298258
for (auto &G : OldNewGlobal) {
299-
G.first->removeDeadConstantUsers();
300-
G.first->eraseFromParent();
259+
auto OldGlob = G.first;
260+
auto NewGlobal = G.second;
261+
OldGlob->replaceAllUsesWith(
262+
ConstantExpr::getBitCast(NewGlobal, OldGlob->getType()));
263+
OldGlob->eraseFromParent();
301264
}
302265
}
303266

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt < %s -ESIMDLowerVecArg -S | FileCheck %s
3+
4+
; This test checks that there is no compiler crash when a Global
5+
; is used in simple instruction, not directly in ConstantExpr.
6+
7+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
8+
target triple = "spir64-unknown-unknown-sycldevice"
9+
10+
%"class.cl::sycl::INTEL::gpu::simd" = type { <2512 x i32> }
11+
12+
; CHECK: @Global = dso_local global <2512 x i32> undef, align 16384
13+
@Global = dso_local global %"class.cl::sycl::INTEL::gpu::simd" undef, align 16384
14+
15+
define void @no_crash(<2512 x i32> %simd_val) {
16+
; CHECK-LABEL: @no_crash(
17+
; CHECK-NEXT: [[CAST:%.*]] = addrspacecast %"class.cl::sycl::INTEL::gpu::simd"* bitcast (<2512 x i32>* @Global to %"class.cl::sycl::INTEL::gpu::simd"*) to %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)*
18+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr %"class.cl::sycl::INTEL::gpu::simd", %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)* [[CAST]], i64 0, i32 0
19+
; CHECK-NEXT: store <2512 x i32> [[SIMD_VAL:%.*]], <2512 x i32> addrspace(4)* [[GEP]], align 16384
20+
; CHECK-NEXT: ret void
21+
;
22+
%cast = addrspacecast %"class.cl::sycl::INTEL::gpu::simd"* @Global to %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)*
23+
%gep = getelementptr %"class.cl::sycl::INTEL::gpu::simd", %"class.cl::sycl::INTEL::gpu::simd" addrspace(4)* %cast, i64 0, i32 0
24+
store <2512 x i32> %simd_val, <2512 x i32> addrspace(4)* %gep, align 16384
25+
ret void
26+
}

sycl/include/CL/sycl/detail/type_traits.hpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,19 @@ template <typename T>
207207
struct is_vector_arithmetic
208208
: bool_constant<is_vec<T>::value && is_arithmetic<T>::value> {};
209209

210+
// is_bool
211+
template <typename T>
212+
struct is_scalar_bool
213+
: bool_constant<std::is_same<remove_cv_t<T>, bool>::value> {};
214+
215+
template <typename T>
216+
struct is_vector_bool
217+
: bool_constant<is_vec<T>::value &&
218+
is_scalar_bool<vector_element_t<T>>::value> {};
219+
220+
template <typename T>
221+
struct is_bool : bool_constant<is_scalar_bool<vector_element_t<T>>::value> {};
222+
210223
// is_pointer
211224
template <typename T> struct is_pointer_impl : std::false_type {};
212225

sycl/include/CL/sycl/group.hpp

Lines changed: 71 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -274,58 +274,99 @@ template <int Dimensions = 1> class group {
274274
__spirv_MemoryBarrier(__spv::Scope::Workgroup, flags);
275275
}
276276

277+
/// Asynchronously copies a number of elements specified by \p numElements
278+
/// from the source pointed by \p src to destination pointed by \p dest
279+
/// with a source stride specified by \p srcStride, and returns a SYCL
280+
/// device_event which can be used to wait on the completion of the copy.
281+
/// Permitted types for dataT are all scalar and vector types, except boolean.
277282
template <typename dataT>
278-
device_event async_work_group_copy(local_ptr<dataT> dest,
279-
global_ptr<dataT> src,
280-
size_t numElements) const {
283+
detail::enable_if_t<!detail::is_bool<dataT>::value, device_event>
284+
async_work_group_copy(local_ptr<dataT> dest, global_ptr<dataT> src,
285+
size_t numElements, size_t srcStride) const {
281286
using DestT = detail::ConvertToOpenCLType_t<decltype(dest)>;
282287
using SrcT = detail::ConvertToOpenCLType_t<decltype(src)>;
283288

284-
__ocl_event_t e = OpGroupAsyncCopyGlobalToLocal(
289+
__ocl_event_t E = OpGroupAsyncCopyGlobalToLocal(
285290
__spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()),
286-
numElements, 1, 0);
287-
return device_event(&e);
291+
numElements, srcStride, 0);
292+
return device_event(&E);
288293
}
289294

295+
/// Asynchronously copies a number of elements specified by \p numElements
296+
/// from the source pointed by \p src to destination pointed by \p dest with
297+
/// the destination stride specified by \p destStride, and returns a SYCL
298+
/// device_event which can be used to wait on the completion of the copy.
299+
/// Permitted types for dataT are all scalar and vector types, except boolean.
290300
template <typename dataT>
291-
device_event async_work_group_copy(global_ptr<dataT> dest,
292-
local_ptr<dataT> src,
293-
size_t numElements) const {
301+
detail::enable_if_t<!detail::is_bool<dataT>::value, device_event>
302+
async_work_group_copy(global_ptr<dataT> dest, local_ptr<dataT> src,
303+
size_t numElements, size_t destStride) const {
294304
using DestT = detail::ConvertToOpenCLType_t<decltype(dest)>;
295305
using SrcT = detail::ConvertToOpenCLType_t<decltype(src)>;
296306

297-
__ocl_event_t e = OpGroupAsyncCopyLocalToGlobal(
307+
__ocl_event_t E = OpGroupAsyncCopyLocalToGlobal(
298308
__spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()),
299-
numElements, 1, 0);
300-
return device_event(&e);
309+
numElements, destStride, 0);
310+
return device_event(&E);
311+
}
312+
313+
/// Specialization for scalar bool type.
314+
/// Asynchronously copies a number of elements specified by \p NumElements
315+
/// from the source pointed by \p Src to destination pointed by \p Dest
316+
/// with a stride specified by \p Stride, and returns a SYCL device_event
317+
/// which can be used to wait on the completion of the copy.
318+
template <typename T, access::address_space DestS, access::address_space SrcS>
319+
detail::enable_if_t<detail::is_scalar_bool<T>::value, device_event>
320+
async_work_group_copy(multi_ptr<T, DestS> Dest, multi_ptr<T, SrcS> Src,
321+
size_t NumElements, size_t Stride) const {
322+
static_assert(sizeof(bool) == sizeof(uint8_t),
323+
"Async copy to/from bool memory is not supported.");
324+
auto DestP =
325+
multi_ptr<uint8_t, DestS>(reinterpret_cast<uint8_t *>(Dest.get()));
326+
auto SrcP =
327+
multi_ptr<uint8_t, SrcS>(reinterpret_cast<uint8_t *>(Src.get()));
328+
return async_work_group_copy(DestP, SrcP, NumElements, Stride);
329+
}
330+
331+
/// Specialization for vector bool type.
332+
/// Asynchronously copies a number of elements specified by \p NumElements
333+
/// from the source pointed by \p Src to destination pointed by \p Dest
334+
/// with a stride specified by \p Stride, and returns a SYCL device_event
335+
/// which can be used to wait on the completion of the copy.
336+
template <typename T, access::address_space DestS, access::address_space SrcS>
337+
detail::enable_if_t<detail::is_vector_bool<T>::value, device_event>
338+
async_work_group_copy(multi_ptr<T, DestS> Dest, multi_ptr<T, SrcS> Src,
339+
size_t NumElements, size_t Stride) const {
340+
static_assert(sizeof(bool) == sizeof(uint8_t),
341+
"Async copy to/from bool memory is not supported.");
342+
using VecT = detail::change_base_type_t<T, uint8_t>;
343+
auto DestP = multi_ptr<VecT, DestS>(reinterpret_cast<VecT *>(Dest.get()));
344+
auto SrcP = multi_ptr<VecT, SrcS>(reinterpret_cast<VecT *>(Src.get()));
345+
return async_work_group_copy(DestP, SrcP, NumElements, Stride);
301346
}
302347

348+
/// Asynchronously copies a number of elements specified by \p numElements
349+
/// from the source pointed by \p src to destination pointed by \p dest and
350+
/// returns a SYCL device_event which can be used to wait on the completion
351+
/// of the copy.
352+
/// Permitted types for dataT are all scalar and vector types.
303353
template <typename dataT>
304354
device_event async_work_group_copy(local_ptr<dataT> dest,
305355
global_ptr<dataT> src,
306-
size_t numElements,
307-
size_t srcStride) const {
308-
using DestT = detail::ConvertToOpenCLType_t<decltype(dest)>;
309-
using SrcT = detail::ConvertToOpenCLType_t<decltype(src)>;
310-
311-
__ocl_event_t e = OpGroupAsyncCopyGlobalToLocal(
312-
__spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()),
313-
numElements, srcStride, 0);
314-
return device_event(&e);
356+
size_t numElements) const {
357+
return async_work_group_copy(dest, src, numElements, 1);
315358
}
316359

360+
/// Asynchronously copies a number of elements specified by \p numElements
361+
/// from the source pointed by \p src to destination pointed by \p dest and
362+
/// returns a SYCL device_event which can be used to wait on the completion
363+
/// of the copy.
364+
/// Permitted types for dataT are all scalar and vector types.
317365
template <typename dataT>
318366
device_event async_work_group_copy(global_ptr<dataT> dest,
319367
local_ptr<dataT> src,
320-
size_t numElements,
321-
size_t destStride) const {
322-
using DestT = detail::ConvertToOpenCLType_t<decltype(dest)>;
323-
using SrcT = detail::ConvertToOpenCLType_t<decltype(src)>;
324-
325-
__ocl_event_t e = OpGroupAsyncCopyLocalToGlobal(
326-
__spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()),
327-
numElements, destStride, 0);
328-
return device_event(&e);
368+
size_t numElements) const {
369+
return async_work_group_copy(dest, src, numElements, 1);
329370
}
330371

331372
template <typename... eventTN>

sycl/include/CL/sycl/handler.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1589,7 +1589,7 @@ class __SYCL_EXPORT handler {
15891589
// Make sure data shared_ptr points to is not released until we finish
15901590
// work with it.
15911591
MSharedPtrStorage.push_back(Dst);
1592-
T_Dst *RawDstPtr = Dst.get();
1592+
typename shared_ptr_class<T_Dst>::element_type *RawDstPtr = Dst.get();
15931593
copy(Src, RawDstPtr);
15941594
}
15951595

@@ -1612,7 +1612,7 @@ class __SYCL_EXPORT handler {
16121612
// Make sure data shared_ptr points to is not released until we finish
16131613
// work with it.
16141614
MSharedPtrStorage.push_back(Src);
1615-
T_Src *RawSrcPtr = Src.get();
1615+
typename shared_ptr_class<T_Src>::element_type *RawSrcPtr = Src.get();
16161616
copy(RawSrcPtr, Dst);
16171617
}
16181618

0 commit comments

Comments
 (0)