Skip to content

Commit c13cb8d

Browse files
v-klochkovromanovvlad
authored andcommitted
[SYCL] Fix issues on Windows: aligned alloc, prefetch, __builtin_expect, /EHsc
Signed-off-by: Vyacheslav N Klochkov <[email protected]>
1 parent 7af8c96 commit c13cb8d

File tree

8 files changed

+62
-17
lines changed

8 files changed

+62
-17
lines changed

sycl/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
99

1010
if(MSVC)
1111
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
12+
# Skip asynchronous C++ exceptions catching and assume "extern C" functions
13+
# never throw C++ exceptions.
14+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc")
1215
endif()
1316

1417
# Get clang's version

sycl/include/CL/sycl/detail/aligned_allocator.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
#include <CL/cl.h>
1212
#include <CL/sycl/detail/cnri.h>
13+
#include <CL/sycl/detail/os_util.hpp>
1314
#include <CL/sycl/range.hpp>
1415

1516
#include <cstring>
@@ -49,7 +50,7 @@ class aligned_allocator {
4950
pointer allocate(size_t Size) {
5051
Size += Alignment - Size % Alignment;
5152
pointer Result = reinterpret_cast<pointer>(
52-
aligned_alloc(Alignment, Size * sizeof(value_type)));
53+
detail::OSUtil::alignedAlloc(Alignment, Size * sizeof(value_type)));
5354
if (!Result)
5455
throw std::bad_alloc();
5556
return Result;
@@ -58,7 +59,7 @@ class aligned_allocator {
5859
// Release allocated memory
5960
void deallocate(pointer Ptr, size_t size) {
6061
if (Ptr)
61-
free(Ptr);
62+
detail::OSUtil::alignedFree(Ptr);
6263
}
6364

6465
bool operator==(const aligned_allocator&) { return true; }

sycl/include/CL/sycl/detail/os_util.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,13 @@ class OSUtil {
5353

5454
/// Returns the amount of RAM available for the operating system.
5555
static size_t getOSMemSize();
56+
57+
/// Allocates \p NumBytes bytes of uninitialized storage whose alignment
58+
/// is specified by \p Alignment.
59+
static void *alignedAlloc(size_t Alignment, size_t NumBytes);
60+
61+
/// Deallocates the memory referenced by \p Ptr.
62+
static void alignedFree(void *Ptr);
5663
};
5764

5865
} // namespace detail

sycl/include/CL/sycl/detail/platform_util.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@
1010

1111
#include <cstdint>
1212

13+
#ifdef _MSC_VER
14+
// This feature is not supported in MSVC.
15+
#define __builtin_expect(a, b) (a)
16+
#endif
17+
1318
namespace cl {
1419
namespace sycl {
1520
namespace detail {
@@ -33,6 +38,8 @@ struct PlatformUtil {
3338
static uint32_t getMemCacheLineSize();
3439

3540
static uint64_t getMemCacheSize();
41+
42+
static void prefetch(const char *Ptr, size_t NumBytes);
3643
};
3744

3845
} // namespace detail

sycl/include/CL/sycl/multi_ptr.hpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -254,12 +254,8 @@ template <typename ElementType, access::address_space Space> class multi_ptr {
254254
Space == access::address_space::global_space>::type>
255255
void prefetch(size_t NumElements) const {
256256
size_t NumBytes = NumElements * sizeof(ElementType);
257-
#ifdef __SYCL_DEVICE_ONLY__
258-
auto PrefetchPtr = reinterpret_cast<const __global char *>(m_Pointer);
259-
#else
260-
auto PrefetchPtr = reinterpret_cast<const char *>(m_Pointer);
261-
#endif
262-
__spirv_ocl_prefetch(PrefetchPtr, NumBytes);
257+
using ptr_t = typename detail::PtrValueType<char, Space>::type const *;
258+
__spirv_ocl_prefetch(reinterpret_cast<ptr_t>(m_Pointer), NumBytes);
263259
}
264260

265261
private:

sycl/source/detail/os_util.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
#elif defined(SYCL_RT_OS_WINDOWS)
2323

2424
#include <Windows.h>
25-
25+
#include <malloc.h>
2626
#endif
2727

2828
namespace cl {
@@ -90,6 +90,22 @@ size_t OSUtil::getOSMemSize() {
9090
#endif
9191
}
9292

93+
void *OSUtil::alignedAlloc(size_t Alignment, size_t NumBytes) {
94+
#if defined(SYCL_RT_OS_LINUX)
95+
return aligned_alloc(Alignment, NumBytes);
96+
#elif defined(SYCL_RT_OS_WINDOWS)
97+
return _aligned_malloc(NumBytes, Alignment);
98+
#endif
99+
}
100+
101+
void OSUtil::alignedFree(void *Ptr) {
102+
#if defined(SYCL_RT_OS_LINUX)
103+
free(Ptr);
104+
#elif defined(SYCL_RT_OS_WINDOWS)
105+
_aligned_free(Ptr);
106+
#endif
107+
}
108+
93109
} // namespace detail
94110
} // namespace sycl
95111
} // namespace cl

sycl/source/detail/platform_util.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,27 @@ uint32_t PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex TIndex) {
120120
return VECTOR_WIDTH_SSE42[Index];
121121
}
122122

123+
void PlatformUtil::prefetch(const char *Ptr, size_t NumBytes) {
124+
if (!Ptr)
125+
return;
126+
127+
// The current implementation assumes 64-byte x86 cache lines.
128+
const size_t CacheLineSize = 64;
129+
const size_t CacheLineMask = ~(CacheLineSize - 1);
130+
const char *PtrEnd = Ptr + NumBytes;
131+
132+
// Set the pointer to the beginning of the current cache line.
133+
Ptr = reinterpret_cast<const char *>(
134+
reinterpret_cast<size_t>(Ptr) & CacheLineMask);
135+
for (; Ptr < PtrEnd; Ptr += CacheLineSize) {
136+
#if defined(SYCL_RT_OS_LINUX)
137+
__builtin_prefetch(Ptr);
138+
#elif defined(SYCL_RT_OS_WINDOWS)
139+
_mm_prefetch(Ptr, _MM_HINT_T0);
140+
#endif
141+
}
142+
}
143+
123144
} // namespace detail
124145
} // namespace sycl
125146
} // namespace cl

sycl/source/spirv_ops.cpp

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include <CL/__spirv/spirv_ops.hpp>
1010
#include <CL/sycl/exception.hpp>
11+
#include <CL/sycl/detail/platform_util.hpp>
1112
#include <atomic>
1213

1314
// This operation is NOP on HOST as all operations there are blocking and
@@ -32,12 +33,5 @@ void __spirv_MemoryBarrier(Scope Memory, uint32_t Semantics) noexcept {
3233
}
3334

3435
void __spirv_ocl_prefetch(const char *Ptr, size_t NumBytes) noexcept {
35-
// TODO: the cache line size may be different.
36-
const size_t CacheLineSize = 64;
37-
size_t NumCacheLines =
38-
(NumBytes / CacheLineSize) + ((NumBytes % CacheLineSize) ? 1 : 0);
39-
for (; NumCacheLines != 0; NumCacheLines--) {
40-
__builtin_prefetch(reinterpret_cast<const void *>(Ptr));
41-
Ptr += 64;
42-
}
36+
cl::sycl::detail::PlatformUtil::prefetch(Ptr, NumBytes);
4337
}

0 commit comments

Comments
 (0)