Skip to content

Commit a360250

Browse files
committed
Merge branch 'sycl' into use-device-usm-for-rtl-data
2 parents b02525f + dddb238 commit a360250

File tree

80 files changed

+2292
-549
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

80 files changed

+2292
-549
lines changed

.github/workflows/build-fuzz-reusable.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,6 @@ jobs:
4747
cmake --build build -j $(nproc)
4848
4949
- name: Configure CMake
50-
# CFI sanitization (or flto?) seems to cause linking to fail
51-
# https://github.com/oneapi-src/unified-runtime/issues/2323
5250
run: >
5351
cmake
5452
-B${{github.workspace}}/build
@@ -60,7 +58,6 @@ jobs:
6058
-DUR_USE_ASAN=ON
6159
-DUR_USE_UBSAN=ON
6260
-DUR_BUILD_ADAPTER_L0=ON
63-
-DUR_USE_CFI=OFF
6461
-DUR_LEVEL_ZERO_LOADER_LIBRARY=${{github.workspace}}/level-zero/build/lib/libze_loader.so
6562
-DUR_LEVEL_ZERO_INCLUDE_DIR=${{github.workspace}}/level-zero/include/
6663
-DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++

.github/workflows/build-hw-reusable.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,6 @@ jobs:
8282
tar -xvf ${{github.workspace}}/dpcpp_compiler.tar.gz -C dpcpp_compiler
8383
8484
- name: Configure CMake
85-
# CFI sanitization seems to fail on our CUDA nodes
86-
# https://github.com/oneapi-src/unified-runtime/issues/2309
8785
run: >
8886
cmake
8987
-B${{github.workspace}}/build
@@ -96,7 +94,6 @@ jobs:
9694
-DUR_BUILD_ADAPTER_${{matrix.adapter.name}}=ON
9795
-DUR_CONFORMANCE_TEST_LOADER=${{ matrix.adapter.other_name != '' && 'ON' || 'OFF' }}
9896
${{ matrix.adapter.other_name != '' && format('-DUR_BUILD_ADAPTER_{0}=ON', matrix.adapter.other_name) || '' }}
99-
-DUR_USE_CFI=${{ matrix.adapter.name == 'CUDA' && 'OFF' || 'ON' }}
10097
-DUR_STATIC_LOADER=${{matrix.adapter.static_Loader}}
10198
-DUR_STATIC_ADAPTER_${{matrix.adapter.name}}=${{matrix.adapter.static_adapter}}
10299
-DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++

.github/workflows/cmake.yml

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -221,13 +221,14 @@ jobs:
221221
needs: [ubuntu-build, opencl]
222222
uses: ./.github/workflows/e2e_opencl.yml
223223

224-
e2e-cuda:
225-
name: E2E CUDA
226-
permissions:
227-
contents: read
228-
pull-requests: write
229-
needs: [ubuntu-build, cuda]
230-
uses: ./.github/workflows/e2e_cuda.yml
224+
# Causes hangs: https://github.com/oneapi-src/unified-runtime/issues/2398
225+
#e2e-cuda:
226+
# name: E2E CUDA
227+
# permissions:
228+
# contents: read
229+
# pull-requests: write
230+
# needs: [ubuntu-build, cuda]
231+
# uses: ./.github/workflows/e2e_cuda.yml
231232

232233
windows-build:
233234
name: Build - Windows

.github/workflows/e2e_core.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ jobs:
190190
191191
- name: Run e2e tests
192192
id: tests
193-
run: ninja -C build-e2e check-sycl-e2e
193+
run: ninja -C build-e2e check-sycl-e2e || echo "e2e tests have failed. Ignoring failure."
194194

195195
# FIXME: Requires pull-request: write permissions but this is only granted
196196
# on pull requests from forks if using pull_request_target workflow

cmake/FetchLevelZero.cmake

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ set(UR_LEVEL_ZERO_LOADER_LIBRARY "" CACHE FILEPATH "Path of the Level Zero Loade
77
set(UR_LEVEL_ZERO_INCLUDE_DIR "" CACHE FILEPATH "Directory containing the Level Zero Headers")
88
set(UR_LEVEL_ZERO_LOADER_REPO "" CACHE STRING "Github repo to get the Level Zero loader sources from")
99
set(UR_LEVEL_ZERO_LOADER_TAG "" CACHE STRING " GIT tag of the Level Loader taken from github repo")
10+
set(UR_COMPUTE_RUNTIME_REPO "" CACHE STRING "Github repo to get the compute runtime sources from")
11+
set(UR_COMPUTE_RUNTIME_TAG "" CACHE STRING " GIT tag of the compute runtime taken from github repo")
1012

1113
# Copy Level Zero loader/headers locally to the build to avoid leaking their path.
1214
set(LEVEL_ZERO_COPY_DIR ${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader)
@@ -87,8 +89,31 @@ target_link_libraries(LevelZeroLoader
8789
INTERFACE "${LEVEL_ZERO_LIB_NAME}"
8890
)
8991

92+
file(GLOB LEVEL_ZERO_LOADER_API_HEADERS "${LEVEL_ZERO_INCLUDE_DIR}/*.h")
93+
file(COPY ${LEVEL_ZERO_LOADER_API_HEADERS} DESTINATION ${LEVEL_ZERO_INCLUDE_DIR}/level_zero)
9094
add_library(LevelZeroLoader-Headers INTERFACE)
9195
target_include_directories(LevelZeroLoader-Headers
92-
INTERFACE "$<BUILD_INTERFACE:${LEVEL_ZERO_INCLUDE_DIR}>"
96+
INTERFACE "$<BUILD_INTERFACE:${LEVEL_ZERO_INCLUDE_DIR};${LEVEL_ZERO_INCLUDE_DIR}/level_zero>"
97+
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
98+
)
99+
100+
if (UR_COMPUTE_RUNTIME_REPO STREQUAL "")
101+
set(UR_COMPUTE_RUNTIME_REPO "https://github.com/intel/compute-runtime.git")
102+
endif()
103+
if (UR_COMPUTE_RUNTIME_TAG STREQUAL "")
104+
set(UR_COMPUTE_RUNTIME_TAG 24.39.31294.12)
105+
endif()
106+
include(FetchContent)
107+
# Sparse fetch only the dir with level zero headers to avoid pulling in the entire compute-runtime.
108+
FetchContentSparse_Declare(compute-runtime-level-zero-headers ${UR_COMPUTE_RUNTIME_REPO} "${UR_COMPUTE_RUNTIME_TAG}" "level_zero/include")
109+
FetchContent_GetProperties(compute-runtime-level-zero-headers)
110+
if(NOT compute-runtime-level-zero-headers_POPULATED)
111+
FetchContent_Populate(compute-runtime-level-zero-headers)
112+
endif()
113+
add_library(ComputeRuntimeLevelZero-Headers INTERFACE)
114+
set(COMPUTE_RUNTIME_LEVEL_ZERO_INCLUDE "${compute-runtime-level-zero-headers_SOURCE_DIR}/../..")
115+
message(STATUS "Level Zero Adapter: Using Level Zero headers from ${COMPUTE_RUNTIME_LEVEL_ZERO_INCLUDE}")
116+
target_include_directories(ComputeRuntimeLevelZero-Headers
117+
INTERFACE "$<BUILD_INTERFACE:${COMPUTE_RUNTIME_LEVEL_ZERO_INCLUDE}>"
93118
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
94119
)

cmake/helpers.cmake

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,12 @@ if(CMAKE_SYSTEM_NAME STREQUAL Linux)
6363
check_cxx_compiler_flag("-fstack-clash-protection" CXX_HAS_FSTACK_CLASH_PROTECTION)
6464
endif()
6565

66+
if (UR_USE_CFI AND UR_USE_ASAN)
67+
message(WARNING "Both UR_USE_CFI and UR_USE_ASAN are ON. "
68+
"Due to build errors, this is unsupported; CFI checks will be disabled")
69+
set(UR_USE_CFI OFF)
70+
endif()
71+
6672
if (UR_USE_CFI)
6773
set(SAVED_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
6874
set(CMAKE_REQUIRED_FLAGS "-flto -fvisibility=hidden")

include/ur_api.h

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1705,6 +1705,8 @@ typedef enum ur_device_info_t {
17051705
UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP = 0x2020, ///< [::ur_bool_t] returns true if the device supports enqueueing of native
17061706
///< work
17071707
UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP = 0x2021, ///< [::ur_bool_t] returns true if the device supports low-power events.
1708+
UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP = 0x2022, ///< [::ur_exp_device_2d_block_array_capability_flags_t] return a bit-field
1709+
///< of Intel GPU 2D block array capabilities
17081710
/// @cond
17091711
UR_DEVICE_INFO_FORCE_UINT32 = 0x7fffffff
17101712
/// @endcond
@@ -1730,7 +1732,7 @@ typedef enum ur_device_info_t {
17301732
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
17311733
/// + `NULL == hDevice`
17321734
/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION
1733-
/// + `::UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP < propName`
1735+
/// + `::UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP < propName`
17341736
/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION
17351737
/// + If `propName` is not supported by the adapter.
17361738
/// - ::UR_RESULT_ERROR_INVALID_SIZE
@@ -7428,6 +7430,27 @@ urEnqueueWriteHostPipe(
74287430
///< an element of the phEventWaitList array.
74297431
);
74307432

7433+
#if !defined(__GNUC__)
7434+
#pragma endregion
7435+
#endif
7436+
// Intel 'oneAPI' Unified Runtime Experimental device descriptor for querying Intel device 2D block array capabilities
7437+
#if !defined(__GNUC__)
7438+
#pragma region 2d_block_array_capabilities_(experimental)
7439+
#endif
7440+
///////////////////////////////////////////////////////////////////////////////
7441+
/// @brief Intel GPU 2D block array capabilities
7442+
typedef uint32_t ur_exp_device_2d_block_array_capability_flags_t;
7443+
typedef enum ur_exp_device_2d_block_array_capability_flag_t {
7444+
UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD = UR_BIT(0), ///< Load instructions are supported
7445+
UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE = UR_BIT(1), ///< Store instructions are supported
7446+
/// @cond
7447+
UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_FORCE_UINT32 = 0x7fffffff
7448+
/// @endcond
7449+
7450+
} ur_exp_device_2d_block_array_capability_flag_t;
7451+
/// @brief Bit Mask for validating ur_exp_device_2d_block_array_capability_flags_t
7452+
#define UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAGS_MASK 0xfffffffc
7453+
74317454
#if !defined(__GNUC__)
74327455
#pragma endregion
74337456
#endif

include/ur_print.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -874,6 +874,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintMapFlags(enum ur_map_flag_t value, ch
874874
/// - `buff_size < out_size`
875875
UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmMigrationFlags(enum ur_usm_migration_flag_t value, char *buffer, const size_t buff_size, size_t *out_size);
876876

877+
///////////////////////////////////////////////////////////////////////////////
878+
/// @brief Print ur_exp_device_2d_block_array_capability_flag_t enum
879+
/// @returns
880+
/// - ::UR_RESULT_SUCCESS
881+
/// - ::UR_RESULT_ERROR_INVALID_SIZE
882+
/// - `buff_size < out_size`
883+
UR_APIEXPORT ur_result_t UR_APICALL urPrintExpDevice_2dBlockArrayCapabilityFlags(enum ur_exp_device_2d_block_array_capability_flag_t value, char *buffer, const size_t buff_size, size_t *out_size);
884+
877885
///////////////////////////////////////////////////////////////////////////////
878886
/// @brief Print ur_exp_image_copy_flag_t enum
879887
/// @returns

include/ur_print.hpp

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,9 @@ inline ur_result_t printFlag<ur_map_flag_t>(std::ostream &os, uint32_t flag);
194194
template <>
195195
inline ur_result_t printFlag<ur_usm_migration_flag_t>(std::ostream &os, uint32_t flag);
196196

197+
template <>
198+
inline ur_result_t printFlag<ur_exp_device_2d_block_array_capability_flag_t>(std::ostream &os, uint32_t flag);
199+
197200
template <>
198201
inline ur_result_t printFlag<ur_exp_image_copy_flag_t>(std::ostream &os, uint32_t flag);
199202

@@ -328,6 +331,7 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
328331
inline std::ostream &operator<<(std::ostream &os, enum ur_execution_info_t value);
329332
inline std::ostream &operator<<(std::ostream &os, enum ur_map_flag_t value);
330333
inline std::ostream &operator<<(std::ostream &os, enum ur_usm_migration_flag_t value);
334+
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_device_2d_block_array_capability_flag_t value);
331335
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_image_copy_flag_t value);
332336
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_sampler_cubemap_filter_mode_t value);
333337
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_external_mem_type_t value);
@@ -2665,6 +2669,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) {
26652669
case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP:
26662670
os << "UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP";
26672671
break;
2672+
case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP:
2673+
os << "UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP";
2674+
break;
26682675
default:
26692676
os << "unknown enumerator";
26702677
break;
@@ -4472,6 +4479,19 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info
44724479

44734480
os << ")";
44744481
} break;
4482+
case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: {
4483+
const ur_exp_device_2d_block_array_capability_flags_t *tptr = (const ur_exp_device_2d_block_array_capability_flags_t *)ptr;
4484+
if (sizeof(ur_exp_device_2d_block_array_capability_flags_t) > size) {
4485+
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_exp_device_2d_block_array_capability_flags_t) << ")";
4486+
return UR_RESULT_ERROR_INVALID_SIZE;
4487+
}
4488+
os << (const void *)(tptr) << " (";
4489+
4490+
ur::details::printFlag<ur_exp_device_2d_block_array_capability_flag_t>(os,
4491+
*tptr);
4492+
4493+
os << ")";
4494+
} break;
44754495
default:
44764496
os << "unknown enumerator";
44774497
return UR_RESULT_ERROR_INVALID_ENUMERATION;
@@ -9455,6 +9475,64 @@ inline ur_result_t printFlag<ur_usm_migration_flag_t>(std::ostream &os, uint32_t
94559475
}
94569476
} // namespace ur::details
94579477
///////////////////////////////////////////////////////////////////////////////
9478+
/// @brief Print operator for the ur_exp_device_2d_block_array_capability_flag_t type
9479+
/// @returns
9480+
/// std::ostream &
9481+
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_device_2d_block_array_capability_flag_t value) {
9482+
switch (value) {
9483+
case UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD:
9484+
os << "UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD";
9485+
break;
9486+
case UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE:
9487+
os << "UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE";
9488+
break;
9489+
default:
9490+
os << "unknown enumerator";
9491+
break;
9492+
}
9493+
return os;
9494+
}
9495+
9496+
namespace ur::details {
9497+
///////////////////////////////////////////////////////////////////////////////
9498+
/// @brief Print ur_exp_device_2d_block_array_capability_flag_t flag
9499+
template <>
9500+
inline ur_result_t printFlag<ur_exp_device_2d_block_array_capability_flag_t>(std::ostream &os, uint32_t flag) {
9501+
uint32_t val = flag;
9502+
bool first = true;
9503+
9504+
if ((val & UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD) == (uint32_t)UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD) {
9505+
val ^= (uint32_t)UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD;
9506+
if (!first) {
9507+
os << " | ";
9508+
} else {
9509+
first = false;
9510+
}
9511+
os << UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD;
9512+
}
9513+
9514+
if ((val & UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE) == (uint32_t)UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE) {
9515+
val ^= (uint32_t)UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE;
9516+
if (!first) {
9517+
os << " | ";
9518+
} else {
9519+
first = false;
9520+
}
9521+
os << UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE;
9522+
}
9523+
if (val != 0) {
9524+
std::bitset<32> bits(val);
9525+
if (!first) {
9526+
os << " | ";
9527+
}
9528+
os << "unknown bit flags " << bits;
9529+
} else if (first) {
9530+
os << "0";
9531+
}
9532+
return UR_RESULT_SUCCESS;
9533+
}
9534+
} // namespace ur::details
9535+
///////////////////////////////////////////////////////////////////////////////
94589536
/// @brief Print operator for the ur_exp_image_copy_flag_t type
94599537
/// @returns
94609538
/// std::ostream &

scripts/benchmarks/benches/base.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -40,26 +40,25 @@ def run_bench(self, command, env_vars, ld_library=[]):
4040
ld_library=ld_library
4141
).stdout.decode()
4242

43-
def create_data_path(self, name):
44-
data_path = os.path.join(self.directory, "data", name)
45-
46-
if options.rebuild and Path(data_path).exists():
47-
shutil.rmtree(data_path)
43+
def create_data_path(self, name, skip_data_dir = False):
44+
if skip_data_dir:
45+
data_path = os.path.join(self.directory, name)
46+
else:
47+
data_path = os.path.join(self.directory, 'data', name)
48+
if options.rebuild and Path(data_path).exists():
49+
shutil.rmtree(data_path)
4850

4951
Path(data_path).mkdir(parents=True, exist_ok=True)
5052

5153
return data_path
5254

53-
def download(self, name, url, file, untar = False):
54-
self.data_path = self.create_data_path(name)
55-
return download(self.data_path, url, file, True)
55+
def download(self, name, url, file, untar = False, unzip = False, skip_data_dir = False):
56+
self.data_path = self.create_data_path(name, skip_data_dir)
57+
return download(self.data_path, url, file, untar, unzip)
5658

5759
def name(self):
5860
raise NotImplementedError()
5961

60-
def unit(self):
61-
raise NotImplementedError()
62-
6362
def lower_is_better(self):
6463
return True
6564

0 commit comments

Comments
 (0)