Skip to content

Commit b19d44d

Browse files
authored
Merge branch 'main' into usm_rebased
2 parents b00c00e + 675dd29 commit b19d44d

File tree

129 files changed

+2805
-830
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

129 files changed

+2805
-830
lines changed

.github/docker/install_dpcpp.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,5 @@ if [ "${SKIP_DPCPP_BUILD}" ]; then
1616
fi
1717

1818
mkdir -p ${DPCPP_PATH}/dpcpp_compiler
19-
wget -O ${DPCPP_PATH}/dpcpp_compiler.tar.gz https://github.com/intel/llvm/releases/download/nightly-2024-01-29/sycl_linux.tar.gz
19+
wget -O ${DPCPP_PATH}/dpcpp_compiler.tar.gz https://github.com/intel/llvm/releases/download/nightly-2024-09-27/sycl_linux.tar.gz
2020
tar -xvf ${DPCPP_PATH}/dpcpp_compiler.tar.gz -C ${DPCPP_PATH}/dpcpp_compiler

.github/workflows/cmake.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ jobs:
7979
if: matrix.os == 'ubuntu-22.04'
8080
run: |
8181
sudo apt install libncurses5
82-
wget -O ${{github.workspace}}/dpcpp_compiler.tar.gz https://github.com/intel/llvm/releases/download/nightly-2024-01-29/sycl_linux.tar.gz
82+
wget -O ${{github.workspace}}/dpcpp_compiler.tar.gz https://github.com/intel/llvm/releases/download/nightly-2024-09-27/sycl_linux.tar.gz
8383
mkdir -p ${{github.workspace}}/dpcpp_compiler
8484
tar -xvf ${{github.workspace}}/dpcpp_compiler.tar.gz -C ${{github.workspace}}/dpcpp_compiler
8585

.github/workflows/e2e_level_zero.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121
config: ""
2222
unit: "gpu"
2323
# Failing tests
24-
xfail: "DeviceCodeSplit/grf.cpp;ESIMD/mask_expand_load.cpp;KernelAndProgram/target_register_alloc_mode.cpp;Matrix/SG32/get_coord_int8_matB.cpp;Matrix/get_coord_int8_matB.cpp;Matrix/joint_matrix_prefetch.cpp;Matrix/joint_matrix_rowmajorA_rowmajorB.cpp;ESIMD/mask_expand_load.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_OOB.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_out_bounds.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_prefetch.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_rowmajorA_rowmajorB.cpp;Matrix/element_wise_all_ops_1d.cpp;Matrix/element_wise_all_ops_1d_cont.cpp;Matrix/element_wise_all_ops_scalar.cpp;Matrix/joint_matrix_bf16_fill_k_cache_OOB.cpp;Matrix/joint_matrix_bf16_fill_k_cache_prefetch.cpp;Matrix/joint_matrix_out_bounds.cpp;Matrix/joint_matrix_unaligned_k.cpp;Matrix/SPVCooperativeMatrix/SG32/get_coord_int8_matB.cpp;Matrix/SPVCooperativeMatrix/element_wise_all_ops_1d.cpp;Matrix/SPVCooperativeMatrix/element_wise_all_ops_1d_cont.cpp;Matrix/SPVCooperativeMatrix/element_wise_all_ops_scalar.cpp;Matrix/SPVCooperativeMatrix/element_wise_ops.cpp;Matrix/SPVCooperativeMatrix/get_coord_int8_matB.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_SLM.cpp;Matrix/joint_matrix_bf16_fill_k_cache_SLM.cpp"
24+
xfail: "InvokeSimd/Regression/call_vadd_1d_spill.cpp;InvokeSimd/Regression/ImplicitSubgroup/call_vadd_1d_spill.cpp;DeviceCodeSplit/grf.cpp;ESIMD/mask_expand_load.cpp;KernelAndProgram/target_register_alloc_mode.cpp;Matrix/joint_matrix_prefetch.cpp;ESIMD/mask_expand_load.cpp;Matrix/SPVCooperativeMatrix/joint_matrix_prefetch.cpp;Matrix/joint_matrix_bf16_fill_k_cache_prefetch.cpp;Matrix/SPVCooperativeMatrix/element_wise_ops.cpp;"
2525
# Unexpectedly Passed Tests
2626
xfail_not: ""
2727
# Flaky tests

.github/workflows/multi_device.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ jobs:
3333
# TODO: enable once test failure are fixed/ignored
3434
# - name: Download DPC++
3535
# run: |
36-
# wget -O ${{github.workspace}}/dpcpp_compiler.tar.gz https://github.com/intel/llvm/releases/download/nightly-2024-01-29/sycl_linux.tar.gz
36+
# wget -O ${{github.workspace}}/dpcpp_compiler.tar.gz https://github.com/intel/llvm/releases/download/nightly-2024-09-27/sycl_linux.tar.gz
3737
# mkdir dpcpp_compiler
3838
# tar -xvf ${{github.workspace}}/dpcpp_compiler.tar.gz -C dpcpp_compiler
3939

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,8 @@ List of options provided by CMake:
139139
| UR_BUILD_ADAPTER_HIP | Build the HIP adapter | ON/OFF | OFF |
140140
| UR_BUILD_ADAPTER_NATIVE_CPU | Build the Native-CPU adapter | ON/OFF | OFF |
141141
| UR_BUILD_ADAPTER_ALL | Build all currently supported adapters | ON/OFF | OFF |
142+
| UR_BUILD_ADAPTER_L0_V2 | Build the (experimental) Level-Zero v2 adapter | ON/OFF | OFF |
143+
| UR_STATIC_ADAPTER_L0 | Build the Level-Zero adapter as static and embed in the loader | ON/OFF | OFF |
142144
| UR_HIP_PLATFORM | Build HIP adapter for AMD or NVIDIA platform | AMD/NVIDIA | AMD |
143145
| UR_ENABLE_COMGR | Enable comgr lib usage | AMD/NVIDIA | AMD |
144146
| UR_DPCXX | Path of the DPC++ compiler executable to build CTS device binaries | File path | `""` |

include/ur_api.h

Lines changed: 69 additions & 19 deletions
Large diffs are not rendered by default.

include/ur_ddi.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1932,6 +1932,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendKernelLaunchExp_t)(
19321932
const size_t *,
19331933
const size_t *,
19341934
uint32_t,
1935+
ur_kernel_handle_t *,
1936+
uint32_t,
19351937
const ur_exp_command_buffer_sync_point_t *,
19361938
ur_exp_command_buffer_sync_point_t *,
19371939
ur_exp_command_buffer_command_handle_t *);

include/ur_print.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -970,6 +970,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintExpExternalSemaphoreDesc(const struct
970970
/// - `buff_size < out_size`
971971
UR_APIEXPORT ur_result_t UR_APICALL urPrintExpImageCopyRegion(const struct ur_exp_image_copy_region_t params, char *buffer, const size_t buff_size, size_t *out_size);
972972

973+
///////////////////////////////////////////////////////////////////////////////
974+
/// @brief Print ur_device_command_buffer_update_capability_flag_t enum
975+
/// @returns
976+
/// - ::UR_RESULT_SUCCESS
977+
/// - ::UR_RESULT_ERROR_INVALID_SIZE
978+
/// - `buff_size < out_size`
979+
UR_APIEXPORT ur_result_t UR_APICALL urPrintDeviceCommandBufferUpdateCapabilityFlags(enum ur_device_command_buffer_update_capability_flag_t value, char *buffer, const size_t buff_size, size_t *out_size);
980+
973981
///////////////////////////////////////////////////////////////////////////////
974982
/// @brief Print ur_exp_command_buffer_info_t enum
975983
/// @returns

include/ur_print.hpp

Lines changed: 131 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,8 @@ inline ur_result_t printFlag<ur_usm_migration_flag_t>(std::ostream &os, uint32_t
197197
template <>
198198
inline ur_result_t printFlag<ur_exp_image_copy_flag_t>(std::ostream &os, uint32_t flag);
199199

200+
template <>
201+
inline ur_result_t printFlag<ur_device_command_buffer_update_capability_flag_t>(std::ostream &os, uint32_t flag);
200202
template <>
201203
inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_command_buffer_info_t value, size_t size);
202204

@@ -335,6 +337,7 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
335337
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_external_mem_desc_t params);
336338
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_external_semaphore_desc_t params);
337339
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_image_copy_region_t params);
340+
inline std::ostream &operator<<(std::ostream &os, enum ur_device_command_buffer_update_capability_flag_t value);
338341
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_command_buffer_info_t value);
339342
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_command_buffer_command_info_t value);
340343
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_desc_t params);
@@ -2541,8 +2544,8 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) {
25412544
case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP:
25422545
os << "UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP";
25432546
break;
2544-
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP:
2545-
os << "UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP";
2547+
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP:
2548+
os << "UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP";
25462549
break;
25472550
case UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP:
25482551
os << "UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP";
@@ -4049,15 +4052,16 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info
40494052

40504053
os << ")";
40514054
} break;
4052-
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: {
4053-
const ur_bool_t *tptr = (const ur_bool_t *)ptr;
4054-
if (sizeof(ur_bool_t) > size) {
4055-
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")";
4055+
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: {
4056+
const ur_device_command_buffer_update_capability_flags_t *tptr = (const ur_device_command_buffer_update_capability_flags_t *)ptr;
4057+
if (sizeof(ur_device_command_buffer_update_capability_flags_t) > size) {
4058+
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_device_command_buffer_update_capability_flags_t) << ")";
40564059
return UR_RESULT_ERROR_INVALID_SIZE;
40574060
}
40584061
os << (const void *)(tptr) << " (";
40594062

4060-
os << *tptr;
4063+
ur::details::printFlag<ur_device_command_buffer_update_capability_flag_t>(os,
4064+
*tptr);
40614065

40624066
os << ")";
40634067
} break;
@@ -9701,6 +9705,103 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_image_copy
97019705
return os;
97029706
}
97039707
///////////////////////////////////////////////////////////////////////////////
9708+
/// @brief Print operator for the ur_device_command_buffer_update_capability_flag_t type
9709+
/// @returns
9710+
/// std::ostream &
9711+
inline std::ostream &operator<<(std::ostream &os, enum ur_device_command_buffer_update_capability_flag_t value) {
9712+
switch (value) {
9713+
case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS:
9714+
os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS";
9715+
break;
9716+
case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE:
9717+
os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE";
9718+
break;
9719+
case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE:
9720+
os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE";
9721+
break;
9722+
case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET:
9723+
os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET";
9724+
break;
9725+
case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE:
9726+
os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE";
9727+
break;
9728+
default:
9729+
os << "unknown enumerator";
9730+
break;
9731+
}
9732+
return os;
9733+
}
9734+
9735+
namespace ur::details {
9736+
///////////////////////////////////////////////////////////////////////////////
9737+
/// @brief Print ur_device_command_buffer_update_capability_flag_t flag
9738+
template <>
9739+
inline ur_result_t printFlag<ur_device_command_buffer_update_capability_flag_t>(std::ostream &os, uint32_t flag) {
9740+
uint32_t val = flag;
9741+
bool first = true;
9742+
9743+
if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS) {
9744+
val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS;
9745+
if (!first) {
9746+
os << " | ";
9747+
} else {
9748+
first = false;
9749+
}
9750+
os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS;
9751+
}
9752+
9753+
if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE) {
9754+
val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE;
9755+
if (!first) {
9756+
os << " | ";
9757+
} else {
9758+
first = false;
9759+
}
9760+
os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE;
9761+
}
9762+
9763+
if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE) {
9764+
val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE;
9765+
if (!first) {
9766+
os << " | ";
9767+
} else {
9768+
first = false;
9769+
}
9770+
os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE;
9771+
}
9772+
9773+
if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET) {
9774+
val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET;
9775+
if (!first) {
9776+
os << " | ";
9777+
} else {
9778+
first = false;
9779+
}
9780+
os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET;
9781+
}
9782+
9783+
if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE) {
9784+
val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE;
9785+
if (!first) {
9786+
os << " | ";
9787+
} else {
9788+
first = false;
9789+
}
9790+
os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE;
9791+
}
9792+
if (val != 0) {
9793+
std::bitset<32> bits(val);
9794+
if (!first) {
9795+
os << " | ";
9796+
}
9797+
os << "unknown bit flags " << bits;
9798+
} else if (first) {
9799+
os << "0";
9800+
}
9801+
return UR_RESULT_SUCCESS;
9802+
}
9803+
} // namespace ur::details
9804+
///////////////////////////////////////////////////////////////////////////////
97049805
/// @brief Print operator for the ur_exp_command_buffer_info_t type
97059806
/// @returns
97069807
/// std::ostream &
@@ -9953,6 +10054,12 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_command_bu
995310054
ur::details::printStruct(os,
995410055
(params.pNext));
995510056

10057+
os << ", ";
10058+
os << ".hNewKernel = ";
10059+
10060+
ur::details::printPtr(os,
10061+
(params.hNewKernel));
10062+
995610063
os << ", ";
995710064
os << ".numNewMemObjArgs = ";
995810065

@@ -15951,6 +16058,23 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
1595116058
ur::details::printPtr(os,
1595216059
*(params->ppLocalWorkSize));
1595316060

16061+
os << ", ";
16062+
os << ".numKernelAlternatives = ";
16063+
16064+
os << *(params->pnumKernelAlternatives);
16065+
16066+
os << ", ";
16067+
os << ".phKernelAlternatives = {";
16068+
for (size_t i = 0; *(params->pphKernelAlternatives) != NULL && i < *params->pnumKernelAlternatives; ++i) {
16069+
if (i != 0) {
16070+
os << ", ";
16071+
}
16072+
16073+
ur::details::printPtr(os,
16074+
(*(params->pphKernelAlternatives))[i]);
16075+
}
16076+
os << "}";
16077+
1595416078
os << ", ";
1595516079
os << ".numSyncPointsInWaitList = ";
1595616080

scripts/benchmarks/benches/base.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import os
77
import shutil
88
from pathlib import Path
9-
import subprocess # nosec B404
109
from .result import Result
1110
from .options import options
1211
from utils.utils import run
@@ -57,7 +56,7 @@ def lower_is_better(self):
5756
def setup(self):
5857
raise NotImplementedError()
5958

60-
def run(self, env_vars) -> Result:
59+
def run(self, env_vars) -> list[Result]:
6160
raise NotImplementedError()
6261

6362
def teardown(self):

scripts/benchmarks/benches/compute.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def setup(self):
6565
self.bench.setup()
6666
self.benchmark_bin = os.path.join(self.bench.bins, self.bench_name)
6767

68-
def run(self, env_vars) -> Result:
68+
def run(self, env_vars) -> list[Result]:
6969
command = [
7070
f"{self.benchmark_bin}",
7171
f"--test={self.test}",
@@ -78,7 +78,7 @@ def run(self, env_vars) -> Result:
7878

7979
result = self.run_bench(command, env_vars)
8080
(label, mean) = self.parse_output(result)
81-
return Result(label=label, value=mean, command=command, env=env_vars, stdout=result, lower_is_better=self.lower_is_better())
81+
return [ Result(label=self.name(), value=mean, command=command, env=env_vars, stdout=result, lower_is_better=self.lower_is_better()) ]
8282

8383
def parse_output(self, output):
8484
csv_file = io.StringIO(output)

scripts/benchmarks/benches/quicksilver.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def __init__(self, vb: VelocityBench):
1515
super().__init__("QuickSilver", "qs", vb)
1616
self.data_path = os.path.join(vb.repo_path, "QuickSilver", "Examples", "AllScattering")
1717

18-
def run(self, env_vars) -> Result:
18+
def run(self, env_vars) -> list[Result]:
1919
# TODO: fix the crash in QuickSilver when UR_L0_USE_IMMEDIATE_COMMANDLISTS=0
2020
if 'UR_L0_USE_IMMEDIATE_COMMANDLISTS' in env_vars and env_vars['UR_L0_USE_IMMEDIATE_COMMANDLISTS'] == '0':
2121
return None

0 commit comments

Comments
 (0)