Skip to content

Commit 5fb8292

Browse files
authored
Merge branch 'adapters' into l0_usm_error_checking_2
2 parents fe469d7 + d70bae4 commit 5fb8292

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+283
-140
lines changed

.github/workflows/cmake.yml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -196,12 +196,16 @@ jobs:
196196
-DUR_BUILD_TESTS=ON
197197
-DUR_BUILD_ADAPTER_${{matrix.adapter.name}}=ON
198198
-DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++
199+
-DUR_SYCL_LIBRARY_DIR=${{github.workspace}}/dpcpp_compiler/lib
199200
-DUR_CONFORMANCE_TARGET_TRIPLES=${{matrix.adapter.triplet}}
200201
201202
- name: Build
202203
# This is so that device binaries can find the sycl runtime library
203-
run: LD_LIBRARY_PATH=${{github.workspace}}/dpcpp_compiler/lib
204-
cmake --build ${{github.workspace}}/build -j $(nproc)
204+
run: cmake --build ${{github.workspace}}/build -j $(nproc)
205+
206+
- name: Test adapter specific
207+
working-directory: ${{github.workspace}}/build
208+
run: ctest -C ${{matrix.build_type}} --output-on-failure -L "adapter-specific" --timeout 180
205209

206210
# Temporarily disabling platform test for L0, because of hang
207211
# See issue: #824

CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ option(UR_BUILD_ADAPTER_CUDA "build cuda adapter from SYCL" OFF)
4141
option(UR_BUILD_ADAPTER_HIP "build hip adapter from SYCL" OFF)
4242
option(UR_BUILD_EXAMPLE_CODEGEN "Build the codegen example." OFF)
4343
option(VAL_USE_LIBBACKTRACE_BACKTRACE "enable libbacktrace validation backtrace for linux" OFF)
44+
set(UR_DPCXX "" CACHE FILEPATH "Path of the DPC++ compiler executable")
45+
set(UR_SYCL_LIBRARY_DIR "" CACHE PATH
46+
"Path of the SYCL runtime library directory")
4447

4548
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
4649
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,8 @@ List of options provided by CMake:
133133
| UR_BUILD_ADAPTER_HIP | Fetch and use hip adapter from SYCL | ON/OFF | OFF |
134134
| UR_HIP_PLATFORM | Build hip adapter for AMD or NVIDIA platform | AMD/NVIDIA | AMD |
135135
| UR_ENABLE_COMGR | Enable comgr lib usage | AMD/NVIDIA | AMD |
136+
| UR_DPCXX | Path of the DPC++ compiler executable to build CTS device binaries | File path | `""` |
137+
| UR_SYCL_LIBRARY_DIR | Path of the SYCL runtime library directory to build CTS device binaries | Directory path | `""` |
136138

137139
### Additional make targets
138140

examples/codegen/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,5 +43,5 @@ if(LLVM_FOUND AND PkgConfig_FOUND AND LLVMSPIRVLib_FOUND)
4343
)
4444
endif()
4545
else()
46-
message(STATUS "The environment did not satisfy dependency requirements (LLVM, PkgConfig, LLVMSPIRVLib) for codegen example (skipping target).")
46+
message(FATAL_ERROR "The environment did not satisfy dependency requirements (LLVM, PkgConfig, LLVMSPIRVLib) for codegen example (skipping target).")
4747
endif()

scripts/generate_code.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,25 @@ def _mako_info_hpp(path, namespace, tags, version, specs, meta):
315315
specs=specs,
316316
meta=meta)
317317

318+
"""
319+
Entry-point:
320+
generates linker version scripts
321+
"""
322+
def _mako_linker_scripts(path, ext, namespace, tags, version, specs, meta):
323+
name = "adapter"
324+
filename = f"{name}.{ext}.in"
325+
fin = os.path.join(templates_dir, f"{filename}.mako")
326+
fout = os.path.join(path, filename)
327+
print("Generating %s..." % fout)
328+
return util.makoWrite(
329+
fin, fout,
330+
name=name,
331+
ver=version,
332+
namespace=namespace,
333+
tags=tags,
334+
specs=specs,
335+
meta=meta)
336+
318337
"""
319338
Entry-point:
320339
generates lib code
@@ -349,6 +368,8 @@ def generate_adapters(path, section, namespace, tags, version, specs, meta):
349368

350369
loc = 0
351370
loc += _mako_null_adapter_cpp(dstpath, namespace, tags, version, specs, meta)
371+
loc += _mako_linker_scripts(dstpath, "map", namespace, tags, version, specs, meta)
372+
loc += _mako_linker_scripts(dstpath, "def", namespace, tags, version, specs, meta)
352373
print("Generated %s lines of code.\n"%loc)
353374

354375
"""

scripts/templates/adapter.def.in.mako

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
<%!
2+
import re
3+
from templates import helper as th
4+
%><%
5+
n=namespace
6+
%>\
7+
LIBRARY @TARGET_LIBNAME@
8+
EXPORTS
9+
%for tbl in th.get_pfntables(specs, meta, n, tags):
10+
${tbl['export']['name']}
11+
%endfor

scripts/templates/adapter.map.in.mako

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
<%!
2+
import re
3+
from templates import helper as th
4+
%><%
5+
n=namespace
6+
%>\
7+
@TARGET_LIBNAME@ {
8+
global:
9+
%for tbl in th.get_pfntables(specs, meta, n, tags):
10+
${tbl['export']['name']};
11+
%endfor
12+
local:
13+
*;
14+
};

source/adapters/adapter.def.in

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
11
LIBRARY @TARGET_LIBNAME@
22
EXPORTS
3+
urGetGlobalProcAddrTable
34
urGetBindlessImagesExpProcAddrTable
45
urGetCommandBufferExpProcAddrTable
56
urGetContextProcAddrTable
6-
urGetDeviceProcAddrTable
77
urGetEnqueueProcAddrTable
88
urGetEventProcAddrTable
9-
urGetGlobalProcAddrTable
109
urGetKernelProcAddrTable
1110
urGetMemProcAddrTable
1211
urGetPhysicalMemProcAddrTable
1312
urGetPlatformProcAddrTable
1413
urGetProgramProcAddrTable
1514
urGetQueueProcAddrTable
1615
urGetSamplerProcAddrTable
16+
urGetUSMProcAddrTable
1717
urGetUSMExpProcAddrTable
1818
urGetUsmP2PExpProcAddrTable
19-
urGetUSMProcAddrTable
2019
urGetVirtualMemProcAddrTable
20+
urGetDeviceProcAddrTable

source/adapters/adapter.map.in

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,23 @@
11
@TARGET_LIBNAME@ {
22
global:
3+
urGetGlobalProcAddrTable;
34
urGetBindlessImagesExpProcAddrTable;
45
urGetCommandBufferExpProcAddrTable;
56
urGetContextProcAddrTable;
6-
urGetDeviceProcAddrTable;
77
urGetEnqueueProcAddrTable;
88
urGetEventProcAddrTable;
9-
urGetGlobalProcAddrTable;
109
urGetKernelProcAddrTable;
1110
urGetMemProcAddrTable;
1211
urGetPhysicalMemProcAddrTable;
1312
urGetPlatformProcAddrTable;
1413
urGetProgramProcAddrTable;
1514
urGetQueueProcAddrTable;
1615
urGetSamplerProcAddrTable;
16+
urGetUSMProcAddrTable;
1717
urGetUSMExpProcAddrTable;
1818
urGetUsmP2PExpProcAddrTable;
19-
urGetUSMProcAddrTable;
2019
urGetVirtualMemProcAddrTable;
20+
urGetDeviceProcAddrTable;
2121
local:
2222
*;
2323
};

source/adapters/cuda/CMakeLists.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
# See LICENSE.TXT
44
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
55

6-
set(CUDA_DIR "${CMAKE_CURRENT_SOURCE_DIR}" CACHE PATH "CUDA adapter directory")
7-
86
set(TARGET_NAME ur_adapter_cuda)
97

108
add_ur_adapter(${TARGET_NAME}

source/adapters/cuda/device.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -501,12 +501,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
501501
return ReturnValue(
502502
static_cast<uint64_t>(hDevice->getMaxChosenLocalMem()));
503503
} else {
504-
int LocalMemSize = 0;
505-
UR_CHECK_ERROR(cuDeviceGetAttribute(
506-
&LocalMemSize, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK,
507-
hDevice->get()));
508-
detail::ur::assertion(LocalMemSize >= 0);
509-
return ReturnValue(static_cast<uint64_t>(LocalMemSize));
504+
return ReturnValue(
505+
static_cast<uint64_t>(hDevice->getMaxCapacityLocalMem()));
510506
}
511507
}
512508
case UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: {

source/adapters/cuda/device.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ struct ur_device_handle_t_ {
4545
UR_CHECK_ERROR(cuDeviceGetAttribute(
4646
&MaxRegsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK,
4747
cuDevice));
48+
UR_CHECK_ERROR(cuDeviceGetAttribute(
49+
&MaxCapacityLocalMem,
50+
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN, cuDevice));
4851

4952
// Set local mem max size if env var is present
5053
static const char *LocalMemSizePtrUR =
@@ -56,9 +59,6 @@ struct ur_device_handle_t_ {
5659
: (LocalMemSizePtrPI ? LocalMemSizePtrPI : nullptr);
5760

5861
if (LocalMemSizePtr) {
59-
cuDeviceGetAttribute(
60-
&MaxCapacityLocalMem,
61-
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN, cuDevice);
6262
MaxChosenLocalMem = std::atoi(LocalMemSizePtr);
6363
MaxLocalMemSizeChosen = true;
6464
}

source/adapters/cuda/enqueue.cpp

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -284,9 +284,15 @@ setKernelParams(const ur_context_handle_t Context,
284284
CudaImplicitOffset);
285285
}
286286

287-
if (Context->getDevice()->maxLocalMemSizeChosen()) {
287+
auto Device = Context->getDevice();
288+
if (LocalSize > static_cast<uint32_t>(Device->getMaxCapacityLocalMem())) {
289+
setErrorMessage("Excessive allocation of local memory on the device",
290+
UR_RESULT_ERROR_ADAPTER_SPECIFIC);
291+
return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
292+
}
293+
294+
if (Device->maxLocalMemSizeChosen()) {
288295
// Set up local memory requirements for kernel.
289-
auto Device = Context->getDevice();
290296
if (Device->getMaxChosenLocalMem() < 0) {
291297
bool EnvVarHasURPrefix =
292298
std::getenv("UR_CUDA_MAX_LOCAL_MEM_SIZE") != nullptr;
@@ -297,11 +303,6 @@ setKernelParams(const ur_context_handle_t Context,
297303
UR_RESULT_ERROR_ADAPTER_SPECIFIC);
298304
return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
299305
}
300-
if (LocalSize > static_cast<uint32_t>(Device->getMaxCapacityLocalMem())) {
301-
setErrorMessage("Too much local memory allocated for device",
302-
UR_RESULT_ERROR_ADAPTER_SPECIFIC);
303-
return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
304-
}
305306
if (LocalSize > static_cast<uint32_t>(Device->getMaxChosenLocalMem())) {
306307
bool EnvVarHasURPrefix =
307308
std::getenv("UR_CUDA_MAX_LOCAL_MEM_SIZE") != nullptr;
@@ -319,6 +320,10 @@ setKernelParams(const ur_context_handle_t Context,
319320
UR_CHECK_ERROR(cuFuncSetAttribute(
320321
CuFunc, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES,
321322
Device->getMaxChosenLocalMem()));
323+
324+
} else {
325+
UR_CHECK_ERROR(cuFuncSetAttribute(
326+
CuFunc, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, LocalSize));
322327
}
323328

324329
} catch (ur_result_t Err) {

source/adapters/hip/CMakeLists.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
# See LICENSE.TXT
44
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
55

6-
set(HIP_DIR "${CMAKE_CURRENT_SOURCE_DIR}" CACHE PATH "HIP adapter directory")
7-
86
set(TARGET_NAME ur_adapter_hip)
97

108
# Set default UR HIP platform to AMD

source/adapters/opencl/adapter.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,28 +12,29 @@
1212

1313
struct ur_adapter_handle_t_ {
1414
std::atomic<uint32_t> RefCount = 0;
15+
std::mutex Mutex;
1516
};
1617

1718
ur_adapter_handle_t_ adapter{};
1819

1920
UR_APIEXPORT ur_result_t UR_APICALL urInit(ur_device_init_flags_t,
2021
ur_loader_config_handle_t) {
21-
cl_ext::ExtFuncPtrCache = new cl_ext::ExtFuncPtrCacheT();
2222
return UR_RESULT_SUCCESS;
2323
}
2424

2525
UR_APIEXPORT ur_result_t UR_APICALL urTearDown(void *) {
26-
if (cl_ext::ExtFuncPtrCache) {
27-
delete cl_ext::ExtFuncPtrCache;
28-
cl_ext::ExtFuncPtrCache = nullptr;
29-
}
3026
return UR_RESULT_SUCCESS;
3127
}
3228

3329
UR_APIEXPORT ur_result_t UR_APICALL
3430
urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters,
3531
uint32_t *pNumAdapters) {
3632
if (NumEntries > 0 && phAdapters) {
33+
std::lock_guard<std::mutex> Lock{adapter.Mutex};
34+
if (adapter.RefCount++ == 0) {
35+
cl_ext::ExtFuncPtrCache = std::make_unique<cl_ext::ExtFuncPtrCacheT>();
36+
}
37+
3738
*phAdapters = &adapter;
3839
}
3940

@@ -50,7 +51,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) {
5051
}
5152

5253
UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) {
53-
--adapter.RefCount;
54+
std::lock_guard<std::mutex> Lock{adapter.Mutex};
55+
if (--adapter.RefCount == 0) {
56+
cl_ext::ExtFuncPtrCache.reset();
57+
}
5458
return UR_RESULT_SUCCESS;
5559
}
5660

source/adapters/opencl/common.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ struct ExtFuncPtrCacheT {
260260
// piTeardown to avoid issues with static destruction order (a user application
261261
// might have static objects that indirectly access this cache in their
262262
// destructor).
263-
inline ExtFuncPtrCacheT *ExtFuncPtrCache;
263+
inline std::unique_ptr<ExtFuncPtrCacheT> ExtFuncPtrCache;
264264

265265
// USM helper function to get an extension function pointer
266266
template <typename T>

source/adapters/opencl/enqueue.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -350,9 +350,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe(
350350
return mapCLErrorToUR(CLErr);
351351
}
352352

353-
clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr;
353+
cl_ext::clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr;
354354
ur_result_t RetVal =
355-
cl_ext::getExtFuncFromContext<clEnqueueReadHostPipeINTEL_fn>(
355+
cl_ext::getExtFuncFromContext<cl_ext::clEnqueueReadHostPipeINTEL_fn>(
356356
CLContext, cl_ext::ExtFuncPtrCache->clEnqueueReadHostPipeINTELCache,
357357
cl_ext::EnqueueReadHostPipeName, &FuncPtr);
358358

@@ -382,9 +382,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe(
382382
return mapCLErrorToUR(CLErr);
383383
}
384384

385-
clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr;
385+
cl_ext::clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr;
386386
ur_result_t RetVal =
387-
cl_ext::getExtFuncFromContext<clEnqueueWriteHostPipeINTEL_fn>(
387+
cl_ext::getExtFuncFromContext<cl_ext::clEnqueueWriteHostPipeINTEL_fn>(
388388
CLContext, cl_ext::ExtFuncPtrCache->clEnqueueWriteHostPipeINTELCache,
389389
cl_ext::EnqueueWriteHostPipeName, &FuncPtr);
390390

0 commit comments

Comments
 (0)