Skip to content

Commit 1309731

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web'
2 parents cd90ecf + aadd1e7 commit 1309731

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+844
-373
lines changed

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "llvm/Support/CommandLine.h"
1818
#include "llvm/Support/FileSystem.h"
1919
#include "llvm/Support/Path.h"
20+
#include <algorithm>
2021
#include <sstream>
2122

2223
using namespace clang::driver;
@@ -311,6 +312,21 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
311312
if (SanitizeVal == "address")
312313
addLibraries(SYCLDeviceSanitizerLibs);
313314
}
315+
} else {
316+
// User can pass -fsanitize=address to device compiler via
317+
// -Xsycl-target-frontend, sanitize device library must be
318+
// linked with user's device image if so.
319+
bool IsDeviceAsanEnabled = false;
320+
auto SyclFEArg = Args.getAllArgValues(options::OPT_Xsycl_frontend);
321+
IsDeviceAsanEnabled = (std::count(SyclFEArg.begin(), SyclFEArg.end(),
322+
"-fsanitize=address") > 0);
323+
if (!IsDeviceAsanEnabled) {
324+
auto SyclFEArgEq = Args.getAllArgValues(options::OPT_Xsycl_frontend_EQ);
325+
IsDeviceAsanEnabled = (std::count(SyclFEArgEq.begin(), SyclFEArgEq.end(),
326+
"-fsanitize=address") > 0);
327+
}
328+
if (IsDeviceAsanEnabled)
329+
addLibraries(SYCLDeviceSanitizerLibs);
314330
}
315331
#endif
316332
return LibraryList;

clang/test/Driver/sycl-device-lib.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,10 @@
216216
/// test behavior of libsycl-sanitizer.o linking when -fsanitize=address is available
217217
// RUN: %clangxx -fsycl %s --sysroot=%S/Inputs/SYCL -fsanitize=address -### 2>&1 \
218218
// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_SANITIZER
219+
// RUN: %clangxx -fsycl %s --sysroot=%S/Inputs/SYCL -Xsycl-target-frontend -fsanitize=address -### 2>&1 \
220+
// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_SANITIZER
221+
// RUN: %clangxx -fsycl %s --sysroot=%S/Inputs/SYCL -Xsycl-target-frontend=spir64 -fsanitize=address -### 2>&1 \
222+
// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_SANITIZER
219223
// SYCL_DEVICE_LIB_SANITIZER: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-crt.o" "-output={{.*}}libsycl-crt-{{.*}}.o" "-unbundle"
220224
// SYCL_DEVICE_LIB_SANITIZER-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex.o" "-output={{.*}}libsycl-complex-{{.*}}.o" "-unbundle"
221225
// SYCL_DEVICE_LIB_SANITIZER-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.o" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle"

llvm/test/CodeGen/Generic/fp-builtin-intrinsics-svml.ll

Lines changed: 216 additions & 216 deletions
Large diffs are not rendered by default.

sycl/doc/design/CommandGraph.md

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -241,11 +241,22 @@ created on UR command-buffer enqueue.
241241

242242
There is also a *WaitEvent* used by the `ur_exp_command_buffer_handle_t` class
243243
in the prefix to wait on any dependencies passed in the enqueue wait-list.
244-
This WaitEvent is reset at the end of the suffix, along with reset commands
245-
to reset the L0 events used to implement the UR sync-points back to the
246-
non-signaled state.
247-
248-
![L0 command-buffer diagram](images/L0_UR_command-buffer.svg)
244+
This WaitEvent is reset in the suffix.
245+
246+
A command-buffer is expected to be submitted multiple times. Consequently,
247+
we need to ensure that L0 events associated with graph commands have not
248+
been signaled by a previous execution. These events are therefore reset to the
249+
non-signaled state before running the actual graph associated commands. Note
250+
that this reset is performed in the prefix and not in the suffix to avoid
251+
additional synchronization w.r.t profiling data extraction.
252+
253+
If a command-buffer is about to be submitted to a queue with the profiling
254+
property enabled, an extra command that copies timestamps of L0 events
255+
associated with graph commands into a dedicated memory which is attached to the
256+
returned UR event. This memory stores the profiling information that
257+
corresponds to the current submission of the command-buffer.
258+
259+
![L0 command-buffer diagram](images/L0_UR_command-buffer-v3.jpg)
249260

250261
For a call to `urCommandBufferEnqueueExp` with an `event_list` *EL*,
251262
command-buffer *CB*, and return event *RE* our implementation has to submit two
Loading

sycl/doc/design/images/L0_UR_command-buffer.svg

Lines changed: 0 additions & 1 deletion
This file was deleted.

sycl/doc/extensions/proposed/sycl_ext_oneapi_kernel_compiler_spirv.asciidoc renamed to sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler_spirv.asciidoc

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -52,14 +52,12 @@ This extension also depends on the following other SYCL extensions:
5252

5353
== Status
5454

55-
This is a proposed extension specification, intended to gather community
56-
feedback.
57-
Interfaces defined in this specification may not be implemented yet or may be
58-
in a preliminary state.
59-
The specification itself may also change in incompatible ways before it is
60-
finalized.
61-
*Shipping software products should not rely on APIs defined in this
62-
specification.*
55+
This is an experimental extension specification, intended to provide early
56+
access to features and gather community feedback. Interfaces defined in
57+
this specification are implemented in DPC++, but they are not finalized
58+
and may change incompatibly in future versions of DPC++ without prior notice.
59+
*Shipping software products should not rely on APIs defined in
60+
this specification.*
6361

6462

6563
== Overview

sycl/include/sycl/kernel_bundle.hpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <sycl/ext/oneapi/properties/property_value.hpp> // and log
2626

2727
#include <array> // for array
28+
#include <cstddef> // for std::byte
2829
#include <cstring> // for size_t, memcpy
2930
#include <functional> // for function
3031
#include <iterator> // for distance
@@ -46,7 +47,7 @@ auto get_native(const kernel_bundle<State> &Obj)
4647
namespace detail {
4748
class kernel_id_impl;
4849
class kernel_impl;
49-
}
50+
} // namespace detail
5051

5152
template <typename KernelName> kernel_id get_kernel_id();
5253

@@ -886,11 +887,18 @@ __SYCL_EXPORT bool is_source_kernel_bundle_supported(backend BE,
886887
/////////////////////////
887888
// syclex::create_kernel_bundle_from_source
888889
/////////////////////////
890+
891+
__SYCL_EXPORT kernel_bundle<bundle_state::ext_oneapi_source>
892+
create_kernel_bundle_from_source(const context &SyclContext,
893+
source_language Language,
894+
const std::string &Source);
895+
896+
#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
889897
__SYCL_EXPORT kernel_bundle<bundle_state::ext_oneapi_source>
890-
create_kernel_bundle_from_source(
891-
const context &SyclContext,
892-
sycl::ext::oneapi::experimental::source_language Language,
893-
const std::string &Source);
898+
create_kernel_bundle_from_source(const context &SyclContext,
899+
source_language Language,
900+
const std::vector<std::byte> &Bytes);
901+
#endif
894902

895903
/////////////////////////
896904
// syclex::build(source_kb) => exe_kb

sycl/include/sycl/kernel_bundle_enums.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ enum class bundle_state : char {
2020

2121
namespace ext::oneapi::experimental {
2222

23-
enum class source_language : int { opencl = 0 /* sycl , spir-v, cuda */ };
23+
enum class source_language : int { opencl = 0, spirv = 1 /* sycl, cuda */ };
2424

2525
} // namespace ext::oneapi::experimental
2626

sycl/plugins/unified_runtime/pi2ur.hpp

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -794,11 +794,7 @@ inline pi_result piTearDown(void *PluginParameter) {
794794
return PI_SUCCESS;
795795
}
796796

797-
///////////////////////////////////////////////////////////////////////////////
798-
// Platform
799-
inline pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
800-
pi_uint32 *NumPlatforms) {
801-
797+
inline pi_result PiGetAdapter(ur_adapter_handle_t &adapter) {
802798
// We're not going through the UR loader so we're guaranteed to have exactly
803799
// one adapter (whichever is statically linked). The PI plugin for UR has its
804800
// own implementation of piPlatformsGet.
@@ -809,9 +805,23 @@ inline pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
809805
[&Ret]() { Ret = urAdapterGet(1, &Adapter, nullptr); });
810806
HANDLE_ERRORS(Ret);
811807

808+
adapter = Adapter;
809+
810+
return PI_SUCCESS;
811+
}
812+
813+
///////////////////////////////////////////////////////////////////////////////
814+
// Platform
815+
inline pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
816+
pi_uint32 *NumPlatforms) {
817+
ur_adapter_handle_t adapter = nullptr;
818+
if (auto res = PiGetAdapter(adapter); res != PI_SUCCESS) {
819+
return res;
820+
}
821+
812822
auto phPlatforms = reinterpret_cast<ur_platform_handle_t *>(Platforms);
813823
HANDLE_ERRORS(
814-
urPlatformGet(&Adapter, 1, NumEntries, phPlatforms, NumPlatforms));
824+
urPlatformGet(&adapter, 1, NumEntries, phPlatforms, NumPlatforms));
815825
return PI_SUCCESS;
816826
}
817827

@@ -838,6 +848,12 @@ piextPlatformCreateWithNativeHandle(pi_native_handle NativeHandle,
838848
PI_ASSERT(Platform, PI_ERROR_INVALID_PLATFORM);
839849
PI_ASSERT(NativeHandle, PI_ERROR_INVALID_VALUE);
840850

851+
ur_adapter_handle_t adapter = nullptr;
852+
if (auto res = PiGetAdapter(adapter); res != PI_SUCCESS) {
853+
return res;
854+
}
855+
(void)adapter;
856+
841857
ur_platform_handle_t UrPlatform{};
842858
ur_native_handle_t UrNativeHandle =
843859
reinterpret_cast<ur_native_handle_t>(NativeHandle);

sycl/source/detail/event_impl.cpp

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -278,17 +278,33 @@ void event_impl::checkProfilingPreconditions() const {
278278
"Profiling information is unavailable as the queue associated with "
279279
"the event does not have the 'enable_profiling' property.");
280280
}
281-
if (MEventFromSubmitedExecCommandBuffer) {
282-
throw sycl::exception(make_error_code(sycl::errc::invalid),
283-
"Profiling information is unavailable for events "
284-
"returned by a graph submission.");
285-
}
286281
}
287282

288283
template <>
289284
uint64_t
290285
event_impl::get_profiling_info<info::event_profiling::command_submit>() {
291286
checkProfilingPreconditions();
287+
// The delay between the submission and the actual start of a CommandBuffer
288+
// can be short. Consequently, the submission time, which is based on
289+
// an estimated clock and not on the real device clock, may be ahead of the
290+
// start time, which is based on the actual device clock.
291+
// MSubmitTime is set in a critical performance path.
292+
// Force reading the device clock when setting MSubmitTime may deteriorate
293+
// the performance.
294+
// Since submit time is an estimated time, we implement this little hack
295+
// that allows all profiled time to be meaningful.
296+
// (Note that the observed time deviation between the estimated clock and
297+
// the real device clock is typically less than 0.5ms. The approximation we
298+
// made by forcing the re-sync of submit time to start time is less than
299+
// 0.5ms. These timing values were obtained empirically using an integrated
300+
// Intel GPU).
301+
if (MEventFromSubmittedExecCommandBuffer && !MHostEvent && MEvent) {
302+
uint64_t StartTime =
303+
get_event_profiling_info<info::event_profiling::command_start>(
304+
this->getHandleRef(), this->getPlugin());
305+
if (StartTime < MSubmitTime)
306+
MSubmitTime = StartTime;
307+
}
292308
return MSubmitTime;
293309
}
294310

sycl/source/detail/event_impl.hpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -282,12 +282,12 @@ class event_impl {
282282
return MGraph.lock();
283283
}
284284

285-
void setEventFromSubmitedExecCommandBuffer(bool value) {
286-
MEventFromSubmitedExecCommandBuffer = value;
285+
void setEventFromSubmittedExecCommandBuffer(bool value) {
286+
MEventFromSubmittedExecCommandBuffer = value;
287287
}
288288

289-
bool isEventFromSubmitedExecCommandBuffer() const {
290-
return MEventFromSubmitedExecCommandBuffer;
289+
bool isEventFromSubmittedExecCommandBuffer() const {
290+
return MEventFromSubmittedExecCommandBuffer;
291291
}
292292

293293
protected:
@@ -340,8 +340,8 @@ class event_impl {
340340
/// Store the command graph associated with this event, if any.
341341
/// This event is also be stored in the graph so a weak_ptr is used.
342342
std::weak_ptr<ext::oneapi::experimental::detail::graph_impl> MGraph;
343-
/// Indicates that the event results from a command graph submission
344-
bool MEventFromSubmitedExecCommandBuffer = false;
343+
/// Indicates that the event results from a command graph submission.
344+
bool MEventFromSubmittedExecCommandBuffer = false;
345345

346346
// If this event represents a submission to a
347347
// sycl::detail::pi::PiExtCommandBuffer the sync point for that submission is

sycl/source/detail/graph_impl.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -757,7 +757,6 @@ exec_graph_impl::enqueue(const std::shared_ptr<sycl::detail::queue_impl> &Queue,
757757
auto NewEvent = std::make_shared<sycl::detail::event_impl>(Queue);
758758
NewEvent->setContextImpl(Queue->getContextImplPtr());
759759
NewEvent->setStateIncomplete();
760-
NewEvent->setEventFromSubmitedExecCommandBuffer(true);
761760
return NewEvent;
762761
});
763762

@@ -840,7 +839,7 @@ exec_graph_impl::enqueue(const std::shared_ptr<sycl::detail::queue_impl> &Queue,
840839
NewEvent = sycl::detail::Scheduler::getInstance().addCG(
841840
std::move(CommandGroup), Queue);
842841
}
843-
842+
NewEvent->setEventFromSubmittedExecCommandBuffer(true);
844843
} else if ((CurrentPartition->MSchedule.size() > 0) &&
845844
(CurrentPartition->MSchedule.front()->MCGType ==
846845
sycl::detail::CG::CGTYPE::CodeplayHostTask)) {

0 commit comments

Comments
 (0)