Skip to content

Commit 9498fc2

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web' (6 commits)
2 parents 5e174f8 + e880f95 commit 9498fc2

File tree

9 files changed

+137
-24
lines changed

9 files changed

+137
-24
lines changed

libclc/libspirv/lib/native_cpu-unknown-linux/cl_khr_int64_extended_atomics/minmax_helpers.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,15 @@ entry:
2828
ret i64 %0
2929
}
3030

31-
define i64 @__clc__sync_fetch_and_min_generic_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
31+
define i64 @__clc__sync_fetch_and_min_generic_8(i64* nocapture %ptr, i64 %value) nounwind alwaysinline {
3232
entry:
33-
%0 = atomicrmw volatile min i64 addrspace(1)* %ptr, i64 %value seq_cst
33+
%0 = atomicrmw volatile min i64* %ptr, i64 %value seq_cst
3434
ret i64 %0
3535
}
3636

37-
define i64 @__clc__sync_fetch_and_umin_generic_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
37+
define i64 @__clc__sync_fetch_and_umin_generic_8(i64* nocapture %ptr, i64 %value) nounwind alwaysinline {
3838
entry:
39-
%0 = atomicrmw volatile umin i64 addrspace(1)* %ptr, i64 %value seq_cst
39+
%0 = atomicrmw volatile umin i64* %ptr, i64 %value seq_cst
4040
ret i64 %0
4141
}
4242

@@ -64,14 +64,14 @@ entry:
6464
ret i64 %0
6565
}
6666

67-
define i64 @__clc__sync_fetch_and_max_generic_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
67+
define i64 @__clc__sync_fetch_and_max_generic_8(i64* nocapture %ptr, i64 %value) nounwind alwaysinline {
6868
entry:
69-
%0 = atomicrmw volatile max i64 addrspace(1)* %ptr, i64 %value seq_cst
69+
%0 = atomicrmw volatile max i64* %ptr, i64 %value seq_cst
7070
ret i64 %0
7171
}
7272

73-
define i64 @__clc__sync_fetch_and_umax_generic_8(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
73+
define i64 @__clc__sync_fetch_and_umax_generic_8(i64* nocapture %ptr, i64 %value) nounwind alwaysinline {
7474
entry:
75-
%0 = atomicrmw volatile umax i64 addrspace(1)* %ptr, i64 %value seq_cst
75+
%0 = atomicrmw volatile umax i64* %ptr, i64 %value seq_cst
7676
ret i64 %0
7777
}

llvm/test/SYCLLowerIR/work_group_static.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,5 +38,5 @@ attributes #1 = { convergent norecurse }
3838
!2 = !{i32 4, i32 100000}
3939
!3 = !{!"clang version 13.0.0"}
4040
!4 = !{}
41-
; ![[ADDR_SPACE_MD]] = !{i32 1, i32 3}
41+
; CHECK: ![[ADDR_SPACE_MD]] = !{i32 1, i32 3}
4242
!5 = !{i32 1}

llvm/test/SYCLLowerIR/work_group_static_nv.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ target triple = "nvptx64-nvidia-cuda"
88
; CHECK: @__sycl_dynamicLocalMemoryPlaceholder_GV = external local_unnamed_addr addrspace(3) global [0 x i8], align 128
99

1010
; Function Attrs: convergent norecurse
11-
; CHECK: @_ZTS7KernelA(ptr addrspace(1) %0)
11+
; CHECK: @_ZTS7KernelA(ptr addrspace(1) %0){{.*}} !kernel_arg_addr_space ![[ADDR_SPACE_MD:[0-9]+]]
1212
define void @_ZTS7KernelA(ptr addrspace(1) %0) local_unnamed_addr #0 !kernel_arg_addr_space !5 {
1313
entry:
1414
; CHECK: getelementptr inbounds i8, ptr addrspace(3) @__sycl_dynamicLocalMemoryPlaceholder_GV
@@ -34,5 +34,5 @@ attributes #1 = { convergent norecurse }
3434
!2 = !{i32 4, i32 100000}
3535
!3 = !{!"clang version 13.0.0"}
3636
!4 = !{}
37-
; ![[ADDR_SPACE_MD]] = !{i32 1, i32 3}
37+
; CHECK: ![[ADDR_SPACE_MD]] = !{i32 1}
3838
!5 = !{i32 1}

sycl-jit/jit-compiler/lib/KernelFusion.cpp

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,13 @@
1717
#include "rtc/DeviceCompilation.h"
1818
#include "translation/KernelTranslation.h"
1919
#include "translation/SPIRVLLVMTranslation.h"
20+
21+
#include <llvm/ADT/StringExtras.h>
2022
#include <llvm/Support/Error.h>
23+
#include <llvm/Support/TimeProfiler.h>
24+
25+
#include <clang/Driver/Options.h>
26+
2127
#include <sstream>
2228

2329
using namespace jit_compiler;
@@ -237,14 +243,34 @@ fuseKernels(View<SYCLKernelInfo> KernelInformation, const char *FusedKernelName,
237243
extern "C" KF_EXPORT_SYMBOL RTCResult
238244
compileSYCL(InMemoryFile SourceFile, View<InMemoryFile> IncludeFiles,
239245
View<const char *> UserArgs) {
246+
std::string BuildLog;
247+
240248
auto UserArgListOrErr = parseUserArgs(UserArgs);
241249
if (!UserArgListOrErr) {
242250
return errorTo<RTCResult>(UserArgListOrErr.takeError(),
243251
"Parsing of user arguments failed");
244252
}
245253
llvm::opt::InputArgList UserArgList = std::move(*UserArgListOrErr);
246254

247-
std::string BuildLog;
255+
llvm::StringRef TraceFileName;
256+
if (auto *Arg =
257+
UserArgList.getLastArg(clang::driver::options::OPT_ftime_trace_EQ)) {
258+
TraceFileName = Arg->getValue();
259+
int Granularity =
260+
500; // microseconds. Same default as in `clang::FrontendOptions`.
261+
if (auto *Arg = UserArgList.getLastArg(
262+
clang::driver::options::OPT_ftime_trace_granularity_EQ)) {
263+
if (!llvm::to_integer(Arg->getValue(), Granularity)) {
264+
BuildLog += "warning: ignoring malformed argument: '" +
265+
Arg->getAsString(UserArgList) + "'\n";
266+
}
267+
}
268+
bool Verbose =
269+
UserArgList.hasArg(clang::driver::options::OPT_ftime_trace_verbose);
270+
271+
llvm::timeTraceProfilerInitialize(Granularity, /*ProcName=*/"sycl-rtc",
272+
Verbose);
273+
}
248274

249275
auto ModuleOrErr =
250276
compileDeviceCode(SourceFile, IncludeFiles, UserArgList, BuildLog);
@@ -279,6 +305,15 @@ compileSYCL(InMemoryFile SourceFile, View<InMemoryFile> IncludeFiles,
279305
DevImgInfo.BinaryInfo = std::move(*BinaryInfoOrError);
280306
}
281307

308+
if (llvm::timeTraceProfilerEnabled()) {
309+
auto Error = llvm::timeTraceProfilerWrite(
310+
TraceFileName, /*FallbackFileName=*/"trace.json");
311+
llvm::timeTraceProfilerCleanup();
312+
if (Error) {
313+
return errorTo<RTCResult>(std::move(Error), "Trace file writing failed");
314+
}
315+
}
316+
282317
return RTCResult{std::move(BundleInfo), BuildLog.c_str()};
283318
}
284319

sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include <llvm/SYCLLowerIR/ModuleSplitter.h>
3434
#include <llvm/SYCLLowerIR/SYCLJointMatrixTransform.h>
3535
#include <llvm/Support/PropertySetIO.h>
36+
#include <llvm/Support/TimeProfiler.h>
3637

3738
#include <algorithm>
3839
#include <array>
@@ -225,6 +226,8 @@ class LLVMDiagnosticWrapper : public llvm::DiagnosticHandler {
225226
Expected<std::unique_ptr<llvm::Module>> jit_compiler::compileDeviceCode(
226227
InMemoryFile SourceFile, View<InMemoryFile> IncludeFiles,
227228
const InputArgList &UserArgList, std::string &BuildLog) {
229+
TimeTraceScope TTS{"compileDeviceCode"};
230+
228231
const std::string &DPCPPRoot = getDPCPPRoot();
229232
if (DPCPPRoot == InvalidDPCPPRoot) {
230233
return createStringError("Could not locate DPCPP root directory");
@@ -244,6 +247,9 @@ Expected<std::unique_ptr<llvm::Module>> jit_compiler::compileDeviceCode(
244247
// linking).
245248
DAL.eraseArg(OPT_fsycl_device_lib_EQ);
246249
DAL.eraseArg(OPT_fno_sycl_device_lib_EQ);
250+
DAL.eraseArg(OPT_ftime_trace_EQ);
251+
DAL.eraseArg(OPT_ftime_trace_granularity_EQ);
252+
DAL.eraseArg(OPT_ftime_trace_verbose);
247253

248254
SmallVector<std::string> CommandLine;
249255
for (auto *Arg : DAL) {
@@ -382,6 +388,8 @@ static bool getDeviceLibraries(const ArgList &Args,
382388
Error jit_compiler::linkDeviceLibraries(llvm::Module &Module,
383389
const InputArgList &UserArgList,
384390
std::string &BuildLog) {
391+
TimeTraceScope TTS{"linkDeviceLibraries"};
392+
385393
const std::string &DPCPPRoot = getDPCPPRoot();
386394
if (DPCPPRoot == InvalidDPCPPRoot) {
387395
return createStringError("Could not locate DPCPP root directory");
@@ -458,6 +466,8 @@ static IRSplitMode getDeviceCodeSplitMode(const InputArgList &UserArgList) {
458466
Expected<PostLinkResult>
459467
jit_compiler::performPostLink(std::unique_ptr<llvm::Module> Module,
460468
const InputArgList &UserArgList) {
469+
TimeTraceScope TTS{"performPostLink"};
470+
461471
// This is a simplified version of `processInputModule` in
462472
// `llvm/tools/sycl-post-link.cpp`. Assertions/TODOs point to functionality
463473
// left out of the algorithm for now.

sycl-jit/jit-compiler/lib/translation/KernelTranslation.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "llvm/MC/TargetRegistry.h"
1818
#include "llvm/Support/MemoryBuffer.h"
1919
#include "llvm/Support/TargetSelect.h"
20+
#include "llvm/Support/TimeProfiler.h"
2021
#include "llvm/Target/TargetMachine.h"
2122
#include "llvm/Target/TargetOptions.h"
2223

@@ -225,6 +226,8 @@ llvm::Error KernelTranslator::translateKernel(SYCLKernelInfo &Kernel,
225226
llvm::Expected<RTCDevImgBinaryInfo>
226227
KernelTranslator::translateDevImgToSPIRV(llvm::Module &Mod,
227228
JITContext &JITCtx) {
229+
llvm::TimeTraceScope TTS{"translateDevImgToSPIRV"};
230+
228231
llvm::Expected<KernelBinary *> BinaryOrError = translateToSPIRV(Mod, JITCtx);
229232
if (auto Error = BinaryOrError.takeError()) {
230233
return Error;
Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
# commit f058cb230c65fe8094f74043d0c9afd5ba0e8325
2-
# Merge: 871061f1 3bf76246
3-
# Author: Ross Brunton <ross@codeplay.com>
4-
# Date: Tue Jan 21 15:49:41 2025 +0000
5-
# Merge pull request #2593 from RossBrunton/ross/formatall
6-
# Ensure all files are clang formatted
7-
set(UNIFIED_RUNTIME_TAG f058cb230c65fe8094f74043d0c9afd5ba0e8325)
1+
# commit d3e97040b67ee6b45655eccdfca19185bd9115b6
2+
# Merge: 2ef69591 37ad03a7
3+
# Author: Kenneth Benzie (Benie) <k.benzie@codeplay.com>
4+
# Date: Thu Jan 23 10:23:08 2025 +0000
5+
# Merge pull request #2605 from yingcong-wu/yc/0123-ur-quick-fix
6+
# [DeviceASAN] Bugfix for GetDeviceType
7+
set(UNIFIED_RUNTIME_TAG d3e97040b67ee6b45655eccdfca19185bd9115b6)

sycl/doc/extensions/proposed/sycl_ext_oneapi_device_image_backend_content.asciidoc

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -101,9 +101,7 @@ class device_image {
101101
backend ext_oneapi_get_backend() const noexcept;
102102
std::vector<std::byte> ext_oneapi_get_backend_content() const;
103103
104-
#if defined(__cpp_lib_span)
105-
std::span<std::byte> ext_oneapi_get_backend_content_view() const;
106-
#endif
104+
std::span<std::byte> ext_oneapi_get_backend_content_view() const; // Requires C++20
107105
108106
/*...*/
109107
};
@@ -154,8 +152,7 @@ std::span<std::byte> ext_oneapi_get_content_backend_view() const;
154152
----
155153
!====
156154

157-
Available only when the compiler defines the `__cpp_lib_span` feature-test macro
158-
(which is defined in {cpp}20 and higher).
155+
Minimum C++ Version: {cpp}20
159156

160157
_Constraints:_ Available only when `State` is `bundle_state::executable`.
161158

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
//==----- kernel_compiler_sycl_jit_time_trace.cpp --- time-tracing test ----==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// REQUIRES: (opencl || level_zero)
10+
// UNSUPPORTED: accelerator
11+
// UNSUPPORTED-INTENDED: SYCL-RTC is not available for accelerator devices
12+
13+
// RUN: %{build} -o %t.out
14+
// RUN: %{run} %t.out | FileCheck %s
15+
16+
#include <sycl/detail/core.hpp>
17+
#include <sycl/kernel_bundle.hpp>
18+
19+
int test_tracing() {
20+
namespace syclex = sycl::ext::oneapi::experimental;
21+
using source_kb = sycl::kernel_bundle<sycl::bundle_state::ext_oneapi_source>;
22+
23+
sycl::queue q;
24+
sycl::context ctx = q.get_context();
25+
26+
bool ok =
27+
q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl_jit);
28+
if (!ok) {
29+
std::cout << "Apparently this device does not support `sycl_jit` source "
30+
"kernel bundle extension: "
31+
<< q.get_device().get_info<sycl::info::device::name>()
32+
<< std::endl;
33+
return -1;
34+
}
35+
36+
source_kb kbSrc = syclex::create_kernel_bundle_from_source(
37+
ctx, syclex::source_language::sycl_jit, "");
38+
39+
auto props = syclex::properties{
40+
syclex::build_options{std::vector<std::string>{
41+
"-ftime-trace=-", "-ftime-trace-granularity=1000" /* us */,
42+
"-ftime-trace-verbose"}},
43+
};
44+
45+
syclex::build(kbSrc, props);
46+
// CHECK: {"traceEvents":
47+
48+
std::string log;
49+
auto props2 = syclex::properties{
50+
syclex::build_options{std::vector<std::string>{
51+
"-ftime-trace=-", "-ftime-trace-granularity=invalid_int"}},
52+
syclex::save_log{&log}};
53+
syclex::build(kbSrc, props2);
54+
std::cout << log << std::endl;
55+
// CHECK: {"traceEvents":
56+
// CHECK: warning: ignoring malformed argument: '-ftime-trace-granularity=invalid_int'
57+
58+
return 0;
59+
}
60+
61+
int main() {
62+
#ifdef SYCL_EXT_ONEAPI_KERNEL_COMPILER
63+
return test_tracing();
64+
#else
65+
static_assert(false, "Kernel Compiler feature test macro undefined");
66+
#endif
67+
return 0;
68+
}

0 commit comments

Comments
 (0)