Skip to content

Commit e18f25a

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web'
2 parents 919959d + a418e1c commit e18f25a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+57
-4106
lines changed

CONTRIBUTING.md

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,6 @@ There are 3 types of tests which are used for DPC++ toolchain validation:
6666
should not have hardware (e.g. GPU, FPGA, etc.) or external software
6767
dependencies (e.g. OpenCL, Level Zero, CUDA runtimes). All tests not following
6868
this approach should be moved to DPC++ end-to-end or SYCL-CTS tests.
69-
However, the tests for a feature under active development requiring atomic
70-
change for tests and product can be put to
71-
[sycl/test/on-device](../../sycl/test/on-device) temporarily. It is developer
72-
responsibility to move the tests to DPC++ E2E test suite or SYCL-CTS once
73-
the feature is stabilized.
7469

7570
**Guidelines for adding DPC++ in-tree LIT tests (DPC++ Clang FE tests)**:
7671
- Use `sycl::` namespace instead of `cl::sycl::`

clang/include/clang/Driver/Options.td

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2628,14 +2628,6 @@ def fsycl_device_code_split_EQ : Joined<["-"], "fsycl-device-code-split=">,
26282628
def fsycl_device_code_split : Flag<["-"], "fsycl-device-code-split">, Alias<fsycl_device_code_split_EQ>,
26292629
AliasArgs<["auto"]>, Flags<[CC1Option, CoreOption]>,
26302630
HelpText<"Perform SYCL device code split in the 'auto' mode, i.e. use heuristic to distribute device code across modules">;
2631-
def fsycl_device_code_split_esimd : Flag<["-"], "fsycl-device-code-split-esimd">,
2632-
Flags<[CC1Option, CoreOption]>, HelpText<"Split SYCL and ESIMD kernels into separate modules">;
2633-
def fno_sycl_device_code_split_esimd : Flag<["-"], "fno-sycl-device-code-split-esimd">,
2634-
Flags<[CC1Option, CoreOption]>, HelpText<"Don't split SYCL and ESIMD kernels into separate modules">;
2635-
def fsycl_device_code_lower_esimd : Flag<["-"], "fsycl-device-code-lower-esimd">,
2636-
Flags<[CC1Option, CoreOption]>, HelpText<"Lower ESIMD-specific constructs">;
2637-
def fno_sycl_device_code_lower_esimd : Flag<["-"], "fno-sycl-device-code-lower-esimd">,
2638-
Flags<[CC1Option, CoreOption]>, HelpText<"Do not lower ESIMD-specific constructs">;
26392631
def fsycl_instrument_device_code : Flag<["-"], "fsycl-instrument-device-code">,
26402632
Group<sycl_Group>, Flags<[CC1Option, CoreOption]>,
26412633
HelpText<"Add ITT instrumentation intrinsics calls">,

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8774,7 +8774,7 @@ void SPIRVTranslator::ConstructJob(Compilation &C, const JobAction &JA,
87748774
ExtArg += ",+SPV_INTEL_usm_storage_classes";
87758775
else
87768776
// Don't enable several freshly added extensions on FPGA H/W
8777-
ExtArg += ",+SPV_INTEL_token_type";
8777+
ExtArg += ",+SPV_INTEL_token_type,+SPV_INTEL_bfloat16_conversion";
87788778
TranslatorArgs.push_back(TCArgs.MakeArgString(ExtArg));
87798779
}
87808780
for (auto I : Inputs) {
@@ -8945,13 +8945,8 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA,
89458945
// Symbol file and specialization constant info generation is mandatory -
89468946
// add options unconditionally
89478947
addArgs(CmdArgs, TCArgs, {"-symbols"});
8948-
// By default we split SYCL and ESIMD kernels into separate modules
8949-
if (TCArgs.hasFlag(options::OPT_fsycl_device_code_split_esimd,
8950-
options::OPT_fno_sycl_device_code_split_esimd, true))
8951-
addArgs(CmdArgs, TCArgs, {"-split-esimd"});
8952-
if (TCArgs.hasFlag(options::OPT_fsycl_device_code_lower_esimd,
8953-
options::OPT_fno_sycl_device_code_lower_esimd, true))
8954-
addArgs(CmdArgs, TCArgs, {"-lower-esimd"});
8948+
addArgs(CmdArgs, TCArgs, {"-split-esimd"});
8949+
addArgs(CmdArgs, TCArgs, {"-lower-esimd"});
89558950
}
89568951
addArgs(CmdArgs, TCArgs,
89578952
{StringRef(getSYCLPostLinkOptimizationLevel(TCArgs))});

clang/test/Driver/sycl-offload-with-split.c

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -299,25 +299,15 @@
299299
// Check ESIMD device code split.
300300
// RUN: %clang -### -fsycl %s 2>&1 | FileCheck %s -check-prefixes=CHK-ESIMD-SPLIT
301301
// RUN: %clang_cl -### -fsycl %s 2>&1 | FileCheck %s -check-prefixes=CHK-ESIMD-SPLIT
302-
// RUN: %clang -### -fsycl -fsycl-device-code-split-esimd %s 2>&1 | FileCheck %s -check-prefixes=CHK-ESIMD-SPLIT
303-
// RUN: %clang_cl -### -fsycl -fsycl-device-code-split-esimd %s 2>&1 | FileCheck %s -check-prefixes=CHK-ESIMD-SPLIT
304-
// RUN: %clang -### -fsycl -fno-sycl-device-code-split-esimd %s 2>&1 | FileCheck %s -check-prefixes=CHK-NO-ESIMD-SPLIT
305-
// RUN: %clang_cl -### -fsycl -fno-sycl-device-code-split-esimd %s 2>&1 | FileCheck %s -check-prefixes=CHK-NO-ESIMD-SPLIT
306302
// RUN: %clang -### -fsycl -fintelfpga %s 2>&1 | FileCheck %s -check-prefixes=CHK-ESIMD-SPLIT
307303
// RUN: %clang -### -fsycl -fsycl-targets=spir64_fpga-unknown-unknown-sycldevice %s 2>&1 | FileCheck %s -check-prefixes=CHK-ESIMD-SPLIT
308304
// RUN: %clang_cl -### -fsycl -fintelfpga %s 2>&1 | FileCheck %s -check-prefixes=CHK-ESIMD-SPLIT
309305
// CHK-ESIMD-SPLIT: sycl-post-link{{.*}} "-split-esimd"
310-
// CHK-NO-ESIMD-SPLIT-NOT: sycl-post-link{{.*}} "-split-esimd"
311306

312307
// Check lowering of ESIMD device code.
313308
// RUN: %clang -### -fsycl %s 2>&1 | FileCheck %s -check-prefixes=CHK-ESIMD-LOWER
314309
// RUN: %clang_cl -### -fsycl %s 2>&1 | FileCheck %s -check-prefixes=CHK-ESIMD-LOWER
315-
// RUN: %clang -### -fsycl -fsycl-device-code-lower-esimd %s 2>&1 | FileCheck %s -check-prefixes=CHK-ESIMD-LOWER
316-
// RUN: %clang_cl -### -fsycl -fsycl-device-code-lower-esimd %s 2>&1 | FileCheck %s -check-prefixes=CHK-ESIMD-LOWER
317-
// RUN: %clang -### -fsycl -fno-sycl-device-code-lower-esimd %s 2>&1 | FileCheck %s -check-prefixes=CHK-NO-ESIMD-LOWER
318-
// RUN: %clang_cl -### -fsycl -fno-sycl-device-code-lower-esimd %s 2>&1 | FileCheck %s -check-prefixes=CHK-NO-ESIMD-LOWER
319310
// RUN: %clang -### -fsycl -fintelfpga %s 2>&1 | FileCheck %s -check-prefixes=CHK-ESIMD-LOWER
320311
// RUN: %clang -### -fsycl -fsycl-targets=spir64_fpga-unknown-unknown-sycldevice %s 2>&1 | FileCheck %s -check-prefixes=CHK-ESIMD-LOWER
321312
// RUN: %clang_cl -### -fsycl -fintelfpga %s 2>&1 | FileCheck %s -check-prefixes=CHK-ESIMD-LOWER
322313
// CHK-ESIMD-LOWER: sycl-post-link{{.*}} "-lower-esimd"
323-
// CHK-NO-ESIMD-LOWER-NOT: sycl-post-link{{.*}} "-lower-esimd"

clang/test/Driver/sycl-spirv-ext.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@
5050
// CHECK-DEFAULT-SAME:,+SPV_INTEL_fpga_dsp_control
5151
// CHECK-DEFAULT-SAME:,+SPV_INTEL_arithmetic_fence
5252
// CHECK-DEFAULT-SAME:,+SPV_INTEL_runtime_aligned
53-
// CHECK-DEFAULT-SAME:,+SPV_INTEL_token_type"
53+
// CHECK-DEFAULT-SAME:,+SPV_INTEL_token_type
54+
// CHECK-DEFAULT-SAME:,+SPV_INTEL_bfloat16_conversion"
5455
// CHECK-FPGA-HW: llvm-spirv{{.*}}"-spirv-ext=-all
5556
// CHECK-FPGA-HW-SAME:,+SPV_EXT_shader_atomic_float_add
5657
// CHECK-FPGA-HW-SAME:,+SPV_EXT_shader_atomic_float_min_max

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3654,12 +3654,18 @@ static pi_result commonEnqueueMemBufferCopyRect(
36543654
assert(src_type == CU_MEMORYTYPE_DEVICE || src_type == CU_MEMORYTYPE_HOST);
36553655
assert(dst_type == CU_MEMORYTYPE_DEVICE || dst_type == CU_MEMORYTYPE_HOST);
36563656

3657-
src_row_pitch = (!src_row_pitch) ? region->width_bytes : src_row_pitch;
3658-
src_slice_pitch = (!src_slice_pitch) ? (region->height_scalar * src_row_pitch)
3659-
: src_slice_pitch;
3660-
dst_row_pitch = (!dst_row_pitch) ? region->width_bytes : dst_row_pitch;
3661-
dst_slice_pitch = (!dst_slice_pitch) ? (region->height_scalar * dst_row_pitch)
3662-
: dst_slice_pitch;
3657+
src_row_pitch = (!src_row_pitch) ? region->width_bytes + src_offset->x_bytes
3658+
: src_row_pitch;
3659+
src_slice_pitch =
3660+
(!src_slice_pitch)
3661+
? ((region->height_scalar + src_offset->y_scalar) * src_row_pitch)
3662+
: src_slice_pitch;
3663+
dst_row_pitch = (!dst_row_pitch) ? region->width_bytes + dst_offset->x_bytes
3664+
: dst_row_pitch;
3665+
dst_slice_pitch =
3666+
(!dst_slice_pitch)
3667+
? ((region->height_scalar + dst_offset->y_scalar) * dst_row_pitch)
3668+
: dst_slice_pitch;
36633669

36643670
CUDA_MEMCPY3D params = {};
36653671

sycl/source/detail/event_impl.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -210,16 +210,21 @@ void event_impl::wait(
210210

211211
void event_impl::wait_and_throw(
212212
std::shared_ptr<cl::sycl::detail::event_impl> Self) {
213+
Command *Cmd = static_cast<Command *>(Self->getCommand());
214+
QueueImplPtr submittedQueue = nullptr;
215+
if (Cmd)
216+
submittedQueue = Cmd->getSubmittedQueue();
217+
213218
wait(Self);
219+
214220
for (auto &EventImpl :
215221
detail::Scheduler::getInstance().getWaitList(std::move(Self))) {
216222
Command *Cmd = (Command *)EventImpl->getCommand();
217223
if (Cmd)
218-
Cmd->getQueue()->throw_asynchronous();
224+
Cmd->getSubmittedQueue()->throw_asynchronous();
219225
}
220-
Command *Cmd = (Command *)getCommand();
221-
if (Cmd)
222-
Cmd->getQueue()->throw_asynchronous();
226+
if (submittedQueue)
227+
submittedQueue->throw_asynchronous();
223228
}
224229

225230
void event_impl::cleanupCommand(

sycl/source/detail/scheduler/commands.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,7 @@ void Command::waitForEvents(QueueImplPtr Queue,
311311

312312
Command::Command(CommandType Type, QueueImplPtr Queue)
313313
: MQueue(std::move(Queue)), MType(Type) {
314+
MSubmittedQueue = MQueue;
314315
MEvent.reset(new detail::event_impl(MQueue));
315316
MEvent->setCommand(this);
316317
MEvent->setContextImpl(MQueue->getContextImplPtr());
@@ -1535,7 +1536,9 @@ ExecCGCommand::ExecCGCommand(std::unique_ptr<detail::CG> CommandGroup,
15351536
QueueImplPtr Queue)
15361537
: Command(CommandType::RUN_CG, std::move(Queue)),
15371538
MCommandGroup(std::move(CommandGroup)) {
1538-
1539+
if (MCommandGroup->getType() == detail::CG::CodeplayHostTask)
1540+
MSubmittedQueue =
1541+
static_cast<detail::CGHostTask *>(MCommandGroup.get())->MQueue;
15391542
emitInstrumentationDataProxy();
15401543
}
15411544

sycl/source/detail/scheduler/commands.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,8 @@ class Command {
138138

139139
const QueueImplPtr &getQueue() const { return MQueue; }
140140

141+
const QueueImplPtr &getSubmittedQueue() const { return MSubmittedQueue; }
142+
141143
const EventImplPtr &getEvent() const { return MEvent; }
142144

143145
// Methods needed to support SYCL instrumentation
@@ -195,6 +197,7 @@ class Command {
195197
protected:
196198
EventImplPtr MEvent;
197199
QueueImplPtr MQueue;
200+
QueueImplPtr MSubmittedQueue;
198201

199202
/// Dependency events prepared for waiting by backend.
200203
/// See processDepEvent for details.

sycl/test/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,4 +105,3 @@ if(SYCL_BUILD_PI_ROCM)
105105

106106
add_dependencies(check-sycl check-sycl-rocm)
107107
endif()
108-
add_subdirectory(on-device)

sycl/test/extensions/bfloat16.cpp

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,40 @@
1-
// RUN: %clangxx -fsycl-device-only -S -Xclang -emit-llvm %s -o - | FileCheck %s
1+
// RUN: %clangxx -fsycl-device-only -fsycl-targets=%sycl_triple -S %s -o - | FileCheck %s
22

3-
#include <sycl/sycl.hpp>
43
#include <sycl/ext/intel/experimental/bfloat16.hpp>
4+
#include <sycl/sycl.hpp>
55

66
using sycl::ext::intel::experimental::bfloat16;
77

88
SYCL_EXTERNAL uint16_t some_bf16_intrinsic(uint16_t x, uint16_t y);
99

10-
__attribute__((noinline))
11-
float op(float a, float b) {
12-
bfloat16 A {a};
13-
// CHECK: [[A:%.*]] = tail call spir_func zeroext i16 @_Z27__spirv_ConvertFToBF16INTELf(float %a)
14-
// CHECK-NOT: fptoui
10+
__attribute__((noinline)) float op(float a, float b) {
11+
// CHECK: define {{.*}} spir_func float @_Z2opff(float [[a:%.*]], float [[b:%.*]])
12+
bfloat16 A{a};
13+
// CHECK: [[A:%.*]] = tail call spir_func zeroext i16 @_Z27__spirv_ConvertFToBF16INTELf(float [[a]])
14+
// CHECK-NOT: fptoui
1515

16-
bfloat16 B {b};
17-
// CHECK: [[B:%.*]] = tail call spir_func zeroext i16 @_Z27__spirv_ConvertFToBF16INTELf(float %b)
18-
// CHECK-NOT: fptoui
16+
bfloat16 B{b};
17+
// CHECK: [[B:%.*]] = tail call spir_func zeroext i16 @_Z27__spirv_ConvertFToBF16INTELf(float [[b]])
18+
// CHECK-NOT: fptoui
1919

2020
bfloat16 C = A + B;
21-
// CHECK: [[A_float:%.*]] = tail call spir_func float @_Z27__spirv_ConvertBF16ToFINTELt(i16 zeroext [[A]])
22-
// CHECK: [[B_float:%.*]] = tail call spir_func float @_Z27__spirv_ConvertBF16ToFINTELt(i16 zeroext [[B]])
23-
// CHECK: [[Add:%.*]] = fadd float [[A_float]], [[B_float]]
24-
// CHECK: [[C:%.*]] = tail call spir_func zeroext i16 @_Z27__spirv_ConvertFToBF16INTELf(float [[Add]])
25-
// CHECK-NOT: uitofp
26-
// CHECK-NOT: fptoui
21+
// CHECK: [[A_float:%.*]] = tail call spir_func float @_Z27__spirv_ConvertBF16ToFINTELt(i16 zeroext [[A]])
22+
// CHECK: [[B_float:%.*]] = tail call spir_func float @_Z27__spirv_ConvertBF16ToFINTELt(i16 zeroext [[B]])
23+
// CHECK: [[Add:%.*]] = fadd float [[A_float]], [[B_float]]
24+
// CHECK: [[C:%.*]] = tail call spir_func zeroext i16 @_Z27__spirv_ConvertFToBF16INTELf(float [[Add]])
25+
// CHECK-NOT: uitofp
26+
// CHECK-NOT: fptoui
2727

2828
bfloat16 D = some_bf16_intrinsic(A, C);
29-
// CHECK: [[D:%.*]] = tail call spir_func zeroext i16 @_Z19some_bf16_intrinsictt(i16 zeroext [[A]], i16 zeroext [[C]])
30-
// CHECK-NOT: uitofp
31-
// CHECK-NOT: fptoui
29+
// CHECK: [[D:%.*]] = tail call spir_func zeroext i16 @_Z19some_bf16_intrinsictt(i16 zeroext [[A]], i16 zeroext [[C]])
30+
// CHECK-NOT: uitofp
31+
// CHECK-NOT: fptoui
3232

3333
return D;
34-
// CHECK: [[RetVal:%.*]] = tail call spir_func float @_Z27__spirv_ConvertBF16ToFINTELt(i16 zeroext [[D]])
35-
// CHECK: ret float [[RetVal]]
36-
// CHECK-NOT: uitofp
37-
// CHECK-NOT: fptoui
34+
// CHECK: [[RetVal:%.*]] = tail call spir_func float @_Z27__spirv_ConvertBF16ToFINTELt(i16 zeroext [[D]])
35+
// CHECK: ret float [[RetVal]]
36+
// CHECK-NOT: uitofp
37+
// CHECK-NOT: fptoui
3838
}
3939

4040
int main(int argc, char *argv[]) {

sycl/test/lit.cfg.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
config.suffixes = ['.c', '.cpp', '.dump'] #add .spv. Currently not clear what to do with those
2828

2929
# feature tests are considered not so lightweight, so, they are excluded by default
30-
config.excludes = ['Inputs', 'feature-tests', 'on-device']
30+
config.excludes = ['Inputs', 'feature-tests']
3131

3232
# test_source_root: The root path where tests are located.
3333
config.test_source_root = os.path.dirname(__file__)

sycl/test/on-device/CMakeLists.txt

Lines changed: 0 additions & 56 deletions
This file was deleted.

sycl/test/on-device/README.md

Lines changed: 0 additions & 70 deletions
This file was deleted.

0 commit comments

Comments
 (0)