Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit aa97101

Browse files
committed
Merge remote-tracking branch 'intel_llvm/intel' into simd_view_from_simd
2 parents d948db1 + ee4653d commit aa97101

36 files changed

+1084
-26
lines changed

.github/CODEOWNERS

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ SYCL/AtomicRef @AGindinson
1010
SYCL/Assert @intel/llvm-reviewers-runtime
1111
SYCL/Basic @intel/llvm-reviewers-runtime
1212
SYCL/Config @intel/llvm-reviewers-runtime
13+
SYCL/DiscardEvents @intel/llvm-reviewers-runtime
1314
SYCL/FilterSelector @intel/llvm-reviewers-runtime
1415
SYCL/HostInteropTask @intel/llvm-reviewers-runtime
1516
SYCL/InorderQueue @intel/llvm-reviewers-runtime
@@ -29,7 +30,7 @@ SYCL/DeviceLib/ITTAnnotations @vzakhari @MrSidims @AGindinson
2930
SYCL/DotProduct @rdeodhar
3031

3132
# Explicit SIMD
32-
SYCL/ESIMD @kbobrovs @v-klochkov
33+
SYCL/ESIMD @kbobrovs @v-klochkov @sndmitriev
3334

3435
# Functor
3536
SYCL/Functor @AlexeySachkov

SYCL/Basic/barrier_order.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
// UNSUPPORTED: hip
12
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
23
// RUN: %HOST_RUN_PLACEHOLDER %t.out
34
// RUN: %CPU_RUN_PLACEHOLDER %t.out

SYCL/Basic/enqueue_barrier.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
// The test is failing sporadically on Windows OpenCL RTs
77
// Disabling on windows until fixed
8-
// UNSUPPORTED: windows
8+
// UNSUPPORTED: hip_amd, windows
99

1010
#include <CL/sycl.hpp>
1111
#include <sycl/ext/intel/fpga_device_selector.hpp>
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %HOST_RUN_PLACEHOLDER %t.out
3+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
4+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
5+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
6+
7+
// This test performs basic check of supporting user defined class that are
8+
// implicitly converted from sycl::item/sycl::nd_item in parallel_for.
9+
10+
#include <CL/sycl.hpp>
11+
#include <iostream>
12+
13+
template <int Dimensions> class item_wrapper {
14+
public:
15+
item_wrapper(sycl::item<Dimensions> it) : m_item(it) {}
16+
17+
size_t get() { return m_item; }
18+
19+
private:
20+
sycl::item<Dimensions> m_item;
21+
};
22+
23+
template <int Dimensions> class nd_item_wrapper {
24+
public:
25+
nd_item_wrapper(sycl::nd_item<Dimensions> it) : m_item(it) {}
26+
27+
size_t get() { return m_item.get_global_linear_id(); }
28+
29+
private:
30+
sycl::nd_item<Dimensions> m_item;
31+
};
32+
33+
int main() {
34+
sycl::queue q;
35+
36+
// Initialize data array
37+
const int sz = 16;
38+
int data[sz] = {0};
39+
for (int i = 0; i < sz; ++i) {
40+
data[i] = i;
41+
}
42+
43+
// Check user defined sycl::item wrapper
44+
sycl::buffer<int> data_buf(data, sz);
45+
q.submit([&](sycl::handler &h) {
46+
auto buf_acc = data_buf.get_access<sycl::access::mode::read_write>(h);
47+
h.parallel_for(sycl::range<1>{sz},
48+
[=](item_wrapper<1> item) { buf_acc[item.get()] += 1; });
49+
});
50+
q.wait();
51+
bool failed = false;
52+
53+
{
54+
auto buf_acc = data_buf.get_access<sycl::access::mode::read>();
55+
for (int i = 0; i < sz; ++i) {
56+
failed |= (buf_acc[i] != i + 1);
57+
}
58+
if (failed) {
59+
std::cout << "item_wrapper check failed" << std::endl;
60+
return 1;
61+
}
62+
}
63+
64+
// Check user defined sycl::nd_item wrapper
65+
q.submit([&](sycl::handler &h) {
66+
auto buf_acc = data_buf.get_access<sycl::access::mode::read_write>(h);
67+
h.parallel_for(sycl::nd_range<1>{sz, 2},
68+
[=](nd_item_wrapper<1> item) { buf_acc[item.get()] += 1; });
69+
});
70+
q.wait();
71+
72+
{
73+
auto buf_acc = data_buf.get_access<sycl::access::mode::read>();
74+
for (int i = 0; i < sz; ++i) {
75+
failed |= (buf_acc[i] != i + 2);
76+
}
77+
if (failed) {
78+
std::cout << "nd_item_wrapper check failed" << std::endl;
79+
return 1;
80+
}
81+
}
82+
83+
std::cout << "Test passed" << std::endl;
84+
return 0;
85+
}

SYCL/Basic/stream/stream.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44
// RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_CHECK_PLACEHOLDER
55
// RUN: %GPU_RUN_ON_LINUX_PLACEHOLDER %t.out %GPU_CHECK_ON_LINUX_PLACEHOLDER
66
// RUN: %ACC_RUN_PLACEHOLDER %t.out %ACC_CHECK_PLACEHOLDER
7-
//
8-
// Missing built-ins on AMD
9-
// XFAIL: hip_amd
107

118
//==------------------ stream.cpp - SYCL stream basic test -----------------==//
129
//

SYCL/Basic/submit_barrier.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
// UNSUPPORTED: hip_amd
12
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
23
// RUN: %HOST_RUN_PLACEHOLDER %t.out
34
// RUN: %CPU_RUN_PLACEHOLDER %t.out

SYCL/DeviceLib/built-ins/nan.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
// RUN: %GPU_RUN_PLACEHOLDER %t_gpu.out
66
// RUN: %ACC_RUN_PLACEHOLDER %t.out
77

8-
// XFAIL: cuda
9-
108
#include <CL/sycl.hpp>
119

1210
#include <cassert>

SYCL/DeviceLib/built-ins/scalar_math.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,6 @@
33
// RUN: %CPU_RUN_PLACEHOLDER %t.out
44
// RUN: %GPU_RUN_PLACEHOLDER %t.out
55
// RUN: %ACC_RUN_PLACEHOLDER %t.out
6-
//
7-
// Incorrect results with hip on AMD
8-
// XFAIL: hip_amd
96

107
#include <CL/sycl.hpp>
118

SYCL/DeviceLib/built-ins/vector_relational.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
// FIXME unsupported on windows (opencl and level-zero) until fix of libdevice
2+
// UNSUPPORTED: windows && (opencl || level_zero)
13
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
24
// RUN: %HOST_RUN_PLACEHOLDER %t.out
35
// RUN: %CPU_RUN_PLACEHOLDER %t.out

SYCL/DeviceLib/cmath_fp64_test.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
// FIXME unsupported on windows (opencl) until fix of libdevice fails
2+
// UNSUPPORTED: windows && opencl
13
// RUN: %clangxx -fsycl %s -o %t.out
24
// RUN: %HOST_RUN_PLACEHOLDER %t.out
35
// RUN: %CPU_RUN_PLACEHOLDER %t.out

SYCL/DeviceLib/cmath_test.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
// FIXME unsupported on windows (opencl) until fix of libdevice fails
2+
// UNSUPPORTED: windows && opencl
13
// RUN: %clangxx -fsycl -fno-builtin %s -o %t.out
24
// RUN: %HOST_RUN_PLACEHOLDER %t.out
35
// RUN: %CPU_RUN_PLACEHOLDER %t.out

SYCL/DeviceLib/math_fp64_test.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
// FIXME unsupported on windows (opencl) until fix of libdevice fails
2+
// UNSUPPORTED: windows && opencl
13
// RUN: %clangxx -fsycl %s -o %t.out
24
// RUN: %HOST_RUN_PLACEHOLDER %t.out
35
// RUN: %CPU_RUN_PLACEHOLDER %t.out

SYCL/DeviceLib/math_test.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
// FIXME unsupported on windows (opencl) until fix of libdevice fails
2+
// UNSUPPORTED: windows && opencl
13
// RUN: %clangxx -fsycl %s -o %t.out
24
// RUN: %HOST_RUN_PLACEHOLDER %t.out
35
// RUN: %CPU_RUN_PLACEHOLDER %t.out
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
// FIXME unsupported on level_zero until L0 Plugin support becomes available for
2+
// discard_queue_events
3+
// UNSUPPORTED: level_zero
4+
//
5+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
6+
//
7+
// RUN: env SYCL_PI_TRACE=2 %CPU_RUN_PLACEHOLDER %t.out &> %t.txt || true
8+
// RUN: %CPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt
9+
// RUN: env SYCL_PI_TRACE=2 %GPU_RUN_PLACEHOLDER %t.out &> %t.txt || true
10+
// RUN: %GPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt
11+
// RUN: env SYCL_PI_TRACE=2 %ACC_RUN_PLACEHOLDER %t.out &> %t.txt || true
12+
// RUN: %ACC_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt
13+
//
14+
// The test checks that the last parameter is `nullptr` for
15+
// piEnqueueKernelLaunch for USM kernel using local accessor, but
16+
// is not `nullptr` for kernel using buffer accessor.
17+
// {{0|0000000000000000}} is required for various output on Linux and Windows.
18+
//
19+
// CHECK: ---> piEnqueueKernelLaunch(
20+
// CHECK: pi_event * :
21+
// CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ]
22+
//
23+
// CHECK: ---> piEnqueueKernelLaunch(
24+
// CHECK: pi_event * :
25+
// CHECK-NOT: pi_event * : {{0|0000000000000000}}[ nullptr ]
26+
// CHECK: ---> pi_result : PI_SUCCESS
27+
//
28+
// CHECK: The test passed.
29+
30+
#include <CL/sycl.hpp>
31+
#include <cassert>
32+
#include <iostream>
33+
34+
using namespace cl::sycl;
35+
static constexpr int MAGIC_NUM = -1;
36+
static constexpr size_t BUFFER_SIZE = 16;
37+
38+
void RunKernelHelper(sycl::queue Q,
39+
const std::function<void(int *Harray)> &TestFunction) {
40+
int *Harray = sycl::malloc_host<int>(BUFFER_SIZE, Q);
41+
assert(Harray != nullptr);
42+
for (size_t i = 0; i < BUFFER_SIZE; ++i) {
43+
Harray[i] = MAGIC_NUM;
44+
}
45+
46+
TestFunction(Harray);
47+
48+
// Checks result
49+
for (size_t i = 0; i < BUFFER_SIZE; ++i) {
50+
size_t expected = i + 10;
51+
assert(Harray[i] == expected);
52+
}
53+
free(Harray, Q);
54+
}
55+
56+
int main(int Argc, const char *Argv[]) {
57+
58+
sycl::property_list props{
59+
sycl::property::queue::in_order{},
60+
sycl::ext::oneapi::property::queue::discard_events{}};
61+
sycl::queue Q(props);
62+
sycl::range<1> Range(BUFFER_SIZE);
63+
64+
RunKernelHelper(Q, [&](int *Harray) {
65+
Q.submit([&](sycl::handler &CGH) {
66+
const size_t LocalMemSize = BUFFER_SIZE;
67+
using LocalAccessor =
68+
sycl::accessor<int, 1, sycl::access::mode::read_write,
69+
sycl::access::target::local>;
70+
LocalAccessor LocalAcc(LocalMemSize, CGH);
71+
72+
CGH.parallel_for<class kernel_using_local_memory>(
73+
Range, [=](sycl::item<1> itemID) {
74+
size_t i = itemID.get_id(0);
75+
int *Ptr = LocalAcc.get_pointer();
76+
Ptr[i] = i + 5;
77+
Harray[i] = Ptr[i] + 5;
78+
});
79+
});
80+
Q.wait();
81+
});
82+
83+
RunKernelHelper(Q, [&](int *Harray) {
84+
sycl::buffer<int, 1> Buf(Range);
85+
Q.submit([&](sycl::handler &CGH) {
86+
auto Acc = Buf.get_access<sycl::access::mode::read_write>(CGH);
87+
CGH.parallel_for<class kernel_using_buffer_accessor>(
88+
Range, [=](sycl::item<1> itemID) {
89+
size_t i = itemID.get_id(0);
90+
Harray[i] = i + 10;
91+
Acc[i] = i + 20;
92+
});
93+
});
94+
Q.wait();
95+
96+
// Checks result
97+
auto HostAcc = Buf.get_access<sycl::access::mode::read>();
98+
for (size_t i = 0; i < BUFFER_SIZE; ++i) {
99+
size_t expected = i + 20;
100+
assert(HostAcc[i] == expected);
101+
}
102+
});
103+
104+
std::cout << "The test passed." << std::endl;
105+
return 0;
106+
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// If necessary, the test can be removed as run_on_host_intel() is deprecated
2+
// and host_task() which should be used instead does not use the PI call
3+
// piEnqueueNativeKernel
4+
//
5+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
6+
//
7+
// RUN: env SYCL_PI_TRACE=2 %CPU_RUN_PLACEHOLDER %t.out &> %t.txt
8+
// RUN: %CPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt
9+
//
10+
// The test checks that the last parameter is `nullptr` for
11+
// piEnqueueNativeKernel.
12+
// {{0|0000000000000000}} is required for various output on Linux and Windows.
13+
//
14+
// CHECK: ---> piEnqueueNativeKernel(
15+
// CHECK: pi_event * :
16+
// CHECK-NEXT: pi_event * : {{0|0000000000000000}}[ nullptr ]
17+
//
18+
// CHECK: The test passed.
19+
20+
#include <CL/sycl.hpp>
21+
#include <cassert>
22+
#include <iostream>
23+
24+
using namespace cl::sycl;
25+
26+
void CheckArray(sycl::queue Q, int *x, size_t buffer_size, int expected) {
27+
Q.wait();
28+
for (size_t i = 0; i < buffer_size; ++i)
29+
assert(x[i] == expected);
30+
}
31+
32+
static constexpr size_t BUFFER_SIZE = 16;
33+
34+
int main(int Argc, const char *Argv[]) {
35+
36+
sycl::property_list Props{
37+
sycl::property::queue::in_order{},
38+
sycl::ext::oneapi::property::queue::discard_events{}};
39+
sycl::queue Q(Props);
40+
41+
int *x = sycl::malloc_shared<int>(BUFFER_SIZE, Q);
42+
assert(x != nullptr);
43+
44+
Q.submit([&](sycl::handler &CGH) {
45+
CGH.run_on_host_intel([=]() {
46+
for (size_t i = 0; i < BUFFER_SIZE; ++i)
47+
x[i] = 8;
48+
});
49+
});
50+
CheckArray(Q, x, BUFFER_SIZE, 8);
51+
52+
Q.wait();
53+
free(x, Q);
54+
55+
std::cout << "The test passed." << std::endl;
56+
return 0;
57+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#include <CL/sycl.hpp>
2+
#include <cassert>
3+
#include <iostream>
4+
5+
using namespace cl::sycl;
6+
static constexpr int MAGIC_NUM = -1;
7+
static constexpr size_t BUFFER_SIZE = 16;
8+
9+
int main(int Argc, const char *Argv[]) {
10+
11+
sycl::property_list Props{
12+
sycl::property::queue::in_order{},
13+
sycl::ext::oneapi::property::queue::discard_events{}};
14+
sycl::queue Q(Props);
15+
16+
sycl::range<1> Range(BUFFER_SIZE);
17+
int *Harray = sycl::malloc_host<int>(BUFFER_SIZE, Q);
18+
if (Harray == nullptr) {
19+
return -1;
20+
}
21+
for (size_t i = 0; i < BUFFER_SIZE; ++i) {
22+
Harray[i] = MAGIC_NUM;
23+
}
24+
25+
Q.submit([&](sycl::handler &CGH) {
26+
CGH.parallel_for<class kernel_using_assert>(
27+
Range, [=](sycl::item<1> itemID) {
28+
size_t i = itemID.get_id(0);
29+
Harray[i] = i + 10;
30+
assert(Harray[i] == i + 10 && "assert message");
31+
});
32+
});
33+
Q.wait();
34+
35+
// Checks result
36+
for (size_t i = 0; i < BUFFER_SIZE; ++i) {
37+
size_t expected = i + 10;
38+
if (Harray[i] != expected)
39+
return -1;
40+
}
41+
free(Harray, Q);
42+
43+
std::cout << "The test passed." << std::endl;
44+
return 0;
45+
}

0 commit comments

Comments
 (0)