Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 458d034

Browse files
authored
Merge branch 'intel:intel' into raaiq
2 parents bb66a8a + 67f591c commit 458d034

File tree

11 files changed

+398
-41
lines changed

11 files changed

+398
-41
lines changed

SYCL/Basic/kernel_info.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,14 @@ int main() {
4646

4747
device dev = q.get_device();
4848
const size_t wgSize =
49-
krn.get_work_group_info<info::kernel_work_group::work_group_size>(dev);
49+
krn.get_info<info::kernel_device_specific::work_group_size>(dev);
5050
assert(wgSize > 0);
5151
const size_t wgSizeNew =
5252
krn.get_info<info::kernel_device_specific::work_group_size>(dev);
5353
assert(wgSizeNew > 0);
5454
assert(wgSize == wgSizeNew);
55-
const size_t prefWGSizeMult = krn.get_work_group_info<
56-
info::kernel_work_group::preferred_work_group_size_multiple>(dev);
55+
const size_t prefWGSizeMult = krn.get_info<
56+
info::kernel_device_specific::preferred_work_group_size_multiple>(dev);
5757
assert(prefWGSizeMult > 0);
5858
const size_t prefWGSizeMultNew = krn.get_info<
5959
info::kernel_device_specific::preferred_work_group_size_multiple>(dev);

SYCL/DeprecatedFeatures/kernel_info.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,14 @@ int main() {
5151

5252
device dev = q.get_device();
5353
const size_t wgSize =
54-
krn.get_work_group_info<info::kernel_work_group::work_group_size>(dev);
54+
krn.get_info<info::kernel_device_specific::work_group_size>(dev);
5555
assert(wgSize > 0);
5656
const size_t wgSizeNew =
5757
krn.get_info<info::kernel_device_specific::work_group_size>(dev);
5858
assert(wgSizeNew > 0);
5959
assert(wgSize == wgSizeNew);
60-
const size_t prefWGSizeMult = krn.get_work_group_info<
61-
info::kernel_work_group::preferred_work_group_size_multiple>(dev);
60+
const size_t prefWGSizeMult = krn.get_info<
61+
info::kernel_device_specific::preferred_work_group_size_multiple>(dev);
6262
assert(prefWGSizeMult > 0);
6363
const size_t prefWGSizeMultNew = krn.get_info<
6464
info::kernel_device_specific::preferred_work_group_size_multiple>(dev);

SYCL/DeprecatedFeatures/parallel_for_range.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ int main() {
322322

323323
kernel K = P.get_kernel<class OpenCL2XNegativeA>();
324324
size_t MaxKernelWGSize =
325-
K.get_work_group_info<info::kernel_work_group::work_group_size>(
325+
K.get_info<info::kernel_device_specific::work_group_size>(
326326
Q.get_device());
327327
try {
328328
Q.submit([&](handler &CGH) {
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
// TODO: device_global without the device_image_scope property is not currently
2+
// initialized on device. Enable the following test cases when it is
3+
// supported.
4+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
5+
// RUNx: %CPU_RUN_PLACEHOLDER %t.out
6+
// RUNx: %GPU_RUN_PLACEHOLDER %t.out
7+
// RUNx: %ACC_RUN_PLACEHOLDER %t.out
8+
//
9+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_source -DUSE_DEVICE_IMAGE_SCOPE %s -o %t_dev_img_scope.out
10+
// RUN: %CPU_RUN_PLACEHOLDER %t_dev_img_scope.out
11+
// RUN: %GPU_RUN_PLACEHOLDER %t_dev_img_scope.out
12+
// RUN: %ACC_RUN_PLACEHOLDER %t_dev_img_scope.out
13+
//
14+
// Currently fails for CPUs due to missing support for the SPIR-V extension.
15+
// Currently crashes on accelerators.
16+
// XFAIL: cpu, accelerator
17+
//
18+
// Tests operator-> on device_global.
19+
// NOTE: USE_DEVICE_IMAGE_SCOPE needs both kernels to be in the same image so
20+
// we set -fsycl-device-code-split=per_source.
21+
22+
#include <sycl/sycl.hpp>
23+
24+
using namespace sycl;
25+
using namespace sycl::ext::oneapi::experimental;
26+
27+
struct StructWithMember {
28+
int x;
29+
int getX() { return x; }
30+
};
31+
32+
struct StructWithDeref {
33+
StructWithMember y[1];
34+
StructWithMember *operator->() { return y; }
35+
};
36+
37+
#ifdef USE_DEVICE_IMAGE_SCOPE
38+
device_global<StructWithMember *, decltype(properties{device_image_scope})>
39+
DeviceGlobalVar1;
40+
device_global<StructWithDeref, decltype(properties{device_image_scope})>
41+
DeviceGlobalVar2;
42+
#else
43+
device_global<StructWithMember *> DeviceGlobalVar1;
44+
device_global<StructWithDeref> DeviceGlobalVar2;
45+
#endif
46+
47+
int main() {
48+
queue Q;
49+
if (Q.is_host()) {
50+
std::cout << "Skipping test\n";
51+
return 0;
52+
}
53+
54+
StructWithMember *DGMem = malloc_device<StructWithMember>(1, Q);
55+
56+
Q.single_task([=]() {
57+
DeviceGlobalVar1 = DGMem;
58+
DeviceGlobalVar1->x = 1234;
59+
DeviceGlobalVar2->x = 4321;
60+
}).wait();
61+
62+
int Out[2] = {0, 0};
63+
{
64+
buffer<int, 1> OutBuf{Out, 2};
65+
Q.submit([&](handler &CGH) {
66+
auto OutAcc = OutBuf.get_access<access::mode::write>(CGH);
67+
CGH.single_task([=]() {
68+
OutAcc[0] = DeviceGlobalVar1->getX();
69+
OutAcc[1] = DeviceGlobalVar2->getX();
70+
});
71+
});
72+
}
73+
free(DGMem, Q);
74+
75+
assert(Out[0] == 1234 && "First value does not match.");
76+
assert(Out[1] == 4321 && "Second value does not match.");
77+
return 0;
78+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// TODO: device_global without the device_image_scope property is not currently
2+
// initialized on device. Enable the following test cases when it is
3+
// supported.
4+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
5+
// RUNx: %CPU_RUN_PLACEHOLDER %t.out
6+
// RUNx: %GPU_RUN_PLACEHOLDER %t.out
7+
// RUNx: %ACC_RUN_PLACEHOLDER %t.out
8+
//
9+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_source -DUSE_DEVICE_IMAGE_SCOPE %s -o %t_dev_img_scope.out
10+
// RUN: %CPU_RUN_PLACEHOLDER %t_dev_img_scope.out
11+
// RUN: %GPU_RUN_PLACEHOLDER %t_dev_img_scope.out
12+
// RUN: %ACC_RUN_PLACEHOLDER %t_dev_img_scope.out
13+
//
14+
// Currently fails for CPUs due to missing support for the SPIR-V extension.
15+
// Currently crashes on accelerators.
16+
// XFAIL: cpu, accelerator
17+
//
18+
// Tests basic device_global access through device kernels.
19+
// NOTE: USE_DEVICE_IMAGE_SCOPE needs both kernels to be in the same image so
20+
// we set -fsycl-device-code-split=per_source.
21+
22+
#include <sycl/sycl.hpp>
23+
24+
using namespace sycl;
25+
using namespace sycl::ext::oneapi::experimental;
26+
27+
#ifdef USE_DEVICE_IMAGE_SCOPE
28+
device_global<int[4], decltype(properties{device_image_scope})> DeviceGlobalVar;
29+
#else
30+
device_global<int[4]> DeviceGlobalVar;
31+
#endif
32+
33+
int main() {
34+
queue Q;
35+
if (Q.is_host()) {
36+
std::cout << "Skipping test\n";
37+
return 0;
38+
}
39+
40+
Q.single_task([=]() { DeviceGlobalVar.get()[0] = 42; });
41+
42+
int OutVal = 0;
43+
{
44+
buffer<int, 1> OutBuf(&OutVal, 1);
45+
Q.submit([&](handler &CGH) {
46+
auto OutAcc = OutBuf.get_access<access::mode::write>(CGH);
47+
CGH.single_task([=]() { OutAcc[0] = DeviceGlobalVar.get()[0]; });
48+
});
49+
}
50+
assert(OutVal == 42 && "Read value does not match.");
51+
return 0;
52+
}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
// TODO: device_global without the device_image_scope property is not currently
2+
// initialized on device. Enable the following test cases when it is
3+
// supported.
4+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
5+
// RUNx: %CPU_RUN_PLACEHOLDER %t.out
6+
// RUNx: %GPU_RUN_PLACEHOLDER %t.out
7+
// RUNx: %ACC_RUN_PLACEHOLDER %t.out
8+
//
9+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_source -DUSE_DEVICE_IMAGE_SCOPE %s -o %t_dev_img_scope.out
10+
// RUN: %CPU_RUN_PLACEHOLDER %t_dev_img_scope.out
11+
// RUN: %GPU_RUN_PLACEHOLDER %t_dev_img_scope.out
12+
// RUN: %ACC_RUN_PLACEHOLDER %t_dev_img_scope.out
13+
//
14+
// Currently fails for CPUs due to missing support for the SPIR-V extension.
15+
// Currently crashes on accelerators.
16+
// XFAIL: cpu, accelerator
17+
//
18+
// Tests the passthrough of operators on device_global.
19+
// NOTE: USE_DEVICE_IMAGE_SCOPE needs both kernels to be in the same image so
20+
// we set -fsycl-device-code-split=per_source.
21+
22+
#include <sycl/sycl.hpp>
23+
24+
using namespace sycl;
25+
using namespace sycl::ext::oneapi::experimental;
26+
27+
#ifdef USE_DEVICE_IMAGE_SCOPE
28+
device_global<int, decltype(properties{device_image_scope})> DeviceGlobalVar;
29+
#else
30+
device_global<int> DeviceGlobalVar;
31+
#endif
32+
33+
int main() {
34+
queue Q;
35+
if (Q.is_host()) {
36+
std::cout << "Skipping test\n";
37+
return 0;
38+
}
39+
40+
Q.single_task([]() {
41+
DeviceGlobalVar = 2;
42+
DeviceGlobalVar += 3;
43+
DeviceGlobalVar = DeviceGlobalVar * DeviceGlobalVar;
44+
DeviceGlobalVar = DeviceGlobalVar - 3;
45+
DeviceGlobalVar = 25 - DeviceGlobalVar;
46+
}).wait();
47+
48+
int Out = 0;
49+
{
50+
buffer<int, 1> OutBuf{&Out, 1};
51+
Q.submit([&](handler &CGH) {
52+
auto OutAcc = OutBuf.get_access<access::mode::write>(CGH);
53+
CGH.single_task([=]() { OutAcc[0] = DeviceGlobalVar; });
54+
});
55+
}
56+
assert(Out == 3 && "Read value does not match.");
57+
return 0;
58+
}
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
// TODO: device_global without the device_image_scope property is not currently
2+
// initialized on device. Enable the following test cases when it is
3+
// supported.
4+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
5+
// RUNx: %CPU_RUN_PLACEHOLDER %t.out
6+
// RUNx: %GPU_RUN_PLACEHOLDER %t.out
7+
// RUNx: %ACC_RUN_PLACEHOLDER %t.out
8+
//
9+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_source -DUSE_DEVICE_IMAGE_SCOPE %s -o %t_dev_img_scope.out
10+
// RUN: %CPU_RUN_PLACEHOLDER %t_dev_img_scope.out
11+
// RUN: %GPU_RUN_PLACEHOLDER %t_dev_img_scope.out
12+
// RUN: %ACC_RUN_PLACEHOLDER %t_dev_img_scope.out
13+
//
14+
// Currently fails for CPUs due to missing support for the SPIR-V extension.
15+
// Currently crashes on accelerators.
16+
// XFAIL: cpu, accelerator
17+
//
18+
// Tests operator[] on device_global.
19+
// NOTE: USE_DEVICE_IMAGE_SCOPE needs both kernels to be in the same image so
20+
// we set -fsycl-device-code-split=per_source.
21+
22+
#include <sycl/sycl.hpp>
23+
24+
using namespace sycl;
25+
using namespace sycl::ext::oneapi::experimental;
26+
27+
struct StructWithSubscript {
28+
int x[4];
29+
int &operator[](std::ptrdiff_t index) { return x[index]; }
30+
};
31+
32+
#ifdef USE_DEVICE_IMAGE_SCOPE
33+
device_global<int[4], decltype(properties{device_image_scope})>
34+
DeviceGlobalVar1;
35+
device_global<StructWithSubscript, decltype(properties{device_image_scope})>
36+
DeviceGlobalVar2;
37+
#else
38+
device_global<int[4]> DeviceGlobalVar1;
39+
device_global<StructWithSubscript> DeviceGlobalVar2;
40+
#endif
41+
42+
int main() {
43+
queue Q;
44+
if (Q.is_host()) {
45+
std::cout << "Skipping test\n";
46+
return 0;
47+
}
48+
49+
Q.single_task([]() {
50+
DeviceGlobalVar1[2] = 1234;
51+
DeviceGlobalVar2[1] = 4321;
52+
}).wait();
53+
54+
int Out[2] = {0, 0};
55+
{
56+
buffer<int, 1> OutBuf{Out, 2};
57+
Q.submit([&](handler &CGH) {
58+
auto OutAcc = OutBuf.get_access<access::mode::write>(CGH);
59+
CGH.single_task([=]() {
60+
OutAcc[0] = DeviceGlobalVar1[2];
61+
OutAcc[1] = DeviceGlobalVar2[1];
62+
});
63+
});
64+
}
65+
assert(Out[0] == 1234 && "First value does not match.");
66+
assert(Out[1] == 4321 && "Second value does not match.");
67+
return 0;
68+
}

SYCL/ESIMD/printf.cpp

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,6 @@ static const CONSTANT char format_hello_world[] = "Hello, World!\n";
4343
// Static isn't really needed if you define it in global scope
4444
const CONSTANT char format_int[] = "%d\n";
4545

46-
static const CONSTANT char format_vec[] = "%d,%d,%d,%d\n";
47-
48-
const CONSTANT char format_hello_world_2[] = "%lu: Hello, World!\n";
49-
5046
int main() {
5147
{
5248
queue Queue(esimd_test::ESIMDSelector{},
@@ -56,17 +52,12 @@ int main() {
5652
CGH.single_task([=]() SYCL_ESIMD_KERNEL {
5753
// String
5854
oneapi::experimental::printf(format_hello_world);
59-
// Due to a bug in Intel CPU Runtime for OpenCL on Windows, information
60-
// printed using such format strings (without %-specifiers) might
61-
// appear in different order if output is redirected to a file or
62-
// another app
63-
// FIXME: strictly check output order once the bug is fixed
64-
// CHECK: {{(Hello, World!)?}}
55+
// CHECK: Hello, World!
6556

6657
// Integral types
6758
oneapi::experimental::printf(format_int, (int32_t)123);
6859
oneapi::experimental::printf(format_int, (int32_t)-123);
69-
// CHECK: 123
60+
// CHECK-NEXT: 123
7061
// CHECK-NEXT: -123
7162

7263
// Floating point types
@@ -93,30 +84,39 @@ int main() {
9384
}
9485

9586
{
96-
queue Queue(esimd_test::ESIMDSelector{},
97-
esimd_test::createExceptionHandler());
87+
queue Q(esimd_test::ESIMDSelector{}, esimd_test::createExceptionHandler());
9888
// printf in parallel_for
99-
Queue.submit([&](handler &CGH) {
100-
CGH.parallel_for(range<1>(10), [=](id<1> i) SYCL_ESIMD_KERNEL {
101-
// cast to uint64_t to be sure that we pass 64-bit unsigned value
102-
oneapi::experimental::printf(format_hello_world_2, (uint64_t)i.get(0));
103-
});
104-
});
105-
Queue.wait();
106-
// CHECK-NEXT: {{[0-9]+}}: Hello, World!
107-
// CHECK-NEXT: {{[0-9]+}}: Hello, World!
108-
// CHECK-NEXT: {{[0-9]+}}: Hello, World!
109-
// CHECK-NEXT: {{[0-9]+}}: Hello, World!
110-
// CHECK-NEXT: {{[0-9]+}}: Hello, World!
111-
// CHECK-NEXT: {{[0-9]+}}: Hello, World!
112-
// CHECK-NEXT: {{[0-9]+}}: Hello, World!
113-
// CHECK-NEXT: {{[0-9]+}}: Hello, World!
114-
// CHECK-NEXT: {{[0-9]+}}: Hello, World!
115-
// CHECK-NEXT: {{[0-9]+}}: Hello, World!
89+
constexpr int SIMD_SIZE = 16;
90+
constexpr int WORK_SIZE = SIMD_SIZE;
91+
int *Mem = malloc_shared<int>(WORK_SIZE * SIMD_SIZE, Q);
92+
for (int I = 0; I < WORK_SIZE * SIMD_SIZE; I++)
93+
Mem[I] = I;
94+
std::cout << "Start parallel_for:" << std::endl;
95+
Q.parallel_for(range<1>(WORK_SIZE), [=](id<1> i) SYCL_ESIMD_KERNEL {
96+
static const CONSTANT char STR_LU_D[] = "Thread-id: %d, Value: %d\n";
97+
ext::intel::esimd::simd<int, SIMD_SIZE> Vec(Mem + i * SIMD_SIZE);
98+
// cast to uint64_t to be sure that we pass 64-bit unsigned value
99+
oneapi::experimental::printf(STR_LU_D, (uint64_t)i[0], (int)Vec[i]);
100+
}).wait();
101+
free(Mem, Q);
102+
// CHECK-LABEL: Start parallel_for
103+
// CHECK-DAG: Thread-id: 0, Value: 0
104+
// CHECK-DAG: Thread-id: 1, Value: 17
105+
// CHECK-DAG: Thread-id: 2, Value: 34
106+
// CHECK-DAG: Thread-id: 3, Value: 51
107+
// CHECK-DAG: Thread-id: 4, Value: 68
108+
// CHECK-DAG: Thread-id: 5, Value: 85
109+
// CHECK-DAG: Thread-id: 6, Value: 102
110+
// CHECK-DAG: Thread-id: 7, Value: 119
111+
// CHECK-DAG: Thread-id: 8, Value: 136
112+
// CHECK-DAG: Thread-id: 9, Value: 153
113+
// CHECK-DAG: Thread-id: 10, Value: 170
114+
// CHECK-DAG: Thread-id: 11, Value: 187
115+
// CHECK-DAG: Thread-id: 12, Value: 204
116+
// CHECK-DAG: Thread-id: 13, Value: 221
117+
// CHECK-DAG: Thread-id: 14, Value: 238
118+
// CHECK-DAG: Thread-id: 15, Value: 255
116119
}
117120

118-
// FIXME: strictly check output order once the bug mentioned above is fixed
119-
// CHECK: {{(Hello, World!)?}}
120-
121121
return 0;
122122
}

0 commit comments

Comments
 (0)