Skip to content

Commit 75a77fd

Browse files
committed
Migrate test changes from intel/llvm-test-suite
Signed-off-by: Lukas Sommer <[email protected]>
1 parent 8fcd4c7 commit 75a77fd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+276
-80
lines changed

sycl/test-e2e/KernelFusion/abort_fusion.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out
22
// RUN: env SYCL_RT_WARNING_LEVEL=1 %CPU_RUN_PLACEHOLDER %t.out 2>&1\
33
// RUN: %CPU_CHECK_PLACEHOLDER
44
// RUN: env SYCL_RT_WARNING_LEVEL=1 %GPU_RUN_PLACEHOLDER %t.out 2>&1\
55
// RUN: %GPU_CHECK_PLACEHOLDER
6-
// UNSUPPORTED: cuda || hip
6+
// UNSUPPORTED: hip
77
// REQUIRES: fusion
88

99
// Test fusion being aborted: Different scenarios causing the JIT compiler

sycl/test-e2e/KernelFusion/abort_internalization.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -O2 -o %t.out
2-
// RUN: env SYCL_ENABLE_FUSION_CACHING=0 SYCL_RT_WARNING_LEVEL=1 %CPU_RUN_PLACEHOLDER %t.out 2>&1\
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -O2 -fsycl-embed-ir %s -o %t.out
2+
// RUN: env SYCL_RT_WARNING_LEVEL=1 SYCL_ENABLE_FUSION_CACHING=0\
3+
// RUN: %CPU_RUN_PLACEHOLDER %t.out 2>&1\
34
// RUN: %CPU_CHECK_PLACEHOLDER
4-
// RUN: env SYCL_ENABLE_FUSION_CACHING=0 SYCL_RT_WARNING_LEVEL=1 %GPU_RUN_PLACEHOLDER %t.out 2>&1\
5+
// RUN: env SYCL_RT_WARNING_LEVEL=1 SYCL_ENABLE_FUSION_CACHING=0\
6+
// RUN: %GPU_RUN_PLACEHOLDER %t.out 2>&1\
57
// RUN: %GPU_CHECK_PLACEHOLDER
6-
// UNSUPPORTED: cuda || hip
8+
// UNSUPPORTED: hip
79
// REQUIRES: fusion
810

911
// Test incomplete internalization: Different scenarios causing the JIT compiler

sycl/test-e2e/KernelFusion/abort_internalization_stored_ptr.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out
22
// RUN: env SYCL_RT_WARNING_LEVEL=1 %CPU_RUN_PLACEHOLDER %t.out 2>&1\
33
// RUN: %CPU_CHECK_PLACEHOLDER --implicit-check-not "Computation error" --implicit-check-not "Internalized"
44
// RUN: env SYCL_RT_WARNING_LEVEL=1 %GPU_RUN_PLACEHOLDER %t.out 2>&1\
55
// RUN: %GPU_CHECK_PLACEHOLDER --implicit-check-not "Computation error" --implicit-check-not "Internalized"
6-
// UNSUPPORTED: cuda || hip
6+
// UNSUPPORTED: hip
77
// REQUIRES: fusion
88

99
// Test pointers being stored are not internalized.

sycl/test-e2e/KernelFusion/barrier_local_internalization.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -O2 -o %t.out
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir -O2 %s -o %t.out
22
// RUN: %CPU_RUN_PLACEHOLDER %t.out
33
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4-
// UNSUPPORTED: cuda || hip
4+
// UNSUPPORTED: hip
55
// REQUIRES: fusion
66

77
// Test complete fusion with local internalization and a combination of kernels

sycl/test-e2e/KernelFusion/buffer_internalization.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -O2 -o %t.out
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir -O2 %s -o %t.out
22
// RUN: %CPU_RUN_PLACEHOLDER %t.out
33
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4-
// UNSUPPORTED: cuda || hip
4+
// UNSUPPORTED: hip
55
// REQUIRES: fusion
66

77
// Test complete fusion with private internalization specified on the

sycl/test-e2e/KernelFusion/cancel_fusion.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out
22
// RUN: %CPU_RUN_PLACEHOLDER %t.out
33
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4-
// UNSUPPORTED: cuda || hip
4+
// UNSUPPORTED: hip
55
// REQUIRES: fusion
66

77
// Test cancel fusion

sycl/test-e2e/KernelFusion/complete_fusion.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out
22
// RUN: %CPU_RUN_PLACEHOLDER %t.out
33
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4-
// UNSUPPORTED: cuda || hip
4+
// UNSUPPORTED: hip
55
// REQUIRES: fusion
66

77
// Test complete fusion without any internalization

sycl/test-e2e/KernelFusion/device_info_descriptor.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
22
// RUN: %CPU_RUN_PLACEHOLDER %t.out
33
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4-
// XFAIL: cuda || hip
4+
// XFAIL: hip
55
// REQUIRES: fusion
66

77
// Test correct return from device information descriptor.

sycl/test-e2e/KernelFusion/diamond_shape.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -O2 -o %t.out
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir -O2 %s -o %t.out
22
// RUN: %CPU_RUN_PLACEHOLDER %t.out
33
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4-
// UNSUPPORTED: cuda || hip
4+
// UNSUPPORTED: hip
55
// REQUIRES: fusion
66

77
// Test complete fusion with private internalization specified on the
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir -O2 %s -o %t.out
2+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
3+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4+
// UNSUPPORTED: hip
5+
// REQUIRES: fusion
6+
7+
// Test complete fusion with local internalization specified on the
8+
// accessors for a combination of four kernels, forming a diamond-like shape and
9+
// repeating one of the kernels.
10+
11+
#include <sycl/sycl.hpp>
12+
13+
using namespace sycl;
14+
15+
struct AddKernel {
16+
accessor<int, 1> accIn1;
17+
accessor<int, 1> accIn2;
18+
accessor<int, 1> accOut;
19+
20+
void operator()(id<1> i) const { accOut[i] = accIn1[i] + accIn2[i]; }
21+
};
22+
23+
int main() {
24+
constexpr size_t dataSize = 512;
25+
int in1[dataSize], in2[dataSize], in3[dataSize], tmp1[dataSize],
26+
tmp2[dataSize], tmp3[dataSize], out[dataSize];
27+
28+
for (size_t i = 0; i < dataSize; ++i) {
29+
in1[i] = i * 2;
30+
in2[i] = i * 3;
31+
in3[i] = i * 4;
32+
tmp1[i] = -1;
33+
tmp2[i] = -1;
34+
tmp3[i] = -1;
35+
out[i] = -1;
36+
}
37+
38+
queue q{ext::codeplay::experimental::property::queue::enable_fusion{}};
39+
40+
{
41+
buffer<int> bIn1{in1, range{dataSize}};
42+
buffer<int> bIn2{in2, range{dataSize}};
43+
buffer<int> bIn3{in3, range{dataSize}};
44+
buffer<int> bTmp1{
45+
tmp1,
46+
range{dataSize},
47+
{sycl::ext::codeplay::experimental::property::promote_local{}}};
48+
buffer<int> bTmp2{
49+
tmp2,
50+
range{dataSize},
51+
{sycl::ext::codeplay::experimental::property::promote_local{}}};
52+
buffer<int> bTmp3{
53+
tmp3,
54+
range{dataSize},
55+
{sycl::ext::codeplay::experimental::property::promote_local{}}};
56+
buffer<int> bOut{out, range{dataSize}};
57+
58+
ext::codeplay::experimental::fusion_wrapper fw{q};
59+
fw.start_fusion();
60+
61+
assert(fw.is_in_fusion_mode() && "Queue should be in fusion mode");
62+
63+
q.submit([&](handler &cgh) {
64+
auto accIn1 = bIn1.get_access(cgh);
65+
auto accIn2 = bIn2.get_access(cgh);
66+
auto accTmp1 = bTmp1.get_access(cgh);
67+
cgh.parallel_for<AddKernel>(nd_range<1>{{dataSize}, {16}},
68+
AddKernel{accIn1, accIn2, accTmp1});
69+
});
70+
71+
q.submit([&](handler &cgh) {
72+
auto accTmp1 = bTmp1.get_access(cgh);
73+
auto accIn3 = bIn3.get_access(cgh);
74+
auto accTmp2 = bTmp2.get_access(cgh);
75+
cgh.parallel_for<class KernelOne>(
76+
nd_range<1>{{dataSize}, {16}},
77+
[=](id<1> i) { accTmp2[i] = accTmp1[i] * accIn3[i]; });
78+
});
79+
80+
q.submit([&](handler &cgh) {
81+
auto accTmp1 = bTmp1.get_access(cgh);
82+
auto accTmp3 = bTmp3.get_access(cgh);
83+
cgh.parallel_for<class KernelTwo>(
84+
nd_range<1>{{dataSize}, {16}},
85+
[=](id<1> i) { accTmp3[i] = accTmp1[i] * 5; });
86+
});
87+
88+
q.submit([&](handler &cgh) {
89+
auto accTmp2 = bTmp2.get_access(cgh);
90+
auto accTmp3 = bTmp3.get_access(cgh);
91+
auto accOut = bOut.get_access(cgh);
92+
cgh.parallel_for<AddKernel>(nd_range<1>{{dataSize}, {16}},
93+
AddKernel{accTmp2, accTmp3, accOut});
94+
});
95+
96+
fw.complete_fusion({ext::codeplay::experimental::property::no_barriers{}});
97+
98+
assert(!fw.is_in_fusion_mode() &&
99+
"Queue should not be in fusion mode anymore");
100+
}
101+
102+
// Check the results
103+
for (size_t i = 0; i < dataSize; ++i) {
104+
assert(out[i] == (20 * i * i + i * 25) && "Computation error");
105+
assert(tmp1[i] == -1 && "tmp1 not internalized");
106+
assert(tmp2[i] == -1 && "tmp2 not internalized");
107+
assert(tmp3[i] == -1 && "tmp3 not internalized");
108+
}
109+
110+
return 0;
111+
}

sycl/test-e2e/KernelFusion/event_wait_cancel.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out
22
// RUN: %CPU_RUN_PLACEHOLDER %t.out
33
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4-
// UNSUPPORTED: cuda || hip
4+
// UNSUPPORTED: hip
55
// REQUIRES: fusion, aspect-usm_shared_allocations
66

77
// Test validity of events after cancel_fusion.

sycl/test-e2e/KernelFusion/event_wait_complete.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out
22
// RUN: %CPU_RUN_PLACEHOLDER %t.out
33
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4-
// UNSUPPORTED: cuda || hip
4+
// UNSUPPORTED: hip
55
// REQUIRES: fusion, aspect-usm_shared_allocations
66

77
// Test validity of events after complete_fusion.
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir -O2 %s -o %t.out
2+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
3+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4+
// UNSUPPORTED: hip
5+
// REQUIRES: fusion
6+
7+
// Test complete fusion with local internalization and an local accessor that
8+
// already exists in one of the input kernels.
9+
10+
#include <sycl/sycl.hpp>
11+
12+
using namespace sycl;
13+
14+
int main() {
15+
constexpr size_t dataSize = 512;
16+
int in1[dataSize], in2[dataSize], in3[dataSize], tmp[dataSize], out[dataSize];
17+
18+
for (size_t i = 0; i < dataSize; ++i) {
19+
in1[i] = i * 2;
20+
in2[i] = i * 3;
21+
in3[i] = i * 4;
22+
tmp[i] = -1;
23+
out[i] = -1;
24+
}
25+
26+
queue q{ext::codeplay::experimental::property::queue::enable_fusion{}};
27+
28+
{
29+
buffer<int> bIn1{in1, range{dataSize}};
30+
buffer<int> bIn2{in2, range{dataSize}};
31+
buffer<int> bIn3{in3, range{dataSize}};
32+
buffer<int> bTmp{tmp, range{dataSize}};
33+
buffer<int> bOut{out, range{dataSize}};
34+
35+
ext::codeplay::experimental::fusion_wrapper fw{q};
36+
fw.start_fusion();
37+
38+
assert(fw.is_in_fusion_mode() && "Queue should be in fusion mode");
39+
40+
q.submit([&](handler &cgh) {
41+
auto accIn1 = bIn1.get_access(cgh);
42+
auto accIn2 = bIn2.get_access(cgh);
43+
auto accTmp = bTmp.get_access(
44+
cgh, sycl::ext::codeplay::experimental::property::promote_local{});
45+
local_accessor<int> accLocal{16, cgh};
46+
cgh.parallel_for<class KernelOne>(
47+
nd_range<1>{{dataSize}, {16}}, [=](nd_item<1> i) {
48+
size_t globalIdx = i.get_global_linear_id();
49+
size_t localIdx = i.get_local_linear_id();
50+
accLocal[localIdx] = accIn2[globalIdx];
51+
accTmp[globalIdx] = accIn1[globalIdx] + accLocal[localIdx];
52+
});
53+
});
54+
55+
q.submit([&](handler &cgh) {
56+
auto accTmp = bTmp.get_access(
57+
cgh, sycl::ext::codeplay::experimental::property::promote_local{});
58+
auto accIn3 = bIn3.get_access(cgh);
59+
auto accOut = bOut.get_access(cgh);
60+
cgh.parallel_for<class KernelTwo>(
61+
nd_range<1>{{dataSize}, {16}},
62+
[=](id<1> i) { accOut[i] = accTmp[i] * accIn3[i]; });
63+
});
64+
65+
fw.complete_fusion({ext::codeplay::experimental::property::no_barriers{}});
66+
67+
assert(!fw.is_in_fusion_mode() &&
68+
"Queue should not be in fusion mode anymore");
69+
}
70+
71+
// Check the results
72+
for (size_t i = 0; i < dataSize; ++i) {
73+
assert(out[i] == (20 * i * i) && "Computation error");
74+
assert(tmp[i] == -1 && "Not internalized");
75+
}
76+
77+
return 0;
78+
}

sycl/test-e2e/KernelFusion/internal_explicit_dependency.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out
22
// RUN: %CPU_RUN_PLACEHOLDER %t.out
33
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4-
// UNSUPPORTED: cuda || hip
4+
// UNSUPPORTED: hip
55
// REQUIRES: fusion, aspect-usm_shared_allocations
66

77
// Test complete fusion where one kernel in the fusion list specifies an

sycl/test-e2e/KernelFusion/internalize_array_wrapper.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -O2 -o %t.out
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir -O2 %s -o %t.out
22
// RUN: %CPU_RUN_PLACEHOLDER %t.out
33
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4-
// UNSUPPORTED: cuda || hip
4+
// UNSUPPORTED: hip
55
// REQUIRES: fusion
66

77
// Test internalization of a nested array type.

sycl/test-e2e/KernelFusion/internalize_deep.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -O2 -o %t.out
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir -O2 %s -o %t.out
22
// RUN: %CPU_RUN_PLACEHOLDER %t.out
33
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4-
// UNSUPPORTED: cuda || hip
4+
// UNSUPPORTED: hip
55
// REQUIRES: fusion
66

77
// Test complete fusion with internalization of a deep struct type.

sycl/test-e2e/KernelFusion/internalize_multi_ptr.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -O2 -o %t.out
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir -O2 %s -o %t.out
22
// RUN: %CPU_RUN_PLACEHOLDER %t.out
33
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4-
// UNSUPPORTED: cuda || hip
4+
// UNSUPPORTED: hip
55
// REQUIRES: fusion
66

77
// Test complete fusion with private internalization specified on the

sycl/test-e2e/KernelFusion/internalize_vec.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -O2 -o %t.out
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir -O2 %s -o %t.out
22
// RUN: %CPU_RUN_PLACEHOLDER %t.out
33
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4-
// UNSUPPORTED: cuda || hip
4+
// UNSUPPORTED: hip
55
// REQUIRES: fusion
66

77
// Test complete fusion with internalization of a struct type.

sycl/test-e2e/KernelFusion/internalize_vfunc.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -O2 -o %t.out
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir -O2 %s -o %t.out
22
// RUN: %CPU_RUN_PLACEHOLDER %t.out
33
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4-
// UNSUPPORTED: cuda || hip
4+
// UNSUPPORTED: hip
55
// REQUIRES: fusion
66

77
// Test complete fusion with private internalization specified on the

sycl/test-e2e/KernelFusion/jit_caching.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -O2 -o %t.out
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir -O2 %s -o %t.out
22
// RUN: env SYCL_RT_WARNING_LEVEL=1 %CPU_RUN_PLACEHOLDER %t.out 2>&1\
33
// RUN: %CPU_CHECK_PLACEHOLDER --implicit-check-not "COMPUTATION ERROR" --implicit-check-not "WRONG INTERNALIZATION"
44
// RUN: env SYCL_RT_WARNING_LEVEL=1 %GPU_RUN_PLACEHOLDER %t.out 2>&1\
55
// RUN: %GPU_CHECK_PLACEHOLDER --implicit-check-not "COMPUTATION ERROR" --implicit-check-not "WRONG INTERNALIZATION"
6-
// UNSUPPORTED: cuda || hip
6+
// UNSUPPORTED: hip
77
// REQUIRES: fusion
88

99
// Test caching for JIT fused kernels. Also test for debug messages being

sycl/test-e2e/KernelFusion/local_internalization.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -O2 -o %t.out
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir -O2 %s -o %t.out
22
// RUN: %CPU_RUN_PLACEHOLDER %t.out
33
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4-
// UNSUPPORTED: cuda || hip
4+
// UNSUPPORTED: hip
55
// REQUIRES: fusion
66

77
// Test complete fusion with local internalization specified on the

0 commit comments

Comments
 (0)