Skip to content

Commit 1bec982

Browse files
authored
[SYCL] Fix malloc shared by throwing when usm_shared_allocations not supported (#12700)
Final PR in the series of #12636. Refer to it for a description. After a discussion with @AlexeySachkov we've decided its best to not rewrite USM and syclcompat tests with buffers/accessors. For USM, the reason is obvious and for syclcompat you can reach out to Alexey. Therefore, these tests are handled using if statements or requring aspect to be supported. Once this PR is merged, the behavior of malloc_shared will be to throw if the usm_shared_allocations is not supported which is conformant with the spec.
1 parent 44a74d0 commit 1bec982

16 files changed

+147
-96
lines changed

sycl/source/detail/usm/usm_impl.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -146,11 +146,8 @@ void *alignedAllocInternal(size_t Alignment, size_t Size,
146146
}
147147
if (Kind == alloc::shared &&
148148
!DevImpl->has(sycl::aspect::usm_shared_allocations)) {
149-
// TODO:: Throw an exception to conform with the specification.
150-
// Note that many tests will have to be changed to conform with the spec
151-
// before completing this. That is, the tests will now have to expect
152-
// exceptions as a result of failed allocations in addition to nullptr
153-
// being returned depending on the reason why allocation failed.
149+
throw sycl::exception(sycl::errc::feature_not_supported,
150+
"Device does not support shared USM allocations!");
154151
}
155152
void *RetVal = nullptr;
156153
if (Size == 0)

sycl/test-e2e/USM/alloc_functions.cpp

Lines changed: 46 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -84,14 +84,17 @@ int main() {
8484
[&]() { return MHost(q, property_list{}); },
8585
[&]() { return MHost(ctx, property_list{}); }});
8686

87-
auto MShared = [&](auto... args) {
88-
return malloc_shared(sizeof(std::max_align_t), args...);
89-
};
90-
CheckAll(FAlign,
91-
std::tuple{[&]() { return MShared(q); },
92-
[&]() { return MShared(d, ctx); },
93-
[&]() { return MShared(q, property_list{}); },
94-
[&]() { return MShared(d, ctx, property_list{}); }});
87+
if (d.has(aspect::usm_shared_allocations)) {
88+
auto MShared = [&](auto... args) {
89+
return malloc_shared(sizeof(std::max_align_t), args...);
90+
};
91+
92+
CheckAll(FAlign,
93+
std::tuple{[&]() { return MShared(q); },
94+
[&]() { return MShared(d, ctx); },
95+
[&]() { return MShared(q, property_list{}); },
96+
[&]() { return MShared(d, ctx, property_list{}); }});
97+
}
9598

9699
auto ADevice = [&](size_t Align, auto... args) {
97100
return aligned_alloc_device(Align, sizeof(std::max_align_t), args...);
@@ -124,21 +127,24 @@ int main() {
124127
[&]() { return AHost(Align, q, property_list{}); },
125128
[&]() { return AHost(Align, ctx, property_list{}); }});
126129

127-
auto AShared = [&](size_t Align, auto... args) {
128-
return aligned_alloc_shared(Align, sizeof(std::max_align_t), args...);
129-
};
130-
CheckAll(FAlign,
131-
std::tuple{
132-
[&]() { return AShared(FAlign / 2, q); },
133-
[&]() { return AShared(FAlign / 2, d, ctx); },
134-
[&]() { return AShared(FAlign / 2, q, property_list{}); },
135-
[&]() { return AShared(FAlign / 2, d, ctx, property_list{}); }});
136-
CheckAll(
137-
Align,
138-
std::tuple{[&]() { return AShared(Align, q); },
139-
[&]() { return AShared(Align, d, ctx); },
140-
[&]() { return AShared(Align, q, property_list{}); },
141-
[&]() { return AShared(Align, d, ctx, property_list{}); }});
130+
if (d.has(aspect::usm_shared_allocations)) {
131+
auto AShared = [&](size_t Align, auto... args) {
132+
return aligned_alloc_shared(Align, sizeof(std::max_align_t), args...);
133+
};
134+
CheckAll(
135+
FAlign,
136+
std::tuple{
137+
[&]() { return AShared(FAlign / 2, q); },
138+
[&]() { return AShared(FAlign / 2, d, ctx); },
139+
[&]() { return AShared(FAlign / 2, q, property_list{}); },
140+
[&]() { return AShared(FAlign / 2, d, ctx, property_list{}); }});
141+
CheckAll(
142+
Align,
143+
std::tuple{[&]() { return AShared(Align, q); },
144+
[&]() { return AShared(Align, d, ctx); },
145+
[&]() { return AShared(Align, q, property_list{}); },
146+
[&]() { return AShared(Align, d, ctx, property_list{}); }});
147+
}
142148

143149
auto TDevice = [&](auto... args) {
144150
return malloc_device<Aligned>(1, args...);
@@ -150,11 +156,13 @@ int main() {
150156
CheckAll(Align, std::tuple{[&]() { return THost(q); },
151157
[&]() { return THost(ctx); }});
152158

153-
auto TShared = [&](auto... args) {
154-
return malloc_shared<Aligned>(1, args...);
155-
};
156-
CheckAll(Align, std::tuple{[&]() { return TShared(q); },
157-
[&]() { return TShared(d, ctx); }});
159+
if (d.has(aspect::usm_shared_allocations)) {
160+
auto TShared = [&](auto... args) {
161+
return malloc_shared<Aligned>(1, args...);
162+
};
163+
CheckAll(Align, std::tuple{[&]() { return TShared(q); },
164+
[&]() { return TShared(d, ctx); }});
165+
}
158166

159167
auto ATDevice = [&](size_t Align, auto... args) {
160168
return aligned_alloc_device<Aligned>(Align, 1, args...);
@@ -172,15 +180,16 @@ int main() {
172180
[&]() { return ATHost(Align / 2, ctx); }});
173181
CheckAll(Align * 2, std::tuple{[&]() { return ATHost(Align * 2, q); },
174182
[&]() { return ATHost(Align * 2, ctx); }});
175-
176-
auto ATShared = [&](size_t Align, auto... args) {
177-
return aligned_alloc_shared<Aligned>(Align, 1, args...);
178-
};
179-
CheckAll(Align, std::tuple{[&]() { return ATShared(Align / 2, q); },
180-
[&]() { return ATShared(Align / 2, d, ctx); }});
181-
CheckAll(Align * 2,
182-
std::tuple{[&]() { return ATShared(Align * 2, q); },
183-
[&]() { return ATShared(Align * 2, d, ctx); }});
183+
if (d.has(aspect::usm_shared_allocations)) {
184+
auto ATShared = [&](size_t Align, auto... args) {
185+
return aligned_alloc_shared<Aligned>(Align, 1, args...);
186+
};
187+
CheckAll(Align, std::tuple{[&]() { return ATShared(Align / 2, q); },
188+
[&]() { return ATShared(Align / 2, d, ctx); }});
189+
CheckAll(Align * 2,
190+
std::tuple{[&]() { return ATShared(Align * 2, q); },
191+
[&]() { return ATShared(Align * 2, d, ctx); }});
192+
}
184193

185194
auto Malloc = [&](auto... args) {
186195
return malloc(sizeof(std::max_align_t), args...);

sycl/test-e2e/USM/badmalloc.cpp

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,16 @@ int main(int argc, char *argv[]) {
2626
auto p = malloc(8, q, usm::alloc::unknown);
2727
if (p != nullptr)
2828
return 1;
29-
29+
// check that malloc_shared throws when usm_shared_allocations not supported
30+
if (!q.get_device().has(aspect::usm_shared_allocations)) {
31+
try {
32+
auto p = malloc_shared<int>(1, q);
33+
return 11;
34+
} catch (const sycl::exception &e) {
35+
if (e.code() != sycl::errc::feature_not_supported)
36+
return 11;
37+
}
38+
}
3039
// Bad size, host
3140
p = malloc(-1, q, usm::alloc::host);
3241
std::cout << "p = " << p << std::endl;
@@ -36,10 +45,12 @@ int main(int argc, char *argv[]) {
3645
std::cout << "p = " << p << std::endl;
3746
if (p != nullptr)
3847
return 3;
39-
p = malloc(-1, q, usm::alloc::shared);
40-
std::cout << "p = " << p << std::endl;
41-
if (p != nullptr)
42-
return 4;
48+
if (q.get_device().has(aspect::usm_shared_allocations)) {
49+
p = malloc(-1, q, usm::alloc::shared);
50+
std::cout << "p = " << p << std::endl;
51+
if (p != nullptr)
52+
return 4;
53+
}
4354
p = malloc(-1, q, usm::alloc::unknown);
4455
std::cout << "p = " << p << std::endl;
4556
if (p != nullptr)
@@ -54,10 +65,12 @@ int main(int argc, char *argv[]) {
5465
std::cout << "p = " << p << std::endl;
5566
if (p != nullptr)
5667
return 7;
57-
p = aligned_alloc(0, -1, q, usm::alloc::shared);
58-
std::cout << "p = " << p << std::endl;
59-
if (p != nullptr)
60-
return 8;
68+
if (q.get_device().has(aspect::usm_shared_allocations)) {
69+
p = aligned_alloc(0, -1, q, usm::alloc::shared);
70+
std::cout << "p = " << p << std::endl;
71+
if (p != nullptr)
72+
return 8;
73+
}
6174
p = aligned_alloc(0, -1, q, usm::alloc::unknown);
6275
std::cout << "p = " << p << std::endl;
6376
if (p != nullptr)

sycl/test-e2e/USM/dep_events.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
77
//===----------------------------------------------------------------------===//
8-
8+
// REQUIRES: usm_shared_allocations
99
// RUN: %{build} -o %t1.out
1010
// RUN: %{run} %t1.out
1111

sycl/test-e2e/USM/memcpy.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ int main() {
175175
TEST_MEMCPY(inArray, init_on_device, outArray, check_on_device)
176176

177177
// Test device to aligned device
178-
USM_MALLOC(inArray, shared)
178+
USM_MALLOC(inArray, device)
179179
USM_ALIGNED_ALLOC_DEVICE(outArray)
180180
TEST_MEMCPY(inArray, init_on_device, outArray, check_on_device)
181181

@@ -279,7 +279,7 @@ int main() {
279279
TEST_MEMCPY(inArray, init_on_host, outArray, check_on_device)
280280
}
281281

282-
if (dev.get_info<info::device::usm_host_allocations>() &&
282+
if (dev.get_info<info::device::usm_shared_allocations>() &&
283283
dev.get_info<info::device::usm_device_allocations>()) {
284284
// Test shared to device
285285
USM_MALLOC(inArray, shared)

sycl/test-e2e/syclcompat/kernel/Inputs/kernel_function.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
//
3030
//
3131
// ===---------------------------------------------------------------------===//
32-
3332
#ifdef _WIN32
3433
#include <windows.h>
3534
#else
@@ -110,7 +109,8 @@ void test_kernel_functor_ptr() {
110109

111110
int sharedSize = 10;
112111
void **param = nullptr, **extra = nullptr;
113-
112+
if (!q_ct1->get_device().has(sycl::aspect::usm_shared_allocations))
113+
return;
114114
int *dev = sycl::malloc_shared<int>(16, *q_ct1);
115115
for (int i = 0; i < 16; i++) {
116116
dev[i] = 0;

sycl/test-e2e/syclcompat/memory/memory_management_test2.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
//
3131
// ===----------------------------------------------------------------------===//
3232

33+
// REQUIRES: usm_shared_allocations
3334
// RUN: %clangxx -std=c++20 -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out
3435
// RUN: %{run} %t.out
3536

sycl/test-e2e/syclcompat/memory/usm_allocations.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
* USM allocation tests
2121
**************************************************************************/
2222

23+
// REQUIRES: usm_shared_allocations
2324
// RUN: %clangxx -std=c++20 -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out
2425
// RUN: %{run} %t.out
2526

sycl/test-e2e/syclcompat/util/util_complex.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
//===---------------------------------------------------------------===//
3131

3232
// REQUIRES: aspect-fp64
33-
33+
// REQUIRES: usm_shared_allocations
3434
// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out
3535
// RUN: %{run} %t.out
3636

sycl/test-e2e/syclcompat/util/util_find_first_set.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,16 +104,17 @@ void test_find_first_set() {
104104
sycl::queue q_ct1 = *dev_ct1.default_queue();
105105
int *test_result, host_test_result = 0;
106106

107-
test_result = sycl::malloc_shared<int>(sizeof(int), q_ct1);
108-
*test_result = 0;
107+
test_result = sycl::malloc_device<int>(1, q_ct1);
108+
q_ct1.memcpy(test_result, &host_test_result, sizeof(int)).wait();
109109

110110
q_ct1.parallel_for(
111111
sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)),
112112
[=](sycl::nd_item<3> item_ct1) { find_first_set_test(test_result); });
113113

114114
dev_ct1.queues_wait_and_throw();
115115
find_first_set_test(&host_test_result);
116-
assert(*test_result == 0);
116+
assert(host_test_result == 0);
117+
q_ct1.memcpy(&host_test_result, test_result, sizeof(int)).wait();
117118
assert(host_test_result == 0);
118119

119120
sycl::free(test_result, q_ct1);

sycl/test-e2e/syclcompat/util/util_permute_sub_group_by_xor.cpp

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,8 @@ void test_permute_sub_group_by_xor() {
9090
unsigned int *dev_data_u = nullptr;
9191
sycl::range<3> GridSize(1, 1, 1);
9292
sycl::range<3> BlockSize(1, 1, 1);
93-
dev_data = sycl::malloc_shared<int>(DATA_NUM, *q_ct1);
94-
dev_data_u = sycl::malloc_shared<unsigned int>(DATA_NUM, *q_ct1);
93+
dev_data = sycl::malloc_device<int>(DATA_NUM, *q_ct1);
94+
dev_data_u = sycl::malloc_device<unsigned int>(DATA_NUM, *q_ct1);
9595

9696
GridSize = sycl::range<3>(1, 1, 2);
9797
BlockSize = sycl::range<3>(1, 2, 32);
@@ -105,7 +105,10 @@ void test_permute_sub_group_by_xor() {
105105
88, 89, 94, 95, 92, 93, 98, 99, 96, 97, 102, 103, 100, 101, 106,
106106
107, 104, 105, 110, 111, 108, 109, 114, 115, 112, 113, 118, 119, 116, 117,
107107
122, 123, 120, 121, 126, 127, 124, 125};
108-
init_data<unsigned int>(dev_data_u, DATA_NUM);
108+
unsigned int host_dev_data_u[DATA_NUM];
109+
init_data<unsigned int>(host_dev_data_u, DATA_NUM);
110+
q_ct1->memcpy(dev_data_u, host_dev_data_u, DATA_NUM * sizeof(unsigned int))
111+
.wait();
109112

110113
q_ct1->parallel_for(sycl::nd_range<3>(GridSize * BlockSize, BlockSize),
111114
[=](sycl::nd_item<3> item_ct1)
@@ -114,7 +117,9 @@ void test_permute_sub_group_by_xor() {
114117
});
115118

116119
dev_ct1.queues_wait_and_throw();
117-
verify_data<unsigned int>(dev_data_u, expect1, DATA_NUM);
120+
q_ct1->memcpy(host_dev_data_u, dev_data_u, DATA_NUM * sizeof(unsigned int))
121+
.wait();
122+
verify_data<unsigned int>(host_dev_data_u, expect1, DATA_NUM);
118123

119124
GridSize = sycl::range<3>(1, 1, 2);
120125
BlockSize = sycl::range<3>(1, 2, 32);
@@ -128,16 +133,20 @@ void test_permute_sub_group_by_xor() {
128133
91, 90, 93, 92, 95, 94, 97, 96, 99, 98, 101, 100, 103, 102, 105,
129134
104, 107, 106, 109, 108, 111, 110, 113, 112, 115, 114, 117, 116, 119, 118,
130135
121, 120, 123, 122, 125, 124, 127, 126};
131-
init_data<unsigned int>(dev_data_u, DATA_NUM);
136+
init_data<unsigned int>(host_dev_data_u, DATA_NUM);
132137

138+
q_ct1->memcpy(dev_data_u, host_dev_data_u, DATA_NUM * sizeof(unsigned int))
139+
.wait();
133140
q_ct1->parallel_for(sycl::nd_range<3>(GridSize * BlockSize, BlockSize),
134141
[=](sycl::nd_item<3> item_ct1)
135142
[[intel::reqd_sub_group_size(32)]] {
136143
permute_sub_group_by_xor2(dev_data_u, item_ct1);
137144
});
138145

139146
dev_ct1.queues_wait_and_throw();
140-
verify_data<unsigned int>(dev_data_u, expect2, DATA_NUM);
147+
q_ct1->memcpy(host_dev_data_u, dev_data_u, DATA_NUM * sizeof(unsigned int))
148+
.wait();
149+
verify_data<unsigned int>(host_dev_data_u, expect2, DATA_NUM);
141150

142151
sycl::free(dev_data, *q_ct1);
143152
sycl::free(dev_data_u, *q_ct1);

sycl/test-e2e/syclcompat/util/util_select_from_sub_group.cpp

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,9 @@ void test_select_from_sub_group() {
8888
unsigned int *dev_data_u = nullptr;
8989
sycl::range<3> GridSize(1, 1, 1);
9090
sycl::range<3> BlockSize(1, 1, 1);
91-
dev_data = sycl::malloc_shared<int>(DATA_NUM, *q_ct1);
92-
dev_data_u = sycl::malloc_shared<unsigned int>(DATA_NUM, *q_ct1);
93-
91+
dev_data = sycl::malloc_device<int>(DATA_NUM, *q_ct1);
92+
dev_data_u = sycl::malloc_device<unsigned int>(DATA_NUM, *q_ct1);
93+
unsigned int host_dev_data_u[DATA_NUM];
9494
GridSize = sycl::range<3>(1, 1, 2);
9595
BlockSize = sycl::range<3>(1, 2, 32);
9696
unsigned int expect1[DATA_NUM] = {
@@ -103,16 +103,19 @@ void test_select_from_sub_group() {
103103
91, 92, 93, 94, 95, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105,
104104
106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
105105
121, 122, 123, 124, 125, 126, 127, 96};
106-
init_data<unsigned int>(dev_data_u, DATA_NUM);
107-
106+
init_data<unsigned int>(host_dev_data_u, DATA_NUM);
107+
q_ct1->memcpy(dev_data_u, host_dev_data_u, DATA_NUM * sizeof(unsigned int))
108+
.wait();
108109
q_ct1->parallel_for(sycl::nd_range<3>(GridSize * BlockSize, BlockSize),
109110
[=](sycl::nd_item<3> item_ct1)
110111
[[intel::reqd_sub_group_size(32)]] {
111112
select_from_sub_group1(dev_data_u, item_ct1);
112113
});
113114

114115
dev_ct1.queues_wait_and_throw();
115-
verify_data<unsigned int>(dev_data_u, expect1, DATA_NUM);
116+
q_ct1->memcpy(host_dev_data_u, dev_data_u, DATA_NUM * sizeof(unsigned int))
117+
.wait();
118+
verify_data<unsigned int>(host_dev_data_u, expect1, DATA_NUM);
116119

117120
GridSize = sycl::range<3>(1, 1, 2);
118121
BlockSize = sycl::range<3>(1, 2, 32);
@@ -126,16 +129,19 @@ void test_select_from_sub_group() {
126129
91, 92, 93, 94, 95, 88, 97, 98, 99, 100, 101, 102, 103, 96, 105,
127130
106, 107, 108, 109, 110, 111, 104, 113, 114, 115, 116, 117, 118, 119, 112,
128131
121, 122, 123, 124, 125, 126, 127, 120};
129-
init_data<unsigned int>(dev_data_u, DATA_NUM);
130-
132+
init_data<unsigned int>(host_dev_data_u, DATA_NUM);
133+
q_ct1->memcpy(dev_data_u, host_dev_data_u, DATA_NUM * sizeof(unsigned int))
134+
.wait();
131135
q_ct1->parallel_for(sycl::nd_range<3>(GridSize * BlockSize, BlockSize),
132136
[=](sycl::nd_item<3> item_ct1)
133137
[[intel::reqd_sub_group_size(32)]] {
134138
select_from_sub_group2(dev_data_u, item_ct1);
135139
});
136140

137141
dev_ct1.queues_wait_and_throw();
138-
verify_data<unsigned int>(dev_data_u, expect2, DATA_NUM);
142+
q_ct1->memcpy(host_dev_data_u, dev_data_u, DATA_NUM * sizeof(unsigned int))
143+
.wait();
144+
verify_data<unsigned int>(host_dev_data_u, expect2, DATA_NUM);
139145

140146
sycl::free(dev_data, *q_ct1);
141147
sycl::free(dev_data_u, *q_ct1);

0 commit comments

Comments
 (0)