|
1 |
| -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out |
| 1 | +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_kernel %s -o %t.out |
2 | 2 | // RUN: %HOST_RUN_PLACEHOLDER %t.out
|
3 | 3 | // RUN: %CPU_RUN_PLACEHOLDER %t.out
|
4 | 4 | // RUN: %GPU_RUN_PLACEHOLDER %t.out
|
@@ -166,34 +166,40 @@ template <int N> bool check(vec<float, N> a, vec<float, N> b) {
|
166 | 166 |
|
167 | 167 | int main() {
|
168 | 168 | queue q;
|
169 |
| - if (q.get_device().has(sycl::aspect::fp16)) { |
170 |
| - float16 a, b, c, d; |
171 |
| - for (int i = 0; i < SZ_max; i++) { |
172 |
| - a[i] = i / (float)SZ_max; |
173 |
| - b[i] = (SZ_max - i) / (float)SZ_max; |
174 |
| - c[i] = (float)(3 * i); |
175 |
| - } |
176 |
| - int err = 0; |
177 |
| - { |
178 |
| - buffer<float16> a_buf(&a, 1); |
179 |
| - buffer<float16> b_buf(&b, 1); |
180 |
| - buffer<float16> c_buf(&c, 1); |
181 |
| - buffer<int> err_buf(&err, 1); |
182 |
| - q.submit([&](handler &cgh) { |
183 |
| - auto A = a_buf.get_access<access::mode::read>(cgh); |
184 |
| - auto B = b_buf.get_access<access::mode::read>(cgh); |
185 |
| - auto C = c_buf.get_access<access::mode::read>(cgh); |
186 |
| - auto err = err_buf.get_access<access::mode::write>(cgh); |
187 |
| - cgh.parallel_for(SZ_max, [=](item<1> index) { |
188 |
| - size_t i = index.get_id(0); |
189 |
| - TEST_BUILTIN_1(fabs); |
190 |
| - TEST_BUILTIN_2(fmin); |
191 |
| - TEST_BUILTIN_2(fmax); |
192 |
| - TEST_BUILTIN_3(fma); |
193 |
| - }); |
| 169 | + |
| 170 | + if (!q.get_device().has(sycl::aspect::fp16)) { |
| 171 | + std::cout << "skipping fp16 tests: requires fp16 device aspect." |
| 172 | + << std::endl; |
| 173 | + return 0; |
| 174 | + } |
| 175 | + |
| 176 | + float16 a, b, c, d; |
| 177 | + for (int i = 0; i < SZ_max; i++) { |
| 178 | + a[i] = i / (float)SZ_max; |
| 179 | + b[i] = (SZ_max - i) / (float)SZ_max; |
| 180 | + c[i] = (float)(3 * i); |
| 181 | + } |
| 182 | + int err = 0; |
| 183 | + { |
| 184 | + buffer<float16> a_buf(&a, 1); |
| 185 | + buffer<float16> b_buf(&b, 1); |
| 186 | + buffer<float16> c_buf(&c, 1); |
| 187 | + buffer<int> err_buf(&err, 1); |
| 188 | + q.submit([&](handler &cgh) { |
| 189 | + auto A = a_buf.get_access<access::mode::read>(cgh); |
| 190 | + auto B = b_buf.get_access<access::mode::read>(cgh); |
| 191 | + auto C = c_buf.get_access<access::mode::read>(cgh); |
| 192 | + auto err = err_buf.get_access<access::mode::write>(cgh); |
| 193 | + cgh.parallel_for(SZ_max, [=](item<1> index) { |
| 194 | + size_t i = index.get_id(0); |
| 195 | + TEST_BUILTIN_1(fabs); |
| 196 | + TEST_BUILTIN_2(fmin); |
| 197 | + TEST_BUILTIN_2(fmax); |
| 198 | + TEST_BUILTIN_3(fma); |
194 | 199 | });
|
195 |
| - } |
196 |
| - assert(err == 0); |
| 200 | + }); |
197 | 201 | }
|
| 202 | + assert(err == 0); |
| 203 | + |
198 | 204 | return 0;
|
199 | 205 | }
|
0 commit comments