Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Add 'spec' tests to SYCL/InvokeSimd #1360

Merged
merged 7 commits into from
Dec 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
176 changes: 176 additions & 0 deletions SYCL/InvokeSimd/spec/ESIMD_to_unmarked_function.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
// TODO: enable on Windows once driver is ready
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip
//
// Check that full compilation works:
// RUN: %clangxx -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %s -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
//
// VISALTO enable run
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

// Tests invoke_simd support in the compiler/headers

/* Test case specification:
* -----------------------
* Test and report errors if invoked ESIMD function calls SPMD function.
*
* Test case description:
* ---------------------
* This is an additional test case to increase test coverage. It tests a
* happy-path. ESIMD_CALLEE_doVadd() calls doVadd(), which is an entirely
* unmarked function (not tagged with SYCL_EXTERNAL nor SYCL_ESIMD_FUNCTION).
* Such a function "is treated as ESIMD if called froma within ESIMD code"
* and is treated as SPMD if called from within SPMD (SYCL) code.
*
* Currently, this test case passes because no rules are violated; the unmarked
* function doVadd() is treated as ESIMD when called from within ESIMD code
* (ESIMD_CALLEE_doVadd()), and as SPMD when called from within SPMD code.
* Therefore, we never go from ESIMD to SPMD, or from SPMD to ESIMD in this test
* case.
*
* This test also runs with all types of VISA link time optimizations enabled.
*/

#include <sycl/ext/intel/esimd.hpp>
#include <sycl/ext/oneapi/experimental/invoke_simd.hpp>
#include <sycl/sycl.hpp>

#include <functional>
#include <iostream>
#include <type_traits>

/* Subgroup size attribute is optional
* In case it is absent compiler decides what subgroup size to use
*/
#ifdef IMPL_SUBGROUP
#define SUBGROUP_ATTR
#else
#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]]
#endif

using namespace sycl;
using namespace sycl::ext::oneapi::experimental;
namespace esimd = sycl::ext::intel::esimd;

// 1024 / 16 = 64: There will be 64 iterations that process 16 elements each
constexpr int Size = 1024;
// constexpr int Size = 1024 * 128;
constexpr int VL = 16;

/* This unmarked function will be treated the same as its calling context;
* if it is called from within ESIMD code, it will be treated as ESIMD,
* if it is called from within SPMD code, it will be treated as SPMD.
*/
// SYCL_EXTERNAL
float doVadd(float a, float b) { return a + b; }

__attribute__((always_inline)) esimd::simd<float, VL>
ESIMD_CALLEE_doVadd(esimd::simd<float, VL> va,
esimd::simd<float, VL> vb) SYCL_ESIMD_FUNCTION {
esimd::simd<float, VL> vc;
for (int i = 0; i < VL; ++i) {
float a = va[i];
float b = vb[i];
vc[i] = doVadd(a, b);
}
return vc;
}

[[intel::device_indirectly_callable]] SYCL_EXTERNAL
simd<float, VL> __regcall SIMD_CALLEE_doVadd(
simd<float, VL> va, simd<float, VL> vb) SYCL_ESIMD_FUNCTION;

constexpr bool use_invoke_simd = true;

int main(void) {
float *A = new float[Size];
float *B = new float[Size];
float *C = new float[Size];

for (unsigned i = 0; i < Size; ++i) {
A[i] = B[i] = i;
C[i] = 0.0f;
}

try {
buffer<float, 1> bufa(A, range<1>(Size));
buffer<float, 1> bufb(B, range<1>(Size));
buffer<float, 1> bufc(C, range<1>(Size));

// We need that many workgroups
sycl::range<1> GlobalRange{Size};

// We need that many threads in each group
sycl::range<1> LocalRange{VL};

auto q = queue{gpu_selector_v};
auto dev = q.get_device();
std::cout << "Running on " << dev.get_info<sycl::info::device::name>()
<< "\n";

auto e = q.submit([&](handler &cgh) {
auto PA = bufa.get_access<access::mode::read>(cgh);
auto PB = bufb.get_access<access::mode::read>(cgh);
auto PC = bufc.get_access<access::mode::write>(cgh);

cgh.parallel_for<class Test>(
nd_range<1>(GlobalRange, LocalRange),
[=](nd_item<1> item) SUBGROUP_ATTR {
sycl::group<1> g = item.get_group();
sycl::sub_group sg = item.get_sub_group();

unsigned int offset = g.get_group_id() * g.get_local_range() +
sg.get_group_id() * sg.get_max_local_range();
float va = sg.load(PA.get_pointer() + offset);
float vb = sg.load(PB.get_pointer() + offset);
float vc;

if constexpr (use_invoke_simd) {
vc = invoke_simd(sg, SIMD_CALLEE_doVadd, va, vb);
} else {
vc = doVadd(va, vb);
}
sg.store(PC.get_pointer() + offset, vc);
});
});
e.wait();
} catch (sycl::exception const &e) {
delete[] A;
delete[] B;
delete[] C;

std::cout << "SYCL exception caught: " << e.what() << '\n';
return e.code().value();
}

int err_cnt = 0;

for (unsigned i = 0; i < Size; ++i) {
if (A[i] + B[i] != C[i]) {
if (++err_cnt < 10) {
std::cout << "failed at index " << i << ", " << C[i] << " != " << A[i]
<< " + " << B[i] << "\n";
}
}
}
if (err_cnt > 0) {
std::cout << " pass rate: "
<< ((float)(Size - err_cnt) / (float)Size) * 100.0f << "% ("
<< (Size - err_cnt) << "/" << Size << ")\n";
}

delete[] A;
delete[] B;
delete[] C;

std::cout << (err_cnt > 0 ? "FAILED\n" : "Passed\n");
return err_cnt > 0 ? 1 : 0;
}

[[intel::device_indirectly_callable]] SYCL_EXTERNAL
simd<float, VL> __regcall SIMD_CALLEE_doVadd(
simd<float, VL> va, simd<float, VL> vb) SYCL_ESIMD_FUNCTION {
esimd::simd<float, VL> res = ESIMD_CALLEE_doVadd(va, vb);
return res;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// TODO: enable on Windows once driver is ready
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip
//
// Check that full compilation works:
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../ESIMD_to_unmarked_function.cpp -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
//
// VISALTO enable run
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

/*
* This tests is the same as InvokeSimd/spec/ESIMD_to_unmarked_function.cpp,
* but compiles without optional subgroup attribute specified and intended to
* check that compiler is able to choose subgroup size correctly.
*/
19 changes: 19 additions & 0 deletions SYCL/InvokeSimd/spec/IMPL_SUBGROUP/function_overloads.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// TODO: enable on Windows once driver is ready
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip
//
// TODO: enable when Jira ticket resolved
// XFAIL: gpu
//
// Check that full compilation works:
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../function_overloads.cpp -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
//
// VISALTO enable run
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

/*
* This tests is the same as InvokeSimd/spec/function_overloads.cpp, but
* compiles without optional subgroup attribute specified and intended to check
* that compiler is able to choose subgroup size correctly.
*/
15 changes: 15 additions & 0 deletions SYCL/InvokeSimd/spec/IMPL_SUBGROUP/invoke_simd_conv.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// TODO: enable on Windows once driver is ready
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip

// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../../invoke_simd_conv.cpp -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
//
// VISALTO enable run
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

/*
* This tests is the same as InvokeSimd/spec/invoke_simd_conv.cpp, but compiles
* without optional subgroup attribute specified and intended to check that
* compiler is able to choose subgroup size correctly.
*/
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// TODO: enable on Windows once driver is ready
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip
//
// Check that full compilation works:
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../multiple_SPMD_to_multiple_ESIMD.cpp -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
//
// VISALTO enable run
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

/*
* This tests is the same as
* InvokeSimd/spec/multiple_SPMD_to_multiple_ESIMD.cpp, but compiles without
* optional subgroup attribute specified and intended to check that compiler
* is able to choose subgroup size correctly.
*/
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// TODO: enable on Windows once driver is ready
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip
//
// Check that full compilation works:
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../multiple_SPMD_to_single_ESIMD.cpp -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
//
// VISALTO enable run
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

/*
* This tests is the same as InvokeSimd/spec/multiple_SPMD_to_single_ESIMD.cpp,
* but compiles without optional subgroup attribute specified and intended to
* check that compiler is able to choose subgroup size correctly.
*/
16 changes: 16 additions & 0 deletions SYCL/InvokeSimd/spec/IMPL_SUBGROUP/nested_ESIMD_to_ESIMD.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// TODO: enable on Windows once driver is ready
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip
//
// Check that full compilation works:
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../nested_ESIMD_to_ESIMD.cpp -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
//
// VISALTO enable run
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

/*
* This tests is the same as InvokeSimd/spec/nested_ESIMD_to_ESIMD.cpp, but
* compiles without optional subgroup attribute specified and intended to check
* that compiler is able to choose subgroup size correctly.
*/
16 changes: 16 additions & 0 deletions SYCL/InvokeSimd/spec/IMPL_SUBGROUP/nested_SPMD_to_ESIMD.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// TODO: enable on Windows once driver is ready
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip
//
// Check that full compilation works:
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../nested_SPMD_to_ESIMD.cpp -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
//
// VISALTO enable run
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

/*
* This tests is the same as InvokeSimd/spec/nested_SPMD_to_ESIMD.cpp, but
* compiles without optional subgroup attribute specified and intended to check
* that compiler is able to choose subgroup size correctly.
*/
19 changes: 19 additions & 0 deletions SYCL/InvokeSimd/spec/IMPL_SUBGROUP/simd_mask.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// TODO: enable on Windows once driver is ready
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip
//
// TODO: enable when Jira ticket resolved
// XFAIL: gpu
//
// Check that full compilation works:
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../simd_mask.cpp -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
//
// VISALTO enable run
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

/*
* This tests is the same as InvokeSimd/spec/simd_mask.cpp, but compiles without
* optional subgroup attribute specified and intended to check that compiler is
* able to choose subgroup size correctly.
*/
19 changes: 19 additions & 0 deletions SYCL/InvokeSimd/spec/IMPL_SUBGROUP/tuple.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// TODO: enable on Windows once driver is ready
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip
//
// TODO: enable when Jira ticket resolved
// XFAIL: gpu
//
// Check that full compilation works:
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../tuple.cpp -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
//
// VISALTO enable run
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

/*
* This tests is the same as InvokeSimd/spec/tuple.cpp, but compiles without
* optional subgroup attribute specified and intended to check that compiler is
* able to choose subgroup size correctly.
*/
19 changes: 19 additions & 0 deletions SYCL/InvokeSimd/spec/IMPL_SUBGROUP/tuple_return.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// TODO: enable on Windows once driver is ready
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip
//
// TODO: enable when Jira ticket resolved
// XFAIL: gpu
//
// Check that full compilation works:
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../tuple_return.cpp -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
//
// VISALTO enable run
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

/*
* This tests is the same as InvokeSimd/spec/tuple_return.cpp, but compiles
* without optional subgroup attribute specified and intended to check that
* compiler is able to choose subgroup size correctly.
*/
19 changes: 19 additions & 0 deletions SYCL/InvokeSimd/spec/IMPL_SUBGROUP/tuple_vadd.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// TODO: enable on Windows once driver is ready
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip
//
// TODO: enable when Jira ticket resolved
// XFAIL: gpu
//
// Check that full compilation works:
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../tuple_vadd.cpp -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
//
// VISALTO enable run
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

/*
* This tests is the same as InvokeSimd/spec/tuple_vadd.cpp, but compiles
* without optional subgroup attribute specified and intended to check that
* compiler is able to choose subgroup size correctly.
*/
19 changes: 19 additions & 0 deletions SYCL/InvokeSimd/spec/IMPL_SUBGROUP/uniform_retval.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// TODO: enable on Windows once driver is ready
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip
//
// TODO: enable when Jira ticket resolved
// XFAIL: gpu
//
// Check that full compilation works:
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../uniform_retval.cpp -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
//
// VISALTO enable run
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

/*
* This tests is the same as InvokeSimd/spec/uniform_retval.cpp, but compiles
* without optional subgroup attribute specified and intended to check that
* compiler is able to choose subgroup size correctly.
*/
Loading