Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

[SYCL][ESIMD] Enable invoke_simd simd_mask test #1630

Merged
merged 5 commits into from
Mar 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions SYCL/InvokeSimd/Spec/ImplicitSubgroup/simd_mask.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip
//
// TODO: enable when Jira ticket resolved
// XFAIL: gpu
//
// Check that full compilation works:
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../simd_mask.cpp -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
Expand Down
48 changes: 35 additions & 13 deletions SYCL/InvokeSimd/Spec/simd_mask.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip
//
// TODO: enable when Jira ticket resolved
// XFAIL: gpu
//
// Check that full compilation works:
// RUN: %clangxx -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %s -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
Expand All @@ -21,6 +18,7 @@
* This test also runs with all types of VISA link time optimizations enabled.
*/

#include <sycl/detail/boost/mp11.hpp>
#include <sycl/ext/intel/esimd.hpp>
#include <sycl/ext/oneapi/experimental/invoke_simd.hpp>
#include <sycl/sycl.hpp>
Expand All @@ -42,26 +40,25 @@ using namespace sycl::ext::oneapi::experimental;
namespace esimd = sycl::ext::intel::esimd;
constexpr int VL = 16;

template <typename MaskType>
__attribute__((always_inline)) esimd::simd<float, VL>
ESIMD_CALLEE(esimd::simd<float, VL> va,
simd_mask<bool, VL> mask) SYCL_ESIMD_FUNCTION {
simd_mask<MaskType, VL> mask) SYCL_ESIMD_FUNCTION {
return va;
}

template <typename MaskType>
[[intel::device_indirectly_callable]] SYCL_EXTERNAL
simd<float, VL> __regcall SIMD_CALLEE(
simd<float, VL> va, simd_mask<bool, VL> mask) SYCL_ESIMD_FUNCTION;
simd<float, VL> va, simd_mask<MaskType, VL> mask) SYCL_ESIMD_FUNCTION;

using namespace sycl;

int main(void) {
template <typename MaskType> int test(queue q) {
constexpr unsigned Size = 1024;
constexpr unsigned GroupSize = 4 * VL;

auto q = queue{gpu_selector_v};
auto dev = q.get_device();
std::cout << "Running on " << dev.get_info<sycl::info::device::name>()
<< "\n";
auto ctxt = q.get_context();

float *A =
Expand All @@ -86,14 +83,14 @@ int main(void) {

try {
auto e = q.submit([&](handler &cgh) {
cgh.parallel_for<class Test>(Range, [=](nd_item<1> ndi) SUBGROUP_ATTR {
cgh.parallel_for(Range, [=](nd_item<1> ndi) SUBGROUP_ATTR {
sub_group sg = ndi.get_sub_group();
group<1> g = ndi.get_group();
uint32_t i =
sg.get_group_linear_id() * VL + g.get_group_linear_id() * GroupSize;
uint32_t wi_id = i + sg.get_local_id();

float res = invoke_simd(sg, SIMD_CALLEE, A[wi_id], M[wi_id]);
auto Callee = SIMD_CALLEE<MaskType>;
float res = invoke_simd(sg, Callee, A[wi_id], M[wi_id]);
C[wi_id] = res;
});
});
Expand Down Expand Up @@ -131,9 +128,34 @@ int main(void) {
return err_cnt > 0 ? 1 : 0;
}

template <typename MaskType>
[[intel::device_indirectly_callable]] SYCL_EXTERNAL
simd<float, VL> __regcall SIMD_CALLEE(
simd<float, VL> va, simd_mask<bool, VL> mask) SYCL_ESIMD_FUNCTION {
simd<float, VL> va, simd_mask<MaskType, VL> mask) SYCL_ESIMD_FUNCTION {
esimd::simd<float, VL> res = ESIMD_CALLEE(va, mask);
return res;
}

int main() {
queue q{gpu_selector_v};

auto dev = q.get_device();
std::cout << "Running on " << dev.get_info<sycl::info::device::name>()
<< "\n";
bool passed = true;
const bool SupportsDouble = dev.has(aspect::fp64);
using namespace sycl::detail::boost::mp11;
using MaskTypes =
std::tuple<char, char16_t, char32_t, wchar_t, signed char, signed short,
signed int, signed long, signed long long, unsigned char,
unsigned short, unsigned int, unsigned long,
unsigned long long, float, double>;
tuple_for_each(MaskTypes{}, [&](auto &&x) {
using T = std::remove_reference_t<decltype(x)>;
if (std::is_same_v<T, double> && !SupportsDouble)
return;
passed &= !test<T>(q);
});
std::cout << (passed ? "Test passed\n" : "TEST FAILED\n");
return passed ? 0 : 1;
}