Skip to content

[SYCL] Implement sycl_ext_intel_device_architecture for AOT #7008

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
278 changes: 278 additions & 0 deletions sycl/include/sycl/ext/intel/experimental/device_architecture.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,278 @@
#pragma once

#include <sycl/detail/defines_elementary.hpp>

namespace sycl {
__SYCL_INLINE_VER_NAMESPACE(_V1) {
namespace ext::intel::experimental {

enum class architecture {
x86_64,
intel_gpu_bdw,
intel_gpu_skl,
intel_gpu_kbl,
intel_gpu_cfl,
intel_gpu_apl,
intel_gpu_glk,
intel_gpu_whl,
intel_gpu_aml,
intel_gpu_cml,
intel_gpu_icllp,
intel_gpu_ehl,
intel_gpu_tgllp,
intel_gpu_rkl,
intel_gpu_adl_s,
intel_gpu_rpl_s,
intel_gpu_adl_p,
intel_gpu_adl_n,
intel_gpu_dg1,
intel_gpu_acm_g10,
intel_gpu_acm_g11,
intel_gpu_acm_g12,
intel_gpu_pvc,
// Update "detail::max_architecture" below if you add new elements here!
intel_gpu_8_0_0 = intel_gpu_bdw,
intel_gpu_9_0_9 = intel_gpu_skl,
intel_gpu_9_1_9 = intel_gpu_kbl,
intel_gpu_9_2_9 = intel_gpu_cfl,
intel_gpu_9_3_0 = intel_gpu_apl,
intel_gpu_9_4_0 = intel_gpu_glk,
intel_gpu_9_5_0 = intel_gpu_whl,
intel_gpu_9_6_0 = intel_gpu_aml,
intel_gpu_9_7_0 = intel_gpu_cml,
intel_gpu_11_0_0 = intel_gpu_icllp,
intel_gpu_11_2_0 = intel_gpu_ehl,
intel_gpu_12_0_0 = intel_gpu_tgllp,
intel_gpu_12_10_0 = intel_gpu_dg1,
};

} // namespace ext::intel::experimental

namespace detail {

static constexpr ext::intel::experimental::architecture max_architecture =
ext::intel::experimental::architecture::intel_gpu_pvc;

#ifndef __SYCL_TARGET_INTEL_X86_64__
#define __SYCL_TARGET_INTEL_X86_64__ 0
#endif
#ifndef __SYCL_TARGET_INTEL_GPU_BDW__
#define __SYCL_TARGET_INTEL_GPU_BDW__ 0
#endif
#ifndef __SYCL_TARGET_INTEL_GPU_SKL__
#define __SYCL_TARGET_INTEL_GPU_SKL__ 0
#endif
#ifndef __SYCL_TARGET_INTEL_GPU_KBL__
#define __SYCL_TARGET_INTEL_GPU_KBL__ 0
#endif
#ifndef __SYCL_TARGET_INTEL_GPU_CFL__
#define __SYCL_TARGET_INTEL_GPU_CFL__ 0
#endif
#ifndef __SYCL_TARGET_INTEL_GPU_APL__
#define __SYCL_TARGET_INTEL_GPU_APL__ 0
#endif
#ifndef __SYCL_TARGET_INTEL_GPU_GLK__
#define __SYCL_TARGET_INTEL_GPU_GLK__ 0
#endif
#ifndef __SYCL_TARGET_INTEL_GPU_WHL__
#define __SYCL_TARGET_INTEL_GPU_WHL__ 0
#endif
#ifndef __SYCL_TARGET_INTEL_GPU_AML__
#define __SYCL_TARGET_INTEL_GPU_AML__ 0
#endif
#ifndef __SYCL_TARGET_INTEL_GPU_CML__
#define __SYCL_TARGET_INTEL_GPU_CML__ 0
#endif
#ifndef __SYCL_TARGET_INTEL_GPU_ICLLP__
#define __SYCL_TARGET_INTEL_GPU_ICLLP__ 0
#endif
#ifndef __SYCL_TARGET_INTEL_GPU_EHL__
#define __SYCL_TARGET_INTEL_GPU_EHL__ 0
#endif
#ifndef __SYCL_TARGET_INTEL_GPU_TGLLP__
#define __SYCL_TARGET_INTEL_GPU_TGLLP__ 0
#endif
#ifndef __SYCL_TARGET_INTEL_GPU_RKL__
#define __SYCL_TARGET_INTEL_GPU_RKL__ 0
#endif
#ifndef __SYCL_TARGET_INTEL_GPU_ADL_S__
#define __SYCL_TARGET_INTEL_GPU_ADL_S__ 0
#endif
#ifndef __SYCL_TARGET_INTEL_GPU_RPL_S__
#define __SYCL_TARGET_INTEL_GPU_RPL_S__ 0
#endif
#ifndef __SYCL_TARGET_INTEL_GPU_ADL_P__
#define __SYCL_TARGET_INTEL_GPU_ADL_P__ 0
#endif
#ifndef __SYCL_TARGET_INTEL_GPU_ADL_N__
#define __SYCL_TARGET_INTEL_GPU_ADL_N__ 0
#endif
#ifndef __SYCL_TARGET_INTEL_GPU_DG1__
#define __SYCL_TARGET_INTEL_GPU_DG1__ 0
#endif
#ifndef __SYCL_TARGET_INTEL_GPU_ACM_G10__
#define __SYCL_TARGET_INTEL_GPU_ACM_G10__ 0
#endif
#ifndef __SYCL_TARGET_INTEL_GPU_ACM_G11__
#define __SYCL_TARGET_INTEL_GPU_ACM_G11__ 0
#endif
#ifndef __SYCL_TARGET_INTEL_GPU_ACM_G12__
#define __SYCL_TARGET_INTEL_GPU_ACM_G12__ 0
#endif
#ifndef __SYCL_TARGET_INTEL_GPU_PVC__
#define __SYCL_TARGET_INTEL_GPU_PVC__ 0
#endif

// This is true when the translation unit is compiled in AOT mode with target
// names that supports the "if_architecture_is" features. If an unsupported
// target name is specified via "-fsycl-targets", the associated invocation of
// the device compiler will set this variable to false, and that will trigger
// an error for code that uses "if_architecture_is".
static constexpr bool is_allowable_aot_mode =
(__SYCL_TARGET_INTEL_X86_64__ == 1) ||
(__SYCL_TARGET_INTEL_GPU_BDW__ == 1) ||
(__SYCL_TARGET_INTEL_GPU_SKL__ == 1) ||
(__SYCL_TARGET_INTEL_GPU_KBL__ == 1) ||
(__SYCL_TARGET_INTEL_GPU_CFL__ == 1) ||
(__SYCL_TARGET_INTEL_GPU_APL__ == 1) ||
(__SYCL_TARGET_INTEL_GPU_GLK__ == 1) ||
(__SYCL_TARGET_INTEL_GPU_WHL__ == 1) ||
(__SYCL_TARGET_INTEL_GPU_AML__ == 1) ||
(__SYCL_TARGET_INTEL_GPU_CML__ == 1) ||
(__SYCL_TARGET_INTEL_GPU_ICLLP__ == 1) ||
(__SYCL_TARGET_INTEL_GPU_EHL__ == 1) ||
(__SYCL_TARGET_INTEL_GPU_TGLLP__ == 1) ||
(__SYCL_TARGET_INTEL_GPU_RKL__ == 1) ||
(__SYCL_TARGET_INTEL_GPU_ADL_S__ == 1) ||
(__SYCL_TARGET_INTEL_GPU_RPL_S__ == 1) ||
(__SYCL_TARGET_INTEL_GPU_ADL_P__ == 1) ||
(__SYCL_TARGET_INTEL_GPU_ADL_N__ == 1) ||
(__SYCL_TARGET_INTEL_GPU_DG1__ == 1) ||
(__SYCL_TARGET_INTEL_GPU_ACM_G10__ == 1) ||
(__SYCL_TARGET_INTEL_GPU_ACM_G11__ == 1) ||
(__SYCL_TARGET_INTEL_GPU_ACM_G12__ == 1) ||
(__SYCL_TARGET_INTEL_GPU_PVC__ == 1);

struct IsAOTForArchitectureClass {
// Allocate an array of size == size of ext::intel::experimental::architecture
// enum.
bool arr[static_cast<int>(max_architecture) + 1];

using arch = ext::intel::experimental::architecture;

constexpr IsAOTForArchitectureClass() : arr() {
arr[static_cast<int>(arch::x86_64)] = __SYCL_TARGET_INTEL_X86_64__ == 1;
arr[static_cast<int>(arch::intel_gpu_bdw)] =
__SYCL_TARGET_INTEL_GPU_BDW__ == 1;
arr[static_cast<int>(arch::intel_gpu_skl)] =
__SYCL_TARGET_INTEL_GPU_SKL__ == 1;
arr[static_cast<int>(arch::intel_gpu_kbl)] =
__SYCL_TARGET_INTEL_GPU_KBL__ == 1;
arr[static_cast<int>(arch::intel_gpu_cfl)] =
__SYCL_TARGET_INTEL_GPU_CFL__ == 1;
arr[static_cast<int>(arch::intel_gpu_apl)] =
__SYCL_TARGET_INTEL_GPU_APL__ == 1;
arr[static_cast<int>(arch::intel_gpu_glk)] =
__SYCL_TARGET_INTEL_GPU_GLK__ == 1;
arr[static_cast<int>(arch::intel_gpu_whl)] =
__SYCL_TARGET_INTEL_GPU_WHL__ == 1;
arr[static_cast<int>(arch::intel_gpu_aml)] =
__SYCL_TARGET_INTEL_GPU_AML__ == 1;
arr[static_cast<int>(arch::intel_gpu_cml)] =
__SYCL_TARGET_INTEL_GPU_CML__ == 1;
arr[static_cast<int>(arch::intel_gpu_icllp)] =
__SYCL_TARGET_INTEL_GPU_ICLLP__ == 1;
arr[static_cast<int>(arch::intel_gpu_ehl)] =
__SYCL_TARGET_INTEL_GPU_EHL__ == 1;
arr[static_cast<int>(arch::intel_gpu_tgllp)] =
__SYCL_TARGET_INTEL_GPU_TGLLP__ == 1;
arr[static_cast<int>(arch::intel_gpu_rkl)] =
__SYCL_TARGET_INTEL_GPU_RKL__ == 1;
arr[static_cast<int>(arch::intel_gpu_adl_s)] =
__SYCL_TARGET_INTEL_GPU_ADL_S__ == 1;
arr[static_cast<int>(arch::intel_gpu_rpl_s)] =
__SYCL_TARGET_INTEL_GPU_RPL_S__ == 1;
arr[static_cast<int>(arch::intel_gpu_adl_p)] =
__SYCL_TARGET_INTEL_GPU_ADL_P__ == 1;
arr[static_cast<int>(arch::intel_gpu_adl_n)] =
__SYCL_TARGET_INTEL_GPU_ADL_N__ == 1;
arr[static_cast<int>(arch::intel_gpu_dg1)] =
__SYCL_TARGET_INTEL_GPU_DG1__ == 1;
arr[static_cast<int>(arch::intel_gpu_acm_g10)] =
__SYCL_TARGET_INTEL_GPU_ACM_G10__ == 1;
arr[static_cast<int>(arch::intel_gpu_acm_g11)] =
__SYCL_TARGET_INTEL_GPU_ACM_G11__ == 1;
arr[static_cast<int>(arch::intel_gpu_acm_g12)] =
__SYCL_TARGET_INTEL_GPU_ACM_G12__ == 1;
arr[static_cast<int>(arch::intel_gpu_pvc)] =
__SYCL_TARGET_INTEL_GPU_PVC__ == 1;
}
};

// One entry for each enumerator in "architecture" telling whether the AOT
// target matches that architecture.
static constexpr IsAOTForArchitectureClass is_aot_for_architecture;

// Reads the value of "is_allowable_aot_mode" via a template to defer triggering
// static_assert() until template instantiation time.
template <ext::intel::experimental::architecture... Archs>
constexpr static bool allowable_aot_mode() {
return is_allowable_aot_mode;
}

// Tells if the current device has one of the architectures in the parameter
// pack.
template <ext::intel::experimental::architecture... Archs>
constexpr static bool device_architecture_is() {
return (is_aot_for_architecture.arr[static_cast<int>(Archs)] || ...);
}

// Helper object used to implement "else_if_architecture_is" and "otherwise".
// The "MakeCall" template parameter tells whether a previous clause in the
// "if-elseif-elseif ..." chain was true. When "MakeCall" is false, some
// previous clause was true, so none of the subsequent
// "else_if_architecture_is" or "otherwise" member functions should call the
// user's function.
template <bool MakeCall> class if_architecture_helper {
public:
template <ext::intel::experimental::architecture... Archs, typename T,
typename... Args>
constexpr auto else_if_architecture_is(T fnTrue, Args... args) {
if constexpr (MakeCall && device_architecture_is<Archs...>()) {
fnTrue(args...);
return if_architecture_helper<false>{};
} else {
(void)fnTrue;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
(void)fnTrue;
std::ignore = fnTrue;

Nit; just to be a little more modern.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, thanks! Will change it in another patch

return if_architecture_helper<MakeCall>{};
}
}

template <typename T, typename... Args>
constexpr void otherwise(T fn, Args... args) {
if constexpr (MakeCall) {
fn(args...);
}
}
};
} // namespace detail

namespace ext::intel::experimental {

template <architecture... Archs, typename T, typename... Args>
constexpr static auto if_architecture_is(T fnTrue, Args... args) {
static_assert(detail::allowable_aot_mode<Archs...>(),
"The if_architecture_is function may only be used when AOT "
"compiling with '-fsycl-targets=spir64_x86_64' or "
"'-fsycl-targets=intel_gpu_*'");
if constexpr (detail::device_architecture_is<Archs...>()) {
fnTrue(args...);
return detail::if_architecture_helper<false>{};
} else {
(void)fnTrue;
return detail::if_architecture_helper<true>{};
}
}

} // namespace ext::intel::experimental
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl
1 change: 1 addition & 0 deletions sycl/include/sycl/feature_test.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) {

// TODO: Move these feature-test macros to compiler driver.
#define SYCL_EXT_INTEL_DEVICE_INFO 5
#define SYCL_EXT_INTEL_DEVICE_ARCHITECTURE 1
#define SYCL_EXT_ONEAPI_SUB_GROUP_MASK 1
#define SYCL_EXT_ONEAPI_LOCAL_MEMORY 1
#define SYCL_EXT_ONEAPI_MATRIX 1
Expand Down
1 change: 1 addition & 0 deletions sycl/unittests/Extensions/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@ set(CMAKE_CXX_EXTENSIONS OFF)

add_sycl_unittest(ExtensionsTests OBJECT
DefaultContext.cpp
DeviceArchitecture.cpp
)

47 changes: 47 additions & 0 deletions sycl/unittests/Extensions/DeviceArchitecture.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
//==--------------- DeviceArchitectureOneArchSelected.cpp ------------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <gtest/gtest.h>

// define one of __SYCL_TARGET_INTEL_*** macro, e.g., the one for SKL
#define __SYCL_TARGET_INTEL_GPU_SKL__ 1

#include <sycl/ext/intel/experimental/device_architecture.hpp>

using namespace sycl;
using namespace sycl::detail;
using namespace sycl::ext::intel::experimental;

TEST(DeviceArchitectureTest, DeviceArchitecture_If) {
bool res = false;
if_architecture_is<architecture::intel_gpu_skl>([&]() { res = true; });
ASSERT_TRUE(res);
}

TEST(DeviceArchitectureTest, DeviceArchitecture_If_Negative) {
bool res = false;
if_architecture_is<architecture::intel_gpu_pvc>([&]() { res = true; });
ASSERT_FALSE(res);
}

TEST(DeviceArchitectureTest, DeviceArchitecture_Else_If) {
bool res = false;
if_architecture_is<architecture::intel_gpu_dg1>([]() {
}).else_if_architecture_is<architecture::intel_gpu_skl>([&]() {
res = true;
});
ASSERT_TRUE(res);
}

TEST(DeviceArchitectureTest, DeviceArchitecture_Otherwise) {
bool res = false;
if_architecture_is<architecture::intel_gpu_dg1>([]() {
}).else_if_architecture_is<architecture::intel_gpu_pvc>([&]() {
}).otherwise([&]() { res = true; });
ASSERT_TRUE(res);
}