-
Notifications
You must be signed in to change notification settings - Fork 788
[SYCL] Implement sycl_ext_intel_device_architecture for AOT #7008
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
dm-vodopyanov
merged 12 commits into
intel:sycl
from
dm-vodopyanov:dvodopya/device_architecture_aot
Oct 21, 2022
Merged
Changes from all commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
2760cde
[SYCL] Implement sycl_ext_intel_device_architecture for AOT
dm-vodopyanov a5fee67
Merge branch 'sycl' into dvodopya/device_architecture_aot
dm-vodopyanov 47132b5
Improve reliability in case of arch updates + add unit tests
dm-vodopyanov c9bb2b1
Merge branch 'dvodopya/device_architecture_aot' of https://github.com…
dm-vodopyanov 2c8ef5f
Fix clang-format
dm-vodopyanov 428dd80
Add newline
dm-vodopyanov 733356c
Apply CR comments
dm-vodopyanov 7178de6
Merge branch 'dvodopya/device_architecture_aot' of https://github.com…
dm-vodopyanov 3556eae
Apply CR comment
dm-vodopyanov a1a0da4
Improve unit test
dm-vodopyanov 8ac9f24
Fix unit test
dm-vodopyanov 373c226
Optimize unit tests - remove needless
dm-vodopyanov File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
278 changes: 278 additions & 0 deletions
278
sycl/include/sycl/ext/intel/experimental/device_architecture.hpp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,278 @@ | ||||||
#pragma once | ||||||
|
||||||
#include <sycl/detail/defines_elementary.hpp> | ||||||
|
||||||
namespace sycl { | ||||||
__SYCL_INLINE_VER_NAMESPACE(_V1) { | ||||||
namespace ext::intel::experimental { | ||||||
|
||||||
enum class architecture { | ||||||
x86_64, | ||||||
intel_gpu_bdw, | ||||||
intel_gpu_skl, | ||||||
intel_gpu_kbl, | ||||||
intel_gpu_cfl, | ||||||
intel_gpu_apl, | ||||||
intel_gpu_glk, | ||||||
intel_gpu_whl, | ||||||
intel_gpu_aml, | ||||||
intel_gpu_cml, | ||||||
intel_gpu_icllp, | ||||||
intel_gpu_ehl, | ||||||
intel_gpu_tgllp, | ||||||
intel_gpu_rkl, | ||||||
intel_gpu_adl_s, | ||||||
intel_gpu_rpl_s, | ||||||
intel_gpu_adl_p, | ||||||
intel_gpu_adl_n, | ||||||
intel_gpu_dg1, | ||||||
intel_gpu_acm_g10, | ||||||
intel_gpu_acm_g11, | ||||||
intel_gpu_acm_g12, | ||||||
intel_gpu_pvc, | ||||||
// Update "detail::max_architecture" below if you add new elements here! | ||||||
intel_gpu_8_0_0 = intel_gpu_bdw, | ||||||
intel_gpu_9_0_9 = intel_gpu_skl, | ||||||
intel_gpu_9_1_9 = intel_gpu_kbl, | ||||||
intel_gpu_9_2_9 = intel_gpu_cfl, | ||||||
intel_gpu_9_3_0 = intel_gpu_apl, | ||||||
intel_gpu_9_4_0 = intel_gpu_glk, | ||||||
intel_gpu_9_5_0 = intel_gpu_whl, | ||||||
intel_gpu_9_6_0 = intel_gpu_aml, | ||||||
intel_gpu_9_7_0 = intel_gpu_cml, | ||||||
intel_gpu_11_0_0 = intel_gpu_icllp, | ||||||
intel_gpu_11_2_0 = intel_gpu_ehl, | ||||||
intel_gpu_12_0_0 = intel_gpu_tgllp, | ||||||
intel_gpu_12_10_0 = intel_gpu_dg1, | ||||||
}; | ||||||
|
||||||
} // namespace ext::intel::experimental | ||||||
|
||||||
namespace detail { | ||||||
|
||||||
static constexpr ext::intel::experimental::architecture max_architecture = | ||||||
ext::intel::experimental::architecture::intel_gpu_pvc; | ||||||
|
||||||
#ifndef __SYCL_TARGET_INTEL_X86_64__ | ||||||
#define __SYCL_TARGET_INTEL_X86_64__ 0 | ||||||
#endif | ||||||
#ifndef __SYCL_TARGET_INTEL_GPU_BDW__ | ||||||
#define __SYCL_TARGET_INTEL_GPU_BDW__ 0 | ||||||
#endif | ||||||
#ifndef __SYCL_TARGET_INTEL_GPU_SKL__ | ||||||
#define __SYCL_TARGET_INTEL_GPU_SKL__ 0 | ||||||
#endif | ||||||
#ifndef __SYCL_TARGET_INTEL_GPU_KBL__ | ||||||
#define __SYCL_TARGET_INTEL_GPU_KBL__ 0 | ||||||
#endif | ||||||
#ifndef __SYCL_TARGET_INTEL_GPU_CFL__ | ||||||
#define __SYCL_TARGET_INTEL_GPU_CFL__ 0 | ||||||
#endif | ||||||
#ifndef __SYCL_TARGET_INTEL_GPU_APL__ | ||||||
#define __SYCL_TARGET_INTEL_GPU_APL__ 0 | ||||||
#endif | ||||||
#ifndef __SYCL_TARGET_INTEL_GPU_GLK__ | ||||||
#define __SYCL_TARGET_INTEL_GPU_GLK__ 0 | ||||||
#endif | ||||||
#ifndef __SYCL_TARGET_INTEL_GPU_WHL__ | ||||||
#define __SYCL_TARGET_INTEL_GPU_WHL__ 0 | ||||||
#endif | ||||||
#ifndef __SYCL_TARGET_INTEL_GPU_AML__ | ||||||
#define __SYCL_TARGET_INTEL_GPU_AML__ 0 | ||||||
#endif | ||||||
#ifndef __SYCL_TARGET_INTEL_GPU_CML__ | ||||||
#define __SYCL_TARGET_INTEL_GPU_CML__ 0 | ||||||
#endif | ||||||
#ifndef __SYCL_TARGET_INTEL_GPU_ICLLP__ | ||||||
#define __SYCL_TARGET_INTEL_GPU_ICLLP__ 0 | ||||||
#endif | ||||||
#ifndef __SYCL_TARGET_INTEL_GPU_EHL__ | ||||||
#define __SYCL_TARGET_INTEL_GPU_EHL__ 0 | ||||||
#endif | ||||||
#ifndef __SYCL_TARGET_INTEL_GPU_TGLLP__ | ||||||
#define __SYCL_TARGET_INTEL_GPU_TGLLP__ 0 | ||||||
#endif | ||||||
#ifndef __SYCL_TARGET_INTEL_GPU_RKL__ | ||||||
#define __SYCL_TARGET_INTEL_GPU_RKL__ 0 | ||||||
#endif | ||||||
#ifndef __SYCL_TARGET_INTEL_GPU_ADL_S__ | ||||||
#define __SYCL_TARGET_INTEL_GPU_ADL_S__ 0 | ||||||
#endif | ||||||
#ifndef __SYCL_TARGET_INTEL_GPU_RPL_S__ | ||||||
#define __SYCL_TARGET_INTEL_GPU_RPL_S__ 0 | ||||||
#endif | ||||||
#ifndef __SYCL_TARGET_INTEL_GPU_ADL_P__ | ||||||
#define __SYCL_TARGET_INTEL_GPU_ADL_P__ 0 | ||||||
#endif | ||||||
#ifndef __SYCL_TARGET_INTEL_GPU_ADL_N__ | ||||||
#define __SYCL_TARGET_INTEL_GPU_ADL_N__ 0 | ||||||
#endif | ||||||
#ifndef __SYCL_TARGET_INTEL_GPU_DG1__ | ||||||
#define __SYCL_TARGET_INTEL_GPU_DG1__ 0 | ||||||
#endif | ||||||
#ifndef __SYCL_TARGET_INTEL_GPU_ACM_G10__ | ||||||
#define __SYCL_TARGET_INTEL_GPU_ACM_G10__ 0 | ||||||
#endif | ||||||
#ifndef __SYCL_TARGET_INTEL_GPU_ACM_G11__ | ||||||
#define __SYCL_TARGET_INTEL_GPU_ACM_G11__ 0 | ||||||
#endif | ||||||
#ifndef __SYCL_TARGET_INTEL_GPU_ACM_G12__ | ||||||
#define __SYCL_TARGET_INTEL_GPU_ACM_G12__ 0 | ||||||
#endif | ||||||
#ifndef __SYCL_TARGET_INTEL_GPU_PVC__ | ||||||
#define __SYCL_TARGET_INTEL_GPU_PVC__ 0 | ||||||
#endif | ||||||
|
||||||
// This is true when the translation unit is compiled in AOT mode with target | ||||||
// names that supports the "if_architecture_is" features. If an unsupported | ||||||
// target name is specified via "-fsycl-targets", the associated invocation of | ||||||
// the device compiler will set this variable to false, and that will trigger | ||||||
// an error for code that uses "if_architecture_is". | ||||||
static constexpr bool is_allowable_aot_mode = | ||||||
(__SYCL_TARGET_INTEL_X86_64__ == 1) || | ||||||
(__SYCL_TARGET_INTEL_GPU_BDW__ == 1) || | ||||||
(__SYCL_TARGET_INTEL_GPU_SKL__ == 1) || | ||||||
(__SYCL_TARGET_INTEL_GPU_KBL__ == 1) || | ||||||
(__SYCL_TARGET_INTEL_GPU_CFL__ == 1) || | ||||||
(__SYCL_TARGET_INTEL_GPU_APL__ == 1) || | ||||||
(__SYCL_TARGET_INTEL_GPU_GLK__ == 1) || | ||||||
(__SYCL_TARGET_INTEL_GPU_WHL__ == 1) || | ||||||
(__SYCL_TARGET_INTEL_GPU_AML__ == 1) || | ||||||
(__SYCL_TARGET_INTEL_GPU_CML__ == 1) || | ||||||
(__SYCL_TARGET_INTEL_GPU_ICLLP__ == 1) || | ||||||
(__SYCL_TARGET_INTEL_GPU_EHL__ == 1) || | ||||||
(__SYCL_TARGET_INTEL_GPU_TGLLP__ == 1) || | ||||||
(__SYCL_TARGET_INTEL_GPU_RKL__ == 1) || | ||||||
(__SYCL_TARGET_INTEL_GPU_ADL_S__ == 1) || | ||||||
(__SYCL_TARGET_INTEL_GPU_RPL_S__ == 1) || | ||||||
(__SYCL_TARGET_INTEL_GPU_ADL_P__ == 1) || | ||||||
(__SYCL_TARGET_INTEL_GPU_ADL_N__ == 1) || | ||||||
(__SYCL_TARGET_INTEL_GPU_DG1__ == 1) || | ||||||
(__SYCL_TARGET_INTEL_GPU_ACM_G10__ == 1) || | ||||||
(__SYCL_TARGET_INTEL_GPU_ACM_G11__ == 1) || | ||||||
(__SYCL_TARGET_INTEL_GPU_ACM_G12__ == 1) || | ||||||
(__SYCL_TARGET_INTEL_GPU_PVC__ == 1); | ||||||
|
||||||
struct IsAOTForArchitectureClass { | ||||||
// Allocate an array of size == size of ext::intel::experimental::architecture | ||||||
// enum. | ||||||
bool arr[static_cast<int>(max_architecture) + 1]; | ||||||
|
||||||
using arch = ext::intel::experimental::architecture; | ||||||
|
||||||
constexpr IsAOTForArchitectureClass() : arr() { | ||||||
arr[static_cast<int>(arch::x86_64)] = __SYCL_TARGET_INTEL_X86_64__ == 1; | ||||||
arr[static_cast<int>(arch::intel_gpu_bdw)] = | ||||||
__SYCL_TARGET_INTEL_GPU_BDW__ == 1; | ||||||
arr[static_cast<int>(arch::intel_gpu_skl)] = | ||||||
__SYCL_TARGET_INTEL_GPU_SKL__ == 1; | ||||||
arr[static_cast<int>(arch::intel_gpu_kbl)] = | ||||||
__SYCL_TARGET_INTEL_GPU_KBL__ == 1; | ||||||
arr[static_cast<int>(arch::intel_gpu_cfl)] = | ||||||
__SYCL_TARGET_INTEL_GPU_CFL__ == 1; | ||||||
arr[static_cast<int>(arch::intel_gpu_apl)] = | ||||||
__SYCL_TARGET_INTEL_GPU_APL__ == 1; | ||||||
arr[static_cast<int>(arch::intel_gpu_glk)] = | ||||||
__SYCL_TARGET_INTEL_GPU_GLK__ == 1; | ||||||
arr[static_cast<int>(arch::intel_gpu_whl)] = | ||||||
__SYCL_TARGET_INTEL_GPU_WHL__ == 1; | ||||||
arr[static_cast<int>(arch::intel_gpu_aml)] = | ||||||
__SYCL_TARGET_INTEL_GPU_AML__ == 1; | ||||||
arr[static_cast<int>(arch::intel_gpu_cml)] = | ||||||
__SYCL_TARGET_INTEL_GPU_CML__ == 1; | ||||||
arr[static_cast<int>(arch::intel_gpu_icllp)] = | ||||||
__SYCL_TARGET_INTEL_GPU_ICLLP__ == 1; | ||||||
arr[static_cast<int>(arch::intel_gpu_ehl)] = | ||||||
__SYCL_TARGET_INTEL_GPU_EHL__ == 1; | ||||||
arr[static_cast<int>(arch::intel_gpu_tgllp)] = | ||||||
__SYCL_TARGET_INTEL_GPU_TGLLP__ == 1; | ||||||
arr[static_cast<int>(arch::intel_gpu_rkl)] = | ||||||
__SYCL_TARGET_INTEL_GPU_RKL__ == 1; | ||||||
arr[static_cast<int>(arch::intel_gpu_adl_s)] = | ||||||
__SYCL_TARGET_INTEL_GPU_ADL_S__ == 1; | ||||||
arr[static_cast<int>(arch::intel_gpu_rpl_s)] = | ||||||
__SYCL_TARGET_INTEL_GPU_RPL_S__ == 1; | ||||||
arr[static_cast<int>(arch::intel_gpu_adl_p)] = | ||||||
__SYCL_TARGET_INTEL_GPU_ADL_P__ == 1; | ||||||
arr[static_cast<int>(arch::intel_gpu_adl_n)] = | ||||||
__SYCL_TARGET_INTEL_GPU_ADL_N__ == 1; | ||||||
arr[static_cast<int>(arch::intel_gpu_dg1)] = | ||||||
__SYCL_TARGET_INTEL_GPU_DG1__ == 1; | ||||||
arr[static_cast<int>(arch::intel_gpu_acm_g10)] = | ||||||
__SYCL_TARGET_INTEL_GPU_ACM_G10__ == 1; | ||||||
arr[static_cast<int>(arch::intel_gpu_acm_g11)] = | ||||||
__SYCL_TARGET_INTEL_GPU_ACM_G11__ == 1; | ||||||
arr[static_cast<int>(arch::intel_gpu_acm_g12)] = | ||||||
__SYCL_TARGET_INTEL_GPU_ACM_G12__ == 1; | ||||||
arr[static_cast<int>(arch::intel_gpu_pvc)] = | ||||||
__SYCL_TARGET_INTEL_GPU_PVC__ == 1; | ||||||
} | ||||||
}; | ||||||
|
||||||
// One entry for each enumerator in "architecture" telling whether the AOT | ||||||
// target matches that architecture. | ||||||
static constexpr IsAOTForArchitectureClass is_aot_for_architecture; | ||||||
|
||||||
// Reads the value of "is_allowable_aot_mode" via a template to defer triggering | ||||||
// static_assert() until template instantiation time. | ||||||
template <ext::intel::experimental::architecture... Archs> | ||||||
constexpr static bool allowable_aot_mode() { | ||||||
return is_allowable_aot_mode; | ||||||
} | ||||||
|
||||||
// Tells if the current device has one of the architectures in the parameter | ||||||
// pack. | ||||||
template <ext::intel::experimental::architecture... Archs> | ||||||
constexpr static bool device_architecture_is() { | ||||||
return (is_aot_for_architecture.arr[static_cast<int>(Archs)] || ...); | ||||||
} | ||||||
|
||||||
// Helper object used to implement "else_if_architecture_is" and "otherwise". | ||||||
// The "MakeCall" template parameter tells whether a previous clause in the | ||||||
// "if-elseif-elseif ..." chain was true. When "MakeCall" is false, some | ||||||
// previous clause was true, so none of the subsequent | ||||||
// "else_if_architecture_is" or "otherwise" member functions should call the | ||||||
// user's function. | ||||||
template <bool MakeCall> class if_architecture_helper { | ||||||
public: | ||||||
template <ext::intel::experimental::architecture... Archs, typename T, | ||||||
typename... Args> | ||||||
constexpr auto else_if_architecture_is(T fnTrue, Args... args) { | ||||||
if constexpr (MakeCall && device_architecture_is<Archs...>()) { | ||||||
fnTrue(args...); | ||||||
return if_architecture_helper<false>{}; | ||||||
} else { | ||||||
(void)fnTrue; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Nit; just to be a little more modern. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, thanks! Will change it in another patch |
||||||
return if_architecture_helper<MakeCall>{}; | ||||||
} | ||||||
} | ||||||
|
||||||
template <typename T, typename... Args> | ||||||
constexpr void otherwise(T fn, Args... args) { | ||||||
if constexpr (MakeCall) { | ||||||
fn(args...); | ||||||
} | ||||||
} | ||||||
}; | ||||||
} // namespace detail | ||||||
|
||||||
namespace ext::intel::experimental { | ||||||
|
||||||
template <architecture... Archs, typename T, typename... Args> | ||||||
constexpr static auto if_architecture_is(T fnTrue, Args... args) { | ||||||
static_assert(detail::allowable_aot_mode<Archs...>(), | ||||||
"The if_architecture_is function may only be used when AOT " | ||||||
"compiling with '-fsycl-targets=spir64_x86_64' or " | ||||||
"'-fsycl-targets=intel_gpu_*'"); | ||||||
if constexpr (detail::device_architecture_is<Archs...>()) { | ||||||
fnTrue(args...); | ||||||
return detail::if_architecture_helper<false>{}; | ||||||
} else { | ||||||
(void)fnTrue; | ||||||
return detail::if_architecture_helper<true>{}; | ||||||
} | ||||||
} | ||||||
|
||||||
} // namespace ext::intel::experimental | ||||||
} // __SYCL_INLINE_VER_NAMESPACE(_V1) | ||||||
} // namespace sycl |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
//==--------------- DeviceArchitectureOneArchSelected.cpp ------------------==// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include <gtest/gtest.h> | ||
|
||
// define one of __SYCL_TARGET_INTEL_*** macro, e.g., the one for SKL | ||
#define __SYCL_TARGET_INTEL_GPU_SKL__ 1 | ||
|
||
#include <sycl/ext/intel/experimental/device_architecture.hpp> | ||
|
||
using namespace sycl; | ||
using namespace sycl::detail; | ||
using namespace sycl::ext::intel::experimental; | ||
|
||
TEST(DeviceArchitectureTest, DeviceArchitecture_If) { | ||
bool res = false; | ||
if_architecture_is<architecture::intel_gpu_skl>([&]() { res = true; }); | ||
ASSERT_TRUE(res); | ||
} | ||
|
||
TEST(DeviceArchitectureTest, DeviceArchitecture_If_Negative) { | ||
bool res = false; | ||
if_architecture_is<architecture::intel_gpu_pvc>([&]() { res = true; }); | ||
ASSERT_FALSE(res); | ||
} | ||
|
||
TEST(DeviceArchitectureTest, DeviceArchitecture_Else_If) { | ||
bool res = false; | ||
if_architecture_is<architecture::intel_gpu_dg1>([]() { | ||
}).else_if_architecture_is<architecture::intel_gpu_skl>([&]() { | ||
res = true; | ||
}); | ||
ASSERT_TRUE(res); | ||
} | ||
|
||
TEST(DeviceArchitectureTest, DeviceArchitecture_Otherwise) { | ||
bool res = false; | ||
if_architecture_is<architecture::intel_gpu_dg1>([]() { | ||
}).else_if_architecture_is<architecture::intel_gpu_pvc>([&]() { | ||
}).otherwise([&]() { res = true; }); | ||
ASSERT_TRUE(res); | ||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.