-
Notifications
You must be signed in to change notification settings - Fork 787
[SYCL] Implement sycl_ext_oneapi_root_group #9396
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
a691a3f
[SYCL] Implement sycl_ext_oneapi_root_group
0x12CC c986b4d
Address review comments
0x12CC b02ea76
Merge branch 'sycl' into root_group
0x12CC 3c1ff0c
Make non-standard constructor private
0x12CC 971a88f
Merge branch 'sycl' into root_group
0x12CC File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
140 changes: 140 additions & 0 deletions
140
sycl/include/sycl/ext/oneapi/experimental/root_group.hpp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
//==--- root_group.hpp --- SYCL extension for root groups ------------------==// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#pragma once | ||
|
||
#include <sycl/builtins.hpp> | ||
#include <sycl/ext/oneapi/properties/properties.hpp> | ||
#include <sycl/memory_enums.hpp> | ||
#include <sycl/queue.hpp> | ||
|
||
#define SYCL_EXT_ONEAPI_ROOT_GROUP 1 | ||
|
||
namespace sycl { | ||
__SYCL_INLINE_VER_NAMESPACE(_V1) { | ||
namespace ext::oneapi::experimental { | ||
|
||
namespace info::kernel_queue_specific { | ||
// TODO: Revisit and align with sycl_ext_oneapi_forward_progress extension once | ||
// #7598 is merged. | ||
struct max_num_work_group_sync { | ||
using return_type = size_t; | ||
}; | ||
} // namespace info::kernel_queue_specific | ||
|
||
struct use_root_sync_key { | ||
using value_t = property_value<use_root_sync_key>; | ||
}; | ||
|
||
inline constexpr use_root_sync_key::value_t use_root_sync; | ||
|
||
template <> struct is_property_key<use_root_sync_key> : std::true_type {}; | ||
|
||
template <> struct detail::PropertyToKind<use_root_sync_key> { | ||
static constexpr PropKind Kind = PropKind::UseRootSync; | ||
}; | ||
|
||
template <> | ||
struct detail::IsCompileTimeProperty<use_root_sync_key> : std::true_type {}; | ||
|
||
template <int Dimensions> class root_group { | ||
public: | ||
using id_type = id<Dimensions>; | ||
using range_type = range<Dimensions>; | ||
using linear_id_type = size_t; | ||
static constexpr int dimensions = Dimensions; | ||
static constexpr memory_scope fence_scope = memory_scope::device; | ||
|
||
id<Dimensions> get_group_id() const { return id<Dimensions>{}; }; | ||
|
||
id<Dimensions> get_local_id() const { return it.get_global_id(); } | ||
|
||
range<Dimensions> get_group_range() const { | ||
if constexpr (Dimensions == 3) { | ||
return range<3>{1, 1, 1}; | ||
} else if constexpr (Dimensions == 2) { | ||
return range<2>{1, 1}; | ||
} else { | ||
return range<1>{1}; | ||
} | ||
} | ||
|
||
range<Dimensions> get_local_range() const { return it.get_global_range(); }; | ||
|
||
range<Dimensions> get_max_local_range() const { return get_local_range(); }; | ||
|
||
size_t get_group_linear_id() const { return 0; }; | ||
|
||
size_t get_local_linear_id() const { return it.get_global_linear_id(); } | ||
|
||
size_t get_group_linear_range() const { return get_group_range().size(); }; | ||
|
||
size_t get_local_linear_range() const { return get_local_range().size(); }; | ||
|
||
bool leader() const { return get_local_id() == 0; }; | ||
|
||
private: | ||
friend root_group<Dimensions> | ||
nd_item<Dimensions>::ext_oneapi_get_root_group() const; | ||
|
||
root_group(nd_item<Dimensions> it) : it{it} {} | ||
0x12CC marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
sycl::nd_item<Dimensions> it; | ||
}; | ||
|
||
template <int Dimensions> | ||
group<Dimensions> get_child_group(root_group<Dimensions> g) { | ||
(void)g; | ||
return this_group<Dimensions>(); | ||
} | ||
|
||
template <int Dimensions> sub_group get_child_group(group<Dimensions> g) { | ||
(void)g; | ||
return this_sub_group(); | ||
} | ||
|
||
namespace this_kernel { | ||
template <int Dimensions> root_group<Dimensions> get_root_group() { | ||
return this_nd_item<Dimensions>().ext_oneapi_get_root_group(); | ||
} | ||
} // namespace this_kernel | ||
|
||
} // namespace ext::oneapi::experimental | ||
|
||
template <> | ||
typename ext::oneapi::experimental::info::kernel_queue_specific:: | ||
max_num_work_group_sync::return_type | ||
kernel::ext_oneapi_get_info< | ||
ext::oneapi::experimental::info::kernel_queue_specific:: | ||
max_num_work_group_sync>(const queue &q) const { | ||
// TODO: query the backend to return a value >= 1. | ||
return 1; | ||
} | ||
|
||
template <int dimensions> | ||
void group_barrier(ext::oneapi::experimental::root_group<dimensions> G, | ||
memory_scope FenceScope = decltype(G)::fence_scope) { | ||
(void)G; | ||
(void)FenceScope; | ||
#ifdef __SYCL_DEVICE_ONLY__ | ||
// TODO: Change __spv::Scope::Workgroup to __spv::Scope::Device once backends | ||
// support device scope. __spv::Scope::Workgroup is only valid when | ||
// max_num_work_group_sync is 1, so that all work items in a root group will | ||
// also be in the same work group. | ||
__spirv_ControlBarrier(__spv::Scope::Workgroup, __spv::Scope::Workgroup, | ||
__spv::MemorySemanticsMask::SubgroupMemory | | ||
__spv::MemorySemanticsMask::WorkgroupMemory | | ||
__spv::MemorySemanticsMask::CrossWorkgroupMemory); | ||
#else | ||
throw sycl::runtime_error("Barriers are not supported on host device", | ||
PI_ERROR_INVALID_DEVICE); | ||
#endif | ||
} | ||
|
||
} // __SYCL_INLINE_VER_NAMESPACE(_V1) | ||
} // namespace sycl |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
// RUN: %{build} -I . -o %t.out | ||
// RUN: %{run} %t.out | ||
|
||
#include <cassert> | ||
#include <cstdlib> | ||
#include <type_traits> | ||
|
||
#include <sycl/ext/oneapi/experimental/root_group.hpp> | ||
#include <sycl/sycl.hpp> | ||
|
||
static constexpr int WorkGroupSize = 32; | ||
|
||
void testFeatureMacro() { | ||
static_assert(SYCL_EXT_ONEAPI_ROOT_GROUP == 1, | ||
"SYCL_EXT_ONEAPI_ROOT_GROUP must have a value of 1"); | ||
} | ||
|
||
void testQueriesAndProperties() { | ||
sycl::queue q; | ||
const auto bundle = | ||
sycl::get_kernel_bundle<sycl::bundle_state::executable>(q.get_context()); | ||
const auto kernel = bundle.get_kernel<class QueryKernel>(); | ||
const auto maxWGs = kernel.ext_oneapi_get_info< | ||
sycl::ext::oneapi::experimental::info::kernel_queue_specific:: | ||
max_num_work_group_sync>(q); | ||
const auto props = sycl::ext::oneapi::experimental::properties{ | ||
sycl::ext::oneapi::experimental::use_root_sync}; | ||
q.single_task<class QueryKernel>(props, []() {}); | ||
static_assert(std::is_same_v<std::remove_cv<decltype(maxWGs)>::type, size_t>, | ||
"max_num_work_group_sync query must return size_t"); | ||
assert(maxWGs >= 1 && "max_num_work_group_sync query failed"); | ||
} | ||
|
||
void testRootGroup() { | ||
sycl::queue q; | ||
const auto bundle = | ||
sycl::get_kernel_bundle<sycl::bundle_state::executable>(q.get_context()); | ||
const auto kernel = bundle.get_kernel<class RootGroupKernel>(); | ||
const auto maxWGs = kernel.ext_oneapi_get_info< | ||
sycl::ext::oneapi::experimental::info::kernel_queue_specific:: | ||
max_num_work_group_sync>(q); | ||
const auto props = sycl::ext::oneapi::experimental::properties{ | ||
sycl::ext::oneapi::experimental::use_root_sync}; | ||
|
||
int *data = sycl::malloc_shared<int>(maxWGs * WorkGroupSize, q); | ||
const auto range = sycl::nd_range<1>{maxWGs * WorkGroupSize, WorkGroupSize}; | ||
q.parallel_for<class RootGroupKernel>(range, props, [=](sycl::nd_item<1> it) { | ||
auto root = it.ext_oneapi_get_root_group(); | ||
data[root.get_local_id()] = root.get_local_id(); | ||
sycl::group_barrier(root); | ||
|
||
root = sycl::ext::oneapi::experimental::this_kernel::get_root_group<1>(); | ||
int sum = data[root.get_local_id()] + | ||
data[root.get_local_range() - root.get_local_id() - 1]; | ||
sycl::group_barrier(root); | ||
data[root.get_local_id()] = sum; | ||
}); | ||
q.wait(); | ||
|
||
const int workItemCount = static_cast<int>(range.get_global_range().size()); | ||
for (int i = 0; i < workItemCount; i++) { | ||
assert(data[i] == (workItemCount - 1)); | ||
} | ||
} | ||
|
||
void testRootGroupFunctions() { | ||
sycl::queue q; | ||
const auto bundle = | ||
sycl::get_kernel_bundle<sycl::bundle_state::executable>(q.get_context()); | ||
const auto kernel = bundle.get_kernel<class RootGroupFunctionsKernel>(); | ||
const auto maxWGs = kernel.ext_oneapi_get_info< | ||
sycl::ext::oneapi::experimental::info::kernel_queue_specific:: | ||
max_num_work_group_sync>(q); | ||
const auto props = sycl::ext::oneapi::experimental::properties{ | ||
sycl::ext::oneapi::experimental::use_root_sync}; | ||
|
||
constexpr int testCount = 10; | ||
bool *testResults = sycl::malloc_shared<bool>(testCount, q); | ||
const auto range = sycl::nd_range<1>{maxWGs * WorkGroupSize, WorkGroupSize}; | ||
q.parallel_for<class RootGroupFunctionsKernel>( | ||
range, props, [=](sycl::nd_item<1> it) { | ||
const auto root = it.ext_oneapi_get_root_group(); | ||
if (root.leader() || root.get_local_id() == 3) { | ||
testResults[0] = root.get_group_id() == sycl::id<1>(0); | ||
testResults[1] = root.leader() | ||
? root.get_local_id() == sycl::id<1>(0) | ||
: root.get_local_id() == sycl::id<1>(3); | ||
testResults[2] = root.get_group_range() == sycl::range<1>(1); | ||
testResults[3] = | ||
root.get_local_range() == sycl::range<1>(WorkGroupSize); | ||
testResults[4] = | ||
root.get_max_local_range() == sycl::range<1>(WorkGroupSize); | ||
testResults[5] = root.get_group_linear_id() == 0; | ||
testResults[6] = | ||
root.get_local_linear_id() == root.get_local_id().get(0); | ||
testResults[7] = root.get_group_linear_range() == 1; | ||
testResults[8] = root.get_local_linear_range() == WorkGroupSize; | ||
|
||
const auto child = | ||
sycl::ext::oneapi::experimental::get_child_group(root); | ||
const auto grandchild = | ||
sycl::ext::oneapi::experimental::get_child_group(child); | ||
0x12CC marked this conversation as resolved.
Show resolved
Hide resolved
|
||
testResults[9] = child == it.get_group(); | ||
static_assert( | ||
std::is_same_v<std::remove_cv<decltype(grandchild)>::type, | ||
sycl::sub_group>, | ||
"get_child_group(sycl::group) must return a sycl::sub_group"); | ||
} | ||
}); | ||
q.wait(); | ||
|
||
for (int i = 0; i < testCount; i++) { | ||
assert(testResults[i]); | ||
} | ||
} | ||
|
||
int main() { | ||
testFeatureMacro(); | ||
testQueriesAndProperties(); | ||
testRootGroup(); | ||
testRootGroupFunctions(); | ||
return EXIT_SUCCESS; | ||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.