Skip to content

Commit fb8c82d

Browse files
authored
Revert "[SYCL] Implement sub_group_mask version 2 (#11195)" (#12239)
This reverts commit 3bd09b9. Reverting as it affects performance. Will re-introduce later.
1 parent 04c4317 commit fb8c82d

File tree

7 files changed

+28
-179
lines changed

7 files changed

+28
-179
lines changed

sycl/include/sycl/ext/oneapi/experimental/tangle_group.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,9 @@ get_tangle_group(Group group) {
153153
// TODO: Construct from compiler-generated mask. Return an invalid group in
154154
// in the meantime. CUDA devices will report false for the tangle_group
155155
// support aspect so kernels launch should ensure this is never run.
156-
return tangle_group<sycl::sub_group>(0);
156+
return tangle_group<sycl::sub_group>(
157+
sycl::detail::Builder::createSubGroupMask<
158+
sycl::ext::oneapi::sub_group_mask>(0, 0));
157159
#endif
158160
#else
159161
throw runtime_error("Non-uniform groups are not supported on host device.",

sycl/include/sycl/ext/oneapi/sub_group_mask.hpp

Lines changed: 21 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99

1010
#include <sycl/detail/helpers.hpp> // for Builder
1111
#include <sycl/detail/memcpy.hpp> // detail::memcpy
12+
#include <sycl/detail/type_traits.hpp> // for is_sub_group
1213
#include <sycl/exception.hpp> // for errc, exception
13-
#include <sycl/feature_test.hpp> // for SYCL_EXT_ONEAPI_SUB_GROUP_MASK
1414
#include <sycl/id.hpp> // for id
1515
#include <sycl/marray.hpp> // for marray
1616
#include <sycl/types.hpp> // for vec
@@ -35,26 +35,25 @@ template <typename Group> struct group_scope;
3535

3636
} // namespace detail
3737

38-
// forward decalre sycl::sub_group
39-
struct sub_group;
40-
4138
namespace ext::oneapi {
4239

43-
// forward decalre sycl::ext::oneapi::sub_group
44-
struct sub_group;
40+
#if defined(__SYCL_DEVICE_ONLY__) && defined(__AMDGCN__) && \
41+
(__AMDGCN_WAVEFRONT_SIZE == 64)
42+
#define BITS_TYPE uint64_t
43+
#else
44+
#define BITS_TYPE uint32_t
45+
#endif
4546

4647
// defining `group_ballot` here to make predicate default `true`
4748
// need to forward declare sub_group_mask first
4849
struct sub_group_mask;
4950
template <typename Group>
50-
std::enable_if_t<std::is_same_v<std::decay_t<Group>, sub_group> ||
51-
std::is_same_v<std::decay_t<Group>, sycl::sub_group>,
52-
sub_group_mask>
51+
std::enable_if_t<sycl::detail::is_sub_group<Group>::value, sub_group_mask>
5352
group_ballot(Group g, bool predicate = true);
5453

5554
struct sub_group_mask {
5655
friend class sycl::detail::Builder;
57-
using BitsType = uint64_t;
56+
using BitsType = BITS_TYPE;
5857

5958
static constexpr size_t max_bits =
6059
sizeof(BitsType) * CHAR_BIT /* implementation-defined */;
@@ -82,8 +81,7 @@ struct sub_group_mask {
8281
}
8382

8483
reference(sub_group_mask &gmask, size_t pos) : Ref(gmask.Bits) {
85-
BitsType one = 1;
86-
RefBit = (pos < gmask.bits_num) ? (one << pos) : 0;
84+
RefBit = (pos < gmask.bits_num) ? (1UL << pos) : 0;
8785
}
8886

8987
private:
@@ -93,36 +91,8 @@ struct sub_group_mask {
9391
BitsType RefBit;
9492
};
9593

96-
#if SYCL_EXT_ONEAPI_SUB_GROUP_MASK >= 2
97-
sub_group_mask() : sub_group_mask(0, GetMaxLocalRangeSize()){};
98-
99-
sub_group_mask(unsigned long long val)
100-
: sub_group_mask(0, GetMaxLocalRangeSize()) {
101-
Bits = val;
102-
};
103-
104-
template <typename T, std::size_t K,
105-
typename = std::enable_if_t<std::is_integral_v<T>>>
106-
sub_group_mask(const sycl::marray<T, K> &val)
107-
: sub_group_mask(0, GetMaxLocalRangeSize()) {
108-
for (size_t I = 0, BytesCopied = 0; I < K && BytesCopied < sizeof(Bits);
109-
++I) {
110-
size_t RemainingBytes = sizeof(Bits) - BytesCopied;
111-
size_t BytesToCopy =
112-
RemainingBytes < sizeof(T) ? RemainingBytes : sizeof(T);
113-
sycl::detail::memcpy(reinterpret_cast<char *>(&Bits) + BytesCopied,
114-
&val[I], BytesToCopy);
115-
BytesCopied += BytesToCopy;
116-
}
117-
}
118-
119-
sub_group_mask(const sub_group_mask &other) = default;
120-
sub_group_mask &operator=(const sub_group_mask &other) = default;
121-
#endif // SYCL_EXT_ONEAPI_SUB_GROUP_MASK
122-
12394
bool operator[](id<1> id) const {
124-
BitsType one = 1;
125-
return (Bits & ((id.get(0) < bits_num) ? (one << id.get(0)) : 0));
95+
return (Bits & ((id.get(0) < bits_num) ? (1UL << id.get(0)) : 0));
12696
}
12797

12898
reference operator[](id<1> id) { return {*this, id.get(0)}; }
@@ -284,6 +254,10 @@ struct sub_group_mask {
284254
return Tmp;
285255
}
286256

257+
sub_group_mask(const sub_group_mask &rhs) = default;
258+
259+
sub_group_mask &operator=(const sub_group_mask &rhs) = default;
260+
287261
template <typename Group>
288262
friend std::enable_if_t<std::is_same_v<std::decay_t<Group>, sub_group>,
289263
sub_group_mask>
@@ -311,14 +285,6 @@ struct sub_group_mask {
311285
}
312286

313287
private:
314-
static size_t GetMaxLocalRangeSize() {
315-
#ifdef __SYCL_DEVICE_ONLY__
316-
return __spirv_SubgroupMaxSize();
317-
#else
318-
return max_bits;
319-
#endif
320-
}
321-
322288
sub_group_mask(BitsType rhs, size_t bn)
323289
: Bits(rhs & valuable_bits(bn)), bits_num(bn) {
324290
assert(bits_num <= max_bits);
@@ -336,17 +302,15 @@ struct sub_group_mask {
336302
};
337303

338304
template <typename Group>
339-
std::enable_if_t<std::is_same_v<std::decay_t<Group>, sub_group> ||
340-
std::is_same_v<std::decay_t<Group>, sycl::sub_group>,
341-
sub_group_mask>
305+
std::enable_if_t<sycl::detail::is_sub_group<Group>::value, sub_group_mask>
342306
group_ballot(Group g, bool predicate) {
343307
(void)g;
344308
#ifdef __SYCL_DEVICE_ONLY__
345309
auto res = __spirv_GroupNonUniformBallot(
346310
sycl::detail::spirv::group_scope<Group>::value, predicate);
347-
sub_group_mask::BitsType val = res[0];
348-
if constexpr (sizeof(sub_group_mask::BitsType) == 8)
349-
val |= ((sub_group_mask::BitsType)res[1]) << 32;
311+
BITS_TYPE val = res[0];
312+
if constexpr (sizeof(BITS_TYPE) == 8)
313+
val |= ((BITS_TYPE)res[1]) << 32;
350314
return sycl::detail::Builder::createSubGroupMask<sub_group_mask>(
351315
val, g.get_max_local_range()[0]);
352316
#else
@@ -356,6 +320,8 @@ group_ballot(Group g, bool predicate) {
356320
#endif
357321
}
358322

323+
#undef BITS_TYPE
324+
359325
} // namespace ext::oneapi
360326
} // namespace _V1
361327
} // namespace sycl

sycl/include/syclcompat/memory.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@
4242
#include <utility>
4343

4444
#include <sycl/builtins.hpp>
45-
#include <sycl/ext/intel/experimental/usm_properties.hpp>
4645
#include <sycl/ext/oneapi/group_local_memory.hpp>
4746
#include <sycl/usm.hpp>
4847

sycl/source/feature_test.hpp.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ inline namespace _V1 {
3131
// TODO: Move these feature-test macros to compiler driver.
3232
#define SYCL_EXT_INTEL_DEVICE_INFO 6
3333
#define SYCL_EXT_ONEAPI_DEVICE_ARCHITECTURE 1
34-
#define SYCL_EXT_ONEAPI_SUB_GROUP_MASK 2
34+
#define SYCL_EXT_ONEAPI_SUB_GROUP_MASK 1
3535
#define SYCL_EXT_ONEAPI_LOCAL_MEMORY 1
3636
#define SYCL_EXT_ONEAPI_MATRIX 1
3737
#define SYCL_EXT_ONEAPI_ASSERT 1

sycl/test-e2e/SubGroupMask/sub_group_mask_ver2.cpp

Lines changed: 0 additions & 119 deletions
This file was deleted.

sycl/test/extensions/macro.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ constexpr bool backend_opencl_macro_defined = true;
99
constexpr bool backend_opencl_macro_defined = false;
1010
#endif
1111

12-
#ifdef SYCL_EXT_ONEAPI_SUB_GROUP_MASK
12+
#if SYCL_EXT_ONEAPI_SUB_GROUP_MASK == 1
1313
constexpr bool sub_group_mask_macro_defined = true;
1414
#else
1515
constexpr bool sub_group_mask_macro_defined = false;

sycl/test/extensions/sub_group_mask.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
// RUN: %clangxx -fsycl -fsycl-device-only -fsyntax-only %s
22
//
33
// This test is intended to check sycl::ext::oneapi::sub_group_mask interface.
4-
// test for spec ver.2: sycl/test-e2e/SubGroupMask/sub_group_mask_ver2.cpp
4+
// There is a work in progress update to the spec: intel/llvm#8174
5+
// TODO: udpate this test once revision 2 of the extension is supported
56

67
#include <sycl/sycl.hpp>
78

0 commit comments

Comments
 (0)