|
| 1 | +//===----------------------------------------------------------------------===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | + |
| 9 | +#include <spirv/spirv.h> |
| 10 | +#include <spirv/spirv_types.h> |
| 11 | + |
| 12 | +#pragma OPENCL EXTENSION cl_khr_fp16 : enable |
| 13 | +#pragma OPENCL EXTENSION cl_khr_fp64 : enable |
| 14 | + |
| 15 | +// CLC helpers |
| 16 | +__local bool * |
| 17 | +__clc__get_group_scratch_bool() __asm("__clc__get_group_scratch_bool"); |
| 18 | +__local char * |
| 19 | +__clc__get_group_scratch_char() __asm("__clc__get_group_scratch_char"); |
| 20 | +__local uchar * |
| 21 | +__clc__get_group_scratch_uchar() __asm("__clc__get_group_scratch_char"); |
| 22 | +__local short * |
| 23 | +__clc__get_group_scratch_short() __asm("__clc__get_group_scratch_short"); |
| 24 | +__local ushort * |
| 25 | +__clc__get_group_scratch_ushort() __asm("__clc__get_group_scratch_short"); |
| 26 | +__local int * |
| 27 | +__clc__get_group_scratch_int() __asm("__clc__get_group_scratch_int"); |
| 28 | +__local uint * |
| 29 | +__clc__get_group_scratch_uint() __asm("__clc__get_group_scratch_int"); |
| 30 | +__local long * |
| 31 | +__clc__get_group_scratch_long() __asm("__clc__get_group_scratch_long"); |
| 32 | +__local ulong * |
| 33 | +__clc__get_group_scratch_ulong() __asm("__clc__get_group_scratch_long"); |
| 34 | +__local half * |
| 35 | +__clc__get_group_scratch_half() __asm("__clc__get_group_scratch_half"); |
| 36 | +__local float * |
| 37 | +__clc__get_group_scratch_float() __asm("__clc__get_group_scratch_float"); |
| 38 | +__local double * |
| 39 | +__clc__get_group_scratch_double() __asm("__clc__get_group_scratch_double"); |
| 40 | + |
| 41 | +#define __CLC_DECLARE_SHUFFLES(TYPE, TYPE_MANGLED) \ |
| 42 | + _CLC_DECL TYPE _Z28__spirv_SubgroupShuffleINTELI##TYPE_MANGLED##ET_S0_j( \ |
| 43 | + TYPE, int); \ |
| 44 | + _CLC_DECL TYPE \ |
| 45 | + _Z30__spirv_SubgroupShuffleUpINTELI##TYPE_MANGLED##ET_S0_S0_j(TYPE, \ |
| 46 | + int); |
| 47 | + |
| 48 | +__CLC_DECLARE_SHUFFLES(char, a); |
| 49 | +__CLC_DECLARE_SHUFFLES(unsigned char, h); |
| 50 | +__CLC_DECLARE_SHUFFLES(short, s); |
| 51 | +__CLC_DECLARE_SHUFFLES(unsigned short, t); |
| 52 | +__CLC_DECLARE_SHUFFLES(int, i); |
| 53 | +__CLC_DECLARE_SHUFFLES(unsigned int, j); |
| 54 | +__CLC_DECLARE_SHUFFLES(float, f); |
| 55 | +__CLC_DECLARE_SHUFFLES(long, l); |
| 56 | +__CLC_DECLARE_SHUFFLES(unsigned long, m); |
| 57 | +__CLC_DECLARE_SHUFFLES(double, d); |
| 58 | + |
| 59 | +#undef __CLC_DECLARE_SHUFFLES |
| 60 | + |
| 61 | +#define __CLC_APPEND(NAME, SUFFIX) NAME##SUFFIX |
| 62 | + |
| 63 | +#define __CLC_ADD(x, y) (x + y) |
| 64 | +#define __CLC_MIN(x, y) ((x < y) ? (x) : (y)) |
| 65 | +#define __CLC_MAX(x, y) ((x > y) ? (x) : (y)) |
| 66 | +#define __CLC_OR(x, y) (x | y) |
| 67 | +#define __CLC_AND(x, y) (x & y) |
| 68 | +#define __CLC_MUL(x, y) (x * y) |
| 69 | + |
| 70 | +#define __CLC_SUBGROUP_COLLECTIVE_BODY(OP, TYPE, TYPE_MANGLED, IDENTITY) \ |
| 71 | + uint sg_lid = __spirv_SubgroupLocalInvocationId(); \ |
| 72 | + /* Can't use XOR/butterfly shuffles; some lanes may be inactive */ \ |
| 73 | + for (int o = 1; o < __spirv_SubgroupMaxSize(); o *= 2) { \ |
| 74 | + TYPE contribution = \ |
| 75 | + _Z28__spirv_SubgroupShuffleINTELI##TYPE_MANGLED##ET_S0_j(x, o); \ |
| 76 | + bool inactive = (sg_lid < o); \ |
| 77 | + contribution = (inactive) ? IDENTITY : contribution; \ |
| 78 | + x = OP(x, contribution); \ |
| 79 | + } \ |
| 80 | + /* For Reduce, broadcast result from highest active lane */ \ |
| 81 | + TYPE result; \ |
| 82 | + if (op == Reduce) { \ |
| 83 | + result = _Z28__spirv_SubgroupShuffleINTELI##TYPE_MANGLED##ET_S0_j( \ |
| 84 | + x, __spirv_SubgroupSize() - 1); \ |
| 85 | + *carry = result; \ |
| 86 | + } /* For InclusiveScan, use results as computed */ \ |
| 87 | + else if (op == InclusiveScan) { \ |
| 88 | + result = x; \ |
| 89 | + *carry = result; \ |
| 90 | + } /* For ExclusiveScan, shift and prepend identity */ \ |
| 91 | + else if (op == ExclusiveScan) { \ |
| 92 | + *carry = x; \ |
| 93 | + result = \ |
| 94 | + _Z30__spirv_SubgroupShuffleUpINTELI##TYPE_MANGLED##ET_S0_S0_j(x, 1); \ |
| 95 | + if (sg_lid == 0) { \ |
| 96 | + result = IDENTITY; \ |
| 97 | + } \ |
| 98 | + } \ |
| 99 | + return result; |
| 100 | + |
| 101 | +#define __CLC_SUBGROUP_COLLECTIVE(NAME, OP, TYPE, TYPE_MANGLED, IDENTITY) \ |
| 102 | + _CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT TYPE __CLC_APPEND( \ |
| 103 | + __clc__Subgroup, NAME)(uint op, TYPE x, TYPE * carry) { \ |
| 104 | + __CLC_SUBGROUP_COLLECTIVE_BODY(OP, TYPE, TYPE_MANGLED, IDENTITY) \ |
| 105 | + } |
| 106 | + |
| 107 | +__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, char, a, 0) |
| 108 | +__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, uchar, h, 0) |
| 109 | +__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, short, s, 0) |
| 110 | +__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, ushort, t, 0) |
| 111 | +__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, int, i, 0) |
| 112 | +__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, uint, j, 0) |
| 113 | +__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, long, l, 0) |
| 114 | +__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, ulong, m, 0) |
| 115 | +__CLC_SUBGROUP_COLLECTIVE(FAdd, __CLC_ADD, float, f, 0) |
| 116 | +__CLC_SUBGROUP_COLLECTIVE(FAdd, __CLC_ADD, double, d, 0) |
| 117 | + |
| 118 | +__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, char, a, 1) |
| 119 | +__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, uchar, h, 1) |
| 120 | +__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, short, s, 1) |
| 121 | +__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, ushort, t, 1) |
| 122 | +__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, int, i, 1) |
| 123 | +__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, uint, j, 1) |
| 124 | +__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, long, l, 1) |
| 125 | +__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, ulong, m, 1) |
| 126 | +__CLC_SUBGROUP_COLLECTIVE(FMul, __CLC_MUL, float, f, 1) |
| 127 | +__CLC_SUBGROUP_COLLECTIVE(FMul, __CLC_MUL, double, d, 1) |
| 128 | + |
| 129 | +__CLC_SUBGROUP_COLLECTIVE(SMin, __CLC_MIN, char, a, CHAR_MAX) |
| 130 | +__CLC_SUBGROUP_COLLECTIVE(UMin, __CLC_MIN, uchar, h, UCHAR_MAX) |
| 131 | +__CLC_SUBGROUP_COLLECTIVE(SMin, __CLC_MIN, short, s, SHRT_MAX) |
| 132 | +__CLC_SUBGROUP_COLLECTIVE(UMin, __CLC_MIN, ushort, t, USHRT_MAX) |
| 133 | +__CLC_SUBGROUP_COLLECTIVE(SMin, __CLC_MIN, int, i, INT_MAX) |
| 134 | +__CLC_SUBGROUP_COLLECTIVE(UMin, __CLC_MIN, uint, j, UINT_MAX) |
| 135 | +__CLC_SUBGROUP_COLLECTIVE(SMin, __CLC_MIN, long, l, LONG_MAX) |
| 136 | +__CLC_SUBGROUP_COLLECTIVE(UMin, __CLC_MIN, ulong, m, ULONG_MAX) |
| 137 | +__CLC_SUBGROUP_COLLECTIVE(FMin, __CLC_MIN, float, f, FLT_MAX) |
| 138 | +__CLC_SUBGROUP_COLLECTIVE(FMin, __CLC_MIN, double, d, DBL_MAX) |
| 139 | + |
| 140 | +__CLC_SUBGROUP_COLLECTIVE(SMax, __CLC_MAX, char, a, CHAR_MIN) |
| 141 | +__CLC_SUBGROUP_COLLECTIVE(UMax, __CLC_MAX, uchar, h, 0) |
| 142 | +__CLC_SUBGROUP_COLLECTIVE(SMax, __CLC_MAX, short, s, SHRT_MIN) |
| 143 | +__CLC_SUBGROUP_COLLECTIVE(UMax, __CLC_MAX, ushort, t, 0) |
| 144 | +__CLC_SUBGROUP_COLLECTIVE(SMax, __CLC_MAX, int, i, INT_MIN) |
| 145 | +__CLC_SUBGROUP_COLLECTIVE(UMax, __CLC_MAX, uint, j, 0) |
| 146 | +__CLC_SUBGROUP_COLLECTIVE(SMax, __CLC_MAX, long, l, LONG_MIN) |
| 147 | +__CLC_SUBGROUP_COLLECTIVE(UMax, __CLC_MAX, ulong, m, 0) |
| 148 | +__CLC_SUBGROUP_COLLECTIVE(FMax, __CLC_MAX, float, f, -FLT_MAX) |
| 149 | +__CLC_SUBGROUP_COLLECTIVE(FMax, __CLC_MAX, double, d, -DBL_MAX) |
| 150 | + |
| 151 | +__CLC_SUBGROUP_COLLECTIVE(All, __CLC_AND, bool, a, true) |
| 152 | +__CLC_SUBGROUP_COLLECTIVE(Any, __CLC_OR, bool, a, true) |
| 153 | + |
| 154 | +#undef __CLC_SUBGROUP_COLLECTIVE_BODY |
| 155 | +#undef __CLC_SUBGROUP_COLLECTIVE |
| 156 | + |
| 157 | +#define __CLC_GROUP_COLLECTIVE_INNER(SPIRV_NAME, CLC_NAME, OP, TYPE, IDENTITY) \ |
| 158 | + _CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT TYPE __CLC_APPEND( \ |
| 159 | + __spirv_Group, SPIRV_NAME)(uint scope, uint op, TYPE x) { \ |
| 160 | + TYPE carry = IDENTITY; \ |
| 161 | + /* Perform GroupOperation within sub-group */ \ |
| 162 | + TYPE sg_x = __CLC_APPEND(__clc__Subgroup, CLC_NAME)(op, x, &carry); \ |
| 163 | + if (scope == Subgroup) { \ |
| 164 | + return sg_x; \ |
| 165 | + } \ |
| 166 | + __local TYPE *scratch = __CLC_APPEND(__clc__get_group_scratch_, TYPE)(); \ |
| 167 | + uint sg_id = __spirv_SubgroupId(); \ |
| 168 | + uint num_sg = __spirv_NumSubgroups(); \ |
| 169 | + uint sg_lid = __spirv_SubgroupLocalInvocationId(); \ |
| 170 | + uint sg_size = __spirv_SubgroupSize(); \ |
| 171 | + /* Share carry values across sub-groups */ \ |
| 172 | + if (sg_lid == sg_size - 1) { \ |
| 173 | + scratch[sg_id] = carry; \ |
| 174 | + } \ |
| 175 | + __spirv_ControlBarrier(Workgroup, 0, 0); \ |
| 176 | + /* Perform InclusiveScan over sub-group results */ \ |
| 177 | + TYPE sg_prefix; \ |
| 178 | + TYPE sg_aggregate = scratch[0]; \ |
| 179 | + _Pragma("unroll") for (int s = 1; s < num_sg; ++s) { \ |
| 180 | + if (sg_id == s) { \ |
| 181 | + sg_prefix = sg_aggregate; \ |
| 182 | + } \ |
| 183 | + TYPE addend = scratch[s]; \ |
| 184 | + sg_aggregate = OP(sg_aggregate, addend); \ |
| 185 | + } \ |
| 186 | + /* For Reduce, broadcast result from final sub-group */ \ |
| 187 | + /* For Scan, combine results from previous sub-groups */ \ |
| 188 | + TYPE result; \ |
| 189 | + if (op == Reduce) { \ |
| 190 | + result = sg_aggregate; \ |
| 191 | + } else if (op == InclusiveScan || op == ExclusiveScan) { \ |
| 192 | + if (sg_id == 0) { \ |
| 193 | + result = sg_x; \ |
| 194 | + } else { \ |
| 195 | + result = OP(sg_x, sg_prefix); \ |
| 196 | + } \ |
| 197 | + } \ |
| 198 | + __spirv_ControlBarrier(Workgroup, 0, 0); \ |
| 199 | + return result; \ |
| 200 | + } |
| 201 | + |
| 202 | +#define __CLC_GROUP_COLLECTIVE_4(NAME, OP, TYPE, IDENTITY) \ |
| 203 | + __CLC_GROUP_COLLECTIVE_INNER(NAME, NAME, OP, TYPE, IDENTITY) |
| 204 | +#define __CLC_GROUP_COLLECTIVE_5(SPIRV_NAME, CLC_NAME, OP, TYPE, IDENTITY) \ |
| 205 | + __CLC_GROUP_COLLECTIVE_INNER(SPIRV_NAME, CLC_NAME, OP, TYPE, IDENTITY) |
| 206 | + |
| 207 | +#define DISPATCH_TO_CLC_GROUP_COLLECTIVE_MACRO(_1, _2, _3, _4, _5, NAME, ...) \ |
| 208 | + NAME |
| 209 | +#define __CLC_GROUP_COLLECTIVE(...) \ |
| 210 | + DISPATCH_TO_CLC_GROUP_COLLECTIVE_MACRO( \ |
| 211 | + __VA_ARGS__, __CLC_GROUP_COLLECTIVE_5, __CLC_GROUP_COLLECTIVE_4) \ |
| 212 | + (__VA_ARGS__) |
| 213 | + |
| 214 | +__CLC_GROUP_COLLECTIVE(Any, __CLC_OR, bool, false); |
| 215 | +__CLC_GROUP_COLLECTIVE(All, __CLC_AND, bool, true); |
| 216 | +_CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT bool __spirv_GroupAny(uint scope, |
| 217 | + bool predicate) { |
| 218 | + return __spirv_GroupAny(scope, Reduce, predicate); |
| 219 | +} |
| 220 | +_CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT bool __spirv_GroupAll(uint scope, |
| 221 | + bool predicate) { |
| 222 | + return __spirv_GroupAll(scope, Reduce, predicate); |
| 223 | +} |
| 224 | + |
| 225 | +__CLC_GROUP_COLLECTIVE(IAdd, __CLC_ADD, char, 0) |
| 226 | +__CLC_GROUP_COLLECTIVE(IAdd, __CLC_ADD, uchar, 0) |
| 227 | +__CLC_GROUP_COLLECTIVE(IAdd, __CLC_ADD, short, 0) |
| 228 | +__CLC_GROUP_COLLECTIVE(IAdd, __CLC_ADD, ushort, 0) |
| 229 | +__CLC_GROUP_COLLECTIVE(IAdd, __CLC_ADD, int, 0) |
| 230 | +__CLC_GROUP_COLLECTIVE(IAdd, __CLC_ADD, uint, 0) |
| 231 | +__CLC_GROUP_COLLECTIVE(IAdd, __CLC_ADD, long, 0) |
| 232 | +__CLC_GROUP_COLLECTIVE(IAdd, __CLC_ADD, ulong, 0) |
| 233 | +__CLC_GROUP_COLLECTIVE(FAdd, __CLC_ADD, float, 0) |
| 234 | +__CLC_GROUP_COLLECTIVE(FAdd, __CLC_ADD, double, 0) |
| 235 | + |
| 236 | +// There is no Mul group op in SPIR-V, use non-uniform variant instead. |
| 237 | +__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, char, 1) |
| 238 | +__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, uchar, 1) |
| 239 | +__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, short, 1) |
| 240 | +__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, ushort, 1) |
| 241 | +__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, int, 1) |
| 242 | +__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, uint, 1) |
| 243 | +__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, long, 1) |
| 244 | +__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, ulong, 1) |
| 245 | +__CLC_GROUP_COLLECTIVE(NonUniformFMul, FMul, __CLC_MUL, float, 1) |
| 246 | +__CLC_GROUP_COLLECTIVE(NonUniformFMul, FMul, __CLC_MUL, double, 1) |
| 247 | + |
| 248 | +__CLC_GROUP_COLLECTIVE(SMin, __CLC_MIN, char, CHAR_MAX) |
| 249 | +__CLC_GROUP_COLLECTIVE(UMin, __CLC_MIN, uchar, UCHAR_MAX) |
| 250 | +__CLC_GROUP_COLLECTIVE(SMin, __CLC_MIN, short, SHRT_MAX) |
| 251 | +__CLC_GROUP_COLLECTIVE(UMin, __CLC_MIN, ushort, USHRT_MAX) |
| 252 | +__CLC_GROUP_COLLECTIVE(SMin, __CLC_MIN, int, INT_MAX) |
| 253 | +__CLC_GROUP_COLLECTIVE(UMin, __CLC_MIN, uint, UINT_MAX) |
| 254 | +__CLC_GROUP_COLLECTIVE(SMin, __CLC_MIN, long, LONG_MAX) |
| 255 | +__CLC_GROUP_COLLECTIVE(UMin, __CLC_MIN, ulong, ULONG_MAX) |
| 256 | +__CLC_GROUP_COLLECTIVE(FMin, __CLC_MIN, float, FLT_MAX) |
| 257 | +__CLC_GROUP_COLLECTIVE(FMin, __CLC_MIN, double, DBL_MAX) |
| 258 | + |
| 259 | +__CLC_GROUP_COLLECTIVE(SMax, __CLC_MAX, char, CHAR_MIN) |
| 260 | +__CLC_GROUP_COLLECTIVE(UMax, __CLC_MAX, uchar, 0) |
| 261 | +__CLC_GROUP_COLLECTIVE(SMax, __CLC_MAX, short, SHRT_MIN) |
| 262 | +__CLC_GROUP_COLLECTIVE(UMax, __CLC_MAX, ushort, 0) |
| 263 | +__CLC_GROUP_COLLECTIVE(SMax, __CLC_MAX, int, INT_MIN) |
| 264 | +__CLC_GROUP_COLLECTIVE(UMax, __CLC_MAX, uint, 0) |
| 265 | +__CLC_GROUP_COLLECTIVE(SMax, __CLC_MAX, long, LONG_MIN) |
| 266 | +__CLC_GROUP_COLLECTIVE(UMax, __CLC_MAX, ulong, 0) |
| 267 | +__CLC_GROUP_COLLECTIVE(FMax, __CLC_MAX, float, -FLT_MAX) |
| 268 | +__CLC_GROUP_COLLECTIVE(FMax, __CLC_MAX, double, -DBL_MAX) |
| 269 | + |
| 270 | +#undef __CLC_GROUP_COLLECTIVE_4 |
| 271 | +#undef __CLC_GROUP_COLLECTIVE_5 |
| 272 | +#undef DISPATCH_TO_CLC_GROUP_COLLECTIVE_MACRO |
| 273 | +#undef __CLC_GROUP_COLLECTIVE |
| 274 | + |
| 275 | +#undef __CLC_AND |
| 276 | +#undef __CLC_OR |
| 277 | +#undef __CLC_MAX |
| 278 | +#undef __CLC_MIN |
| 279 | +#undef __CLC_ADD |
| 280 | +#undef __CLC_MUL |
| 281 | + |
| 282 | +long __clc__get_linear_local_id() { |
| 283 | + size_t id_x = __spirv_LocalInvocationId_x(); |
| 284 | + size_t id_y = __spirv_LocalInvocationId_y(); |
| 285 | + size_t id_z = __spirv_LocalInvocationId_z(); |
| 286 | + size_t size_x = __spirv_WorkgroupSize_x(); |
| 287 | + size_t size_y = __spirv_WorkgroupSize_y(); |
| 288 | + size_t size_z = __spirv_WorkgroupSize_z(); |
| 289 | + uint sg_size = __spirv_SubgroupMaxSize(); |
| 290 | + return (id_z * size_y * size_x + id_y * size_x + id_x); |
| 291 | +} |
| 292 | + |
| 293 | +long __clc__2d_to_linear_local_id(ulong2 id) { |
| 294 | + size_t size_x = __spirv_WorkgroupSize_x(); |
| 295 | + size_t size_y = __spirv_WorkgroupSize_y(); |
| 296 | + return (id.y * size_x + id.x); |
| 297 | +} |
| 298 | + |
| 299 | +long __clc__3d_to_linear_local_id(ulong3 id) { |
| 300 | + size_t size_x = __spirv_WorkgroupSize_x(); |
| 301 | + size_t size_y = __spirv_WorkgroupSize_y(); |
| 302 | + size_t size_z = __spirv_WorkgroupSize_z(); |
| 303 | + return (id.z * size_y * size_x + id.y * size_x + id.x); |
| 304 | +} |
| 305 | + |
| 306 | +#define __CLC_GROUP_BROADCAST(TYPE, TYPE_MANGLED) \ |
| 307 | + _CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT TYPE __spirv_GroupBroadcast( \ |
| 308 | + uint scope, TYPE x, ulong local_id) { \ |
| 309 | + if (scope == Subgroup) { \ |
| 310 | + return _Z28__spirv_SubgroupShuffleINTELI##TYPE_MANGLED##ET_S0_j( \ |
| 311 | + x, local_id); \ |
| 312 | + } \ |
| 313 | + bool source = (__clc__get_linear_local_id() == local_id); \ |
| 314 | + __local TYPE *scratch = __CLC_APPEND(__clc__get_group_scratch_, TYPE)(); \ |
| 315 | + if (source) { \ |
| 316 | + *scratch = x; \ |
| 317 | + } \ |
| 318 | + __spirv_ControlBarrier(Workgroup, 0, 0); \ |
| 319 | + TYPE result = *scratch; \ |
| 320 | + __spirv_ControlBarrier(Workgroup, 0, 0); \ |
| 321 | + return result; \ |
| 322 | + } \ |
| 323 | + _CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT TYPE __spirv_GroupBroadcast( \ |
| 324 | + uint scope, TYPE x, ulong2 local_id) { \ |
| 325 | + ulong linear_local_id = __clc__2d_to_linear_local_id(local_id); \ |
| 326 | + return __spirv_GroupBroadcast(scope, x, linear_local_id); \ |
| 327 | + } \ |
| 328 | + _CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT TYPE __spirv_GroupBroadcast( \ |
| 329 | + uint scope, TYPE x, ulong3 local_id) { \ |
| 330 | + ulong linear_local_id = __clc__3d_to_linear_local_id(local_id); \ |
| 331 | + return __spirv_GroupBroadcast(scope, x, linear_local_id); \ |
| 332 | + } \ |
| 333 | + _CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT TYPE __spirv_GroupBroadcast( \ |
| 334 | + uint scope, TYPE x, uint local_id) { \ |
| 335 | + return __spirv_GroupBroadcast(scope, x, (ulong)local_id); \ |
| 336 | + } |
| 337 | +__CLC_GROUP_BROADCAST(char, a); |
| 338 | +__CLC_GROUP_BROADCAST(uchar, h); |
| 339 | +__CLC_GROUP_BROADCAST(short, s); |
| 340 | +__CLC_GROUP_BROADCAST(ushort, t); |
| 341 | +__CLC_GROUP_BROADCAST(int, i) |
| 342 | +__CLC_GROUP_BROADCAST(uint, j) |
| 343 | +__CLC_GROUP_BROADCAST(long, l) |
| 344 | +__CLC_GROUP_BROADCAST(ulong, m) |
| 345 | +__CLC_GROUP_BROADCAST(float, f) |
| 346 | +__CLC_GROUP_BROADCAST(double, d) |
| 347 | + |
| 348 | +#undef __CLC_GROUP_BROADCAST |
| 349 | + |
| 350 | +#undef __CLC_APPEND |
0 commit comments