Skip to content

Commit 106882c

Browse files
authored
[SYCL][HIP][libclc] Add group collective functions for HIP (#5202)
Adds group collective functions (reduce, scans, broadcast) for HIP.
1 parent 979bf95 commit 106882c

File tree

4 files changed

+415
-4
lines changed

4 files changed

+415
-4
lines changed

libclc/amdgcn-amdhsa/libspirv/SOURCES

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
2+
group/collectives.cl
3+
group/collectives_helpers.ll
14
atomic/loadstore_helpers.ll
25
cl_khr_int64_extended_atomics/minmax_helpers.ll
36
synchronization/barrier.cl
Lines changed: 350 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,350 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <spirv/spirv.h>
10+
#include <spirv/spirv_types.h>
11+
12+
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
13+
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
14+
15+
// CLC helpers
16+
__local bool *
17+
__clc__get_group_scratch_bool() __asm("__clc__get_group_scratch_bool");
18+
__local char *
19+
__clc__get_group_scratch_char() __asm("__clc__get_group_scratch_char");
20+
__local uchar *
21+
__clc__get_group_scratch_uchar() __asm("__clc__get_group_scratch_char");
22+
__local short *
23+
__clc__get_group_scratch_short() __asm("__clc__get_group_scratch_short");
24+
__local ushort *
25+
__clc__get_group_scratch_ushort() __asm("__clc__get_group_scratch_short");
26+
__local int *
27+
__clc__get_group_scratch_int() __asm("__clc__get_group_scratch_int");
28+
__local uint *
29+
__clc__get_group_scratch_uint() __asm("__clc__get_group_scratch_int");
30+
__local long *
31+
__clc__get_group_scratch_long() __asm("__clc__get_group_scratch_long");
32+
__local ulong *
33+
__clc__get_group_scratch_ulong() __asm("__clc__get_group_scratch_long");
34+
__local half *
35+
__clc__get_group_scratch_half() __asm("__clc__get_group_scratch_half");
36+
__local float *
37+
__clc__get_group_scratch_float() __asm("__clc__get_group_scratch_float");
38+
__local double *
39+
__clc__get_group_scratch_double() __asm("__clc__get_group_scratch_double");
40+
41+
#define __CLC_DECLARE_SHUFFLES(TYPE, TYPE_MANGLED) \
42+
_CLC_DECL TYPE _Z28__spirv_SubgroupShuffleINTELI##TYPE_MANGLED##ET_S0_j( \
43+
TYPE, int); \
44+
_CLC_DECL TYPE \
45+
_Z30__spirv_SubgroupShuffleUpINTELI##TYPE_MANGLED##ET_S0_S0_j(TYPE, \
46+
int);
47+
48+
__CLC_DECLARE_SHUFFLES(char, a);
49+
__CLC_DECLARE_SHUFFLES(unsigned char, h);
50+
__CLC_DECLARE_SHUFFLES(short, s);
51+
__CLC_DECLARE_SHUFFLES(unsigned short, t);
52+
__CLC_DECLARE_SHUFFLES(int, i);
53+
__CLC_DECLARE_SHUFFLES(unsigned int, j);
54+
__CLC_DECLARE_SHUFFLES(float, f);
55+
__CLC_DECLARE_SHUFFLES(long, l);
56+
__CLC_DECLARE_SHUFFLES(unsigned long, m);
57+
__CLC_DECLARE_SHUFFLES(double, d);
58+
59+
#undef __CLC_DECLARE_SHUFFLES
60+
61+
#define __CLC_APPEND(NAME, SUFFIX) NAME##SUFFIX
62+
63+
#define __CLC_ADD(x, y) (x + y)
64+
#define __CLC_MIN(x, y) ((x < y) ? (x) : (y))
65+
#define __CLC_MAX(x, y) ((x > y) ? (x) : (y))
66+
#define __CLC_OR(x, y) (x | y)
67+
#define __CLC_AND(x, y) (x & y)
68+
#define __CLC_MUL(x, y) (x * y)
69+
70+
#define __CLC_SUBGROUP_COLLECTIVE_BODY(OP, TYPE, TYPE_MANGLED, IDENTITY) \
71+
uint sg_lid = __spirv_SubgroupLocalInvocationId(); \
72+
/* Can't use XOR/butterfly shuffles; some lanes may be inactive */ \
73+
for (int o = 1; o < __spirv_SubgroupMaxSize(); o *= 2) { \
74+
TYPE contribution = \
75+
_Z28__spirv_SubgroupShuffleINTELI##TYPE_MANGLED##ET_S0_j(x, o); \
76+
bool inactive = (sg_lid < o); \
77+
contribution = (inactive) ? IDENTITY : contribution; \
78+
x = OP(x, contribution); \
79+
} \
80+
/* For Reduce, broadcast result from highest active lane */ \
81+
TYPE result; \
82+
if (op == Reduce) { \
83+
result = _Z28__spirv_SubgroupShuffleINTELI##TYPE_MANGLED##ET_S0_j( \
84+
x, __spirv_SubgroupSize() - 1); \
85+
*carry = result; \
86+
} /* For InclusiveScan, use results as computed */ \
87+
else if (op == InclusiveScan) { \
88+
result = x; \
89+
*carry = result; \
90+
} /* For ExclusiveScan, shift and prepend identity */ \
91+
else if (op == ExclusiveScan) { \
92+
*carry = x; \
93+
result = \
94+
_Z30__spirv_SubgroupShuffleUpINTELI##TYPE_MANGLED##ET_S0_S0_j(x, 1); \
95+
if (sg_lid == 0) { \
96+
result = IDENTITY; \
97+
} \
98+
} \
99+
return result;
100+
101+
#define __CLC_SUBGROUP_COLLECTIVE(NAME, OP, TYPE, TYPE_MANGLED, IDENTITY) \
102+
_CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT TYPE __CLC_APPEND( \
103+
__clc__Subgroup, NAME)(uint op, TYPE x, TYPE * carry) { \
104+
__CLC_SUBGROUP_COLLECTIVE_BODY(OP, TYPE, TYPE_MANGLED, IDENTITY) \
105+
}
106+
107+
__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, char, a, 0)
108+
__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, uchar, h, 0)
109+
__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, short, s, 0)
110+
__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, ushort, t, 0)
111+
__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, int, i, 0)
112+
__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, uint, j, 0)
113+
__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, long, l, 0)
114+
__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, ulong, m, 0)
115+
__CLC_SUBGROUP_COLLECTIVE(FAdd, __CLC_ADD, float, f, 0)
116+
__CLC_SUBGROUP_COLLECTIVE(FAdd, __CLC_ADD, double, d, 0)
117+
118+
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, char, a, 1)
119+
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, uchar, h, 1)
120+
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, short, s, 1)
121+
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, ushort, t, 1)
122+
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, int, i, 1)
123+
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, uint, j, 1)
124+
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, long, l, 1)
125+
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, ulong, m, 1)
126+
__CLC_SUBGROUP_COLLECTIVE(FMul, __CLC_MUL, float, f, 1)
127+
__CLC_SUBGROUP_COLLECTIVE(FMul, __CLC_MUL, double, d, 1)
128+
129+
__CLC_SUBGROUP_COLLECTIVE(SMin, __CLC_MIN, char, a, CHAR_MAX)
130+
__CLC_SUBGROUP_COLLECTIVE(UMin, __CLC_MIN, uchar, h, UCHAR_MAX)
131+
__CLC_SUBGROUP_COLLECTIVE(SMin, __CLC_MIN, short, s, SHRT_MAX)
132+
__CLC_SUBGROUP_COLLECTIVE(UMin, __CLC_MIN, ushort, t, USHRT_MAX)
133+
__CLC_SUBGROUP_COLLECTIVE(SMin, __CLC_MIN, int, i, INT_MAX)
134+
__CLC_SUBGROUP_COLLECTIVE(UMin, __CLC_MIN, uint, j, UINT_MAX)
135+
__CLC_SUBGROUP_COLLECTIVE(SMin, __CLC_MIN, long, l, LONG_MAX)
136+
__CLC_SUBGROUP_COLLECTIVE(UMin, __CLC_MIN, ulong, m, ULONG_MAX)
137+
__CLC_SUBGROUP_COLLECTIVE(FMin, __CLC_MIN, float, f, FLT_MAX)
138+
__CLC_SUBGROUP_COLLECTIVE(FMin, __CLC_MIN, double, d, DBL_MAX)
139+
140+
__CLC_SUBGROUP_COLLECTIVE(SMax, __CLC_MAX, char, a, CHAR_MIN)
141+
__CLC_SUBGROUP_COLLECTIVE(UMax, __CLC_MAX, uchar, h, 0)
142+
__CLC_SUBGROUP_COLLECTIVE(SMax, __CLC_MAX, short, s, SHRT_MIN)
143+
__CLC_SUBGROUP_COLLECTIVE(UMax, __CLC_MAX, ushort, t, 0)
144+
__CLC_SUBGROUP_COLLECTIVE(SMax, __CLC_MAX, int, i, INT_MIN)
145+
__CLC_SUBGROUP_COLLECTIVE(UMax, __CLC_MAX, uint, j, 0)
146+
__CLC_SUBGROUP_COLLECTIVE(SMax, __CLC_MAX, long, l, LONG_MIN)
147+
__CLC_SUBGROUP_COLLECTIVE(UMax, __CLC_MAX, ulong, m, 0)
148+
__CLC_SUBGROUP_COLLECTIVE(FMax, __CLC_MAX, float, f, -FLT_MAX)
149+
__CLC_SUBGROUP_COLLECTIVE(FMax, __CLC_MAX, double, d, -DBL_MAX)
150+
151+
__CLC_SUBGROUP_COLLECTIVE(All, __CLC_AND, bool, a, true)
152+
__CLC_SUBGROUP_COLLECTIVE(Any, __CLC_OR, bool, a, true)
153+
154+
#undef __CLC_SUBGROUP_COLLECTIVE_BODY
155+
#undef __CLC_SUBGROUP_COLLECTIVE
156+
157+
#define __CLC_GROUP_COLLECTIVE_INNER(SPIRV_NAME, CLC_NAME, OP, TYPE, IDENTITY) \
158+
_CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT TYPE __CLC_APPEND( \
159+
__spirv_Group, SPIRV_NAME)(uint scope, uint op, TYPE x) { \
160+
TYPE carry = IDENTITY; \
161+
/* Perform GroupOperation within sub-group */ \
162+
TYPE sg_x = __CLC_APPEND(__clc__Subgroup, CLC_NAME)(op, x, &carry); \
163+
if (scope == Subgroup) { \
164+
return sg_x; \
165+
} \
166+
__local TYPE *scratch = __CLC_APPEND(__clc__get_group_scratch_, TYPE)(); \
167+
uint sg_id = __spirv_SubgroupId(); \
168+
uint num_sg = __spirv_NumSubgroups(); \
169+
uint sg_lid = __spirv_SubgroupLocalInvocationId(); \
170+
uint sg_size = __spirv_SubgroupSize(); \
171+
/* Share carry values across sub-groups */ \
172+
if (sg_lid == sg_size - 1) { \
173+
scratch[sg_id] = carry; \
174+
} \
175+
__spirv_ControlBarrier(Workgroup, 0, 0); \
176+
/* Perform InclusiveScan over sub-group results */ \
177+
TYPE sg_prefix; \
178+
TYPE sg_aggregate = scratch[0]; \
179+
_Pragma("unroll") for (int s = 1; s < num_sg; ++s) { \
180+
if (sg_id == s) { \
181+
sg_prefix = sg_aggregate; \
182+
} \
183+
TYPE addend = scratch[s]; \
184+
sg_aggregate = OP(sg_aggregate, addend); \
185+
} \
186+
/* For Reduce, broadcast result from final sub-group */ \
187+
/* For Scan, combine results from previous sub-groups */ \
188+
TYPE result; \
189+
if (op == Reduce) { \
190+
result = sg_aggregate; \
191+
} else if (op == InclusiveScan || op == ExclusiveScan) { \
192+
if (sg_id == 0) { \
193+
result = sg_x; \
194+
} else { \
195+
result = OP(sg_x, sg_prefix); \
196+
} \
197+
} \
198+
__spirv_ControlBarrier(Workgroup, 0, 0); \
199+
return result; \
200+
}
201+
202+
#define __CLC_GROUP_COLLECTIVE_4(NAME, OP, TYPE, IDENTITY) \
203+
__CLC_GROUP_COLLECTIVE_INNER(NAME, NAME, OP, TYPE, IDENTITY)
204+
#define __CLC_GROUP_COLLECTIVE_5(SPIRV_NAME, CLC_NAME, OP, TYPE, IDENTITY) \
205+
__CLC_GROUP_COLLECTIVE_INNER(SPIRV_NAME, CLC_NAME, OP, TYPE, IDENTITY)
206+
207+
#define DISPATCH_TO_CLC_GROUP_COLLECTIVE_MACRO(_1, _2, _3, _4, _5, NAME, ...) \
208+
NAME
209+
#define __CLC_GROUP_COLLECTIVE(...) \
210+
DISPATCH_TO_CLC_GROUP_COLLECTIVE_MACRO( \
211+
__VA_ARGS__, __CLC_GROUP_COLLECTIVE_5, __CLC_GROUP_COLLECTIVE_4) \
212+
(__VA_ARGS__)
213+
214+
__CLC_GROUP_COLLECTIVE(Any, __CLC_OR, bool, false);
215+
__CLC_GROUP_COLLECTIVE(All, __CLC_AND, bool, true);
216+
_CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT bool __spirv_GroupAny(uint scope,
217+
bool predicate) {
218+
return __spirv_GroupAny(scope, Reduce, predicate);
219+
}
220+
_CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT bool __spirv_GroupAll(uint scope,
221+
bool predicate) {
222+
return __spirv_GroupAll(scope, Reduce, predicate);
223+
}
224+
225+
__CLC_GROUP_COLLECTIVE(IAdd, __CLC_ADD, char, 0)
226+
__CLC_GROUP_COLLECTIVE(IAdd, __CLC_ADD, uchar, 0)
227+
__CLC_GROUP_COLLECTIVE(IAdd, __CLC_ADD, short, 0)
228+
__CLC_GROUP_COLLECTIVE(IAdd, __CLC_ADD, ushort, 0)
229+
__CLC_GROUP_COLLECTIVE(IAdd, __CLC_ADD, int, 0)
230+
__CLC_GROUP_COLLECTIVE(IAdd, __CLC_ADD, uint, 0)
231+
__CLC_GROUP_COLLECTIVE(IAdd, __CLC_ADD, long, 0)
232+
__CLC_GROUP_COLLECTIVE(IAdd, __CLC_ADD, ulong, 0)
233+
__CLC_GROUP_COLLECTIVE(FAdd, __CLC_ADD, float, 0)
234+
__CLC_GROUP_COLLECTIVE(FAdd, __CLC_ADD, double, 0)
235+
236+
// There is no Mul group op in SPIR-V, use non-uniform variant instead.
237+
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, char, 1)
238+
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, uchar, 1)
239+
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, short, 1)
240+
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, ushort, 1)
241+
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, int, 1)
242+
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, uint, 1)
243+
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, long, 1)
244+
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, ulong, 1)
245+
__CLC_GROUP_COLLECTIVE(NonUniformFMul, FMul, __CLC_MUL, float, 1)
246+
__CLC_GROUP_COLLECTIVE(NonUniformFMul, FMul, __CLC_MUL, double, 1)
247+
248+
__CLC_GROUP_COLLECTIVE(SMin, __CLC_MIN, char, CHAR_MAX)
249+
__CLC_GROUP_COLLECTIVE(UMin, __CLC_MIN, uchar, UCHAR_MAX)
250+
__CLC_GROUP_COLLECTIVE(SMin, __CLC_MIN, short, SHRT_MAX)
251+
__CLC_GROUP_COLLECTIVE(UMin, __CLC_MIN, ushort, USHRT_MAX)
252+
__CLC_GROUP_COLLECTIVE(SMin, __CLC_MIN, int, INT_MAX)
253+
__CLC_GROUP_COLLECTIVE(UMin, __CLC_MIN, uint, UINT_MAX)
254+
__CLC_GROUP_COLLECTIVE(SMin, __CLC_MIN, long, LONG_MAX)
255+
__CLC_GROUP_COLLECTIVE(UMin, __CLC_MIN, ulong, ULONG_MAX)
256+
__CLC_GROUP_COLLECTIVE(FMin, __CLC_MIN, float, FLT_MAX)
257+
__CLC_GROUP_COLLECTIVE(FMin, __CLC_MIN, double, DBL_MAX)
258+
259+
__CLC_GROUP_COLLECTIVE(SMax, __CLC_MAX, char, CHAR_MIN)
260+
__CLC_GROUP_COLLECTIVE(UMax, __CLC_MAX, uchar, 0)
261+
__CLC_GROUP_COLLECTIVE(SMax, __CLC_MAX, short, SHRT_MIN)
262+
__CLC_GROUP_COLLECTIVE(UMax, __CLC_MAX, ushort, 0)
263+
__CLC_GROUP_COLLECTIVE(SMax, __CLC_MAX, int, INT_MIN)
264+
__CLC_GROUP_COLLECTIVE(UMax, __CLC_MAX, uint, 0)
265+
__CLC_GROUP_COLLECTIVE(SMax, __CLC_MAX, long, LONG_MIN)
266+
__CLC_GROUP_COLLECTIVE(UMax, __CLC_MAX, ulong, 0)
267+
__CLC_GROUP_COLLECTIVE(FMax, __CLC_MAX, float, -FLT_MAX)
268+
__CLC_GROUP_COLLECTIVE(FMax, __CLC_MAX, double, -DBL_MAX)
269+
270+
#undef __CLC_GROUP_COLLECTIVE_4
271+
#undef __CLC_GROUP_COLLECTIVE_5
272+
#undef DISPATCH_TO_CLC_GROUP_COLLECTIVE_MACRO
273+
#undef __CLC_GROUP_COLLECTIVE
274+
275+
#undef __CLC_AND
276+
#undef __CLC_OR
277+
#undef __CLC_MAX
278+
#undef __CLC_MIN
279+
#undef __CLC_ADD
280+
#undef __CLC_MUL
281+
282+
long __clc__get_linear_local_id() {
283+
size_t id_x = __spirv_LocalInvocationId_x();
284+
size_t id_y = __spirv_LocalInvocationId_y();
285+
size_t id_z = __spirv_LocalInvocationId_z();
286+
size_t size_x = __spirv_WorkgroupSize_x();
287+
size_t size_y = __spirv_WorkgroupSize_y();
288+
size_t size_z = __spirv_WorkgroupSize_z();
289+
uint sg_size = __spirv_SubgroupMaxSize();
290+
return (id_z * size_y * size_x + id_y * size_x + id_x);
291+
}
292+
293+
long __clc__2d_to_linear_local_id(ulong2 id) {
294+
size_t size_x = __spirv_WorkgroupSize_x();
295+
size_t size_y = __spirv_WorkgroupSize_y();
296+
return (id.y * size_x + id.x);
297+
}
298+
299+
long __clc__3d_to_linear_local_id(ulong3 id) {
300+
size_t size_x = __spirv_WorkgroupSize_x();
301+
size_t size_y = __spirv_WorkgroupSize_y();
302+
size_t size_z = __spirv_WorkgroupSize_z();
303+
return (id.z * size_y * size_x + id.y * size_x + id.x);
304+
}
305+
306+
#define __CLC_GROUP_BROADCAST(TYPE, TYPE_MANGLED) \
307+
_CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT TYPE __spirv_GroupBroadcast( \
308+
uint scope, TYPE x, ulong local_id) { \
309+
if (scope == Subgroup) { \
310+
return _Z28__spirv_SubgroupShuffleINTELI##TYPE_MANGLED##ET_S0_j( \
311+
x, local_id); \
312+
} \
313+
bool source = (__clc__get_linear_local_id() == local_id); \
314+
__local TYPE *scratch = __CLC_APPEND(__clc__get_group_scratch_, TYPE)(); \
315+
if (source) { \
316+
*scratch = x; \
317+
} \
318+
__spirv_ControlBarrier(Workgroup, 0, 0); \
319+
TYPE result = *scratch; \
320+
__spirv_ControlBarrier(Workgroup, 0, 0); \
321+
return result; \
322+
} \
323+
_CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT TYPE __spirv_GroupBroadcast( \
324+
uint scope, TYPE x, ulong2 local_id) { \
325+
ulong linear_local_id = __clc__2d_to_linear_local_id(local_id); \
326+
return __spirv_GroupBroadcast(scope, x, linear_local_id); \
327+
} \
328+
_CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT TYPE __spirv_GroupBroadcast( \
329+
uint scope, TYPE x, ulong3 local_id) { \
330+
ulong linear_local_id = __clc__3d_to_linear_local_id(local_id); \
331+
return __spirv_GroupBroadcast(scope, x, linear_local_id); \
332+
} \
333+
_CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT TYPE __spirv_GroupBroadcast( \
334+
uint scope, TYPE x, uint local_id) { \
335+
return __spirv_GroupBroadcast(scope, x, (ulong)local_id); \
336+
}
337+
__CLC_GROUP_BROADCAST(char, a);
338+
__CLC_GROUP_BROADCAST(uchar, h);
339+
__CLC_GROUP_BROADCAST(short, s);
340+
__CLC_GROUP_BROADCAST(ushort, t);
341+
__CLC_GROUP_BROADCAST(int, i)
342+
__CLC_GROUP_BROADCAST(uint, j)
343+
__CLC_GROUP_BROADCAST(long, l)
344+
__CLC_GROUP_BROADCAST(ulong, m)
345+
__CLC_GROUP_BROADCAST(float, f)
346+
__CLC_GROUP_BROADCAST(double, d)
347+
348+
#undef __CLC_GROUP_BROADCAST
349+
350+
#undef __CLC_APPEND

0 commit comments

Comments
 (0)