Skip to content

Commit b13a3c4

Browse files
authored
[libclc][hip] Fix half shuffles and reenable reduction test (#13016)
- Fix broken half shuffles on amd. - Reenable Reduction test. Fix is to bitcast to the storage type of half (unsigned short) without doing a type conversion, before then extending to int for the shuffle. --------- Signed-off-by: JackAKirk <[email protected]>
1 parent feb80ba commit b13a3c4

File tree

2 files changed

+30
-7
lines changed

2 files changed

+30
-7
lines changed

libclc/amdgcn-amdhsa/libspirv/misc/sub_group_shuffle.cl

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,13 @@ __AMDGCN_CLC_SUBGROUP_SUB_I32(unsigned short, t);
3737

3838
#ifdef cl_khr_fp16
3939
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
40-
__AMDGCN_CLC_SUBGROUP_SUB_I32(half, DF16_);
40+
_CLC_DEF half _Z28__spirv_SubgroupShuffleINTELIDF16_ET_S0_j(
41+
half Data, unsigned int InvocationId) {
42+
unsigned short tmp = as_ushort(Data);
43+
tmp = (unsigned short)_Z28__spirv_SubgroupShuffleINTELIiET_S0_j((int)tmp,
44+
InvocationId);
45+
return as_half(tmp);
46+
}
4147
#endif // cl_khr_fp16
4248

4349
#undef __AMDGCN_CLC_SUBGROUP_SUB_I32
@@ -170,7 +176,13 @@ __AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(unsigned char, h);
170176
__AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(short, s);
171177
__AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(unsigned short, t);
172178
#ifdef cl_khr_fp16
173-
__AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(half, DF16_);
179+
_CLC_DEF half _Z31__spirv_SubgroupShuffleXorINTELIDF16_ET_S0_j(
180+
half Data, unsigned int InvocationId) {
181+
unsigned short tmp = as_ushort(Data);
182+
tmp = (unsigned short)_Z31__spirv_SubgroupShuffleXorINTELIiET_S0_j(
183+
(int)tmp, InvocationId);
184+
return as_half(tmp);
185+
}
174186
#endif // cl_khr_fp16
175187
#undef __AMDGCN_CLC_SUBGROUP_XOR_SUB_I32
176188

@@ -321,7 +333,14 @@ __AMDGCN_CLC_SUBGROUP_UP_SUB_I32(short, s);
321333
__AMDGCN_CLC_SUBGROUP_UP_SUB_I32(unsigned short, t);
322334
// half
323335
#ifdef cl_khr_fp16
324-
__AMDGCN_CLC_SUBGROUP_UP_SUB_I32(half, DF16_);
336+
_CLC_DEF half _Z30__spirv_SubgroupShuffleUpINTELIDF16_ET_S0_S0_j(
337+
half previous, half current, unsigned int delta) {
338+
unsigned short tmpP = as_ushort(previous);
339+
unsigned short tmpC = as_ushort(current);
340+
tmpC = (unsigned short)_Z30__spirv_SubgroupShuffleUpINTELIiET_S0_S0_j(
341+
(int)tmpP, (int)tmpC, delta);
342+
return as_half(tmpC);
343+
}
325344
#endif // cl_khr_fp16
326345
#undef __AMDGCN_CLC_SUBGROUP_UP_SUB_I32
327346

@@ -474,7 +493,14 @@ __AMDGCN_CLC_SUBGROUP_DOWN_TO_I32(short, s);
474493
__AMDGCN_CLC_SUBGROUP_DOWN_TO_I32(unsigned short, t);
475494
// half
476495
#ifdef cl_khr_fp16
477-
__AMDGCN_CLC_SUBGROUP_DOWN_TO_I32(half, DF16_);
496+
_CLC_DEF half _Z32__spirv_SubgroupShuffleDownINTELIDF16_ET_S0_S0_j(
497+
half current, half next, unsigned int delta) {
498+
unsigned short tmpC = as_ushort(current);
499+
unsigned short tmpN = as_ushort(next);
500+
tmpC = (unsigned short)_Z32__spirv_SubgroupShuffleDownINTELIiET_S0_S0_j(
501+
(int)tmpC, (int)tmpN, delta);
502+
return as_half(tmpC);
503+
}
478504
#endif // cl_khr_fp16
479505
#undef __AMDGCN_CLC_SUBGROUP_DOWN_TO_I32
480506

sycl/test-e2e/Reduction/reduction_nd_ext_half.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,6 @@
77
// work group size not bigger than 1`.
88
// XFAIL: hip_nvidia
99

10-
// Incorrect result on AMD.
11-
// XFAIL: hip_amd
12-
1310
// Windows doesn't yet have full shutdown().
1411
// UNSUPPORTED: ze_debug && windows
1512

0 commit comments

Comments
 (0)