Skip to content

Commit 02a20e5

Browse files
authored
[CLC][AMDGPU] Fix barrier calls in collective group algorithms to generate memory fences (#12873)
This PR fixes race conditions in the group algorithms implemented in libclc for AMDGPU because the control barriers were not emitting any fences due to unspecified semantics.
1 parent 95f9bc0 commit 02a20e5

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

libclc/amdgcn-amdhsa/libspirv/group/collectives.cl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ __CLC_SUBGROUP_COLLECTIVE(LogicalAndKHR, __CLC_LOGICAL_AND, bool, a, true)
212212
if (sg_lid == sg_size - 1) { \
213213
scratch[sg_id] = carry; \
214214
} \
215-
__spirv_ControlBarrier(Workgroup, 0, 0); \
215+
__spirv_ControlBarrier(Workgroup, Workgroup, AcquireRelease); \
216216
/* Perform InclusiveScan over sub-group results */ \
217217
TYPE sg_prefix; \
218218
TYPE sg_aggregate = scratch[0]; \
@@ -235,7 +235,7 @@ __CLC_SUBGROUP_COLLECTIVE(LogicalAndKHR, __CLC_LOGICAL_AND, bool, a, true)
235235
result = OP(sg_x, sg_prefix); \
236236
} \
237237
} \
238-
__spirv_ControlBarrier(Workgroup, 0, 0); \
238+
__spirv_ControlBarrier(Workgroup, Workgroup, AcquireRelease); \
239239
return result; \
240240
}
241241

@@ -401,9 +401,9 @@ long __clc__3d_to_linear_local_id(ulong3 id) {
401401
if (source) { \
402402
*scratch = x; \
403403
} \
404-
__spirv_ControlBarrier(Workgroup, 0, 0); \
404+
__spirv_ControlBarrier(Workgroup, Workgroup, AcquireRelease); \
405405
TYPE result = *scratch; \
406-
__spirv_ControlBarrier(Workgroup, 0, 0); \
406+
__spirv_ControlBarrier(Workgroup, Workgroup, AcquireRelease); \
407407
return result; \
408408
} \
409409
_CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT TYPE __spirv_GroupBroadcast( \

0 commit comments

Comments
 (0)