Skip to content

Commit f276326

Browse files
ficoligcbot
authored andcommitted
[Autobackout][FuncReg]Revert of change: 7c428db
Enable Device scope for OpControlBarrier Invoke global_barrier for Device scope. Remove call to OpControlBarrier in global_barrier to avoid recursion.
1 parent d03728a commit f276326

File tree

1 file changed

+10
-19
lines changed

1 file changed

+10
-19
lines changed

IGC/BiFModule/Implementation/barrier.cl

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -124,11 +124,7 @@ void SPIRV_OVERLOADABLE SPIRV_BUILTIN(ControlBarrier, _i32_i32_i32, )(int Execut
124124
__intel_atomic_work_item_fence( Memory, Semantics );
125125
}
126126

127-
if (Execution == Device)
128-
{
129-
global_barrier();
130-
}
131-
else if( Execution <= Workgroup )
127+
if( Execution <= Workgroup )
132128
{
133129
__builtin_IB_thread_group_barrier();
134130
}
@@ -286,18 +282,15 @@ void __builtin_spirv_OpMemoryNamedBarrierWrapperOCL_p3__namedBarrier_i32_i32(loc
286282
}
287283

288284
__global volatile uchar* __builtin_IB_get_sync_buffer();
289-
uint __intel_get_local_linear_id( void );
290-
uint __intel_get_local_size( void );
291285

292286
void global_barrier()
293287
{
294288
//Make sure each WKG item hit the barrier.
295-
__intel_atomic_work_item_fence(Device, AcquireRelease | CrossWorkgroupMemory);
296-
__builtin_IB_thread_group_barrier();
289+
barrier(CLK_GLOBAL_MEM_FENCE);
297290

298291
__global volatile uchar* syncBuffer = __builtin_IB_get_sync_buffer();
299-
bool firstThreadPerWg = __intel_is_first_work_group_item();
300-
uint groupLinearId = (__builtin_IB_get_group_id(2) * __builtin_IB_get_num_groups(1) * __builtin_IB_get_num_groups(0)) + (__builtin_IB_get_group_id(1) * __builtin_IB_get_num_groups(0)) + __builtin_IB_get_group_id(0);
292+
bool firstThreadPerWg = (get_local_id(0) == 0) && (get_local_id(1) == 0) && (get_local_id(2) == 0);
293+
size_t groupLinearId = (get_group_id(2) * get_num_groups(1) * get_num_groups(0)) + (get_group_id(1) * get_num_groups(0)) + get_group_id(0);
301294

302295
//Now first thread of each wkg writes to designated place in syncBuffer
303296
if (firstThreadPerWg)
@@ -306,27 +299,26 @@ void global_barrier()
306299
atomic_work_item_fence(CLK_GLOBAL_MEM_FENCE, memory_order_release, memory_scope_device); // == write_mem_fence(CLK_GLOBAL_MEM_FENCE);
307300
}
308301

309-
uint numGroups = __builtin_IB_get_num_groups(0) * __builtin_IB_get_num_groups(1) * __builtin_IB_get_num_groups(2);
302+
size_t numGroups = get_num_groups(0) * get_num_groups(1) * get_num_groups(2);
310303
//Higher wkg ids tend to not have work to do in all cases, therefore I choose last wkg to wait for the others, as it is most likely it will hit this code sooner.
311304
if (groupLinearId == (numGroups - 1))
312305
{
313-
uint localSize = __intel_get_local_size();
306+
size_t localSize = get_local_size(0) * get_local_size(1) * get_local_size(2);
314307
//24 -48 case
315308
volatile uchar Value;
316309
do
317310
{
318311
atomic_work_item_fence(CLK_GLOBAL_MEM_FENCE, memory_order_acquire, memory_scope_device); // == read_mem_fence(CLK_GLOBAL_MEM_FENCE);
319312
Value = 1;
320-
for (uint i = __intel_get_local_linear_id(); i < numGroups; i += localSize)
313+
for (size_t i = get_local_linear_id(); i < numGroups; i += localSize)
321314
{
322315
Value = Value & syncBuffer[i];
323316
}
324317

325318
} while (Value == 0);
326-
__intel_atomic_work_item_fence(Device, AcquireRelease | CrossWorkgroupMemory);
327-
__builtin_IB_thread_group_barrier();
319+
barrier(CLK_GLOBAL_MEM_FENCE);
328320

329-
for (uint i = __intel_get_local_linear_id(); i < numGroups; i += localSize)
321+
for (size_t i = get_local_linear_id(); i < numGroups; i += localSize)
330322
{
331323
syncBuffer[i] = 0;
332324
}
@@ -339,8 +331,7 @@ void global_barrier()
339331
atomic_work_item_fence(CLK_GLOBAL_MEM_FENCE, memory_order_acquire, memory_scope_device); // == read_mem_fence(CLK_GLOBAL_MEM_FENCE);
340332
};
341333
}
342-
__intel_atomic_work_item_fence(Device, AcquireRelease | CrossWorkgroupMemory);
343-
__builtin_IB_thread_group_barrier();
334+
barrier(CLK_GLOBAL_MEM_FENCE);
344335
}
345336

346337
void system_memfence(char fence_typed_memory)

0 commit comments

Comments
 (0)