Skip to content

Commit 4634f45

Browse files
mbelickiigcbot
authored andcommitted
[Autobackout][FuncReg]Revert of change: 58fe931
Fixed workgroup scan builtins. This patch fixes workgroup scan builtins, which were returning incorrect results on some platforms.
1 parent 55dae6e commit 4634f45

File tree

3 files changed

+70
-97
lines changed

3 files changed

+70
-97
lines changed

IGC/BiFModule/Implementation/group.cl

Lines changed: 70 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,8 @@ SPDX-License-Identifier: MIT
66
77
============================= end_copyright_notice ===========================*/
88

9-
extern __constant int __UseNative64BitIntBuiltin;
10-
extern __constant int __UseNative64BitFloatBuiltin;
11-
extern __constant int __AssumeXYZWalkOrder;
9+
extern __constant int __UseNative64BitIntBuiltin;
10+
extern __constant int __UseNative64BitFloatBuiltin;
1211

1312
// Group Instructions
1413

@@ -2387,103 +2386,79 @@ type __builtin_IB_WorkGroupReduce_##func##_##type_abbr(type X)
23872386
#define DEFN_WORK_GROUP_SCAN_INCL(func, type_abbr, type, op) \
23882387
type __builtin_IB_WorkGroupScanInclusive_##func##_##type_abbr(type X) \
23892388
{ \
2390-
if (__AssumeXYZWalkOrder) { \
2391-
type sg_x = SPIRV_BUILTIN(Group##func, _i32_i32_##type_abbr, )(Subgroup, GroupOperationInclusiveScan, X); \
2392-
\
2393-
GET_MEMPOOL_PTR(scratch, type, true, 0) \
2394-
uint sg_id = SPIRV_BUILTIN_NO_OP(BuiltInSubgroupId, , )(); \
2395-
uint sg_lid = SPIRV_BUILTIN_NO_OP(BuiltInSubgroupLocalInvocationId, , )(); \
2396-
uint sg_size = SPIRV_BUILTIN_NO_OP(BuiltInSubgroupSize, , )(); \
2397-
\
2398-
if (sg_lid == sg_size - 1) { \
2399-
scratch[sg_id] = sg_x; \
2400-
} \
2401-
SPIRV_BUILTIN(ControlBarrier, _i32_i32_i32, )(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
2402-
\
2403-
type sg_aggregate = scratch[0]; \
2404-
for (int s = 1; s < sg_id; ++s) { \
2405-
sg_aggregate = op(sg_aggregate, scratch[s]); \
2406-
} \
2407-
\
2408-
type result = sg_x; \
2409-
if (sg_id != 0) { \
2410-
result = op(sg_x, sg_aggregate); \
2411-
} \
2412-
SPIRV_BUILTIN(ControlBarrier, _i32_i32_i32, )(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
2413-
return result; \
2414-
} else { \
2415-
GET_MEMPOOL_PTR(scratch, type, true, 0) \
2416-
uint sg_lid = SPIRV_BUILTIN_NO_OP(BuiltInGlobalLinearId, , )(); \
2417-
\
2418-
scratch[sg_lid] = X; \
2419-
SPIRV_BUILTIN(ControlBarrier, _i32_i32_i32, )(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
2420-
\
2421-
type sg_aggregate = scratch[0]; \
2422-
for (int s = 1; s < sg_lid; s++) { \
2423-
sg_aggregate = op(sg_aggregate, scratch[s]); \
2424-
} \
2425-
\
2426-
type result = X; \
2427-
if (sg_lid != 0) { \
2428-
result = op(result, sg_aggregate); \
2429-
} \
2430-
return result; \
2431-
} \
2389+
type sg_x = SPIRV_BUILTIN(Group##func, _i32_i32_##type_abbr, )(Subgroup, GroupOperationInclusiveScan, X); \
2390+
\
2391+
GET_MEMPOOL_PTR(scratch, type, true, 0) \
2392+
uint sg_id = SPIRV_BUILTIN_NO_OP(BuiltInSubgroupId, , )(); \
2393+
uint num_sg = SPIRV_BUILTIN_NO_OP(BuiltInNumSubgroups, , )(); \
2394+
uint sg_lid = SPIRV_BUILTIN_NO_OP(BuiltInSubgroupLocalInvocationId, , )(); \
2395+
uint sg_size = SPIRV_BUILTIN_NO_OP(BuiltInSubgroupSize, , )(); \
2396+
\
2397+
if (sg_lid == sg_size - 1) { \
2398+
scratch[sg_id] = sg_x; \
2399+
} \
2400+
SPIRV_BUILTIN(ControlBarrier, _i32_i32_i32, )(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
2401+
\
2402+
type sg_prefix; \
2403+
type sg_aggregate = scratch[0]; \
2404+
for (int s = 1; s < num_sg; ++s) { \
2405+
if (sg_id == s) { \
2406+
sg_prefix = sg_aggregate; \
2407+
break; \
2408+
} \
2409+
sg_aggregate = op(sg_aggregate, scratch[s]); \
2410+
} \
2411+
\
2412+
type result; \
2413+
if (sg_id == 0) { \
2414+
result = sg_x; \
2415+
} else { \
2416+
result = op(sg_x, sg_prefix); \
2417+
} \
2418+
SPIRV_BUILTIN(ControlBarrier, _i32_i32_i32, )(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
2419+
return result; \
24322420
}
24332421

2422+
24342423
#define DEFN_WORK_GROUP_SCAN_EXCL(func, type_abbr, type, op, identity) \
24352424
type __builtin_IB_WorkGroupScanExclusive_##func##_##type_abbr(type X) \
24362425
{ \
2437-
if (__AssumeXYZWalkOrder) { \
2438-
type carry = SPIRV_BUILTIN(Group##func, _i32_i32_##type_abbr, )(Subgroup, GroupOperationInclusiveScan, X); \
2439-
\
2440-
GET_MEMPOOL_PTR(scratch, type, true, 0) \
2441-
uint sg_id = SPIRV_BUILTIN_NO_OP(BuiltInSubgroupId, , )(); \
2442-
uint num_sg = SPIRV_BUILTIN_NO_OP(BuiltInNumSubgroups, , )(); \
2443-
uint sg_lid = SPIRV_BUILTIN_NO_OP(BuiltInSubgroupLocalInvocationId, , )(); \
2444-
uint sg_size = SPIRV_BUILTIN_NO_OP(BuiltInSubgroupSize, , )(); \
2445-
\
2446-
type sg_x = intel_sub_group_shuffle_up((type)identity, carry, 1); \
2447-
if (sg_lid == 0) { \
2448-
sg_x = identity; \
2449-
} \
2450-
\
2451-
if (sg_lid == sg_size - 1) { \
2452-
scratch[sg_id] = carry; \
2453-
} \
2454-
SPIRV_BUILTIN(ControlBarrier, _i32_i32_i32, )(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
2455-
\
2456-
type sg_aggregate = scratch[0]; \
2457-
for (int s = 1; s < sg_id; ++s) { \
2458-
sg_aggregate = op(sg_aggregate, scratch[s]); \
2459-
} \
2460-
\
2461-
type result; \
2462-
if (sg_id == 0) { \
2463-
result = sg_x; \
2464-
} else { \
2465-
result = op(sg_x, sg_aggregate); \
2466-
} \
2467-
SPIRV_BUILTIN(ControlBarrier, _i32_i32_i32, )(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
2468-
return result; \
2469-
} else { \
2470-
GET_MEMPOOL_PTR(scratch, type, true, 0) \
2471-
uint sg_lid = SPIRV_BUILTIN_NO_OP(BuiltInGlobalLinearId, , )(); \
2472-
\
2473-
scratch[sg_lid] = X; \
2474-
SPIRV_BUILTIN(ControlBarrier, _i32_i32_i32, )(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
2475-
\
2476-
type sg_aggregate = identity; \
2477-
for (int s = 1; s <= sg_lid; s++) { \
2478-
sg_aggregate = op(sg_aggregate, scratch[s - 1]); \
2479-
} \
2480-
\
2481-
type result = identity; \
2482-
if (sg_lid != 0) { \
2483-
result = op(result, sg_aggregate); \
2484-
} \
2485-
return result; \
2486-
} \
2426+
type carry = SPIRV_BUILTIN(Group##func, _i32_i32_##type_abbr, )(Subgroup, GroupOperationInclusiveScan, X); \
2427+
\
2428+
GET_MEMPOOL_PTR(scratch, type, true, 0) \
2429+
uint sg_id = SPIRV_BUILTIN_NO_OP(BuiltInSubgroupId, , )(); \
2430+
uint num_sg = SPIRV_BUILTIN_NO_OP(BuiltInNumSubgroups, , )(); \
2431+
uint sg_lid = SPIRV_BUILTIN_NO_OP(BuiltInSubgroupLocalInvocationId, , )(); \
2432+
uint sg_size = SPIRV_BUILTIN_NO_OP(BuiltInSubgroupSize, , )(); \
2433+
\
2434+
type sg_x = intel_sub_group_shuffle_up((type)identity, carry, 1); \
2435+
if (sg_lid == 0) { \
2436+
sg_x = identity; \
2437+
} \
2438+
\
2439+
if (sg_lid == sg_size - 1) { \
2440+
scratch[sg_id] = carry; \
2441+
} \
2442+
SPIRV_BUILTIN(ControlBarrier, _i32_i32_i32, )(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
2443+
\
2444+
type sg_prefix; \
2445+
type sg_aggregate = scratch[0]; \
2446+
for (int s = 1; s < num_sg; ++s) { \
2447+
if (sg_id == s) { \
2448+
sg_prefix = sg_aggregate; \
2449+
break; \
2450+
} \
2451+
sg_aggregate = op(sg_aggregate, scratch[s]); \
2452+
} \
2453+
\
2454+
type result; \
2455+
if (sg_id == 0) { \
2456+
result = sg_x; \
2457+
} else { \
2458+
result = op(sg_x, sg_prefix); \
2459+
} \
2460+
SPIRV_BUILTIN(ControlBarrier, _i32_i32_i32, )(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
2461+
return result; \
24872462
}
24882463

24892464
#define DEFN_SUB_GROUP_REDUCE(func, type_abbr, type, op, identity, signed_cast) \

IGC/Compiler/Optimizer/BuiltInFuncImport.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1146,7 +1146,6 @@ void BIImport::InitializeBIFlags(Module& M)
11461146

11471147
makeVarExternal("__SubDeviceID");
11481148
initializeVarWithValue("__MaxHWThreadIDPerSubDevice", pCtx->platform.GetGTSystemInfo().ThreadCount);
1149-
initializeVarWithValue("__AssumeXYZWalkOrder", 0);
11501149

11511150

11521151
initializeVarWithValue("__JointMatrixLoadStoreOpt", IGC_GET_FLAG_VALUE(JointMatrixLoadStoreOpt));

0 commit comments

Comments
 (0)