@@ -6,9 +6,8 @@ SPDX-License-Identifier: MIT
6
6
7
7
============================= end_copyright_notice ===========================*/
8
8
9
- extern __constant int __UseNative64BitIntBuiltin ;
10
- extern __constant int __UseNative64BitFloatBuiltin ;
11
- extern __constant int __AssumeXYZWalkOrder ;
9
+ extern __constant int __UseNative64BitIntBuiltin ;
10
+ extern __constant int __UseNative64BitFloatBuiltin ;
12
11
13
12
// Group Instructions
14
13
@@ -2387,103 +2386,79 @@ type __builtin_IB_WorkGroupReduce_##func##_##type_abbr(type X)
2387
2386
#define DEFN_WORK_GROUP_SCAN_INCL (func , type_abbr , type , op ) \
2388
2387
type __builtin_IB_WorkGroupScanInclusive_##func##_##type_abbr(type X) \
2389
2388
{ \
2390
- if (__AssumeXYZWalkOrder) { \
2391
- type sg_x = SPIRV_BUILTIN(Group##func, _i32_i32_##type_abbr, )(Subgroup, GroupOperationInclusiveScan, X); \
2392
- \
2393
- GET_MEMPOOL_PTR(scratch, type, true, 0) \
2394
- uint sg_id = SPIRV_BUILTIN_NO_OP(BuiltInSubgroupId, , )(); \
2395
- uint sg_lid = SPIRV_BUILTIN_NO_OP(BuiltInSubgroupLocalInvocationId, , )(); \
2396
- uint sg_size = SPIRV_BUILTIN_NO_OP(BuiltInSubgroupSize, , )(); \
2397
- \
2398
- if (sg_lid == sg_size - 1) { \
2399
- scratch[sg_id] = sg_x; \
2400
- } \
2401
- SPIRV_BUILTIN(ControlBarrier, _i32_i32_i32, )(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
2402
- \
2403
- type sg_aggregate = scratch[0]; \
2404
- for (int s = 1; s < sg_id; ++s) { \
2405
- sg_aggregate = op(sg_aggregate, scratch[s]); \
2406
- } \
2407
- \
2408
- type result = sg_x; \
2409
- if (sg_id != 0) { \
2410
- result = op(sg_x, sg_aggregate); \
2411
- } \
2412
- SPIRV_BUILTIN(ControlBarrier, _i32_i32_i32, )(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
2413
- return result; \
2414
- } else { \
2415
- GET_MEMPOOL_PTR(scratch, type, true, 0) \
2416
- uint sg_lid = SPIRV_BUILTIN_NO_OP(BuiltInGlobalLinearId, , )(); \
2417
- \
2418
- scratch[sg_lid] = X; \
2419
- SPIRV_BUILTIN(ControlBarrier, _i32_i32_i32, )(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
2420
- \
2421
- type sg_aggregate = scratch[0]; \
2422
- for (int s = 1; s < sg_lid; s++) { \
2423
- sg_aggregate = op(sg_aggregate, scratch[s]); \
2424
- } \
2425
- \
2426
- type result = X; \
2427
- if (sg_lid != 0) { \
2428
- result = op(result, sg_aggregate); \
2429
- } \
2430
- return result; \
2431
- } \
2389
+ type sg_x = SPIRV_BUILTIN(Group##func, _i32_i32_##type_abbr, )(Subgroup, GroupOperationInclusiveScan, X); \
2390
+ \
2391
+ GET_MEMPOOL_PTR(scratch, type, true, 0) \
2392
+ uint sg_id = SPIRV_BUILTIN_NO_OP(BuiltInSubgroupId, , )(); \
2393
+ uint num_sg = SPIRV_BUILTIN_NO_OP(BuiltInNumSubgroups, , )(); \
2394
+ uint sg_lid = SPIRV_BUILTIN_NO_OP(BuiltInSubgroupLocalInvocationId, , )(); \
2395
+ uint sg_size = SPIRV_BUILTIN_NO_OP(BuiltInSubgroupSize, , )(); \
2396
+ \
2397
+ if (sg_lid == sg_size - 1) { \
2398
+ scratch[sg_id] = sg_x; \
2399
+ } \
2400
+ SPIRV_BUILTIN(ControlBarrier, _i32_i32_i32, )(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
2401
+ \
2402
+ type sg_prefix; \
2403
+ type sg_aggregate = scratch[0]; \
2404
+ for (int s = 1; s < num_sg; ++s) { \
2405
+ if (sg_id == s) { \
2406
+ sg_prefix = sg_aggregate; \
2407
+ break; \
2408
+ } \
2409
+ sg_aggregate = op(sg_aggregate, scratch[s]); \
2410
+ } \
2411
+ \
2412
+ type result; \
2413
+ if (sg_id == 0) { \
2414
+ result = sg_x; \
2415
+ } else { \
2416
+ result = op(sg_x, sg_prefix); \
2417
+ } \
2418
+ SPIRV_BUILTIN(ControlBarrier, _i32_i32_i32, )(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
2419
+ return result; \
2432
2420
}
2433
2421
2422
+
2434
2423
#define DEFN_WORK_GROUP_SCAN_EXCL (func , type_abbr , type , op , identity ) \
2435
2424
type __builtin_IB_WorkGroupScanExclusive_##func##_##type_abbr(type X) \
2436
2425
{ \
2437
- if (__AssumeXYZWalkOrder) { \
2438
- type carry = SPIRV_BUILTIN(Group##func, _i32_i32_##type_abbr, )(Subgroup, GroupOperationInclusiveScan, X); \
2439
- \
2440
- GET_MEMPOOL_PTR(scratch, type, true, 0) \
2441
- uint sg_id = SPIRV_BUILTIN_NO_OP(BuiltInSubgroupId, , )(); \
2442
- uint num_sg = SPIRV_BUILTIN_NO_OP(BuiltInNumSubgroups, , )(); \
2443
- uint sg_lid = SPIRV_BUILTIN_NO_OP(BuiltInSubgroupLocalInvocationId, , )(); \
2444
- uint sg_size = SPIRV_BUILTIN_NO_OP(BuiltInSubgroupSize, , )(); \
2445
- \
2446
- type sg_x = intel_sub_group_shuffle_up((type)identity, carry, 1); \
2447
- if (sg_lid == 0) { \
2448
- sg_x = identity; \
2449
- } \
2450
- \
2451
- if (sg_lid == sg_size - 1) { \
2452
- scratch[sg_id] = carry; \
2453
- } \
2454
- SPIRV_BUILTIN(ControlBarrier, _i32_i32_i32, )(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
2455
- \
2456
- type sg_aggregate = scratch[0]; \
2457
- for (int s = 1; s < sg_id; ++s) { \
2458
- sg_aggregate = op(sg_aggregate, scratch[s]); \
2459
- } \
2460
- \
2461
- type result; \
2462
- if (sg_id == 0) { \
2463
- result = sg_x; \
2464
- } else { \
2465
- result = op(sg_x, sg_aggregate); \
2466
- } \
2467
- SPIRV_BUILTIN(ControlBarrier, _i32_i32_i32, )(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
2468
- return result; \
2469
- } else { \
2470
- GET_MEMPOOL_PTR(scratch, type, true, 0) \
2471
- uint sg_lid = SPIRV_BUILTIN_NO_OP(BuiltInGlobalLinearId, , )(); \
2472
- \
2473
- scratch[sg_lid] = X; \
2474
- SPIRV_BUILTIN(ControlBarrier, _i32_i32_i32, )(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
2475
- \
2476
- type sg_aggregate = identity; \
2477
- for (int s = 1; s <= sg_lid; s++) { \
2478
- sg_aggregate = op(sg_aggregate, scratch[s - 1]); \
2479
- } \
2480
- \
2481
- type result = identity; \
2482
- if (sg_lid != 0) { \
2483
- result = op(result, sg_aggregate); \
2484
- } \
2485
- return result; \
2486
- } \
2426
+ type carry = SPIRV_BUILTIN(Group##func, _i32_i32_##type_abbr, )(Subgroup, GroupOperationInclusiveScan, X); \
2427
+ \
2428
+ GET_MEMPOOL_PTR(scratch, type, true, 0) \
2429
+ uint sg_id = SPIRV_BUILTIN_NO_OP(BuiltInSubgroupId, , )(); \
2430
+ uint num_sg = SPIRV_BUILTIN_NO_OP(BuiltInNumSubgroups, , )(); \
2431
+ uint sg_lid = SPIRV_BUILTIN_NO_OP(BuiltInSubgroupLocalInvocationId, , )(); \
2432
+ uint sg_size = SPIRV_BUILTIN_NO_OP(BuiltInSubgroupSize, , )(); \
2433
+ \
2434
+ type sg_x = intel_sub_group_shuffle_up((type)identity, carry, 1); \
2435
+ if (sg_lid == 0) { \
2436
+ sg_x = identity; \
2437
+ } \
2438
+ \
2439
+ if (sg_lid == sg_size - 1) { \
2440
+ scratch[sg_id] = carry; \
2441
+ } \
2442
+ SPIRV_BUILTIN(ControlBarrier, _i32_i32_i32, )(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
2443
+ \
2444
+ type sg_prefix; \
2445
+ type sg_aggregate = scratch[0]; \
2446
+ for (int s = 1; s < num_sg; ++s) { \
2447
+ if (sg_id == s) { \
2448
+ sg_prefix = sg_aggregate; \
2449
+ break; \
2450
+ } \
2451
+ sg_aggregate = op(sg_aggregate, scratch[s]); \
2452
+ } \
2453
+ \
2454
+ type result; \
2455
+ if (sg_id == 0) { \
2456
+ result = sg_x; \
2457
+ } else { \
2458
+ result = op(sg_x, sg_prefix); \
2459
+ } \
2460
+ SPIRV_BUILTIN(ControlBarrier, _i32_i32_i32, )(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
2461
+ return result; \
2487
2462
}
2488
2463
2489
2464
#define DEFN_SUB_GROUP_REDUCE (func , type_abbr , type , op , identity , signed_cast ) \
0 commit comments