@@ -422,6 +422,46 @@ uint8 __builtin_IB_subgroup_block_read_flat_transpose_u32_k8(long baseoffset, in
422
422
ulong4 __builtin_IB_subgroup_block_read_flat_transpose_u64_k4 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord );
423
423
#endif // cl_intel_subgroup_extended_block_read
424
424
425
+ #ifdef cl_intel_subgroup_extended_block_read_cacheopts
426
+ // 2d block read cacheopts
427
+ ushort2 __builtin_IB_subgroup_block_read_cacheopts_u8_m1k32v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
428
+ ushort4 __builtin_IB_subgroup_block_read_cacheopts_u8_m2k32v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
429
+ ushort8 __builtin_IB_subgroup_block_read_cacheopts_u8_m4k32v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
430
+ ushort16 __builtin_IB_subgroup_block_read_cacheopts_u8_m8k32v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
431
+ ushort2 __builtin_IB_subgroup_block_read_cacheopts_u16_m1k16v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
432
+ ushort4 __builtin_IB_subgroup_block_read_cacheopts_u16_m2k16v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
433
+ ushort8 __builtin_IB_subgroup_block_read_cacheopts_u16_m4k16v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
434
+ ushort16 __builtin_IB_subgroup_block_read_cacheopts_u16_m8k16v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
435
+ uint8 __builtin_IB_subgroup_block_read_cacheopts_transform_u8_k32 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
436
+ uint8 __builtin_IB_subgroup_block_read_cacheopts_transform_u16_k16 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
437
+ // 2d block write cacheopts
438
+ void __builtin_IB_subgroup_block_write_cacheopts_u8_m1k32v1 (long base_address , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , ushort val , enum LSC_STCC cache_control );
439
+ void __builtin_IB_subgroup_block_write_cacheopts_u8_m2k32v1 (long base_address , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , ushort2 val , enum LSC_STCC cache_control );
440
+ void __builtin_IB_subgroup_block_write_cacheopts_u8_m4k32v1 (long base_address , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , ushort4 val , enum LSC_STCC cache_control );
441
+ void __builtin_IB_subgroup_block_write_cacheopts_u8_m8k32v1 (long base_address , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , ushort8 val , enum LSC_STCC cache_control );
442
+ void __builtin_IB_subgroup_block_write_cacheopts_u16_m1k16v1 (long base_address , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , ushort val , enum LSC_STCC cache_control );
443
+ void __builtin_IB_subgroup_block_write_cacheopts_u16_m2k16v1 (long base_address , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , ushort2 val , enum LSC_STCC cache_control );
444
+ void __builtin_IB_subgroup_block_write_cacheopts_u16_m4k16v1 (long base_address , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , ushort4 val , enum LSC_STCC cache_control );
445
+ void __builtin_IB_subgroup_block_write_cacheopts_u16_m8k16v1 (long base_address , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , ushort8 val , enum LSC_STCC cache_control );
446
+ // equivalent to transpose_transform_u8_k32 and transpose_transform_u16_k16
447
+ uint8 __builtin_IB_subgroup_block_read_cacheopts_transpose_u32_k8 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
448
+ ulong4 __builtin_IB_subgroup_block_read_cacheopts_transpose_u64_k4 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
449
+
450
+ // 2d block read prefetch
451
+ void __builtin_IB_subgroup_block_read_prefetch_u8_m1k32v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
452
+ void __builtin_IB_subgroup_block_read_prefetch_u8_m2k32v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
453
+ void __builtin_IB_subgroup_block_read_prefetch_u8_m4k32v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
454
+ void __builtin_IB_subgroup_block_read_prefetch_u8_m8k32v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
455
+ void __builtin_IB_subgroup_block_read_prefetch_u16_m1k16v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
456
+ void __builtin_IB_subgroup_block_read_prefetch_u16_m2k16v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
457
+ void __builtin_IB_subgroup_block_read_prefetch_u16_m4k16v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
458
+ void __builtin_IB_subgroup_block_read_prefetch_u16_m8k16v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
459
+ void __builtin_IB_subgroup_block_read_prefetch_transform_u8_k32 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
460
+ void __builtin_IB_subgroup_block_read_prefetch_transform_u16_k16 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
461
+ // equivalent to transpose_transform_u8_k32 and transpose_transform_u16_k16
462
+ void __builtin_IB_subgroup_block_read_prefetch_transpose_u32_k8 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
463
+ void __builtin_IB_subgroup_block_read_prefetch_transpose_u64_k4 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
464
+ #endif // cl_intel_subgroup_extended_block_read_cacheopts
425
465
426
466
// experimental
427
467
#ifdef cl_intel_subgroup_extended_block_read
0 commit comments