Skip to content

Commit 8d83ede

Browse files
aratajewigcbot
authored andcommitted
Utilize specification-consistent enums for block_read/write with cacheopts
Subgroup builtins that are part of `cl_intel_subgroup_extended_block_read_cacheopts` and `cl_intel_subgroup_extended_block_write_cacheopts` specifications were utilizing internal enums for cache controls: `LSC_LDCC` and `LSC_STCC`. These enums are used to implement cache controls handling internally in IGC and there is a chance that they may get changed in the future, so shouldn't be used as an interface in OpenCL C API. Instead, we introduced new enums: `intel_read_cache_control` and `intel_write_cache_control`. This change also moves `intel_subgroup_block_write_cacheopts*` builtins under `#ifdef cl_intel_subgroup_extended_block_write_cacheopts`.
1 parent 6b931c1 commit 8d83ede

File tree

7 files changed

+362
-64
lines changed

7 files changed

+362
-64
lines changed

IGC/BiFModule/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,7 @@ set(KHR_DEFINES ${KHR_DEFINES} "cl_intel_subgroup_matrix_multiply_accumulate_tf3
488488
set(KHR_DEFINES ${KHR_DEFINES} "cl_intel_subgroup_extended_block_read")
489489
set(KHR_DEFINES ${KHR_DEFINES} "cl_intel_pvc_lsc_validation")
490490
set(KHR_DEFINES ${KHR_DEFINES} "cl_intel_subgroup_extended_block_read_cacheopts")
491+
set(KHR_DEFINES ${KHR_DEFINES} "cl_intel_subgroup_extended_block_write_cacheopts")
491492

492493
igc_bif_build_bc(
493494
OUTPUT "${IGC_BUILD__BIF_DIR}/IBiF_Impl_int.bc"

IGC/BiFModule/Implementation/IGCBiF_Intrinsics_Lsc.cl

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -434,15 +434,7 @@ ushort8 __builtin_IB_subgroup_block_read_cacheopts_u16_m4k16v2(long baseoffset,
434434
ushort16 __builtin_IB_subgroup_block_read_cacheopts_u16_m8k16v2(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
435435
uint8 __builtin_IB_subgroup_block_read_cacheopts_transform_u8_k32(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
436436
uint8 __builtin_IB_subgroup_block_read_cacheopts_transform_u16_k16(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
437-
// 2d block write cacheopts
438-
void __builtin_IB_subgroup_block_write_cacheopts_u8_m1k32v1(long base_address, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, ushort val, enum LSC_STCC cache_control);
439-
void __builtin_IB_subgroup_block_write_cacheopts_u8_m2k32v1(long base_address, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, ushort2 val, enum LSC_STCC cache_control);
440-
void __builtin_IB_subgroup_block_write_cacheopts_u8_m4k32v1(long base_address, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, ushort4 val, enum LSC_STCC cache_control);
441-
void __builtin_IB_subgroup_block_write_cacheopts_u8_m8k32v1(long base_address, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, ushort8 val, enum LSC_STCC cache_control);
442-
void __builtin_IB_subgroup_block_write_cacheopts_u16_m1k16v1(long base_address, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, ushort val, enum LSC_STCC cache_control);
443-
void __builtin_IB_subgroup_block_write_cacheopts_u16_m2k16v1(long base_address, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, ushort2 val, enum LSC_STCC cache_control);
444-
void __builtin_IB_subgroup_block_write_cacheopts_u16_m4k16v1(long base_address, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, ushort4 val, enum LSC_STCC cache_control);
445-
void __builtin_IB_subgroup_block_write_cacheopts_u16_m8k16v1(long base_address, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, ushort8 val, enum LSC_STCC cache_control);
437+
446438
// equivalent to transpose_transform_u8_k32 and transpose_transform_u16_k16
447439
uint8 __builtin_IB_subgroup_block_read_cacheopts_transpose_u32_k8(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
448440
ulong4 __builtin_IB_subgroup_block_read_cacheopts_transpose_u64_k4(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
@@ -463,6 +455,18 @@ void __builtin_IB_subgroup_block_read_prefetch_transpose_u32_k8(long baseoffset,
463455
void __builtin_IB_subgroup_block_read_prefetch_transpose_u64_k4(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
464456
#endif // cl_intel_subgroup_extended_block_read_cacheopts
465457

458+
#ifdef cl_intel_subgroup_extended_block_write_cacheopts
459+
// 2d block write cacheopts
460+
void __builtin_IB_subgroup_block_write_cacheopts_u8_m1k32v1(long base_address, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, ushort val, enum LSC_STCC cache_control);
461+
void __builtin_IB_subgroup_block_write_cacheopts_u8_m2k32v1(long base_address, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, ushort2 val, enum LSC_STCC cache_control);
462+
void __builtin_IB_subgroup_block_write_cacheopts_u8_m4k32v1(long base_address, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, ushort4 val, enum LSC_STCC cache_control);
463+
void __builtin_IB_subgroup_block_write_cacheopts_u8_m8k32v1(long base_address, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, ushort8 val, enum LSC_STCC cache_control);
464+
void __builtin_IB_subgroup_block_write_cacheopts_u16_m1k16v1(long base_address, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, ushort val, enum LSC_STCC cache_control);
465+
void __builtin_IB_subgroup_block_write_cacheopts_u16_m2k16v1(long base_address, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, ushort2 val, enum LSC_STCC cache_control);
466+
void __builtin_IB_subgroup_block_write_cacheopts_u16_m4k16v1(long base_address, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, ushort4 val, enum LSC_STCC cache_control);
467+
void __builtin_IB_subgroup_block_write_cacheopts_u16_m8k16v1(long base_address, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, ushort8 val, enum LSC_STCC cache_control);
468+
#endif // cl_intel_subgroup_extended_block_write_cacheopts
469+
466470
// experimental
467471
#ifdef cl_intel_subgroup_extended_block_read
468472
//

IGC/BiFModule/Languages/OpenCL/IBiF_Sub_Groups.cl

Lines changed: 72 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -871,14 +871,40 @@ DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_FLAT(intel_subgroup_block_read_transpose_u64
871871
#endif // defined(cl_intel_subgroup_extended_block_read)
872872

873873
#if defined(cl_intel_subgroup_extended_block_read_cacheopts)
874+
enum LSC_LDCC mapToInternalReadCacheControl(intel_read_cache_control cache_control)
875+
{
876+
switch (cache_control)
877+
{
878+
case read_cache_control_default_intel:
879+
return LSC_LDCC_DEFAULT;
880+
case read_cache_control_l1_uncached_l3_uncached_intel:
881+
return LSC_LDCC_L1UC_L3UC;
882+
case read_cache_control_l1_uncached_l3_cached_intel:
883+
return LSC_LDCC_L1UC_L3C;
884+
case read_cache_control_l1_cached_l3_uncached_intel:
885+
return LSC_LDCC_L1C_L3UC;
886+
case read_cache_control_l1_cached_l3_cached_intel:
887+
return LSC_LDCC_L1C_L3C;
888+
case read_cache_control_l1_streaming_l3_uncached_intel:
889+
return LSC_LDCC_L1S_L3UC;
890+
case read_cache_control_l1_streaming_l3_cached_intel:
891+
return LSC_LDCC_L1S_L3C;
892+
case read_cache_control_l1_iar_l3_cached_intel:
893+
return LSC_LDCC_L1IAR_L3C;
894+
default:
895+
return LSC_LDCC_DEFAULT;
896+
}
897+
}
898+
874899
#define DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(FUNC_NAME, TYPE, INTERNAL_FUNC) \
875-
INLINE TYPE FUNC_NAME( __global void* base_address, int width, int height, int pitch, int2 coord, enum LSC_LDCC cache_control ) \
900+
INLINE TYPE FUNC_NAME( __global void* base_address, int width, int height, int pitch, int2 coord, intel_read_cache_control cache_control ) \
876901
{ \
877902
long baseoffset = as_long(base_address); \
878903
int width_minus_one = width - 1; \
879904
int height_minus_one = height - 1; \
880905
int pitch_minus_one = pitch - 1; \
881-
return INTERNAL_FUNC(baseoffset, width_minus_one, height_minus_one, pitch_minus_one, coord, cache_control); \
906+
enum LSC_LDCC cache_control_internal = mapToInternalReadCacheControl(cache_control); \
907+
return INTERNAL_FUNC(baseoffset, width_minus_one, height_minus_one, pitch_minus_one, coord, cache_control_internal); \
882908
}
883909
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_read_cacheopts_u8_m1k32v2, ushort2, __builtin_IB_subgroup_block_read_cacheopts_u8_m1k32v2)
884910
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_read_cacheopts_u8_m2k32v2, ushort4, __builtin_IB_subgroup_block_read_cacheopts_u8_m2k32v2)
@@ -893,14 +919,55 @@ DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_read_cacheopt
893919
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_read_cacheopts_transpose_u32_k8, uint8, __builtin_IB_subgroup_block_read_cacheopts_transpose_u32_k8)
894920
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_read_cacheopts_transpose_u64_k4, ulong4,__builtin_IB_subgroup_block_read_cacheopts_transpose_u64_k4)
895921

922+
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_prefetch_u8_m1k32v2, void, __builtin_IB_subgroup_block_read_prefetch_u8_m1k32v2)
923+
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_prefetch_u8_m2k32v2, void, __builtin_IB_subgroup_block_read_prefetch_u8_m2k32v2)
924+
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_prefetch_u8_m4k32v2, void, __builtin_IB_subgroup_block_read_prefetch_u8_m4k32v2)
925+
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_prefetch_u8_m8k32v2, void, __builtin_IB_subgroup_block_read_prefetch_u8_m8k32v2)
926+
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_prefetch_u16_m1k16v2, void, __builtin_IB_subgroup_block_read_prefetch_u16_m1k16v2)
927+
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_prefetch_u16_m2k16v2, void, __builtin_IB_subgroup_block_read_prefetch_u16_m2k16v2)
928+
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_prefetch_u16_m4k16v2, void, __builtin_IB_subgroup_block_read_prefetch_u16_m4k16v2)
929+
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_prefetch_u16_m8k16v2, void, __builtin_IB_subgroup_block_read_prefetch_u16_m8k16v2)
930+
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_prefetch_transform_u8_k32, void, __builtin_IB_subgroup_block_read_prefetch_transform_u8_k32)
931+
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_prefetch_transform_u16_k16, void, __builtin_IB_subgroup_block_read_prefetch_transform_u16_k16)
932+
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_prefetch_transpose_u32_k8, void, __builtin_IB_subgroup_block_read_prefetch_transpose_u32_k8)
933+
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_prefetch_transpose_u64_k4, void, __builtin_IB_subgroup_block_read_prefetch_transpose_u64_k4)
934+
#endif // defined(cl_intel_subgroup_extended_block_read_cacheopts)
935+
936+
#if defined(cl_intel_subgroup_extended_block_write_cacheopts)
937+
enum LSC_STCC mapToInternalWriteCacheControl(intel_write_cache_control cache_control)
938+
{
939+
switch (cache_control)
940+
{
941+
case write_cache_control_default_intel:
942+
return LSC_STCC_DEFAULT;
943+
case write_cache_control_l1_uncached_l3_uncached_intel:
944+
return LSC_STCC_L1UC_L3UC;
945+
case write_cache_control_l1_uncached_l3_writeback_intel:
946+
return LSC_STCC_L1UC_L3WB;
947+
case write_cache_control_l1_writethrough_l3_uncached_intel:
948+
return LSC_STCC_L1WT_L3UC;
949+
case write_cache_control_l1_writethrough_l3_writeback_intel:
950+
return LSC_STCC_L1WT_L3WB;
951+
case write_cache_control_l1_streaming_l3_uncached_intel:
952+
return LSC_STCC_L1S_L3UC;
953+
case write_cache_control_l1_streaming_l3_writeback_intel:
954+
return LSC_STCC_L1S_L3WB;
955+
case write_cache_control_l1_writeback_l3_writeback_intel:
956+
return LSC_STCC_L1WB_L3WB;
957+
default:
958+
return LSC_STCC_DEFAULT;
959+
}
960+
}
961+
896962
#define DEFN_INTEL_SUB_GROUP_BLOCK_WRITE_LSC_CACHEOPTS(FUNC_NAME, TYPE, INTERNAL_FUNC) \
897-
INLINE void FUNC_NAME( __global void* base_address, int width, int height, int pitch, int2 coord, TYPE val, enum LSC_STCC cache_control ) \
963+
INLINE void FUNC_NAME( __global void* base_address, int width, int height, int pitch, int2 coord, TYPE val, intel_write_cache_control cache_control ) \
898964
{ \
899965
long baseoffset = as_long(base_address); \
900966
int width_minus_one = width - 1; \
901967
int height_minus_one = height - 1; \
902968
int pitch_minus_one = pitch - 1; \
903-
INTERNAL_FUNC(baseoffset, width_minus_one, height_minus_one, pitch_minus_one, coord, val, cache_control); \
969+
enum LSC_STCC cache_control_internal = mapToInternalWriteCacheControl(cache_control); \
970+
INTERNAL_FUNC(baseoffset, width_minus_one, height_minus_one, pitch_minus_one, coord, val, cache_control_internal); \
904971
}
905972
DEFN_INTEL_SUB_GROUP_BLOCK_WRITE_LSC_CACHEOPTS(intel_subgroup_block_write_cacheopts_u8_m1k32v1, ushort, __builtin_IB_subgroup_block_write_cacheopts_u8_m1k32v1)
906973
DEFN_INTEL_SUB_GROUP_BLOCK_WRITE_LSC_CACHEOPTS(intel_subgroup_block_write_cacheopts_u8_m2k32v1, ushort2, __builtin_IB_subgroup_block_write_cacheopts_u8_m2k32v1)
@@ -910,20 +977,7 @@ DEFN_INTEL_SUB_GROUP_BLOCK_WRITE_LSC_CACHEOPTS(intel_subgroup_block_write_cacheo
910977
DEFN_INTEL_SUB_GROUP_BLOCK_WRITE_LSC_CACHEOPTS(intel_subgroup_block_write_cacheopts_u16_m2k16v1, ushort2, __builtin_IB_subgroup_block_write_cacheopts_u16_m2k16v1)
911978
DEFN_INTEL_SUB_GROUP_BLOCK_WRITE_LSC_CACHEOPTS(intel_subgroup_block_write_cacheopts_u16_m4k16v1, ushort4, __builtin_IB_subgroup_block_write_cacheopts_u16_m4k16v1)
912979
DEFN_INTEL_SUB_GROUP_BLOCK_WRITE_LSC_CACHEOPTS(intel_subgroup_block_write_cacheopts_u16_m8k16v1, ushort8, __builtin_IB_subgroup_block_write_cacheopts_u16_m8k16v1)
913-
914-
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_prefetch_u8_m1k32v2, void, __builtin_IB_subgroup_block_read_prefetch_u8_m1k32v2)
915-
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_prefetch_u8_m2k32v2, void, __builtin_IB_subgroup_block_read_prefetch_u8_m2k32v2)
916-
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_prefetch_u8_m4k32v2, void, __builtin_IB_subgroup_block_read_prefetch_u8_m4k32v2)
917-
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_prefetch_u8_m8k32v2, void, __builtin_IB_subgroup_block_read_prefetch_u8_m8k32v2)
918-
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_prefetch_u16_m1k16v2, void, __builtin_IB_subgroup_block_read_prefetch_u16_m1k16v2)
919-
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_prefetch_u16_m2k16v2, void, __builtin_IB_subgroup_block_read_prefetch_u16_m2k16v2)
920-
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_prefetch_u16_m4k16v2, void, __builtin_IB_subgroup_block_read_prefetch_u16_m4k16v2)
921-
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_prefetch_u16_m8k16v2, void, __builtin_IB_subgroup_block_read_prefetch_u16_m8k16v2)
922-
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_prefetch_transform_u8_k32, void, __builtin_IB_subgroup_block_read_prefetch_transform_u8_k32)
923-
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_prefetch_transform_u16_k16, void, __builtin_IB_subgroup_block_read_prefetch_transform_u16_k16)
924-
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_prefetch_transpose_u32_k8, void, __builtin_IB_subgroup_block_read_prefetch_transpose_u32_k8)
925-
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_prefetch_transpose_u64_k4, void, __builtin_IB_subgroup_block_read_prefetch_transpose_u64_k4)
926-
#endif // defined(cl_intel_subgroup_extended_block_read_cacheopts)
980+
#endif // defined(cl_intel_subgroup_extended_block_write_cacheopts)
927981

928982
#if defined(cl_khr_subgroup_shuffle)
929983
#define DEFN_SUB_GROUP_SHUFFLE(TYPE, SPV_TYPE, TYPE_ABBR) \

0 commit comments

Comments
 (0)