@@ -871,14 +871,40 @@ DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_FLAT(intel_subgroup_block_read_transpose_u64
871
871
#endif // defined(cl_intel_subgroup_extended_block_read)
872
872
873
873
#if defined(cl_intel_subgroup_extended_block_read_cacheopts )
874
+ enum LSC_LDCC mapToInternalReadCacheControl (intel_read_cache_control cache_control )
875
+ {
876
+ switch (cache_control )
877
+ {
878
+ case read_cache_control_default_intel :
879
+ return LSC_LDCC_DEFAULT ;
880
+ case read_cache_control_l1_uncached_l3_uncached_intel :
881
+ return LSC_LDCC_L1UC_L3UC ;
882
+ case read_cache_control_l1_uncached_l3_cached_intel :
883
+ return LSC_LDCC_L1UC_L3C ;
884
+ case read_cache_control_l1_cached_l3_uncached_intel :
885
+ return LSC_LDCC_L1C_L3UC ;
886
+ case read_cache_control_l1_cached_l3_cached_intel :
887
+ return LSC_LDCC_L1C_L3C ;
888
+ case read_cache_control_l1_streaming_l3_uncached_intel :
889
+ return LSC_LDCC_L1S_L3UC ;
890
+ case read_cache_control_l1_streaming_l3_cached_intel :
891
+ return LSC_LDCC_L1S_L3C ;
892
+ case read_cache_control_l1_iar_l3_cached_intel :
893
+ return LSC_LDCC_L1IAR_L3C ;
894
+ default :
895
+ return LSC_LDCC_DEFAULT ;
896
+ }
897
+ }
898
+
874
899
#define DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (FUNC_NAME , TYPE , INTERNAL_FUNC ) \
875
- INLINE TYPE FUNC_NAME( __global void* base_address, int width, int height, int pitch, int2 coord, enum LSC_LDCC cache_control ) \
900
+ INLINE TYPE FUNC_NAME( __global void* base_address, int width, int height, int pitch, int2 coord, intel_read_cache_control cache_control ) \
876
901
{ \
877
902
long baseoffset = as_long(base_address); \
878
903
int width_minus_one = width - 1; \
879
904
int height_minus_one = height - 1; \
880
905
int pitch_minus_one = pitch - 1; \
881
- return INTERNAL_FUNC(baseoffset, width_minus_one, height_minus_one, pitch_minus_one, coord, cache_control); \
906
+ enum LSC_LDCC cache_control_internal = mapToInternalReadCacheControl(cache_control); \
907
+ return INTERNAL_FUNC(baseoffset, width_minus_one, height_minus_one, pitch_minus_one, coord, cache_control_internal); \
882
908
}
883
909
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_read_cacheopts_u8_m1k32v2 , ushort2 , __builtin_IB_subgroup_block_read_cacheopts_u8_m1k32v2 )
884
910
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_read_cacheopts_u8_m2k32v2 , ushort4 , __builtin_IB_subgroup_block_read_cacheopts_u8_m2k32v2 )
@@ -893,14 +919,55 @@ DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS(intel_subgroup_block_read_cacheopt
893
919
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_read_cacheopts_transpose_u32_k8 , uint8 , __builtin_IB_subgroup_block_read_cacheopts_transpose_u32_k8 )
894
920
DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_read_cacheopts_transpose_u64_k4 , ulong4 ,__builtin_IB_subgroup_block_read_cacheopts_transpose_u64_k4 )
895
921
922
+ DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_prefetch_u8_m1k32v2 , void , __builtin_IB_subgroup_block_read_prefetch_u8_m1k32v2 )
923
+ DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_prefetch_u8_m2k32v2 , void , __builtin_IB_subgroup_block_read_prefetch_u8_m2k32v2 )
924
+ DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_prefetch_u8_m4k32v2 , void , __builtin_IB_subgroup_block_read_prefetch_u8_m4k32v2 )
925
+ DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_prefetch_u8_m8k32v2 , void , __builtin_IB_subgroup_block_read_prefetch_u8_m8k32v2 )
926
+ DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_prefetch_u16_m1k16v2 , void , __builtin_IB_subgroup_block_read_prefetch_u16_m1k16v2 )
927
+ DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_prefetch_u16_m2k16v2 , void , __builtin_IB_subgroup_block_read_prefetch_u16_m2k16v2 )
928
+ DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_prefetch_u16_m4k16v2 , void , __builtin_IB_subgroup_block_read_prefetch_u16_m4k16v2 )
929
+ DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_prefetch_u16_m8k16v2 , void , __builtin_IB_subgroup_block_read_prefetch_u16_m8k16v2 )
930
+ DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_prefetch_transform_u8_k32 , void , __builtin_IB_subgroup_block_read_prefetch_transform_u8_k32 )
931
+ DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_prefetch_transform_u16_k16 , void , __builtin_IB_subgroup_block_read_prefetch_transform_u16_k16 )
932
+ DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_prefetch_transpose_u32_k8 , void , __builtin_IB_subgroup_block_read_prefetch_transpose_u32_k8 )
933
+ DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_prefetch_transpose_u64_k4 , void , __builtin_IB_subgroup_block_read_prefetch_transpose_u64_k4 )
934
+ #endif // defined(cl_intel_subgroup_extended_block_read_cacheopts)
935
+
936
+ #if defined(cl_intel_subgroup_extended_block_write_cacheopts )
937
+ enum LSC_STCC mapToInternalWriteCacheControl (intel_write_cache_control cache_control )
938
+ {
939
+ switch (cache_control )
940
+ {
941
+ case write_cache_control_default_intel :
942
+ return LSC_STCC_DEFAULT ;
943
+ case write_cache_control_l1_uncached_l3_uncached_intel :
944
+ return LSC_STCC_L1UC_L3UC ;
945
+ case write_cache_control_l1_uncached_l3_writeback_intel :
946
+ return LSC_STCC_L1UC_L3WB ;
947
+ case write_cache_control_l1_writethrough_l3_uncached_intel :
948
+ return LSC_STCC_L1WT_L3UC ;
949
+ case write_cache_control_l1_writethrough_l3_writeback_intel :
950
+ return LSC_STCC_L1WT_L3WB ;
951
+ case write_cache_control_l1_streaming_l3_uncached_intel :
952
+ return LSC_STCC_L1S_L3UC ;
953
+ case write_cache_control_l1_streaming_l3_writeback_intel :
954
+ return LSC_STCC_L1S_L3WB ;
955
+ case write_cache_control_l1_writeback_l3_writeback_intel :
956
+ return LSC_STCC_L1WB_L3WB ;
957
+ default :
958
+ return LSC_STCC_DEFAULT ;
959
+ }
960
+ }
961
+
896
962
#define DEFN_INTEL_SUB_GROUP_BLOCK_WRITE_LSC_CACHEOPTS (FUNC_NAME , TYPE , INTERNAL_FUNC ) \
897
- INLINE void FUNC_NAME( __global void* base_address, int width, int height, int pitch, int2 coord, TYPE val, enum LSC_STCC cache_control ) \
963
+ INLINE void FUNC_NAME( __global void* base_address, int width, int height, int pitch, int2 coord, TYPE val, intel_write_cache_control cache_control ) \
898
964
{ \
899
965
long baseoffset = as_long(base_address); \
900
966
int width_minus_one = width - 1; \
901
967
int height_minus_one = height - 1; \
902
968
int pitch_minus_one = pitch - 1; \
903
- INTERNAL_FUNC(baseoffset, width_minus_one, height_minus_one, pitch_minus_one, coord, val, cache_control); \
969
+ enum LSC_STCC cache_control_internal = mapToInternalWriteCacheControl(cache_control); \
970
+ INTERNAL_FUNC(baseoffset, width_minus_one, height_minus_one, pitch_minus_one, coord, val, cache_control_internal); \
904
971
}
905
972
DEFN_INTEL_SUB_GROUP_BLOCK_WRITE_LSC_CACHEOPTS (intel_subgroup_block_write_cacheopts_u8_m1k32v1 , ushort , __builtin_IB_subgroup_block_write_cacheopts_u8_m1k32v1 )
906
973
DEFN_INTEL_SUB_GROUP_BLOCK_WRITE_LSC_CACHEOPTS (intel_subgroup_block_write_cacheopts_u8_m2k32v1 , ushort2 , __builtin_IB_subgroup_block_write_cacheopts_u8_m2k32v1 )
@@ -910,20 +977,7 @@ DEFN_INTEL_SUB_GROUP_BLOCK_WRITE_LSC_CACHEOPTS(intel_subgroup_block_write_cacheo
910
977
DEFN_INTEL_SUB_GROUP_BLOCK_WRITE_LSC_CACHEOPTS (intel_subgroup_block_write_cacheopts_u16_m2k16v1 , ushort2 , __builtin_IB_subgroup_block_write_cacheopts_u16_m2k16v1 )
911
978
DEFN_INTEL_SUB_GROUP_BLOCK_WRITE_LSC_CACHEOPTS (intel_subgroup_block_write_cacheopts_u16_m4k16v1 , ushort4 , __builtin_IB_subgroup_block_write_cacheopts_u16_m4k16v1 )
912
979
DEFN_INTEL_SUB_GROUP_BLOCK_WRITE_LSC_CACHEOPTS (intel_subgroup_block_write_cacheopts_u16_m8k16v1 , ushort8 , __builtin_IB_subgroup_block_write_cacheopts_u16_m8k16v1 )
913
-
914
- DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_prefetch_u8_m1k32v2 , void , __builtin_IB_subgroup_block_read_prefetch_u8_m1k32v2 )
915
- DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_prefetch_u8_m2k32v2 , void , __builtin_IB_subgroup_block_read_prefetch_u8_m2k32v2 )
916
- DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_prefetch_u8_m4k32v2 , void , __builtin_IB_subgroup_block_read_prefetch_u8_m4k32v2 )
917
- DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_prefetch_u8_m8k32v2 , void , __builtin_IB_subgroup_block_read_prefetch_u8_m8k32v2 )
918
- DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_prefetch_u16_m1k16v2 , void , __builtin_IB_subgroup_block_read_prefetch_u16_m1k16v2 )
919
- DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_prefetch_u16_m2k16v2 , void , __builtin_IB_subgroup_block_read_prefetch_u16_m2k16v2 )
920
- DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_prefetch_u16_m4k16v2 , void , __builtin_IB_subgroup_block_read_prefetch_u16_m4k16v2 )
921
- DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_prefetch_u16_m8k16v2 , void , __builtin_IB_subgroup_block_read_prefetch_u16_m8k16v2 )
922
- DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_prefetch_transform_u8_k32 , void , __builtin_IB_subgroup_block_read_prefetch_transform_u8_k32 )
923
- DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_prefetch_transform_u16_k16 , void , __builtin_IB_subgroup_block_read_prefetch_transform_u16_k16 )
924
- DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_prefetch_transpose_u32_k8 , void , __builtin_IB_subgroup_block_read_prefetch_transpose_u32_k8 )
925
- DEFN_INTEL_SUB_GROUP_BLOCK_READ_LSC_CACHEOPTS (intel_subgroup_block_prefetch_transpose_u64_k4 , void , __builtin_IB_subgroup_block_read_prefetch_transpose_u64_k4 )
926
- #endif // defined(cl_intel_subgroup_extended_block_read_cacheopts)
980
+ #endif // defined(cl_intel_subgroup_extended_block_write_cacheopts)
927
981
928
982
#if defined(cl_khr_subgroup_shuffle )
929
983
#define DEFN_SUB_GROUP_SHUFFLE (TYPE , SPV_TYPE , TYPE_ABBR ) \
0 commit comments