Skip to content

Commit b385195

Browse files
aratajewigcbot
authored andcommitted
Extend cl_intel_subgroup_2d_block_io extension with more variants
This change implements support for the following builtins: ```c intel_sub_group_2d_block_read_8b_16r16x4c intel_sub_group_2d_block_read_8b_32r16x4c intel_sub_group_2d_block_prefetch_8b_16r16x4c ```
1 parent 052e15b commit b385195

File tree

5 files changed

+31
-2
lines changed

5 files changed

+31
-2
lines changed

IGC/BiFModule/Implementation/IGCBiF_Intrinsics_Lsc.cl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,8 @@ void __builtin_IB_subgroup_block_write_cacheopts_u16_m8k16v1(long base_address,
496496
#ifdef cl_intel_subgroup_2d_block_io
497497

498498
typedef uchar uchar32 __attribute__((ext_vector_type(32)));
499+
typedef uchar uchar64 __attribute__((ext_vector_type(64)));
500+
typedef uchar uchar128 __attribute__((ext_vector_type(128)));
499501

500502
typedef ushort ushort32 __attribute__((ext_vector_type(32)));
501503
typedef ushort ushort64 __attribute__((ext_vector_type(64)));
@@ -507,6 +509,8 @@ ushort32 __builtin_IB_subgroup_block_read_cacheopts_u8_m16k32v2(long baseoffset
507509
ushort64 __builtin_IB_subgroup_block_read_cacheopts_u8_m32k32v2(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
508510

509511
uchar32 __builtin_IB_subgroup_block_read_cacheopts_u8_m8k16v4(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
512+
uchar64 __builtin_IB_subgroup_block_read_cacheopts_u8_m16k16v4(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
513+
uchar128 __builtin_IB_subgroup_block_read_cacheopts_u8_m32k16v4(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
510514

511515
ushort32 __builtin_IB_subgroup_block_read_cacheopts_u16_m16k16v2(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
512516
ushort64 __builtin_IB_subgroup_block_read_cacheopts_u16_m32k16v2(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
@@ -515,6 +519,7 @@ void __builtin_IB_subgroup_block_read_prefetch_u8_m16k32v2(long baseoffset, int
515519
void __builtin_IB_subgroup_block_read_prefetch_u8_m32k32v2(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
516520

517521
void __builtin_IB_subgroup_block_read_prefetch_u8_m8k16v4(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
522+
void __builtin_IB_subgroup_block_read_prefetch_u8_m16k16v4(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
518523

519524
void __builtin_IB_subgroup_block_read_prefetch_u16_m16k16v2(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
520525
void __builtin_IB_subgroup_block_read_prefetch_u16_m32k16v2(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);

IGC/BiFModule/Languages/OpenCL/IBiF_Sub_Groups.cl

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1049,7 +1049,9 @@ DEFN_INTEL_SUB_GROUP_2D_BLOCK_READ(intel_sub_group_2d_block_read_8b_32r32x1c,
10491049
DEFN_INTEL_SUB_GROUP_2D_BLOCK_READ(intel_sub_group_2d_block_read_8b_16r32x2c, ushort, ushort32, __builtin_IB_subgroup_block_read_cacheopts_u8_m16k32v2)
10501050
DEFN_INTEL_SUB_GROUP_2D_BLOCK_READ(intel_sub_group_2d_block_read_8b_32r32x2c, ushort, ushort64, __builtin_IB_subgroup_block_read_cacheopts_u8_m32k32v2)
10511051

1052-
DEFN_INTEL_SUB_GROUP_2D_BLOCK_READ(intel_sub_group_2d_block_read_8b_8r16x4c, uchar, uchar32, __builtin_IB_subgroup_block_read_cacheopts_u8_m8k16v4)
1052+
DEFN_INTEL_SUB_GROUP_2D_BLOCK_READ(intel_sub_group_2d_block_read_8b_8r16x4c, uchar, uchar32, __builtin_IB_subgroup_block_read_cacheopts_u8_m8k16v4)
1053+
DEFN_INTEL_SUB_GROUP_2D_BLOCK_READ(intel_sub_group_2d_block_read_8b_16r16x4c, uchar, uchar64, __builtin_IB_subgroup_block_read_cacheopts_u8_m16k16v4)
1054+
DEFN_INTEL_SUB_GROUP_2D_BLOCK_READ(intel_sub_group_2d_block_read_8b_32r16x4c, uchar, uchar128, __builtin_IB_subgroup_block_read_cacheopts_u8_m32k16v4)
10531055

10541056
DEFN_INTEL_SUB_GROUP_2D_BLOCK_READ(intel_sub_group_2d_block_read_16b_16r16x2c, ushort, ushort32, __builtin_IB_subgroup_block_read_cacheopts_u16_m16k16v2)
10551057
DEFN_INTEL_SUB_GROUP_2D_BLOCK_READ(intel_sub_group_2d_block_read_16b_32r16x2c, ushort, ushort64, __builtin_IB_subgroup_block_read_cacheopts_u16_m32k16v2)
@@ -1126,6 +1128,7 @@ DEFN_INTEL_SUB_GROUP_2D_BLOCK_PREFETCH(intel_sub_group_2d_block_prefetch_8b_16r3
11261128
DEFN_INTEL_SUB_GROUP_2D_BLOCK_PREFETCH(intel_sub_group_2d_block_prefetch_8b_32r32x2c, __builtin_IB_subgroup_block_read_prefetch_u8_m32k32v2)
11271129

11281130
DEFN_INTEL_SUB_GROUP_2D_BLOCK_PREFETCH(intel_sub_group_2d_block_prefetch_8b_8r16x4c, __builtin_IB_subgroup_block_read_prefetch_u8_m8k16v4)
1131+
DEFN_INTEL_SUB_GROUP_2D_BLOCK_PREFETCH(intel_sub_group_2d_block_prefetch_8b_16r16x4c, __builtin_IB_subgroup_block_read_prefetch_u8_m16k16v4)
11291132

11301133
DEFN_INTEL_SUB_GROUP_2D_BLOCK_PREFETCH(intel_sub_group_2d_block_prefetch_16b_16r16x2c, __builtin_IB_subgroup_block_read_prefetch_u16_m16k16v2)
11311134
DEFN_INTEL_SUB_GROUP_2D_BLOCK_PREFETCH(intel_sub_group_2d_block_prefetch_16b_32r16x2c, __builtin_IB_subgroup_block_read_prefetch_u16_m32k16v2)

IGC/BiFModule/Languages/OpenCL/PreRelease/opencl_cth_pre_release.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2676,8 +2676,11 @@ void __attribute__((overloadable)) intel_sub_group_2d_block_prefetch_8b_32r32x2c
26762676
// 2D Block Load / Prefetch, 8-bit data, Rows in [1*, 2*, 4*, 8, 16*, 32*], Columns in [16x4]:
26772677

26782678
void __attribute__((overloadable)) intel_sub_group_2d_block_read_8b_8r16x4c(__global void* base_address, int width, int height, int pitch, int2 coord, __private uchar* destination);
2679+
void __attribute__((overloadable)) intel_sub_group_2d_block_read_8b_16r16x4c(__global void* base_address, int width, int height, int pitch, int2 coord, __private uchar* destination);
2680+
void __attribute__((overloadable)) intel_sub_group_2d_block_read_8b_32r16x4c(__global void* base_address, int width, int height, int pitch, int2 coord, __private uchar* destination);
26792681

26802682
void __attribute__((overloadable)) intel_sub_group_2d_block_prefetch_8b_8r16x4c(__global void* base_address, int width, int height, int pitch, int2 coord);
2683+
void __attribute__((overloadable)) intel_sub_group_2d_block_prefetch_8b_16r16x4c(__global void* base_address, int width, int height, int pitch, int2 coord);
26812684

26822685
////////////////////////////////////////////////////////////////
26832686

IGC/ocloc_tests/Builtins/cl_intel_subgroup_2d_block_io/PVC/block_reads.cl

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,18 @@ SPDX-License-Identifier: MIT
5050

5151
// CHECK-VISAASM-8B-8R-16X4C: lsc_load_block2d.ugm (M1, 1) V{{[0-9]+}}:d8.4x16x8nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
5252

53+
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'DumpVISAASMToConsole=1' \
54+
// RUN: -DINPUT_TYPE=uchar -DOUTPUT_TYPE=uchar -DFUNCTION=intel_sub_group_2d_block_read_8b_16r16x4c -DDST_ARRAY_EL_TYPE=uchar -DDST_ARRAY_EL_NUM=64" \
55+
// RUN: -internal_options "-cl-ext=-all,+cl_intel_subgroup_2d_block_io" | FileCheck %s --check-prefix=CHECK-VISAASM-8B-16R-16X4C
56+
57+
// CHECK-VISAASM-8B-16R-16X4C: lsc_load_block2d.ugm (M1, 1) V{{[0-9]+}}:d8.4x16x16nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
58+
59+
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'DumpVISAASMToConsole=1' \
60+
// RUN: -DINPUT_TYPE=uchar -DOUTPUT_TYPE=uchar -DFUNCTION=intel_sub_group_2d_block_read_8b_32r16x4c -DDST_ARRAY_EL_TYPE=uchar -DDST_ARRAY_EL_NUM=128" \
61+
// RUN: -internal_options "-cl-ext=-all,+cl_intel_subgroup_2d_block_io" | FileCheck %s --check-prefix=CHECK-VISAASM-8B-32R-16X4C
62+
63+
// CHECK-VISAASM-8B-32R-16X4C: lsc_load_block2d.ugm (M1, 1) V{{[0-9]+}}:d8.4x16x32nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
64+
5365
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'DumpVISAASMToConsole=1' \
5466
// RUN: -DINPUT_TYPE=ushort -DOUTPUT_TYPE=ushort -DFUNCTION=intel_sub_group_2d_block_read_16b_1r16x1c -DDST_ARRAY_EL_TYPE=ushort -DDST_ARRAY_EL_NUM=1" \
5567
// RUN: -internal_options "-cl-ext=-all,+cl_intel_subgroup_2d_block_io" | FileCheck %s --check-prefix=CHECK-VISAASM-16B-1R-16X1C

IGC/ocloc_tests/Builtins/cl_intel_subgroup_2d_block_io/PVC/prefetches.cl

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,17 @@ SPDX-License-Identifier: MIT
4545
// CHECK-VISAASM-8B-32R-32X2C: lsc_load_block2d.ugm (M1, 1) %null:d8.64x32nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
4646

4747
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'DumpVISAASMToConsole=1' \
48-
// RUN: -DINPUT_TYPE=uint -DFUNCTION=intel_sub_group_2d_block_prefetch_8b_8r16x4c" \
48+
// RUN: -DINPUT_TYPE=uchar -DFUNCTION=intel_sub_group_2d_block_prefetch_8b_8r16x4c" \
4949
// RUN: -internal_options "-cl-ext=-all,+cl_intel_subgroup_2d_block_io" | FileCheck %s --check-prefix=CHECK-VISAASM-8B-8R-16X4C
5050

5151
// CHECK-VISAASM-8B-8R-16X4C: lsc_load_block2d.ugm (M1, 1) %null:d8.64x8nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
5252

53+
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'DumpVISAASMToConsole=1' \
54+
// RUN: -DINPUT_TYPE=uchar -DFUNCTION=intel_sub_group_2d_block_prefetch_8b_16r16x4c" \
55+
// RUN: -internal_options "-cl-ext=-all,+cl_intel_subgroup_2d_block_io" | FileCheck %s --check-prefix=CHECK-VISAASM-8B-16R-16X4C
56+
57+
// CHECK-VISAASM-8B-16R-16X4C: lsc_load_block2d.ugm (M1, 1) %null:d8.64x16nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
58+
5359
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'DumpVISAASMToConsole=1' \
5460
// RUN: -DINPUT_TYPE=ushort -DFUNCTION=intel_sub_group_2d_block_prefetch_16b_1r16x1c" \
5561
// RUN: -internal_options "-cl-ext=-all,+cl_intel_subgroup_2d_block_io" | FileCheck %s --check-prefix=CHECK-VISAASM-16B-1R-16X1C

0 commit comments

Comments
 (0)