Skip to content

Commit dc33900

Browse files
YuriPlyakhinigcbot
authored andcommitted
Changes in code.
1 parent 68d0c07 commit dc33900

File tree

6 files changed

+246
-230
lines changed

6 files changed

+246
-230
lines changed

IGC/BiFModule/Languages/OpenCL/PreRelease/IBiF_matrix.cl

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,8 @@ DEFINE_LOAD(PackedA_RowMajor, _SG16, char, 8, short, 16, 2, 32, 2x32, ROW_MAJOR,
352352

353353
/* A load tf32 SG16 */
354354
DEFINE_LOAD(PackedA_RowMajor, _SG16, int, 32, int, 32, 8, 8, 8x8, ROW_MAJOR, , 4, 8)
355+
/* A load tf32 SG16 for sub group size 32*/
356+
DEFINE_LOAD(PackedA_RowMajor, _SG16, int, 32, int, 32, 8, 8, 8x8, ROW_MAJOR, , 2, 8)
355357

356358
/* PackedB load i16 */
357359
DEFINE_LOAD(PackedB_ColumnMajor, , short, 16, int, 32, 8, 16, 16x8, COL_MAJOR, , 8, -1)
@@ -574,6 +576,8 @@ DEFINE_STORE(PackedA_RowMajor, _SG16, short, 16, short, 16, 8, 16, 8x16, ROW_MAJ
574576

575577
/* A store tf32 SG16 */
576578
DEFINE_STORE(PackedA_RowMajor, _SG16, int, 32, int, 32, 8, 8, 8x8, ROW_MAJOR, , 4, 8, false)
579+
/* A store tf32 SG16 for sub group size 32*/
580+
DEFINE_STORE(PackedA_RowMajor, _SG16, int, 32, int, 32, 8, 8, 8x8, ROW_MAJOR, , 2, 8, false)
577581

578582
/* PackedB store i16*/
579583
DEFINE_STORE(PackedB_ColumnMajor, , short, 16, int, 32, 8, 16, 16x8, COL_MAJOR, , 8, -1, false)
@@ -601,6 +605,9 @@ DEFINE_STORE(PackedB_PackedB, _SG16, char, 8, int, 32, 8, 64, 32x16, ROW_MAJ
601605
/* B store tf32 SG16 */
602606
DEFINE_STORE(PackedB_RowMajor, _SG16, int, 32, int, 32, 8, 16, 8x16, ROW_MAJOR, , 8, 16, true)
603607

608+
/* B store tf32 SG16 for sub group size 32 */
609+
DEFINE_STORE(PackedB_RowMajor, _SG16, int, 32, int, 32, 8, 16, 8x16, ROW_MAJOR, , 4, 16, true)
610+
604611
/* Acc i32 */
605612
DEFINE_STORE(Accumulator_RowMajor, , int, 32, int, 32, 8, 8, 8x8, ROW_MAJOR, , 8, 8, true)
606613
DEFINE_STORE(Accumulator_RowMajor, , int, 32, int, 32, 7, 8, 7x8, ROW_MAJOR, , 7, 8, true)
@@ -874,7 +881,7 @@ DEFINE_LOAD_LARGE(Accumulator_RowMajor, _SG16, int, 32, int, 32, 16, 16, 16x16,
874881
DEFINE_LOAD_LARGE(PackedA_RowMajor, _SG16, short, 16, short, 16, 16, 16, 16x16, ROW_MAJOR, , 16)
875882

876883
#define DEFINE_ACC_ROW_MAJOR_32x64(address_space) \
877-
INLINE void __builtin_spriv_OpJointMatrixLoadINTEL_Accumulator_RowMajor_SG16_32x64_i32_32_##address_space##_v8i8_pi32_i32(__private char *dst, char *mem, long stride) { \
884+
INLINE void __builtin_spriv_OpJointMatrixLoadINTEL_Accumulator_RowMajor_SG16_32x64_i32_128_##address_space##_v8i8_pi32_i32(__private char *dst, char *mem, long stride) { \
878885
__private char *c0 = dst + 0 * 16 * (sizeof (int)); \
879886
__private char *c1 = dst + 1 * 16 * (sizeof (int)); \
880887
__private char *c2 = dst + 2 * 16 * (sizeof (int)); \
@@ -924,7 +931,7 @@ DEFINE_A_ROW_MAJOR_32x16(global)
924931
DEFINE_A_ROW_MAJOR_32x16(local)
925932

926933
#define DEFINE_B_B_16x64(address_space) \
927-
INLINE void __builtin_spriv_OpJointMatrixLoadINTEL_PackedB_PackedB_SG16_16x64_i16_8_##address_space##_v8i8_pi32_i32(__private char *dst, char *mem, long stride) { \
934+
INLINE void __builtin_spriv_OpJointMatrixLoadINTEL_PackedB_PackedB_SG16_16x64_i16_32_##address_space##_v8i8_pi32_i32(__private char *dst, char *mem, long stride) { \
928935
__private char *b0 = dst; \
929936
__private char *b1 = dst + 1 * 16 * (sizeof (short)); \
930937
__private char *b2 = dst + 2 * 16 * (sizeof (short)); \
@@ -973,7 +980,7 @@ DEFINE_B_B_16x64(local)
973980
DEFINE_STORE_LARGE(Accumulator_RowMajor, _SG16, int, 32, int, 32, 16, 16, 16x16, ROW_MAJOR, , 16)
974981

975982
#define DEFINE_STORE_ACC_ROW_MAJOR_32x64(address_space) \
976-
INLINE void __builtin_spriv_OpJointMatrixStoreINTEL_Accumulator_RowMajor_SG16_32x64_i32_32_##address_space##_pi64_v8i8(char *mem, __private char *src, long stride) { \
983+
INLINE void __builtin_spriv_OpJointMatrixStoreINTEL_Accumulator_RowMajor_SG16_32x64_i32_128_##address_space##_pi64_v8i8(char *mem, __private char *src, long stride) { \
977984
__private char *c0 = src + 0 * 16 * (sizeof (int)); \
978985
__private char *c1 = src + 1 * 16 * (sizeof (int)); \
979986
__private char *c2 = src + 2 * 16 * (sizeof (int)); \

0 commit comments

Comments
 (0)