Skip to content

Commit 493e17c

Browse files
pkwasnie-intelfda0
authored andcommitted
rename intel_sub_group_matrix_mad (DPAS) for tf32 type
Renames DPAS for tf32 from: intel_sub_group_tf32_tf32_matrix_mad_k8_f32 to (drops returned type): intel_sub_group_tf32_tf32_matrix_mad_k8 (cherry picked from commit 1b55151)
1 parent b245009 commit 493e17c

File tree

4 files changed

+71
-9
lines changed

4 files changed

+71
-9
lines changed

IGC/BiFModule/Languages/OpenCL/IBiF_dpas.cl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -489,10 +489,10 @@ DEFN_INTEL_CVT2( f32_to_bf16_packed, int16, float16, float16, 2fto2bf_16 )
489489
#ifdef cl_intel_subgroup_matrix_multiply_accumulate_tf32
490490
// PVC_B
491491

492-
DEFN_INTEL_SG16_FDPAS( tf32_tf32_matrix_mad_k8_f32, float, float, float, float8, fdpas_f_f_tf32_tf32_8_1 )
493-
DEFN_INTEL_SG16_FDPAS( tf32_tf32_matrix_mad_k8_f32, float2, float2, float, float8, fdpas_f_f_tf32_tf32_8_2 )
494-
DEFN_INTEL_SG16_FDPAS( tf32_tf32_matrix_mad_k8_f32, float4, float4, float2, float8, fdpas_f_f_tf32_tf32_8_4 )
495-
DEFN_INTEL_SG16_FDPAS( tf32_tf32_matrix_mad_k8_f32, float8, float8, float4, float8, fdpas_f_f_tf32_tf32_8_8 )
492+
DEFN_INTEL_SG16_FDPAS( tf32_tf32_matrix_mad_k8, float, float, float, float8, fdpas_f_f_tf32_tf32_8_1 )
493+
DEFN_INTEL_SG16_FDPAS( tf32_tf32_matrix_mad_k8, float2, float2, float, float8, fdpas_f_f_tf32_tf32_8_2 )
494+
DEFN_INTEL_SG16_FDPAS( tf32_tf32_matrix_mad_k8, float4, float4, float2, float8, fdpas_f_f_tf32_tf32_8_4 )
495+
DEFN_INTEL_SG16_FDPAS( tf32_tf32_matrix_mad_k8, float8, float8, float4, float8, fdpas_f_f_tf32_tf32_8_8 )
496496

497497
DEFN_INTEL_CVT_NO_OVERLOAD( tfloat32_as_float, float, float, ftotf32_1 )
498498
DEFN_INTEL_CVT_NO_OVERLOAD( tfloat322_as_float2, float2, float2, ftotf32_2 )

IGC/BiFModule/Languages/OpenCL/PreRelease/opencl_cth_pre_release.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2386,16 +2386,16 @@ int16 __attribute__((overloadable)) intel_convert_f32_to_bf16_packed(float16 a,
23862386
// DST: float
23872387

23882388
// M = 1, K = 8, N = 16, upper 8 channels of a ignored
2389-
float __attribute__((overloadable)) intel_sub_group_tf32_tf32_matrix_mad_k8_f32(float a, float8 b, float acc);
2389+
float __attribute__((overloadable)) intel_sub_group_tf32_tf32_matrix_mad_k8(float a, float8 b, float acc);
23902390

23912391
// M = 2, K = 8, N = 16, all channels of a are used
2392-
float2 __attribute__((overloadable)) intel_sub_group_tf32_tf32_matrix_mad_k8_f32(float a, float8 b, float2 acc);
2392+
float2 __attribute__((overloadable)) intel_sub_group_tf32_tf32_matrix_mad_k8(float a, float8 b, float2 acc);
23932393

23942394
// M = 4, K = 8, N = 16
2395-
float4 __attribute__((overloadable)) intel_sub_group_tf32_tf32_matrix_mad_k8_f32(float2 a, float8 b, float4 acc);
2395+
float4 __attribute__((overloadable)) intel_sub_group_tf32_tf32_matrix_mad_k8(float2 a, float8 b, float4 acc);
23962396

23972397
// M = 8, K = 8, N = 16
2398-
float8 __attribute__((overloadable)) intel_sub_group_tf32_tf32_matrix_mad_k8_f32(float4 a, float8 b, float8 acc);
2398+
float8 __attribute__((overloadable)) intel_sub_group_tf32_tf32_matrix_mad_k8(float4 a, float8 b, float8 acc);
23992399

24002400
// Conversions
24012401
float intel_convert_tfloat32_as_float( float source);

IGC/common/Types.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ namespace IGC
3636
{
3737
PRECISION_UNUSED, U8, U4, U2, S8, S4, S2,
3838
BF8,
39-
TF32,
39+
TF32 = 10,
4040
BF16, FP16
4141
};
4242

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; REQUIRES: llvm-spirv, regkeys, pvc-supported
10+
11+
; RUN: llvm-as %s -o %t.bc
12+
; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_cache_controls -o %t.spv
13+
; RUN: ocloc compile -spirv_input -file %t.spv -device pvc -options " -igc_opts 'PrintToConsole=1 PrintAfter=Layout'" 2>&1 | FileCheck %s
14+
15+
target triple = "spir64-unknown-unknown"
16+
17+
declare spir_func float @_Z39intel_sub_group_tf32_tf32_matrix_mad_k8fDv8_ff(float, <8 x float>, float)
18+
declare spir_func <2 x float> @_Z39intel_sub_group_tf32_tf32_matrix_mad_k8fDv8_fDv2_f(float, <8 x float>, <2 x float>)
19+
declare spir_func <4 x float> @_Z39intel_sub_group_tf32_tf32_matrix_mad_k8Dv2_fDv8_fDv4_f(<2 x float>, <8 x float>, <4 x float>)
20+
declare spir_func <8 x float> @_Z39intel_sub_group_tf32_tf32_matrix_mad_k8Dv4_fDv8_fS0_(<4 x float>, <8 x float>, <8 x float>)
21+
22+
define spir_kernel void @test_v1(float %a, <8 x float> %b, float %acc, float addrspace(1)* %c) !intel_reqd_sub_group_size !100 {
23+
entry:
24+
; CHECK-LABEL: @test_v1(
25+
; CHECK: call float @llvm.genx.GenISA.sub.group.dpas.f32.f32.f32.v8i32(float %acc, float %a, <8 x i32> %{{.+}}, i32 10, i32 10, i32 8, i32 1, i1 false)
26+
%call = call spir_func float @_Z39intel_sub_group_tf32_tf32_matrix_mad_k8fDv8_ff(float %a, <8 x float> %b, float %acc)
27+
%arrayidx = getelementptr inbounds float, float addrspace(1)* %c, i64 0
28+
store float %call, float addrspace(1)* %arrayidx, align 4
29+
ret void
30+
}
31+
32+
define spir_kernel void @test_v2(float %a, <8 x float> %b, <2 x float> %acc, <2 x float> addrspace(1)* %c) !intel_reqd_sub_group_size !100 {
33+
entry:
34+
; CHECK-LABEL: @test_v2(
35+
; CHECK: call <2 x float> @llvm.genx.GenISA.sub.group.dpas.v2f32.v2f32.f32.v8i32(<2 x float> %acc, float %a, <8 x i32> %{{.+}}, i32 10, i32 10, i32 8, i32 2, i1 false)
36+
%call = call spir_func <2 x float> @_Z39intel_sub_group_tf32_tf32_matrix_mad_k8fDv8_fDv2_f(float %a, <8 x float> %b, <2 x float> %acc)
37+
%arrayidx = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %c, i64 0
38+
store <2 x float> %call, <2 x float> addrspace(1)* %arrayidx, align 4
39+
ret void
40+
}
41+
42+
define spir_kernel void @test_v4(<2 x float> %a, <8 x float> %b, <4 x float> %acc, <4 x float> addrspace(1)* %c) !intel_reqd_sub_group_size !100 {
43+
entry:
44+
; CHECK-LABEL: @test_v4(
45+
; CHECK: call <4 x float> @llvm.genx.GenISA.sub.group.dpas.v4f32.v4f32.v2f32.v8i32(<4 x float> %acc, <2 x float> %a, <8 x i32> %{{.+}}, i32 10, i32 10, i32 8, i32 4, i1 false)
46+
%call = call spir_func <4 x float> @_Z39intel_sub_group_tf32_tf32_matrix_mad_k8Dv2_fDv8_fDv4_f(<2 x float> %a, <8 x float> %b, <4 x float> %acc)
47+
%arrayidx = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %c, i64 0
48+
store <4 x float> %call, <4 x float> addrspace(1)* %arrayidx, align 4
49+
ret void
50+
}
51+
52+
define spir_kernel void @test_v8(<4 x float> %a, <8 x float> %b, <8 x float> %acc, <8 x float> addrspace(1)* %c) !intel_reqd_sub_group_size !100 {
53+
entry:
54+
; CHECK-LABEL: @test_v8(
55+
; CHECK: call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v4f32.v8i32(<8 x float> %acc, <4 x float> %a, <8 x i32> %{{.+}}, i32 10, i32 10, i32 8, i32 8, i1 false)
56+
%call = call spir_func <8 x float> @_Z39intel_sub_group_tf32_tf32_matrix_mad_k8Dv4_fDv8_fS0_(<4 x float> %a, <8 x float> %b, <8 x float> %acc)
57+
%arrayidx = getelementptr inbounds <8 x float>, <8 x float> addrspace(1)* %c, i64 0
58+
store <8 x float> %call, <8 x float> addrspace(1)* %arrayidx, align 4
59+
ret void
60+
}
61+
62+
!100 = !{i32 16}

0 commit comments

Comments
 (0)