Skip to content

Commit 3a61081

Browse files
[SYCL][Joint Matrix] Implement code split for joint_matrix and joint_matrix_mad (#11836)
This patch implements device code split for `joint_matrix` and `joint_matrix_mad`. Note: the data inside metadata values is sorted, so the order of matrix initialization in the C++ code is not relevant.
1 parent 960d898 commit 3a61081

File tree

9 files changed

+807
-0
lines changed

9 files changed

+807
-0
lines changed
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
; This test has 3 kernels:
2+
; Kernel1 and Kernel2 have the same joint_matrix parameters
3+
; Kernel3 has different joint_matrix parameters
4+
5+
; The test is intended to check that sycl-post-link correctly separates kernels
6+
; that use different sycl_joint_matrix metadata
7+
8+
; RUN: sycl-post-link -split=auto -symbols -S %s -o %t.table
9+
; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-K3 \
10+
; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2
11+
; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR-K1,CHECK-IR-K2 \
12+
; RUN: --implicit-check-not Kernel3
13+
; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYMS-K3 \
14+
; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2
15+
; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-SYMS-K1,CHECK-SYMS-K2 \
16+
; RUN: --implicit-check-not Kernel3
17+
18+
; RUN: sycl-post-link -split=source -symbols -S %s -o %t.table
19+
; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-K3 \
20+
; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2
21+
; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR-K1,CHECK-IR-K2 \
22+
; RUN: --implicit-check-not Kernel3
23+
; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYMS-K3 \
24+
; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2
25+
; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-SYMS-K1,CHECK-SYMS-K2 \
26+
; RUN: --implicit-check-not Kernel3
27+
28+
; RUN: sycl-post-link -split=kernel -symbols -S %s -o %t.table
29+
; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-K3 \
30+
; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2
31+
; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR-K2 \
32+
; RUN: --implicit-check-not Kernel3 --implicit-check-not Kernel1
33+
; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-IR-K1 \
34+
; RUN: --implicit-check-not Kernel3 --implicit-check-not Kernel2
35+
; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYMS-K3 \
36+
; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2
37+
; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-SYMS-K2 \
38+
; RUN: --implicit-check-not Kernel3 --implicit-check-not Kernel1
39+
; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-SYMS-K1 \
40+
; RUN: --implicit-check-not Kernel3 --implicit-check-not Kernel2
41+
42+
; CHECK-IR-K1: define {{.*}} @Kernel1
43+
; CHECK-IR-K2: define {{.*}} @Kernel2
44+
; CHECK-IR-K3: define {{.*}} @Kernel3
45+
; CHECK-SYMS-K1: Kernel1
46+
; CHECK-SYMS-K2: Kernel2
47+
; CHECK-SYMS-K3: Kernel3
48+
49+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
50+
target triple = "spir64-unknown-unknown"
51+
52+
$Kernel1 = comdat any
53+
54+
$Kernel2 = comdat any
55+
56+
$Kernel3 = comdat any
57+
58+
; Function Attrs: mustprogress norecurse nounwind
59+
define weak_odr dso_local spir_kernel void @Kernel1() local_unnamed_addr #0 comdat !srcloc !5 !kernel_arg_buffer_location !6 !sycl_fixed_targets !6 !sycl_joint_matrix !7 !sycl_kernel_omit_args !6 {
60+
entry:
61+
ret void
62+
}
63+
64+
; Function Attrs: mustprogress norecurse nounwind
65+
define weak_odr dso_local spir_kernel void @Kernel2() local_unnamed_addr #0 comdat !srcloc !8 !kernel_arg_buffer_location !6 !sycl_fixed_targets !6 !sycl_joint_matrix !7 !sycl_kernel_omit_args !6 {
66+
entry:
67+
ret void
68+
}
69+
70+
; Function Attrs: mustprogress norecurse nounwind
71+
define weak_odr dso_local spir_kernel void @Kernel3() local_unnamed_addr #0 comdat !srcloc !9 !kernel_arg_buffer_location !6 !sycl_fixed_targets !6 !sycl_joint_matrix !10 !sycl_kernel_omit_args !6 {
72+
entry:
73+
ret void
74+
}
75+
76+
attributes #0 = { mustprogress norecurse nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="test.cpp" "sycl-optlevel"="2" "sycl-single-task" "uniform-work-group-size"="true" }
77+
78+
!llvm.module.flags = !{!0, !1}
79+
!opencl.spir.version = !{!2}
80+
!spirv.Source = !{!3}
81+
!llvm.ident = !{!4}
82+
83+
!0 = !{i32 1, !"wchar_size", i32 4}
84+
!1 = !{i32 7, !"frame-pointer", i32 2}
85+
!2 = !{i32 1, i32 2}
86+
!3 = !{i32 4, i32 100000}
87+
!4 = !{!""}
88+
!5 = !{i32 1037}
89+
!6 = !{}
90+
!7 = !{!"matrix_type::fp32,use::a,12,32;matrix_type::fp64,use::b,67,21"}
91+
!8 = !{i32 1301}
92+
!9 = !{i32 1859}
93+
!10 = !{!"matrix_type::fp32,use::a,13,32;matrix_type::fp64,use::b,67,21"}
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
; This test has 3 kernels:
2+
; Kernel1 doesn't have joint_matrix parameters
3+
; Kernel2 has joint_matrix parameters
4+
; Kernel3 doesn't have joint_matrix parameters
5+
6+
; The test is intended to check that sycl-post-link correctly separates kernels
7+
; that use different sycl_joint_matrix metadata and kernels without that metadata
8+
9+
; RUN: sycl-post-link -split=auto -symbols -S %s -o %t.table
10+
; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-K2 \
11+
; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel3
12+
; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR-K1,CHECK-IR-K3 \
13+
; RUN: --implicit-check-not Kernel2
14+
; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYMS-K2 \
15+
; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel3
16+
; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-SYMS-K1,CHECK-SYMS-K3 \
17+
; RUN: --implicit-check-not Kernel2
18+
19+
; RUN: sycl-post-link -split=source -symbols -S %s -o %t.table
20+
; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-K2 \
21+
; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel3
22+
; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR-K1,CHECK-IR-K3 \
23+
; RUN: --implicit-check-not Kernel2
24+
; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYMS-K2 \
25+
; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel3
26+
; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-SYMS-K1,CHECK-SYMS-K3 \
27+
; RUN: --implicit-check-not Kernel2
28+
29+
; RUN: sycl-post-link -split=kernel -symbols -S %s -o %t.table
30+
; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-K3 \
31+
; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2
32+
; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR-K2 \
33+
; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel3
34+
; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-IR-K1 \
35+
; RUN: --implicit-check-not Kernel2 --implicit-check-not Kernel3
36+
; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYMS-K3 \
37+
; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2
38+
; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-SYMS-K2 \
39+
; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel3
40+
; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-SYMS-K1 \
41+
; RUN: --implicit-check-not Kernel2 --implicit-check-not Kernel3
42+
43+
; CHECK-IR-K1: define {{.*}} @Kernel1
44+
; CHECK-IR-K2: define {{.*}} @Kernel2
45+
; CHECK-IR-K3: define {{.*}} @Kernel3
46+
; CHECK-SYMS-K1: Kernel1
47+
; CHECK-SYMS-K2: Kernel2
48+
; CHECK-SYMS-K3: Kernel3
49+
50+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
51+
target triple = "spir64-unknown-unknown"
52+
53+
$Kernel1 = comdat any
54+
55+
$Kernel2 = comdat any
56+
57+
$Kernel3 = comdat any
58+
59+
; Function Attrs: mustprogress norecurse nounwind
60+
define weak_odr dso_local spir_kernel void @Kernel1() local_unnamed_addr #0 comdat !srcloc !5 !kernel_arg_buffer_location !6 !sycl_fixed_targets !6 !sycl_kernel_omit_args !6 {
61+
entry:
62+
ret void
63+
}
64+
65+
; Function Attrs: mustprogress norecurse nounwind
66+
define weak_odr dso_local spir_kernel void @Kernel2() local_unnamed_addr #0 comdat !srcloc !8 !kernel_arg_buffer_location !6 !sycl_fixed_targets !6 !sycl_joint_matrix !7 !sycl_kernel_omit_args !6 {
67+
entry:
68+
ret void
69+
}
70+
71+
; Function Attrs: mustprogress norecurse nounwind
72+
define weak_odr dso_local spir_kernel void @Kernel3() local_unnamed_addr #0 comdat !srcloc !9 !kernel_arg_buffer_location !6 !sycl_fixed_targets !6 !sycl_kernel_omit_args !6 {
73+
entry:
74+
ret void
75+
}
76+
77+
attributes #0 = { mustprogress norecurse nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="test.cpp" "sycl-optlevel"="2" "sycl-single-task" "uniform-work-group-size"="true" }
78+
79+
!llvm.module.flags = !{!0, !1}
80+
!opencl.spir.version = !{!2}
81+
!spirv.Source = !{!3}
82+
!llvm.ident = !{!4}
83+
84+
!0 = !{i32 1, !"wchar_size", i32 4}
85+
!1 = !{i32 7, !"frame-pointer", i32 2}
86+
!2 = !{i32 1, i32 2}
87+
!3 = !{i32 4, i32 100000}
88+
!4 = !{!""}
89+
!5 = !{i32 1037}
90+
!6 = !{}
91+
!7 = !{!"matrix_type::fp32,use::a,12,32;matrix_type::fp64,use::b,67,21"}
92+
!8 = !{i32 1301}
93+
!9 = !{i32 1859}
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
; This test is intended to check that we do not perform per-joint-matrix
2+
; split if it was disabled through one or another sycl-post-link option
3+
4+
; RUN: sycl-post-link -symbols -S %s -o %t.table
5+
; RUN: FileCheck %s -input-file=%t.table --check-prefix CHECK-TABLE
6+
; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix CHECK-IR
7+
;
8+
; -lower-esimd is needed so sycl-post-link does not complain about no actions
9+
; specified
10+
; RUN: sycl-post-link -lower-esimd -ir-output-only -S %s -o %t.ll
11+
; RUN: FileCheck %s -input-file=%t.ll --check-prefix CHECK-IR
12+
13+
; We expect to see only one module generated:
14+
;
15+
; CHECK-TABLE: Code
16+
; CHECK-TABLE-NEXT: _0.ll
17+
; CHECK-TABLE-EMPTY:
18+
19+
; CHECK-IR-DAG: define weak_odr dso_local spir_kernel void @Kernel1
20+
; CHECK-IR-DAG: define weak_odr dso_local spir_kernel void @Kernel2
21+
; CHECK-IR-DAG: define weak_odr dso_local spir_kernel void @Kernel3
22+
23+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
24+
target triple = "spir64-unknown-unknown"
25+
26+
$Kernel1 = comdat any
27+
28+
$Kernel2 = comdat any
29+
30+
$Kernel3 = comdat any
31+
32+
; Function Attrs: mustprogress norecurse nounwind
33+
define weak_odr dso_local spir_kernel void @Kernel1() local_unnamed_addr #0 comdat !srcloc !5 !kernel_arg_buffer_location !6 !sycl_fixed_targets !6 !sycl_joint_matrix !7 !sycl_kernel_omit_args !6 {
34+
entry:
35+
ret void
36+
}
37+
38+
; Function Attrs: mustprogress norecurse nounwind
39+
define weak_odr dso_local spir_kernel void @Kernel2() local_unnamed_addr #0 comdat !srcloc !8 !kernel_arg_buffer_location !6 !sycl_fixed_targets !6 !sycl_joint_matrix !7 !sycl_kernel_omit_args !6 {
40+
entry:
41+
ret void
42+
}
43+
44+
; Function Attrs: mustprogress norecurse nounwind
45+
define weak_odr dso_local spir_kernel void @Kernel3() local_unnamed_addr #0 comdat !srcloc !9 !kernel_arg_buffer_location !6 !sycl_fixed_targets !6 !sycl_joint_matrix !10 !sycl_kernel_omit_args !6 {
46+
entry:
47+
ret void
48+
}
49+
50+
attributes #0 = { mustprogress norecurse nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="test.cpp" "sycl-optlevel"="2" "sycl-single-task" "uniform-work-group-size"="true" }
51+
52+
!llvm.module.flags = !{!0, !1}
53+
!opencl.spir.version = !{!2}
54+
!spirv.Source = !{!3}
55+
!llvm.ident = !{!4}
56+
57+
!0 = !{i32 1, !"wchar_size", i32 4}
58+
!1 = !{i32 7, !"frame-pointer", i32 2}
59+
!2 = !{i32 1, i32 2}
60+
!3 = !{i32 4, i32 100000}
61+
!4 = !{!""}
62+
!5 = !{i32 1037}
63+
!6 = !{}
64+
!7 = !{!"matrix_type::fp32,use::a,12,32;matrix_type::fp64,use::b,67,21"}
65+
!8 = !{i32 1301}
66+
!9 = !{i32 1859}
67+
!10 = !{!"matrix_type::fp32,use::a,13,32;matrix_type::fp64,use::b,67,21"}
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
; This test has 3 kernels:
2+
; Kernel1 and Kernel2 have the same joint_matrix_mad parameters
3+
; Kernel3 has different joint_matrix_mad parameters
4+
5+
; The test is intended to check that sycl-post-link correctly separates kernels
6+
; that use different sycl_joint_matrix_mad metadata
7+
8+
; RUN: sycl-post-link -split=auto -symbols -S %s -o %t.table
9+
; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-K3 \
10+
; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2
11+
; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR-K1,CHECK-IR-K2 \
12+
; RUN: --implicit-check-not Kernel3
13+
; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYMS-K3 \
14+
; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2
15+
; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-SYMS-K1,CHECK-SYMS-K2 \
16+
; RUN: --implicit-check-not Kernel3
17+
18+
; RUN: sycl-post-link -split=source -symbols -S %s -o %t.table
19+
; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-K3 \
20+
; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2
21+
; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR-K1,CHECK-IR-K2 \
22+
; RUN: --implicit-check-not Kernel3
23+
; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYMS-K3 \
24+
; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2
25+
; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-SYMS-K1,CHECK-SYMS-K2 \
26+
; RUN: --implicit-check-not Kernel3
27+
28+
; RUN: sycl-post-link -split=kernel -symbols -S %s -o %t.table
29+
; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-IR-K3 \
30+
; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2
31+
; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-IR-K2 \
32+
; RUN: --implicit-check-not Kernel3 --implicit-check-not Kernel1
33+
; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-IR-K1 \
34+
; RUN: --implicit-check-not Kernel3 --implicit-check-not Kernel2
35+
; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYMS-K3 \
36+
; RUN: --implicit-check-not Kernel1 --implicit-check-not Kernel2
37+
; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-SYMS-K2 \
38+
; RUN: --implicit-check-not Kernel3 --implicit-check-not Kernel1
39+
; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-SYMS-K1 \
40+
; RUN: --implicit-check-not Kernel3 --implicit-check-not Kernel2
41+
42+
; CHECK-IR-K1: define {{.*}} @Kernel1
43+
; CHECK-IR-K2: define {{.*}} @Kernel2
44+
; CHECK-IR-K3: define {{.*}} @Kernel3
45+
; CHECK-SYMS-K1: Kernel1
46+
; CHECK-SYMS-K2: Kernel2
47+
; CHECK-SYMS-K3: Kernel3
48+
49+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
50+
target triple = "spir64-unknown-unknown"
51+
52+
$Kernel1 = comdat any
53+
54+
$Kernel2 = comdat any
55+
56+
$Kernel3 = comdat any
57+
58+
; Function Attrs: mustprogress norecurse nounwind
59+
define weak_odr dso_local spir_kernel void @Kernel1() local_unnamed_addr #0 comdat !srcloc !5 !kernel_arg_buffer_location !6 !sycl_fixed_targets !6 !sycl_joint_matrix_mad !7 !sycl_kernel_omit_args !6 {
60+
entry:
61+
ret void
62+
}
63+
64+
; Function Attrs: mustprogress norecurse nounwind
65+
define weak_odr dso_local spir_kernel void @Kernel2() local_unnamed_addr #0 comdat !srcloc !8 !kernel_arg_buffer_location !6 !sycl_fixed_targets !6 !sycl_joint_matrix_mad !7 !sycl_kernel_omit_args !6 {
66+
entry:
67+
ret void
68+
}
69+
70+
; Function Attrs: mustprogress norecurse nounwind
71+
define weak_odr dso_local spir_kernel void @Kernel3() local_unnamed_addr #0 comdat !srcloc !9 !kernel_arg_buffer_location !6 !sycl_fixed_targets !6 !sycl_joint_matrix_mad !10 !sycl_kernel_omit_args !6 {
72+
entry:
73+
ret void
74+
}
75+
76+
attributes #0 = { mustprogress norecurse nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="test.cpp" "sycl-optlevel"="2" "sycl-single-task" "uniform-work-group-size"="true" }
77+
78+
!llvm.module.flags = !{!0, !1}
79+
!opencl.spir.version = !{!2}
80+
!spirv.Source = !{!3}
81+
!llvm.ident = !{!4}
82+
83+
!0 = !{i32 1, !"wchar_size", i32 4}
84+
!1 = !{i32 7, !"frame-pointer", i32 2}
85+
!2 = !{i32 1, i32 2}
86+
!3 = !{i32 4, i32 100000}
87+
!4 = !{!""}
88+
!5 = !{i32 1037}
89+
!6 = !{}
90+
!7 = !{!"matrix_type::sint8,matrix_type::sint8,matrix_type::sint32,matrix_type::sint32,12,48,12"}
91+
!8 = !{i32 1301}
92+
!9 = !{i32 1859}
93+
!10 = !{!"matrix_type::sint8,matrix_type::sint8,matrix_type::sint32,matrix_type::sint32,12,48,28"}

0 commit comments

Comments
 (0)