@@ -19,6 +19,7 @@ func.func @zero_za_b() {
19
19
func.func @zero_za_h () {
20
20
// CHECK: "arm_sme.intr.zero"() <{tile_mask = 85 : i32}> : () -> ()
21
21
%zero_za0h = arm_sme.zero : vector <[8 ]x[8 ]xi16 >
22
+ " test.prevent_zero_merge" () : () -> ()
22
23
// CHECK: "arm_sme.intr.zero"() <{tile_mask = 170 : i32}> : () -> ()
23
24
%zero_za1h = arm_sme.zero : vector <[8 ]x[8 ]xf16 >
24
25
" test.some_use" (%zero_za0h ) : (vector <[8 ]x[8 ]xi16 >) -> ()
@@ -32,10 +33,13 @@ func.func @zero_za_h() {
32
33
func.func @zero_za_s () {
33
34
// CHECK: arm_sme.intr.zero"() <{tile_mask = 17 : i32}> : () -> ()
34
35
%zero_za0s = arm_sme.zero : vector <[4 ]x[4 ]xi32 >
36
+ " test.prevent_zero_merge" () : () -> ()
35
37
// CHECK: arm_sme.intr.zero"() <{tile_mask = 34 : i32}> : () -> ()
36
38
%zero_za1s = arm_sme.zero : vector <[4 ]x[4 ]xi32 >
39
+ " test.prevent_zero_merge" () : () -> ()
37
40
// CHECK: arm_sme.intr.zero"() <{tile_mask = 68 : i32}> : () -> ()
38
41
%zero_za2s = arm_sme.zero : vector <[4 ]x[4 ]xi32 >
42
+ " test.prevent_zero_merge" () : () -> ()
39
43
// CHECK: arm_sme.intr.zero"() <{tile_mask = 136 : i32}> : () -> ()
40
44
%zero_za3s = arm_sme.zero : vector <[4 ]x[4 ]xf32 >
41
45
" test.some_use" (%zero_za0s ) : (vector <[4 ]x[4 ]xi32 >) -> ()
@@ -51,18 +55,25 @@ func.func @zero_za_s() {
51
55
func.func @zero_za_d () {
52
56
// CHECK: "arm_sme.intr.zero"() <{tile_mask = 1 : i32}> : () -> ()
53
57
%zero_za0d = arm_sme.zero : vector <[2 ]x[2 ]xi64 >
58
+ " test.prevent_zero_merge" () : () -> ()
54
59
// CHECK: "arm_sme.intr.zero"() <{tile_mask = 2 : i32}> : () -> ()
55
60
%zero_za1d = arm_sme.zero : vector <[2 ]x[2 ]xi64 >
61
+ " test.prevent_zero_merge" () : () -> ()
56
62
// CHECK: "arm_sme.intr.zero"() <{tile_mask = 4 : i32}> : () -> ()
57
63
%zero_za2d = arm_sme.zero : vector <[2 ]x[2 ]xi64 >
64
+ " test.prevent_zero_merge" () : () -> ()
58
65
// CHECK: "arm_sme.intr.zero"() <{tile_mask = 8 : i32}> : () -> ()
59
66
%zero_za3d = arm_sme.zero : vector <[2 ]x[2 ]xi64 >
67
+ " test.prevent_zero_merge" () : () -> ()
60
68
// CHECK: "arm_sme.intr.zero"() <{tile_mask = 16 : i32}> : () -> ()
61
69
%zero_za4d = arm_sme.zero : vector <[2 ]x[2 ]xi64 >
70
+ " test.prevent_zero_merge" () : () -> ()
62
71
// CHECK: "arm_sme.intr.zero"() <{tile_mask = 32 : i32}> : () -> ()
63
72
%zero_za5d = arm_sme.zero : vector <[2 ]x[2 ]xi64 >
73
+ " test.prevent_zero_merge" () : () -> ()
64
74
// CHECK: "arm_sme.intr.zero"() <{tile_mask = 64 : i32}> : () -> ()
65
75
%zero_za6d = arm_sme.zero : vector <[2 ]x[2 ]xi64 >
76
+ " test.prevent_zero_merge" () : () -> ()
66
77
// CHECK: "arm_sme.intr.zero"() <{tile_mask = 128 : i32}> : () -> ()
67
78
%zero_za7d = arm_sme.zero : vector <[2 ]x[2 ]xf64 >
68
79
" test.some_use" (%zero_za0d ) : (vector <[2 ]x[2 ]xi64 >) -> ()
@@ -75,3 +86,45 @@ func.func @zero_za_d() {
75
86
" test.some_use" (%zero_za7d ) : (vector <[2 ]x[2 ]xf64 >) -> ()
76
87
return
77
88
}
89
+
90
+ // -----
91
+
92
+ // CHECK-LABEL: merge_consecutive_tile_zero_ops
93
+ func.func @merge_consecutive_tile_zero_ops () {
94
+ // CHECK-NOT: arm_sme.intr.zero
95
+ // CHECK: "arm_sme.intr.zero"() <{tile_mask = 255 : i32}> : () -> ()
96
+ // CHECK-NOT: arm_sme.intr.zero
97
+ %zero_za0s = arm_sme.zero : vector <[4 ]x[4 ]xi32 >
98
+ %zero_za1s = arm_sme.zero : vector <[4 ]x[4 ]xi32 >
99
+ %zero_za2s = arm_sme.zero : vector <[4 ]x[4 ]xi32 >
100
+ %zero_za3s = arm_sme.zero : vector <[4 ]x[4 ]xf32 >
101
+ " test.some_use" (%zero_za0s ) : (vector <[4 ]x[4 ]xi32 >) -> ()
102
+ " test.some_use" (%zero_za1s ) : (vector <[4 ]x[4 ]xi32 >) -> ()
103
+ " test.some_use" (%zero_za2s ) : (vector <[4 ]x[4 ]xi32 >) -> ()
104
+ " test.some_use" (%zero_za3s ) : (vector <[4 ]x[4 ]xf32 >) -> ()
105
+ return
106
+ }
107
+
108
+ // -----
109
+
110
+ /// arm_sme.intr.zero intrinsics are not merged when there is an op other than
111
+ /// arm_sme.intr.zero between them.
112
+
113
+ // CHECK-LABEL: merge_consecutive_tile_zero_ops_with_barrier
114
+ func.func @merge_consecutive_tile_zero_ops_with_barrier () {
115
+ // CHECK-NOT: arm_sme.intr.zero
116
+ // CHECK: "arm_sme.intr.zero"() <{tile_mask = 51 : i32}> : () -> ()
117
+ // CHECK-NOT: arm_sme.intr.zero
118
+ %zero_za0s = arm_sme.zero : vector <[4 ]x[4 ]xi32 >
119
+ %zero_za1s = arm_sme.zero : vector <[4 ]x[4 ]xi32 >
120
+ " test.prevent_zero_merge" () : () -> ()
121
+ // CHECK: "arm_sme.intr.zero"() <{tile_mask = 204 : i32}> : () -> ()
122
+ // CHECK-NOT: arm_sme.intr.zero
123
+ %zero_za2s = arm_sme.zero : vector <[4 ]x[4 ]xi32 >
124
+ %zero_za3s = arm_sme.zero : vector <[4 ]x[4 ]xf32 >
125
+ " test.some_use" (%zero_za0s ) : (vector <[4 ]x[4 ]xi32 >) -> ()
126
+ " test.some_use" (%zero_za1s ) : (vector <[4 ]x[4 ]xi32 >) -> ()
127
+ " test.some_use" (%zero_za2s ) : (vector <[4 ]x[4 ]xi32 >) -> ()
128
+ " test.some_use" (%zero_za3s ) : (vector <[4 ]x[4 ]xf32 >) -> ()
129
+ return
130
+ }
0 commit comments