@@ -20,7 +20,7 @@ define void @extract_row_b(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 %
20
20
; CHECK-NEXT: mov z5.b, p0/m, za0h.b[w12, 10]
21
21
; CHECK-NEXT: mov z6.b, p0/m, za0h.b[w12, 12]
22
22
; CHECK-NEXT: mov z7.b, p0/m, za0h.b[w12, 14]
23
- ; CHECK-NEXT: b dummy_use_8_nxv16i8
23
+ ; CHECK-NEXT: b use
24
24
%z0 = call <vscale x 16 x i8 > @llvm.aarch64.sme.read.horiz.nxv16i8 (<vscale x 16 x i8 > %zd , <vscale x 16 x i1 > %pg , i32 0 , i32 %tileslice )
25
25
%tileslice.2 = add i32 %tileslice , 2
26
26
%z1 = call <vscale x 16 x i8 > @llvm.aarch64.sme.read.horiz.nxv16i8 (<vscale x 16 x i8 > %zd , <vscale x 16 x i1 > %pg , i32 0 , i32 %tileslice.2 )
@@ -38,8 +38,8 @@ define void @extract_row_b(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 %
38
38
%z7 = call <vscale x 16 x i8 > @llvm.aarch64.sme.read.horiz.nxv16i8 (<vscale x 16 x i8 > %zd , <vscale x 16 x i1 > %pg , i32 0 , i32 %tileslice.14 )
39
39
40
40
; Force retention of z0..z7
41
- tail call void @dummy_use_8_nxv16i8 (<vscale x 16 x i8 > %z0 , <vscale x 16 x i8 > %z1 , <vscale x 16 x i8 > %z2 , <vscale x 16 x i8 > %z3 ,
42
- <vscale x 16 x i8 > %z4 , <vscale x 16 x i8 > %z5 , <vscale x 16 x i8 > %z6 , <vscale x 16 x i8 > %z7 )
41
+ tail call void @use (<vscale x 16 x i8 > %z0 , <vscale x 16 x i8 > %z1 , <vscale x 16 x i8 > %z2 , <vscale x 16 x i8 > %z3 ,
42
+ <vscale x 16 x i8 > %z4 , <vscale x 16 x i8 > %z5 , <vscale x 16 x i8 > %z6 , <vscale x 16 x i8 > %z7 )
43
43
ret void
44
44
}
45
45
@@ -62,7 +62,7 @@ define void @extract_col_b(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 %
62
62
; CHECK-NEXT: mov z5.b, p0/m, za0v.b[w12, 11]
63
63
; CHECK-NEXT: mov z6.b, p0/m, za0v.b[w12, 13]
64
64
; CHECK-NEXT: mov z7.b, p0/m, za0v.b[w12, 15]
65
- ; CHECK-NEXT: b dummy_use_8_nxv16i8
65
+ ; CHECK-NEXT: b use
66
66
%tileslice.1 = add i32 %tileslice , 1
67
67
%z0 = call <vscale x 16 x i8 > @llvm.aarch64.sme.read.vert.nxv16i8 (<vscale x 16 x i8 > %zd , <vscale x 16 x i1 > %pg , i32 0 , i32 %tileslice.1 )
68
68
%tileslice.3 = add i32 %tileslice , 3
@@ -80,8 +80,8 @@ define void @extract_col_b(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 %
80
80
%tileslice.15 = add i32 %tileslice , 15
81
81
%z7 = call <vscale x 16 x i8 > @llvm.aarch64.sme.read.vert.nxv16i8 (<vscale x 16 x i8 > %zd , <vscale x 16 x i1 > %pg , i32 0 , i32 %tileslice.15 )
82
82
83
- tail call void @dummy_use_8_nxv16i8 (<vscale x 16 x i8 > %z0 , <vscale x 16 x i8 > %z1 , <vscale x 16 x i8 > %z2 , <vscale x 16 x i8 > %z3 ,
84
- <vscale x 16 x i8 > %z4 , <vscale x 16 x i8 > %z5 , <vscale x 16 x i8 > %z6 , <vscale x 16 x i8 > %z7 )
83
+ tail call void @use (<vscale x 16 x i8 > %z0 , <vscale x 16 x i8 > %z1 , <vscale x 16 x i8 > %z2 , <vscale x 16 x i8 > %z3 ,
84
+ <vscale x 16 x i8 > %z4 , <vscale x 16 x i8 > %z5 , <vscale x 16 x i8 > %z6 , <vscale x 16 x i8 > %z7 )
85
85
ret void
86
86
}
87
87
@@ -96,7 +96,7 @@ define void @extract_row_h(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i32 %t
96
96
; CHECK-NEXT: mov z1.h, p0/m, za0h.h[w12, 2]
97
97
; CHECK-NEXT: mov z2.h, p0/m, za0h.h[w12, 4]
98
98
; CHECK-NEXT: mov z3.h, p0/m, za0h.h[w12, 6]
99
- ; CHECK-NEXT: b dummy_use_4_nxv8i16
99
+ ; CHECK-NEXT: b use
100
100
%z0 = call <vscale x 8 x i16 > @llvm.aarch64.sme.read.horiz.nxv8i16 (<vscale x 8 x i16 > %zd , <vscale x 8 x i1 > %pg , i32 0 , i32 %tileslice )
101
101
%tileslice.2 = add i32 %tileslice , 2
102
102
%z1 = call <vscale x 8 x i16 > @llvm.aarch64.sme.read.horiz.nxv8i16 (<vscale x 8 x i16 > %zd , <vscale x 8 x i1 > %pg , i32 0 , i32 %tileslice.2 )
@@ -105,7 +105,7 @@ define void @extract_row_h(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i32 %t
105
105
%tileslice.6 = add i32 %tileslice , 6
106
106
%z3 = call <vscale x 8 x i16 > @llvm.aarch64.sme.read.horiz.nxv8i16 (<vscale x 8 x i16 > %zd , <vscale x 8 x i1 > %pg , i32 0 , i32 %tileslice.6 )
107
107
108
- tail call void @dummy_use_4_nxv8i16 (<vscale x 8 x i16 > %z0 , <vscale x 8 x i16 > %z1 , <vscale x 8 x i16 > %z2 , <vscale x 8 x i16 > %z3 )
108
+ tail call void @use (<vscale x 8 x i16 > %z0 , <vscale x 8 x i16 > %z1 , <vscale x 8 x i16 > %z2 , <vscale x 8 x i16 > %z3 )
109
109
ret void
110
110
}
111
111
@@ -120,7 +120,7 @@ define void @extract_col_h(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i32 %t
120
120
; CHECK-NEXT: mov z1.h, p0/m, za1v.h[w12, 3]
121
121
; CHECK-NEXT: mov z2.h, p0/m, za1v.h[w12, 5]
122
122
; CHECK-NEXT: mov z3.h, p0/m, za1v.h[w12, 7]
123
- ; CHECK-NEXT: b dummy_use_4_nxv8i16
123
+ ; CHECK-NEXT: b use
124
124
%tileslice.1 = add i32 %tileslice , 1
125
125
%z0 = call <vscale x 8 x i16 > @llvm.aarch64.sme.read.vert.nxv8i16 (<vscale x 8 x i16 > %zd , <vscale x 8 x i1 > %pg , i32 1 , i32 %tileslice.1 )
126
126
%tileslice.3 = add i32 %tileslice , 3
@@ -130,7 +130,7 @@ define void @extract_col_h(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i32 %t
130
130
%tileslice.7 = add i32 %tileslice , 7
131
131
%z3 = call <vscale x 8 x i16 > @llvm.aarch64.sme.read.vert.nxv8i16 (<vscale x 8 x i16 > %zd , <vscale x 8 x i1 > %pg , i32 1 , i32 %tileslice.7 )
132
132
133
- tail call void @dummy_use_4_nxv8i16 (<vscale x 8 x i16 > %z0 , <vscale x 8 x i16 > %z1 , <vscale x 8 x i16 > %z2 , <vscale x 8 x i16 > %z3 )
133
+ tail call void @use (<vscale x 8 x i16 > %z0 , <vscale x 8 x i16 > %z1 , <vscale x 8 x i16 > %z2 , <vscale x 8 x i16 > %z3 )
134
134
ret void
135
135
}
136
136
@@ -153,7 +153,7 @@ define void @extract_f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i32 %ti
153
153
; CHECK-NEXT: mov z5.h, p0/m, za0h.h[w12, 5]
154
154
; CHECK-NEXT: mov z6.h, p0/m, za0v.h[w12, 6]
155
155
; CHECK-NEXT: mov z7.h, p0/m, za0v.h[w12, 7]
156
- ; CHECK-NEXT: b dummy_use_8_nxv8f16
156
+ ; CHECK-NEXT: b use
157
157
%z0 = call <vscale x 8 x half > @llvm.aarch64.sme.read.horiz.nxv8f16 (<vscale x 8 x half > %zd , <vscale x 8 x i1 > %pg , i32 0 , i32 %tileslice )
158
158
%tileslice.1 = add i32 %tileslice , 1
159
159
%z1 = call <vscale x 8 x half > @llvm.aarch64.sme.read.horiz.nxv8f16 (<vscale x 8 x half > %zd , <vscale x 8 x i1 > %pg , i32 0 , i32 %tileslice.1 )
@@ -170,8 +170,8 @@ define void @extract_f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i32 %ti
170
170
%tileslice.7 = add i32 %tileslice , 7
171
171
%z7 = call <vscale x 8 x half > @llvm.aarch64.sme.read.vert.nxv8f16 (<vscale x 8 x half > %zd , <vscale x 8 x i1 > %pg , i32 0 , i32 %tileslice.7 )
172
172
173
- tail call void @dummy_use_8_nxv8f16 (<vscale x 8 x half > %z0 , <vscale x 8 x half > %z1 , <vscale x 8 x half > %z2 , <vscale x 8 x half > %z3 ,
174
- <vscale x 8 x half > %z4 , <vscale x 8 x half > %z5 , <vscale x 8 x half > %z6 , <vscale x 8 x half > %z7 )
173
+ tail call void @use (<vscale x 8 x half > %z0 , <vscale x 8 x half > %z1 , <vscale x 8 x half > %z2 , <vscale x 8 x half > %z3 ,
174
+ <vscale x 8 x half > %z4 , <vscale x 8 x half > %z5 , <vscale x 8 x half > %z6 , <vscale x 8 x half > %z7 )
175
175
ret void
176
176
}
177
177
@@ -194,7 +194,7 @@ define void @extract_bf16(<vscale x 8 x bfloat> %zd, <vscale x 8 x i1> %pg, i32
194
194
; CHECK-NEXT: mov z5.h, p0/m, za0h.h[w12, 5]
195
195
; CHECK-NEXT: mov z6.h, p0/m, za0v.h[w12, 6]
196
196
; CHECK-NEXT: mov z7.h, p0/m, za0v.h[w12, 7]
197
- ; CHECK-NEXT: b dummy_use_8_nxv8bf16
197
+ ; CHECK-NEXT: b use
198
198
%z0 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16 (<vscale x 8 x bfloat> %zd , <vscale x 8 x i1 > %pg , i32 0 , i32 %tileslice )
199
199
%tileslice.1 = add i32 %tileslice , 1
200
200
%z1 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16 (<vscale x 8 x bfloat> %zd , <vscale x 8 x i1 > %pg , i32 0 , i32 %tileslice.1 )
@@ -211,8 +211,8 @@ define void @extract_bf16(<vscale x 8 x bfloat> %zd, <vscale x 8 x i1> %pg, i32
211
211
%tileslice.7 = add i32 %tileslice , 7
212
212
%z7 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16 (<vscale x 8 x bfloat> %zd , <vscale x 8 x i1 > %pg , i32 0 , i32 %tileslice.7 )
213
213
214
- tail call void @dummy_use_8_nxv8bf16 (<vscale x 8 x bfloat> %z0 , <vscale x 8 x bfloat> %z1 , <vscale x 8 x bfloat> %z2 , <vscale x 8 x bfloat> %z3 ,
215
- <vscale x 8 x bfloat> %z4 , <vscale x 8 x bfloat> %z5 , <vscale x 8 x bfloat> %z6 , <vscale x 8 x bfloat> %z7 )
214
+ tail call void @use (<vscale x 8 x bfloat> %z0 , <vscale x 8 x bfloat> %z1 , <vscale x 8 x bfloat> %z2 , <vscale x 8 x bfloat> %z3 ,
215
+ <vscale x 8 x bfloat> %z4 , <vscale x 8 x bfloat> %z5 , <vscale x 8 x bfloat> %z6 , <vscale x 8 x bfloat> %z7 )
216
216
ret void
217
217
}
218
218
@@ -223,12 +223,12 @@ define void @extract_row_s(<vscale x 4 x i32> %zd, <vscale x 4 x i1> %pg, i32 %t
223
223
; CHECK-NEXT: mov w12, w0
224
224
; CHECK-NEXT: mov z0.s, p0/m, za0h.s[w12, 0]
225
225
; CHECK-NEXT: mov z1.s, p0/m, za0h.s[w12, 2]
226
- ; CHECK-NEXT: b dummy_use_2_nxv4i32
226
+ ; CHECK-NEXT: b use
227
227
%z0 = call <vscale x 4 x i32 > @llvm.aarch64.sme.read.horiz.nxv4i32 (<vscale x 4 x i32 > %zd , <vscale x 4 x i1 > %pg , i32 0 , i32 %tileslice )
228
228
%tileslice.2 = add i32 %tileslice , 2
229
229
%z1 = call <vscale x 4 x i32 > @llvm.aarch64.sme.read.horiz.nxv4i32 (<vscale x 4 x i32 > %zd , <vscale x 4 x i1 > %pg , i32 0 , i32 %tileslice.2 )
230
230
231
- tail call void @dummy_use_2_nxv4i32 (<vscale x 4 x i32 > %z0 , <vscale x 4 x i32 > %z1 )
231
+ tail call void @use (<vscale x 4 x i32 > %z0 , <vscale x 4 x i32 > %z1 )
232
232
ret void
233
233
}
234
234
@@ -239,13 +239,13 @@ define void @extract_col_s(<vscale x 4 x i32> %zd, <vscale x 4 x i1> %pg, i32 %t
239
239
; CHECK-NEXT: mov w12, w0
240
240
; CHECK-NEXT: mov z0.s, p0/m, za3v.s[w12, 1]
241
241
; CHECK-NEXT: mov z1.s, p0/m, za3v.s[w12, 3]
242
- ; CHECK-NEXT: b dummy_use_2_nxv4i32
242
+ ; CHECK-NEXT: b use
243
243
%tileslice.1 = add i32 %tileslice , 1
244
244
%z0 = call <vscale x 4 x i32 > @llvm.aarch64.sme.read.vert.nxv4i32 (<vscale x 4 x i32 > %zd , <vscale x 4 x i1 > %pg , i32 3 , i32 %tileslice.1 )
245
245
%tileslice.3 = add i32 %tileslice , 3
246
246
%z1 = call <vscale x 4 x i32 > @llvm.aarch64.sme.read.vert.nxv4i32 (<vscale x 4 x i32 > %zd , <vscale x 4 x i1 > %pg , i32 3 , i32 %tileslice.3 )
247
247
248
- tail call void @dummy_use_2_nxv4i32 (<vscale x 4 x i32 > %z0 , <vscale x 4 x i32 > %z1 )
248
+ tail call void @use (<vscale x 4 x i32 > %z0 , <vscale x 4 x i32 > %z1 )
249
249
ret void
250
250
}
251
251
@@ -260,7 +260,7 @@ define void @extract_f32(<vscale x 4 x float> %zd, <vscale x 4 x i1> %pg, i32 %t
260
260
; CHECK-NEXT: mov z1.s, p0/m, za0h.s[w12, 1]
261
261
; CHECK-NEXT: mov z2.s, p0/m, za0v.s[w12, 2]
262
262
; CHECK-NEXT: mov z3.s, p0/m, za0v.s[w12, 3]
263
- ; CHECK-NEXT: b dummy_use_4_nxv4f32
263
+ ; CHECK-NEXT: b use
264
264
%z0 = call <vscale x 4 x float > @llvm.aarch64.sme.read.horiz.nxv4f32 (<vscale x 4 x float > %zd , <vscale x 4 x i1 > %pg , i32 0 , i32 %tileslice )
265
265
%tileslice.1 = add i32 %tileslice , 1
266
266
%z1 = call <vscale x 4 x float > @llvm.aarch64.sme.read.horiz.nxv4f32 (<vscale x 4 x float > %zd , <vscale x 4 x i1 > %pg , i32 0 , i32 %tileslice.1 )
@@ -269,7 +269,7 @@ define void @extract_f32(<vscale x 4 x float> %zd, <vscale x 4 x i1> %pg, i32 %t
269
269
%tileslice.3 = add i32 %tileslice , 3
270
270
%z3 = call <vscale x 4 x float > @llvm.aarch64.sme.read.vert.nxv4f32 (<vscale x 4 x float > %zd , <vscale x 4 x i1 > %pg , i32 0 , i32 %tileslice.3 )
271
271
272
- tail call void @dummy_use_4_nxv4f32 (<vscale x 4 x float > %z0 , <vscale x 4 x float > %z1 , <vscale x 4 x float > %z2 , <vscale x 4 x float > %z3 )
272
+ tail call void @use (<vscale x 4 x float > %z0 , <vscale x 4 x float > %z1 , <vscale x 4 x float > %z2 , <vscale x 4 x float > %z3 )
273
273
ret void
274
274
}
275
275
@@ -301,12 +301,12 @@ define void @extract_f64(<vscale x 2 x double> %zd, <vscale x 2 x i1> %pg, i32 %
301
301
; CHECK-NEXT: mov w12, w0
302
302
; CHECK-NEXT: mov z0.d, p0/m, za0h.d[w12, 0]
303
303
; CHECK-NEXT: mov z1.d, p0/m, za0v.d[w12, 1]
304
- ; CHECK-NEXT: b dummy_use_2_nxv2f64
304
+ ; CHECK-NEXT: b use
305
305
%z0 = call <vscale x 2 x double > @llvm.aarch64.sme.read.horiz.nxv2f64 (<vscale x 2 x double > %zd , <vscale x 2 x i1 > %pg , i32 0 , i32 %tileslice )
306
306
%tileslice.1 = add i32 %tileslice , 1
307
307
%z1 = call <vscale x 2 x double > @llvm.aarch64.sme.read.vert.nxv2f64 (<vscale x 2 x double > %zd , <vscale x 2 x i1 > %pg , i32 0 , i32 %tileslice.1 )
308
308
309
- tail call void @dummy_use_2_nxv2f64 (<vscale x 2 x double > %z0 , <vscale x 2 x double > %z1 )
309
+ tail call void @use (<vscale x 2 x double > %z0 , <vscale x 2 x double > %z1 )
310
310
ret void
311
311
}
312
312
@@ -485,6 +485,7 @@ for.body:
485
485
exit:
486
486
%tmp1 = add <vscale x 4 x i32 > %z0 , %z1
487
487
%res = add <vscale x 4 x i32 > %tmp1 , %z2
488
+ tail call void @use (<vscale x 4 x i32 > %z0 , <vscale x 4 x i32 > %z1 , <vscale x 4 x i32 > %z2 )
488
489
ret <vscale x 4 x i32 > %res
489
490
}
490
491
@@ -523,33 +524,7 @@ declare <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i6
523
524
declare <vscale x 2 x double > @llvm.aarch64.sme.readq.vert.nxv2f64 (<vscale x 2 x double >, <vscale x 2 x i1 >, i32 , i32 )
524
525
525
526
; ------------------------------------------------------------------------------
526
- ; Dummy external functions to force code retention.
527
- ; The compiler does not see their implementations, so it must keep the calls.
527
+ ; Dummy external function to force code retention.
528
528
; ------------------------------------------------------------------------------
529
529
530
- declare void @dummy_use_8_nxv16i8 (
531
- <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 >,
532
- <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 >
533
- )
534
-
535
- declare void @dummy_use_4_nxv8i16 (
536
- <vscale x 8 x i16 >, <vscale x 8 x i16 >, <vscale x 8 x i16 >, <vscale x 8 x i16 >
537
- )
538
-
539
- declare void @dummy_use_8_nxv8f16 (
540
- <vscale x 8 x half >, <vscale x 8 x half >, <vscale x 8 x half >, <vscale x 8 x half >,
541
- <vscale x 8 x half >, <vscale x 8 x half >, <vscale x 8 x half >, <vscale x 8 x half >
542
- )
543
-
544
- declare void @dummy_use_8_nxv8bf16 (
545
- <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>,
546
- <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>
547
- )
548
-
549
- declare void @dummy_use_2_nxv4i32 (<vscale x 4 x i32 >, <vscale x 4 x i32 >)
550
-
551
- declare void @dummy_use_4_nxv4f32 (
552
- <vscale x 4 x float >, <vscale x 4 x float >, <vscale x 4 x float >, <vscale x 4 x float >
553
- )
554
-
555
- declare void @dummy_use_2_nxv2f64 (<vscale x 2 x double >, <vscale x 2 x double >)
530
+ declare void @use (...)
0 commit comments