Skip to content

Commit cec3fbe

Browse files
committed
Simplify dummy functions
1 parent d27701b commit cec3fbe

File tree

1 file changed

+27
-52
lines changed

1 file changed

+27
-52
lines changed

llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll

Lines changed: 27 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ define void @extract_row_b(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 %
2020
; CHECK-NEXT: mov z5.b, p0/m, za0h.b[w12, 10]
2121
; CHECK-NEXT: mov z6.b, p0/m, za0h.b[w12, 12]
2222
; CHECK-NEXT: mov z7.b, p0/m, za0h.b[w12, 14]
23-
; CHECK-NEXT: b dummy_use_8_nxv16i8
23+
; CHECK-NEXT: b use
2424
%z0 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 0, i32 %tileslice)
2525
%tileslice.2 = add i32 %tileslice, 2
2626
%z1 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 0, i32 %tileslice.2)
@@ -38,8 +38,8 @@ define void @extract_row_b(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 %
3838
%z7 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 0, i32 %tileslice.14)
3939

4040
; Force retention of z0..z7
41-
tail call void @dummy_use_8_nxv16i8(<vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3,
42-
<vscale x 16 x i8> %z4, <vscale x 16 x i8> %z5, <vscale x 16 x i8> %z6, <vscale x 16 x i8> %z7)
41+
tail call void @use(<vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3,
42+
<vscale x 16 x i8> %z4, <vscale x 16 x i8> %z5, <vscale x 16 x i8> %z6, <vscale x 16 x i8> %z7)
4343
ret void
4444
}
4545

@@ -62,7 +62,7 @@ define void @extract_col_b(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 %
6262
; CHECK-NEXT: mov z5.b, p0/m, za0v.b[w12, 11]
6363
; CHECK-NEXT: mov z6.b, p0/m, za0v.b[w12, 13]
6464
; CHECK-NEXT: mov z7.b, p0/m, za0v.b[w12, 15]
65-
; CHECK-NEXT: b dummy_use_8_nxv16i8
65+
; CHECK-NEXT: b use
6666
%tileslice.1 = add i32 %tileslice, 1
6767
%z0 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 0, i32 %tileslice.1)
6868
%tileslice.3 = add i32 %tileslice, 3
@@ -80,8 +80,8 @@ define void @extract_col_b(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 %
8080
%tileslice.15 = add i32 %tileslice, 15
8181
%z7 = call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 0, i32 %tileslice.15)
8282

83-
tail call void @dummy_use_8_nxv16i8(<vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3,
84-
<vscale x 16 x i8> %z4, <vscale x 16 x i8> %z5, <vscale x 16 x i8> %z6, <vscale x 16 x i8> %z7)
83+
tail call void @use(<vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3,
84+
<vscale x 16 x i8> %z4, <vscale x 16 x i8> %z5, <vscale x 16 x i8> %z6, <vscale x 16 x i8> %z7)
8585
ret void
8686
}
8787

@@ -96,7 +96,7 @@ define void @extract_row_h(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i32 %t
9696
; CHECK-NEXT: mov z1.h, p0/m, za0h.h[w12, 2]
9797
; CHECK-NEXT: mov z2.h, p0/m, za0h.h[w12, 4]
9898
; CHECK-NEXT: mov z3.h, p0/m, za0h.h[w12, 6]
99-
; CHECK-NEXT: b dummy_use_4_nxv8i16
99+
; CHECK-NEXT: b use
100100
%z0 = call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice)
101101
%tileslice.2 = add i32 %tileslice, 2
102102
%z1 = call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice.2)
@@ -105,7 +105,7 @@ define void @extract_row_h(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i32 %t
105105
%tileslice.6 = add i32 %tileslice, 6
106106
%z3 = call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice.6)
107107

108-
tail call void @dummy_use_4_nxv8i16(<vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3)
108+
tail call void @use(<vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3)
109109
ret void
110110
}
111111

@@ -120,7 +120,7 @@ define void @extract_col_h(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i32 %t
120120
; CHECK-NEXT: mov z1.h, p0/m, za1v.h[w12, 3]
121121
; CHECK-NEXT: mov z2.h, p0/m, za1v.h[w12, 5]
122122
; CHECK-NEXT: mov z3.h, p0/m, za1v.h[w12, 7]
123-
; CHECK-NEXT: b dummy_use_4_nxv8i16
123+
; CHECK-NEXT: b use
124124
%tileslice.1 = add i32 %tileslice, 1
125125
%z0 = call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i32 1, i32 %tileslice.1)
126126
%tileslice.3 = add i32 %tileslice, 3
@@ -130,7 +130,7 @@ define void @extract_col_h(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i32 %t
130130
%tileslice.7 = add i32 %tileslice, 7
131131
%z3 = call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> %zd, <vscale x 8 x i1> %pg, i32 1, i32 %tileslice.7)
132132

133-
tail call void @dummy_use_4_nxv8i16(<vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3)
133+
tail call void @use(<vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3)
134134
ret void
135135
}
136136

@@ -153,7 +153,7 @@ define void @extract_f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i32 %ti
153153
; CHECK-NEXT: mov z5.h, p0/m, za0h.h[w12, 5]
154154
; CHECK-NEXT: mov z6.h, p0/m, za0v.h[w12, 6]
155155
; CHECK-NEXT: mov z7.h, p0/m, za0v.h[w12, 7]
156-
; CHECK-NEXT: b dummy_use_8_nxv8f16
156+
; CHECK-NEXT: b use
157157
%z0 = call <vscale x 8 x half> @llvm.aarch64.sme.read.horiz.nxv8f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice)
158158
%tileslice.1 = add i32 %tileslice, 1
159159
%z1 = call <vscale x 8 x half> @llvm.aarch64.sme.read.horiz.nxv8f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice.1)
@@ -170,8 +170,8 @@ define void @extract_f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i32 %ti
170170
%tileslice.7 = add i32 %tileslice, 7
171171
%z7 = call <vscale x 8 x half> @llvm.aarch64.sme.read.vert.nxv8f16(<vscale x 8 x half> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice.7)
172172

173-
tail call void @dummy_use_8_nxv8f16(<vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2, <vscale x 8 x half> %z3,
174-
<vscale x 8 x half> %z4, <vscale x 8 x half> %z5, <vscale x 8 x half> %z6, <vscale x 8 x half> %z7)
173+
tail call void @use(<vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2, <vscale x 8 x half> %z3,
174+
<vscale x 8 x half> %z4, <vscale x 8 x half> %z5, <vscale x 8 x half> %z6, <vscale x 8 x half> %z7)
175175
ret void
176176
}
177177

@@ -194,7 +194,7 @@ define void @extract_bf16(<vscale x 8 x bfloat> %zd, <vscale x 8 x i1> %pg, i32
194194
; CHECK-NEXT: mov z5.h, p0/m, za0h.h[w12, 5]
195195
; CHECK-NEXT: mov z6.h, p0/m, za0v.h[w12, 6]
196196
; CHECK-NEXT: mov z7.h, p0/m, za0v.h[w12, 7]
197-
; CHECK-NEXT: b dummy_use_8_nxv8bf16
197+
; CHECK-NEXT: b use
198198
%z0 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16(<vscale x 8 x bfloat> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice)
199199
%tileslice.1 = add i32 %tileslice, 1
200200
%z1 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16(<vscale x 8 x bfloat> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice.1)
@@ -211,8 +211,8 @@ define void @extract_bf16(<vscale x 8 x bfloat> %zd, <vscale x 8 x i1> %pg, i32
211211
%tileslice.7 = add i32 %tileslice, 7
212212
%z7 = call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16(<vscale x 8 x bfloat> %zd, <vscale x 8 x i1> %pg, i32 0, i32 %tileslice.7)
213213

214-
tail call void @dummy_use_8_nxv8bf16(<vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2, <vscale x 8 x bfloat> %z3,
215-
<vscale x 8 x bfloat> %z4, <vscale x 8 x bfloat> %z5, <vscale x 8 x bfloat> %z6, <vscale x 8 x bfloat> %z7)
214+
tail call void @use(<vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2, <vscale x 8 x bfloat> %z3,
215+
<vscale x 8 x bfloat> %z4, <vscale x 8 x bfloat> %z5, <vscale x 8 x bfloat> %z6, <vscale x 8 x bfloat> %z7)
216216
ret void
217217
}
218218

@@ -223,12 +223,12 @@ define void @extract_row_s(<vscale x 4 x i32> %zd, <vscale x 4 x i1> %pg, i32 %t
223223
; CHECK-NEXT: mov w12, w0
224224
; CHECK-NEXT: mov z0.s, p0/m, za0h.s[w12, 0]
225225
; CHECK-NEXT: mov z1.s, p0/m, za0h.s[w12, 2]
226-
; CHECK-NEXT: b dummy_use_2_nxv4i32
226+
; CHECK-NEXT: b use
227227
%z0 = call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> %zd, <vscale x 4 x i1> %pg, i32 0, i32 %tileslice)
228228
%tileslice.2 = add i32 %tileslice, 2
229229
%z1 = call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> %zd, <vscale x 4 x i1> %pg, i32 0, i32 %tileslice.2)
230230

231-
tail call void @dummy_use_2_nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
231+
tail call void @use(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
232232
ret void
233233
}
234234

@@ -239,13 +239,13 @@ define void @extract_col_s(<vscale x 4 x i32> %zd, <vscale x 4 x i1> %pg, i32 %t
239239
; CHECK-NEXT: mov w12, w0
240240
; CHECK-NEXT: mov z0.s, p0/m, za3v.s[w12, 1]
241241
; CHECK-NEXT: mov z1.s, p0/m, za3v.s[w12, 3]
242-
; CHECK-NEXT: b dummy_use_2_nxv4i32
242+
; CHECK-NEXT: b use
243243
%tileslice.1 = add i32 %tileslice, 1
244244
%z0 = call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> %zd, <vscale x 4 x i1> %pg, i32 3, i32 %tileslice.1)
245245
%tileslice.3 = add i32 %tileslice, 3
246246
%z1 = call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> %zd, <vscale x 4 x i1> %pg, i32 3, i32 %tileslice.3)
247247

248-
tail call void @dummy_use_2_nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
248+
tail call void @use(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
249249
ret void
250250
}
251251

@@ -260,7 +260,7 @@ define void @extract_f32(<vscale x 4 x float> %zd, <vscale x 4 x i1> %pg, i32 %t
260260
; CHECK-NEXT: mov z1.s, p0/m, za0h.s[w12, 1]
261261
; CHECK-NEXT: mov z2.s, p0/m, za0v.s[w12, 2]
262262
; CHECK-NEXT: mov z3.s, p0/m, za0v.s[w12, 3]
263-
; CHECK-NEXT: b dummy_use_4_nxv4f32
263+
; CHECK-NEXT: b use
264264
%z0 = call <vscale x 4 x float> @llvm.aarch64.sme.read.horiz.nxv4f32(<vscale x 4 x float> %zd, <vscale x 4 x i1> %pg, i32 0, i32 %tileslice)
265265
%tileslice.1 = add i32 %tileslice, 1
266266
%z1 = call <vscale x 4 x float> @llvm.aarch64.sme.read.horiz.nxv4f32(<vscale x 4 x float> %zd, <vscale x 4 x i1> %pg, i32 0, i32 %tileslice.1)
@@ -269,7 +269,7 @@ define void @extract_f32(<vscale x 4 x float> %zd, <vscale x 4 x i1> %pg, i32 %t
269269
%tileslice.3 = add i32 %tileslice, 3
270270
%z3 = call <vscale x 4 x float> @llvm.aarch64.sme.read.vert.nxv4f32(<vscale x 4 x float> %zd, <vscale x 4 x i1> %pg, i32 0, i32 %tileslice.3)
271271

272-
tail call void @dummy_use_4_nxv4f32(<vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3)
272+
tail call void @use(<vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3)
273273
ret void
274274
}
275275

@@ -301,12 +301,12 @@ define void @extract_f64(<vscale x 2 x double> %zd, <vscale x 2 x i1> %pg, i32 %
301301
; CHECK-NEXT: mov w12, w0
302302
; CHECK-NEXT: mov z0.d, p0/m, za0h.d[w12, 0]
303303
; CHECK-NEXT: mov z1.d, p0/m, za0v.d[w12, 1]
304-
; CHECK-NEXT: b dummy_use_2_nxv2f64
304+
; CHECK-NEXT: b use
305305
%z0 = call <vscale x 2 x double> @llvm.aarch64.sme.read.horiz.nxv2f64(<vscale x 2 x double> %zd, <vscale x 2 x i1> %pg, i32 0, i32 %tileslice)
306306
%tileslice.1 = add i32 %tileslice, 1
307307
%z1 = call <vscale x 2 x double> @llvm.aarch64.sme.read.vert.nxv2f64(<vscale x 2 x double> %zd, <vscale x 2 x i1> %pg, i32 0, i32 %tileslice.1)
308308

309-
tail call void @dummy_use_2_nxv2f64(<vscale x 2 x double> %z0, <vscale x 2 x double> %z1)
309+
tail call void @use(<vscale x 2 x double> %z0, <vscale x 2 x double> %z1)
310310
ret void
311311
}
312312

@@ -485,6 +485,7 @@ for.body:
485485
exit:
486486
%tmp1 = add <vscale x 4 x i32> %z0, %z1
487487
%res = add <vscale x 4 x i32> %tmp1, %z2
488+
tail call void @use(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
488489
ret <vscale x 4 x i32> %res
489490
}
490491

@@ -523,33 +524,7 @@ declare <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i6
523524
declare <vscale x 2 x double> @llvm.aarch64.sme.readq.vert.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32, i32)
524525

525526
; ------------------------------------------------------------------------------
526-
; Dummy external functions to force code retention.
527-
; The compiler does not see their implementations, so it must keep the calls.
527+
; Dummy external function to force code retention.
528528
; ------------------------------------------------------------------------------
529529

530-
declare void @dummy_use_8_nxv16i8(
531-
<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>,
532-
<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>
533-
)
534-
535-
declare void @dummy_use_4_nxv8i16(
536-
<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>
537-
)
538-
539-
declare void @dummy_use_8_nxv8f16(
540-
<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>,
541-
<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>
542-
)
543-
544-
declare void @dummy_use_8_nxv8bf16(
545-
<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>,
546-
<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>
547-
)
548-
549-
declare void @dummy_use_2_nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
550-
551-
declare void @dummy_use_4_nxv4f32(
552-
<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>
553-
)
554-
555-
declare void @dummy_use_2_nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
530+
declare void @use(...)

0 commit comments

Comments
 (0)