1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
- ; RUN: llc -o - %s | FileCheck %s
3
-
4
- target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
5
- target triple = "arm64-apple-ios"
2
+ ; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s
3
+ ; RUN: llc -mtriple=aarch64_be-unknown-linux -o - %s | FileCheck --check-prefix=CHECK-BE %s
6
4
7
5
; It's profitable to convert the zext to a shuffle, which in turn will be
8
6
; lowered to 4 tbl instructions. The masks are materialized outside the loop.
@@ -26,6 +24,33 @@ define void @zext_v16i8_to_v16i32_in_loop(i8* %src, i32* %dst) {
26
24
; CHECK-NEXT: b.ne LBB0_1
27
25
; CHECK-NEXT: ; %bb.2: ; %exit
28
26
; CHECK-NEXT: ret
27
+ ;
28
+ ; CHECK-BE-LABEL: zext_v16i8_to_v16i32_in_loop:
29
+ ; CHECK-BE: // %bb.0: // %entry
30
+ ; CHECK-BE-NEXT: mov x8, xzr
31
+ ; CHECK-BE-NEXT: .LBB0_1: // %loop
32
+ ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
33
+ ; CHECK-BE-NEXT: add x9, x0, x8
34
+ ; CHECK-BE-NEXT: add x10, x1, #32
35
+ ; CHECK-BE-NEXT: add x8, x8, #16
36
+ ; CHECK-BE-NEXT: cmp x8, #128
37
+ ; CHECK-BE-NEXT: ld1 { v0.16b }, [x9]
38
+ ; CHECK-BE-NEXT: add x9, x1, #48
39
+ ; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0
40
+ ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
41
+ ; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0
42
+ ; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0
43
+ ; CHECK-BE-NEXT: st1 { v2.4s }, [x9]
44
+ ; CHECK-BE-NEXT: add x9, x1, #16
45
+ ; CHECK-BE-NEXT: ushll v2.4s, v0.4h, #0
46
+ ; CHECK-BE-NEXT: st1 { v1.4s }, [x10]
47
+ ; CHECK-BE-NEXT: ushll2 v0.4s, v0.8h, #0
48
+ ; CHECK-BE-NEXT: st1 { v2.4s }, [x1]
49
+ ; CHECK-BE-NEXT: add x1, x1, #64
50
+ ; CHECK-BE-NEXT: st1 { v0.4s }, [x9]
51
+ ; CHECK-BE-NEXT: b.ne .LBB0_1
52
+ ; CHECK-BE-NEXT: // %bb.2: // %exit
53
+ ; CHECK-BE-NEXT: ret
29
54
entry:
30
55
br label %loop
31
56
@@ -74,6 +99,40 @@ define void @zext_v16i8_to_v16i32_in_loop_not_header(i8* %src, i32* %dst, i1 %c)
74
99
; CHECK-NEXT: b LBB1_1
75
100
; CHECK-NEXT: LBB1_4: ; %exit
76
101
; CHECK-NEXT: ret
102
+ ;
103
+ ; CHECK-BE-LABEL: zext_v16i8_to_v16i32_in_loop_not_header:
104
+ ; CHECK-BE: // %bb.0: // %entry
105
+ ; CHECK-BE-NEXT: mov x8, xzr
106
+ ; CHECK-BE-NEXT: b .LBB1_2
107
+ ; CHECK-BE-NEXT: .LBB1_1: // %loop.latch
108
+ ; CHECK-BE-NEXT: // in Loop: Header=BB1_2 Depth=1
109
+ ; CHECK-BE-NEXT: add x8, x8, #16
110
+ ; CHECK-BE-NEXT: add x1, x1, #64
111
+ ; CHECK-BE-NEXT: cmp x8, #128
112
+ ; CHECK-BE-NEXT: b.eq .LBB1_4
113
+ ; CHECK-BE-NEXT: .LBB1_2: // %loop
114
+ ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
115
+ ; CHECK-BE-NEXT: tbz w2, #0, .LBB1_1
116
+ ; CHECK-BE-NEXT: // %bb.3: // %then
117
+ ; CHECK-BE-NEXT: // in Loop: Header=BB1_2 Depth=1
118
+ ; CHECK-BE-NEXT: add x9, x0, x8
119
+ ; CHECK-BE-NEXT: add x10, x1, #32
120
+ ; CHECK-BE-NEXT: add x11, x1, #16
121
+ ; CHECK-BE-NEXT: ld1 { v0.16b }, [x9]
122
+ ; CHECK-BE-NEXT: add x9, x1, #48
123
+ ; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0
124
+ ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
125
+ ; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0
126
+ ; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0
127
+ ; CHECK-BE-NEXT: ushll2 v3.4s, v0.8h, #0
128
+ ; CHECK-BE-NEXT: st1 { v2.4s }, [x9]
129
+ ; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0
130
+ ; CHECK-BE-NEXT: st1 { v1.4s }, [x10]
131
+ ; CHECK-BE-NEXT: st1 { v3.4s }, [x11]
132
+ ; CHECK-BE-NEXT: st1 { v0.4s }, [x1]
133
+ ; CHECK-BE-NEXT: b .LBB1_1
134
+ ; CHECK-BE-NEXT: .LBB1_4: // %exit
135
+ ; CHECK-BE-NEXT: ret
77
136
entry:
78
137
br label %loop
79
138
@@ -115,6 +174,24 @@ define void @zext_v16i8_to_v16i32_no_loop(i8* %src, i32* %dst) {
115
174
; CHECK-NEXT: stp q1, q2, [x1, #32]
116
175
; CHECK-NEXT: stp q0, q3, [x1]
117
176
; CHECK-NEXT: ret
177
+ ;
178
+ ; CHECK-BE-LABEL: zext_v16i8_to_v16i32_no_loop:
179
+ ; CHECK-BE: // %bb.0: // %entry
180
+ ; CHECK-BE-NEXT: ld1 { v0.16b }, [x0]
181
+ ; CHECK-BE-NEXT: add x8, x1, #48
182
+ ; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0
183
+ ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
184
+ ; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0
185
+ ; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0
186
+ ; CHECK-BE-NEXT: st1 { v2.4s }, [x8]
187
+ ; CHECK-BE-NEXT: add x8, x1, #32
188
+ ; CHECK-BE-NEXT: st1 { v1.4s }, [x8]
189
+ ; CHECK-BE-NEXT: add x8, x1, #16
190
+ ; CHECK-BE-NEXT: ushll2 v1.4s, v0.8h, #0
191
+ ; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0
192
+ ; CHECK-BE-NEXT: st1 { v1.4s }, [x8]
193
+ ; CHECK-BE-NEXT: st1 { v0.4s }, [x1]
194
+ ; CHECK-BE-NEXT: ret
118
195
entry:
119
196
%src.cast = bitcast i8* %src to <16 x i8 >*
120
197
%load = load <16 x i8 >, <16 x i8 >* %src.cast
@@ -145,6 +222,33 @@ define void @zext_v16i8_to_v16i32_in_loop_optsize(i8* %src, i32* %dst) optsize {
145
222
; CHECK-NEXT: b.ne LBB3_1
146
223
; CHECK-NEXT: ; %bb.2: ; %exit
147
224
; CHECK-NEXT: ret
225
+ ;
226
+ ; CHECK-BE-LABEL: zext_v16i8_to_v16i32_in_loop_optsize:
227
+ ; CHECK-BE: // %bb.0: // %entry
228
+ ; CHECK-BE-NEXT: mov x8, xzr
229
+ ; CHECK-BE-NEXT: .LBB3_1: // %loop
230
+ ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
231
+ ; CHECK-BE-NEXT: add x9, x0, x8
232
+ ; CHECK-BE-NEXT: add x10, x1, #32
233
+ ; CHECK-BE-NEXT: add x8, x8, #16
234
+ ; CHECK-BE-NEXT: cmp x8, #128
235
+ ; CHECK-BE-NEXT: ld1 { v0.16b }, [x9]
236
+ ; CHECK-BE-NEXT: add x9, x1, #48
237
+ ; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0
238
+ ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
239
+ ; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0
240
+ ; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0
241
+ ; CHECK-BE-NEXT: st1 { v2.4s }, [x9]
242
+ ; CHECK-BE-NEXT: add x9, x1, #16
243
+ ; CHECK-BE-NEXT: ushll v2.4s, v0.4h, #0
244
+ ; CHECK-BE-NEXT: st1 { v1.4s }, [x10]
245
+ ; CHECK-BE-NEXT: ushll2 v0.4s, v0.8h, #0
246
+ ; CHECK-BE-NEXT: st1 { v2.4s }, [x1]
247
+ ; CHECK-BE-NEXT: add x1, x1, #64
248
+ ; CHECK-BE-NEXT: st1 { v0.4s }, [x9]
249
+ ; CHECK-BE-NEXT: b.ne .LBB3_1
250
+ ; CHECK-BE-NEXT: // %bb.2: // %exit
251
+ ; CHECK-BE-NEXT: ret
148
252
entry:
149
253
br label %loop
150
254
@@ -186,6 +290,33 @@ define void @zext_v16i8_to_v16i32_in_loop_minsize(i8* %src, i32* %dst) minsize {
186
290
; CHECK-NEXT: b.ne LBB4_1
187
291
; CHECK-NEXT: ; %bb.2: ; %exit
188
292
; CHECK-NEXT: ret
293
+ ;
294
+ ; CHECK-BE-LABEL: zext_v16i8_to_v16i32_in_loop_minsize:
295
+ ; CHECK-BE: // %bb.0: // %entry
296
+ ; CHECK-BE-NEXT: mov x8, xzr
297
+ ; CHECK-BE-NEXT: .LBB4_1: // %loop
298
+ ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
299
+ ; CHECK-BE-NEXT: add x9, x0, x8
300
+ ; CHECK-BE-NEXT: add x10, x1, #32
301
+ ; CHECK-BE-NEXT: add x8, x8, #16
302
+ ; CHECK-BE-NEXT: cmp x8, #128
303
+ ; CHECK-BE-NEXT: ld1 { v0.16b }, [x9]
304
+ ; CHECK-BE-NEXT: add x9, x1, #48
305
+ ; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0
306
+ ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
307
+ ; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0
308
+ ; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0
309
+ ; CHECK-BE-NEXT: st1 { v2.4s }, [x9]
310
+ ; CHECK-BE-NEXT: add x9, x1, #16
311
+ ; CHECK-BE-NEXT: ushll v2.4s, v0.4h, #0
312
+ ; CHECK-BE-NEXT: st1 { v1.4s }, [x10]
313
+ ; CHECK-BE-NEXT: ushll2 v0.4s, v0.8h, #0
314
+ ; CHECK-BE-NEXT: st1 { v2.4s }, [x1]
315
+ ; CHECK-BE-NEXT: add x1, x1, #64
316
+ ; CHECK-BE-NEXT: st1 { v0.4s }, [x9]
317
+ ; CHECK-BE-NEXT: b.ne .LBB4_1
318
+ ; CHECK-BE-NEXT: // %bb.2: // %exit
319
+ ; CHECK-BE-NEXT: ret
189
320
entry:
190
321
br label %loop
191
322
@@ -223,6 +354,25 @@ define void @zext_v16i8_to_v16i16_in_loop(i8* %src, i16* %dst) {
223
354
; CHECK-NEXT: b.ne LBB5_1
224
355
; CHECK-NEXT: ; %bb.2: ; %exit
225
356
; CHECK-NEXT: ret
357
+ ;
358
+ ; CHECK-BE-LABEL: zext_v16i8_to_v16i16_in_loop:
359
+ ; CHECK-BE: // %bb.0: // %entry
360
+ ; CHECK-BE-NEXT: mov x8, xzr
361
+ ; CHECK-BE-NEXT: .LBB5_1: // %loop
362
+ ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
363
+ ; CHECK-BE-NEXT: add x9, x0, x8
364
+ ; CHECK-BE-NEXT: add x8, x8, #16
365
+ ; CHECK-BE-NEXT: cmp x8, #128
366
+ ; CHECK-BE-NEXT: ld1 { v0.16b }, [x9]
367
+ ; CHECK-BE-NEXT: add x9, x1, #16
368
+ ; CHECK-BE-NEXT: ushll v1.8h, v0.8b, #0
369
+ ; CHECK-BE-NEXT: ushll2 v0.8h, v0.16b, #0
370
+ ; CHECK-BE-NEXT: st1 { v1.8h }, [x1]
371
+ ; CHECK-BE-NEXT: add x1, x1, #32
372
+ ; CHECK-BE-NEXT: st1 { v0.8h }, [x9]
373
+ ; CHECK-BE-NEXT: b.ne .LBB5_1
374
+ ; CHECK-BE-NEXT: // %bb.2: // %exit
375
+ ; CHECK-BE-NEXT: ret
226
376
entry:
227
377
br label %loop
228
378
@@ -259,6 +409,26 @@ define void @zext_v8i8_to_v8i32_in_loop(i8* %src, i32* %dst) {
259
409
; CHECK-NEXT: b.ne LBB6_1
260
410
; CHECK-NEXT: ; %bb.2: ; %exit
261
411
; CHECK-NEXT: ret
412
+ ;
413
+ ; CHECK-BE-LABEL: zext_v8i8_to_v8i32_in_loop:
414
+ ; CHECK-BE: // %bb.0: // %entry
415
+ ; CHECK-BE-NEXT: mov x8, xzr
416
+ ; CHECK-BE-NEXT: .LBB6_1: // %loop
417
+ ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
418
+ ; CHECK-BE-NEXT: add x9, x0, x8
419
+ ; CHECK-BE-NEXT: add x8, x8, #16
420
+ ; CHECK-BE-NEXT: cmp x8, #128
421
+ ; CHECK-BE-NEXT: ld1 { v0.8b }, [x9]
422
+ ; CHECK-BE-NEXT: add x9, x1, #16
423
+ ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
424
+ ; CHECK-BE-NEXT: ushll v1.4s, v0.4h, #0
425
+ ; CHECK-BE-NEXT: ushll2 v0.4s, v0.8h, #0
426
+ ; CHECK-BE-NEXT: st1 { v1.4s }, [x1]
427
+ ; CHECK-BE-NEXT: add x1, x1, #64
428
+ ; CHECK-BE-NEXT: st1 { v0.4s }, [x9]
429
+ ; CHECK-BE-NEXT: b.ne .LBB6_1
430
+ ; CHECK-BE-NEXT: // %bb.2: // %exit
431
+ ; CHECK-BE-NEXT: ret
262
432
entry:
263
433
br label %loop
264
434
@@ -309,6 +479,49 @@ define void @zext_v16i8_to_v16i64_in_loop(i8* %src, i64* %dst) {
309
479
; CHECK-NEXT: b.ne LBB7_1
310
480
; CHECK-NEXT: ; %bb.2: ; %exit
311
481
; CHECK-NEXT: ret
482
+ ;
483
+ ; CHECK-BE-LABEL: zext_v16i8_to_v16i64_in_loop:
484
+ ; CHECK-BE: // %bb.0: // %entry
485
+ ; CHECK-BE-NEXT: mov x8, xzr
486
+ ; CHECK-BE-NEXT: .LBB7_1: // %loop
487
+ ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
488
+ ; CHECK-BE-NEXT: add x9, x0, x8
489
+ ; CHECK-BE-NEXT: add x10, x1, #96
490
+ ; CHECK-BE-NEXT: add x8, x8, #16
491
+ ; CHECK-BE-NEXT: cmp x8, #128
492
+ ; CHECK-BE-NEXT: ld1 { v0.16b }, [x9]
493
+ ; CHECK-BE-NEXT: add x9, x1, #112
494
+ ; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0
495
+ ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
496
+ ; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0
497
+ ; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0
498
+ ; CHECK-BE-NEXT: ushll2 v3.2d, v2.4s, #0
499
+ ; CHECK-BE-NEXT: ushll v2.2d, v2.2s, #0
500
+ ; CHECK-BE-NEXT: st1 { v3.2d }, [x9]
501
+ ; CHECK-BE-NEXT: add x9, x1, #80
502
+ ; CHECK-BE-NEXT: ushll2 v3.2d, v1.4s, #0
503
+ ; CHECK-BE-NEXT: st1 { v2.2d }, [x10]
504
+ ; CHECK-BE-NEXT: ushll2 v2.4s, v0.8h, #0
505
+ ; CHECK-BE-NEXT: add x10, x1, #48
506
+ ; CHECK-BE-NEXT: st1 { v3.2d }, [x9]
507
+ ; CHECK-BE-NEXT: add x9, x1, #64
508
+ ; CHECK-BE-NEXT: ushll v1.2d, v1.2s, #0
509
+ ; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0
510
+ ; CHECK-BE-NEXT: ushll2 v4.2d, v2.4s, #0
511
+ ; CHECK-BE-NEXT: st1 { v1.2d }, [x9]
512
+ ; CHECK-BE-NEXT: ushll v1.2d, v0.2s, #0
513
+ ; CHECK-BE-NEXT: add x9, x1, #16
514
+ ; CHECK-BE-NEXT: st1 { v4.2d }, [x10]
515
+ ; CHECK-BE-NEXT: add x10, x1, #32
516
+ ; CHECK-BE-NEXT: st1 { v1.2d }, [x1]
517
+ ; CHECK-BE-NEXT: add x1, x1, #128
518
+ ; CHECK-BE-NEXT: ushll2 v0.2d, v0.4s, #0
519
+ ; CHECK-BE-NEXT: ushll v2.2d, v2.2s, #0
520
+ ; CHECK-BE-NEXT: st1 { v0.2d }, [x9]
521
+ ; CHECK-BE-NEXT: st1 { v2.2d }, [x10]
522
+ ; CHECK-BE-NEXT: b.ne .LBB7_1
523
+ ; CHECK-BE-NEXT: // %bb.2: // %exit
524
+ ; CHECK-BE-NEXT: ret
312
525
entry:
313
526
br label %loop
314
527
0 commit comments