Skip to content

Commit 8f19de8

Browse files
committed
[AArch64] Add big-endian tests for zext-to-tbl.ll
Extra tests for D120571.
1 parent a65557d commit 8f19de8

File tree

1 file changed

+217
-4
lines changed

1 file changed

+217
-4
lines changed

llvm/test/CodeGen/AArch64/zext-to-tbl.ll

Lines changed: 217 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -o - %s | FileCheck %s
3-
4-
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
5-
target triple = "arm64-apple-ios"
2+
; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s
3+
; RUN: llc -mtriple=aarch64_be-unknown-linux -o - %s | FileCheck --check-prefix=CHECK-BE %s
64

75
; It's profitable to convert the zext to a shuffle, which in turn will be
86
; lowered to 4 tbl instructions. The masks are materialized outside the loop.
@@ -26,6 +24,33 @@ define void @zext_v16i8_to_v16i32_in_loop(i8* %src, i32* %dst) {
2624
; CHECK-NEXT: b.ne LBB0_1
2725
; CHECK-NEXT: ; %bb.2: ; %exit
2826
; CHECK-NEXT: ret
27+
;
28+
; CHECK-BE-LABEL: zext_v16i8_to_v16i32_in_loop:
29+
; CHECK-BE: // %bb.0: // %entry
30+
; CHECK-BE-NEXT: mov x8, xzr
31+
; CHECK-BE-NEXT: .LBB0_1: // %loop
32+
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
33+
; CHECK-BE-NEXT: add x9, x0, x8
34+
; CHECK-BE-NEXT: add x10, x1, #32
35+
; CHECK-BE-NEXT: add x8, x8, #16
36+
; CHECK-BE-NEXT: cmp x8, #128
37+
; CHECK-BE-NEXT: ld1 { v0.16b }, [x9]
38+
; CHECK-BE-NEXT: add x9, x1, #48
39+
; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0
40+
; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
41+
; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0
42+
; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0
43+
; CHECK-BE-NEXT: st1 { v2.4s }, [x9]
44+
; CHECK-BE-NEXT: add x9, x1, #16
45+
; CHECK-BE-NEXT: ushll v2.4s, v0.4h, #0
46+
; CHECK-BE-NEXT: st1 { v1.4s }, [x10]
47+
; CHECK-BE-NEXT: ushll2 v0.4s, v0.8h, #0
48+
; CHECK-BE-NEXT: st1 { v2.4s }, [x1]
49+
; CHECK-BE-NEXT: add x1, x1, #64
50+
; CHECK-BE-NEXT: st1 { v0.4s }, [x9]
51+
; CHECK-BE-NEXT: b.ne .LBB0_1
52+
; CHECK-BE-NEXT: // %bb.2: // %exit
53+
; CHECK-BE-NEXT: ret
2954
entry:
3055
br label %loop
3156

@@ -74,6 +99,40 @@ define void @zext_v16i8_to_v16i32_in_loop_not_header(i8* %src, i32* %dst, i1 %c)
7499
; CHECK-NEXT: b LBB1_1
75100
; CHECK-NEXT: LBB1_4: ; %exit
76101
; CHECK-NEXT: ret
102+
;
103+
; CHECK-BE-LABEL: zext_v16i8_to_v16i32_in_loop_not_header:
104+
; CHECK-BE: // %bb.0: // %entry
105+
; CHECK-BE-NEXT: mov x8, xzr
106+
; CHECK-BE-NEXT: b .LBB1_2
107+
; CHECK-BE-NEXT: .LBB1_1: // %loop.latch
108+
; CHECK-BE-NEXT: // in Loop: Header=BB1_2 Depth=1
109+
; CHECK-BE-NEXT: add x8, x8, #16
110+
; CHECK-BE-NEXT: add x1, x1, #64
111+
; CHECK-BE-NEXT: cmp x8, #128
112+
; CHECK-BE-NEXT: b.eq .LBB1_4
113+
; CHECK-BE-NEXT: .LBB1_2: // %loop
114+
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
115+
; CHECK-BE-NEXT: tbz w2, #0, .LBB1_1
116+
; CHECK-BE-NEXT: // %bb.3: // %then
117+
; CHECK-BE-NEXT: // in Loop: Header=BB1_2 Depth=1
118+
; CHECK-BE-NEXT: add x9, x0, x8
119+
; CHECK-BE-NEXT: add x10, x1, #32
120+
; CHECK-BE-NEXT: add x11, x1, #16
121+
; CHECK-BE-NEXT: ld1 { v0.16b }, [x9]
122+
; CHECK-BE-NEXT: add x9, x1, #48
123+
; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0
124+
; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
125+
; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0
126+
; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0
127+
; CHECK-BE-NEXT: ushll2 v3.4s, v0.8h, #0
128+
; CHECK-BE-NEXT: st1 { v2.4s }, [x9]
129+
; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0
130+
; CHECK-BE-NEXT: st1 { v1.4s }, [x10]
131+
; CHECK-BE-NEXT: st1 { v3.4s }, [x11]
132+
; CHECK-BE-NEXT: st1 { v0.4s }, [x1]
133+
; CHECK-BE-NEXT: b .LBB1_1
134+
; CHECK-BE-NEXT: .LBB1_4: // %exit
135+
; CHECK-BE-NEXT: ret
77136
entry:
78137
br label %loop
79138

@@ -115,6 +174,24 @@ define void @zext_v16i8_to_v16i32_no_loop(i8* %src, i32* %dst) {
115174
; CHECK-NEXT: stp q1, q2, [x1, #32]
116175
; CHECK-NEXT: stp q0, q3, [x1]
117176
; CHECK-NEXT: ret
177+
;
178+
; CHECK-BE-LABEL: zext_v16i8_to_v16i32_no_loop:
179+
; CHECK-BE: // %bb.0: // %entry
180+
; CHECK-BE-NEXT: ld1 { v0.16b }, [x0]
181+
; CHECK-BE-NEXT: add x8, x1, #48
182+
; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0
183+
; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
184+
; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0
185+
; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0
186+
; CHECK-BE-NEXT: st1 { v2.4s }, [x8]
187+
; CHECK-BE-NEXT: add x8, x1, #32
188+
; CHECK-BE-NEXT: st1 { v1.4s }, [x8]
189+
; CHECK-BE-NEXT: add x8, x1, #16
190+
; CHECK-BE-NEXT: ushll2 v1.4s, v0.8h, #0
191+
; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0
192+
; CHECK-BE-NEXT: st1 { v1.4s }, [x8]
193+
; CHECK-BE-NEXT: st1 { v0.4s }, [x1]
194+
; CHECK-BE-NEXT: ret
118195
entry:
119196
%src.cast = bitcast i8* %src to <16 x i8>*
120197
%load = load <16 x i8>, <16 x i8>* %src.cast
@@ -145,6 +222,33 @@ define void @zext_v16i8_to_v16i32_in_loop_optsize(i8* %src, i32* %dst) optsize {
145222
; CHECK-NEXT: b.ne LBB3_1
146223
; CHECK-NEXT: ; %bb.2: ; %exit
147224
; CHECK-NEXT: ret
225+
;
226+
; CHECK-BE-LABEL: zext_v16i8_to_v16i32_in_loop_optsize:
227+
; CHECK-BE: // %bb.0: // %entry
228+
; CHECK-BE-NEXT: mov x8, xzr
229+
; CHECK-BE-NEXT: .LBB3_1: // %loop
230+
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
231+
; CHECK-BE-NEXT: add x9, x0, x8
232+
; CHECK-BE-NEXT: add x10, x1, #32
233+
; CHECK-BE-NEXT: add x8, x8, #16
234+
; CHECK-BE-NEXT: cmp x8, #128
235+
; CHECK-BE-NEXT: ld1 { v0.16b }, [x9]
236+
; CHECK-BE-NEXT: add x9, x1, #48
237+
; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0
238+
; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
239+
; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0
240+
; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0
241+
; CHECK-BE-NEXT: st1 { v2.4s }, [x9]
242+
; CHECK-BE-NEXT: add x9, x1, #16
243+
; CHECK-BE-NEXT: ushll v2.4s, v0.4h, #0
244+
; CHECK-BE-NEXT: st1 { v1.4s }, [x10]
245+
; CHECK-BE-NEXT: ushll2 v0.4s, v0.8h, #0
246+
; CHECK-BE-NEXT: st1 { v2.4s }, [x1]
247+
; CHECK-BE-NEXT: add x1, x1, #64
248+
; CHECK-BE-NEXT: st1 { v0.4s }, [x9]
249+
; CHECK-BE-NEXT: b.ne .LBB3_1
250+
; CHECK-BE-NEXT: // %bb.2: // %exit
251+
; CHECK-BE-NEXT: ret
148252
entry:
149253
br label %loop
150254

@@ -186,6 +290,33 @@ define void @zext_v16i8_to_v16i32_in_loop_minsize(i8* %src, i32* %dst) minsize {
186290
; CHECK-NEXT: b.ne LBB4_1
187291
; CHECK-NEXT: ; %bb.2: ; %exit
188292
; CHECK-NEXT: ret
293+
;
294+
; CHECK-BE-LABEL: zext_v16i8_to_v16i32_in_loop_minsize:
295+
; CHECK-BE: // %bb.0: // %entry
296+
; CHECK-BE-NEXT: mov x8, xzr
297+
; CHECK-BE-NEXT: .LBB4_1: // %loop
298+
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
299+
; CHECK-BE-NEXT: add x9, x0, x8
300+
; CHECK-BE-NEXT: add x10, x1, #32
301+
; CHECK-BE-NEXT: add x8, x8, #16
302+
; CHECK-BE-NEXT: cmp x8, #128
303+
; CHECK-BE-NEXT: ld1 { v0.16b }, [x9]
304+
; CHECK-BE-NEXT: add x9, x1, #48
305+
; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0
306+
; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
307+
; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0
308+
; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0
309+
; CHECK-BE-NEXT: st1 { v2.4s }, [x9]
310+
; CHECK-BE-NEXT: add x9, x1, #16
311+
; CHECK-BE-NEXT: ushll v2.4s, v0.4h, #0
312+
; CHECK-BE-NEXT: st1 { v1.4s }, [x10]
313+
; CHECK-BE-NEXT: ushll2 v0.4s, v0.8h, #0
314+
; CHECK-BE-NEXT: st1 { v2.4s }, [x1]
315+
; CHECK-BE-NEXT: add x1, x1, #64
316+
; CHECK-BE-NEXT: st1 { v0.4s }, [x9]
317+
; CHECK-BE-NEXT: b.ne .LBB4_1
318+
; CHECK-BE-NEXT: // %bb.2: // %exit
319+
; CHECK-BE-NEXT: ret
189320
entry:
190321
br label %loop
191322

@@ -223,6 +354,25 @@ define void @zext_v16i8_to_v16i16_in_loop(i8* %src, i16* %dst) {
223354
; CHECK-NEXT: b.ne LBB5_1
224355
; CHECK-NEXT: ; %bb.2: ; %exit
225356
; CHECK-NEXT: ret
357+
;
358+
; CHECK-BE-LABEL: zext_v16i8_to_v16i16_in_loop:
359+
; CHECK-BE: // %bb.0: // %entry
360+
; CHECK-BE-NEXT: mov x8, xzr
361+
; CHECK-BE-NEXT: .LBB5_1: // %loop
362+
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
363+
; CHECK-BE-NEXT: add x9, x0, x8
364+
; CHECK-BE-NEXT: add x8, x8, #16
365+
; CHECK-BE-NEXT: cmp x8, #128
366+
; CHECK-BE-NEXT: ld1 { v0.16b }, [x9]
367+
; CHECK-BE-NEXT: add x9, x1, #16
368+
; CHECK-BE-NEXT: ushll v1.8h, v0.8b, #0
369+
; CHECK-BE-NEXT: ushll2 v0.8h, v0.16b, #0
370+
; CHECK-BE-NEXT: st1 { v1.8h }, [x1]
371+
; CHECK-BE-NEXT: add x1, x1, #32
372+
; CHECK-BE-NEXT: st1 { v0.8h }, [x9]
373+
; CHECK-BE-NEXT: b.ne .LBB5_1
374+
; CHECK-BE-NEXT: // %bb.2: // %exit
375+
; CHECK-BE-NEXT: ret
226376
entry:
227377
br label %loop
228378

@@ -259,6 +409,26 @@ define void @zext_v8i8_to_v8i32_in_loop(i8* %src, i32* %dst) {
259409
; CHECK-NEXT: b.ne LBB6_1
260410
; CHECK-NEXT: ; %bb.2: ; %exit
261411
; CHECK-NEXT: ret
412+
;
413+
; CHECK-BE-LABEL: zext_v8i8_to_v8i32_in_loop:
414+
; CHECK-BE: // %bb.0: // %entry
415+
; CHECK-BE-NEXT: mov x8, xzr
416+
; CHECK-BE-NEXT: .LBB6_1: // %loop
417+
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
418+
; CHECK-BE-NEXT: add x9, x0, x8
419+
; CHECK-BE-NEXT: add x8, x8, #16
420+
; CHECK-BE-NEXT: cmp x8, #128
421+
; CHECK-BE-NEXT: ld1 { v0.8b }, [x9]
422+
; CHECK-BE-NEXT: add x9, x1, #16
423+
; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
424+
; CHECK-BE-NEXT: ushll v1.4s, v0.4h, #0
425+
; CHECK-BE-NEXT: ushll2 v0.4s, v0.8h, #0
426+
; CHECK-BE-NEXT: st1 { v1.4s }, [x1]
427+
; CHECK-BE-NEXT: add x1, x1, #64
428+
; CHECK-BE-NEXT: st1 { v0.4s }, [x9]
429+
; CHECK-BE-NEXT: b.ne .LBB6_1
430+
; CHECK-BE-NEXT: // %bb.2: // %exit
431+
; CHECK-BE-NEXT: ret
262432
entry:
263433
br label %loop
264434

@@ -309,6 +479,49 @@ define void @zext_v16i8_to_v16i64_in_loop(i8* %src, i64* %dst) {
309479
; CHECK-NEXT: b.ne LBB7_1
310480
; CHECK-NEXT: ; %bb.2: ; %exit
311481
; CHECK-NEXT: ret
482+
;
483+
; CHECK-BE-LABEL: zext_v16i8_to_v16i64_in_loop:
484+
; CHECK-BE: // %bb.0: // %entry
485+
; CHECK-BE-NEXT: mov x8, xzr
486+
; CHECK-BE-NEXT: .LBB7_1: // %loop
487+
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
488+
; CHECK-BE-NEXT: add x9, x0, x8
489+
; CHECK-BE-NEXT: add x10, x1, #96
490+
; CHECK-BE-NEXT: add x8, x8, #16
491+
; CHECK-BE-NEXT: cmp x8, #128
492+
; CHECK-BE-NEXT: ld1 { v0.16b }, [x9]
493+
; CHECK-BE-NEXT: add x9, x1, #112
494+
; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0
495+
; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
496+
; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0
497+
; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0
498+
; CHECK-BE-NEXT: ushll2 v3.2d, v2.4s, #0
499+
; CHECK-BE-NEXT: ushll v2.2d, v2.2s, #0
500+
; CHECK-BE-NEXT: st1 { v3.2d }, [x9]
501+
; CHECK-BE-NEXT: add x9, x1, #80
502+
; CHECK-BE-NEXT: ushll2 v3.2d, v1.4s, #0
503+
; CHECK-BE-NEXT: st1 { v2.2d }, [x10]
504+
; CHECK-BE-NEXT: ushll2 v2.4s, v0.8h, #0
505+
; CHECK-BE-NEXT: add x10, x1, #48
506+
; CHECK-BE-NEXT: st1 { v3.2d }, [x9]
507+
; CHECK-BE-NEXT: add x9, x1, #64
508+
; CHECK-BE-NEXT: ushll v1.2d, v1.2s, #0
509+
; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0
510+
; CHECK-BE-NEXT: ushll2 v4.2d, v2.4s, #0
511+
; CHECK-BE-NEXT: st1 { v1.2d }, [x9]
512+
; CHECK-BE-NEXT: ushll v1.2d, v0.2s, #0
513+
; CHECK-BE-NEXT: add x9, x1, #16
514+
; CHECK-BE-NEXT: st1 { v4.2d }, [x10]
515+
; CHECK-BE-NEXT: add x10, x1, #32
516+
; CHECK-BE-NEXT: st1 { v1.2d }, [x1]
517+
; CHECK-BE-NEXT: add x1, x1, #128
518+
; CHECK-BE-NEXT: ushll2 v0.2d, v0.4s, #0
519+
; CHECK-BE-NEXT: ushll v2.2d, v2.2s, #0
520+
; CHECK-BE-NEXT: st1 { v0.2d }, [x9]
521+
; CHECK-BE-NEXT: st1 { v2.2d }, [x10]
522+
; CHECK-BE-NEXT: b.ne .LBB7_1
523+
; CHECK-BE-NEXT: // %bb.2: // %exit
524+
; CHECK-BE-NEXT: ret
312525
entry:
313526
br label %loop
314527

0 commit comments

Comments
 (0)