1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
- ; RUN: llc -o - %s | FileCheck %s
3
-
4
- target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
5
- target triple = "arm64-apple-ios"
2
+ ; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s
3
+ ; RUN: llc -mtriple=aarch64_be-unknown-linux -o - %s | FileCheck --check-prefix=CHECK-BE %s
6
4
7
5
; It's profitable to use a single tbl.4 instruction to lower the truncate.
8
6
define void @trunc_v16i32_to_v16i8_in_loop (ptr %A , ptr %dst ) {
@@ -23,6 +21,30 @@ define void @trunc_v16i32_to_v16i8_in_loop(ptr %A, ptr %dst) {
23
21
; CHECK-NEXT: b.eq LBB0_1
24
22
; CHECK-NEXT: ; %bb.2: ; %exit
25
23
; CHECK-NEXT: ret
24
+ ;
25
+ ; CHECK-BE-LABEL: trunc_v16i32_to_v16i8_in_loop:
26
+ ; CHECK-BE: // %bb.0: // %entry
27
+ ; CHECK-BE-NEXT: mov x8, xzr
28
+ ; CHECK-BE-NEXT: .LBB0_1: // %loop
29
+ ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
30
+ ; CHECK-BE-NEXT: add x9, x0, x8, lsl #6
31
+ ; CHECK-BE-NEXT: add x10, x9, #48
32
+ ; CHECK-BE-NEXT: add x11, x9, #32
33
+ ; CHECK-BE-NEXT: ld1 { v0.4s }, [x9]
34
+ ; CHECK-BE-NEXT: add x9, x9, #16
35
+ ; CHECK-BE-NEXT: ld1 { v1.4s }, [x10]
36
+ ; CHECK-BE-NEXT: ld1 { v2.4s }, [x11]
37
+ ; CHECK-BE-NEXT: ld1 { v3.4s }, [x9]
38
+ ; CHECK-BE-NEXT: add x9, x1, x8, lsl #4
39
+ ; CHECK-BE-NEXT: add x8, x8, #1
40
+ ; CHECK-BE-NEXT: cmp x8, #1000
41
+ ; CHECK-BE-NEXT: uzp1 v1.8h, v2.8h, v1.8h
42
+ ; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v3.8h
43
+ ; CHECK-BE-NEXT: uzp1 v0.16b, v0.16b, v1.16b
44
+ ; CHECK-BE-NEXT: st1 { v0.16b }, [x9]
45
+ ; CHECK-BE-NEXT: b.eq .LBB0_1
46
+ ; CHECK-BE-NEXT: // %bb.2: // %exit
47
+ ; CHECK-BE-NEXT: ret
26
48
entry:
27
49
br label %loop
28
50
@@ -53,6 +75,21 @@ define void @trunc_v16i32_to_v16i8_no_loop(ptr %A, ptr %dst) {
53
75
; CHECK-NEXT: uzp1.16b v0, v1, v0
54
76
; CHECK-NEXT: str q0, [x1]
55
77
; CHECK-NEXT: ret
78
+ ;
79
+ ; CHECK-BE-LABEL: trunc_v16i32_to_v16i8_no_loop:
80
+ ; CHECK-BE: // %bb.0: // %entry
81
+ ; CHECK-BE-NEXT: add x8, x0, #48
82
+ ; CHECK-BE-NEXT: add x9, x0, #32
83
+ ; CHECK-BE-NEXT: ld1 { v0.4s }, [x0]
84
+ ; CHECK-BE-NEXT: ld1 { v1.4s }, [x8]
85
+ ; CHECK-BE-NEXT: add x8, x0, #16
86
+ ; CHECK-BE-NEXT: ld1 { v2.4s }, [x9]
87
+ ; CHECK-BE-NEXT: ld1 { v3.4s }, [x8]
88
+ ; CHECK-BE-NEXT: uzp1 v1.8h, v2.8h, v1.8h
89
+ ; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v3.8h
90
+ ; CHECK-BE-NEXT: uzp1 v0.16b, v0.16b, v1.16b
91
+ ; CHECK-BE-NEXT: st1 { v0.16b }, [x1]
92
+ ; CHECK-BE-NEXT: ret
56
93
entry:
57
94
%l.A = load <16 x i32 >, ptr %A
58
95
%trunc = trunc <16 x i32 > %l.A to <16 x i8 >
@@ -77,6 +114,25 @@ define void @trunc_v8i32_to_v8i8_in_loop(ptr %A, ptr %dst) {
77
114
; CHECK-NEXT: b.eq LBB2_1
78
115
; CHECK-NEXT: ; %bb.2: ; %exit
79
116
; CHECK-NEXT: ret
117
+ ;
118
+ ; CHECK-BE-LABEL: trunc_v8i32_to_v8i8_in_loop:
119
+ ; CHECK-BE: // %bb.0: // %entry
120
+ ; CHECK-BE-NEXT: mov x8, xzr
121
+ ; CHECK-BE-NEXT: .LBB2_1: // %loop
122
+ ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
123
+ ; CHECK-BE-NEXT: add x9, x0, x8, lsl #5
124
+ ; CHECK-BE-NEXT: add x10, x9, #16
125
+ ; CHECK-BE-NEXT: ld1 { v0.4s }, [x9]
126
+ ; CHECK-BE-NEXT: add x9, x1, x8, lsl #3
127
+ ; CHECK-BE-NEXT: add x8, x8, #1
128
+ ; CHECK-BE-NEXT: ld1 { v1.4s }, [x10]
129
+ ; CHECK-BE-NEXT: cmp x8, #1000
130
+ ; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
131
+ ; CHECK-BE-NEXT: xtn v0.8b, v0.8h
132
+ ; CHECK-BE-NEXT: st1 { v0.8b }, [x9]
133
+ ; CHECK-BE-NEXT: b.eq .LBB2_1
134
+ ; CHECK-BE-NEXT: // %bb.2: // %exit
135
+ ; CHECK-BE-NEXT: ret
80
136
entry:
81
137
br label %loop
82
138
0 commit comments