Skip to content

Commit 791a7ae

Browse files
committed
[AArch64] Add big-endian tests for trunc-to-tbl.ll
Extra tests for D133495.
1 parent 0e868af commit 791a7ae

File tree

1 file changed

+60
-4
lines changed

1 file changed

+60
-4
lines changed

llvm/test/CodeGen/AArch64/trunc-to-tbl.ll

Lines changed: 60 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -o - %s | FileCheck %s
3-
4-
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
5-
target triple = "arm64-apple-ios"
2+
; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s
3+
; RUN: llc -mtriple=aarch64_be-unknown-linux -o - %s | FileCheck --check-prefix=CHECK-BE %s
64

75
; It's profitable to use a single tbl.4 instruction to lower the truncate.
86
define void @trunc_v16i32_to_v16i8_in_loop(ptr %A, ptr %dst) {
@@ -23,6 +21,30 @@ define void @trunc_v16i32_to_v16i8_in_loop(ptr %A, ptr %dst) {
2321
; CHECK-NEXT: b.eq LBB0_1
2422
; CHECK-NEXT: ; %bb.2: ; %exit
2523
; CHECK-NEXT: ret
24+
;
25+
; CHECK-BE-LABEL: trunc_v16i32_to_v16i8_in_loop:
26+
; CHECK-BE: // %bb.0: // %entry
27+
; CHECK-BE-NEXT: mov x8, xzr
28+
; CHECK-BE-NEXT: .LBB0_1: // %loop
29+
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
30+
; CHECK-BE-NEXT: add x9, x0, x8, lsl #6
31+
; CHECK-BE-NEXT: add x10, x9, #48
32+
; CHECK-BE-NEXT: add x11, x9, #32
33+
; CHECK-BE-NEXT: ld1 { v0.4s }, [x9]
34+
; CHECK-BE-NEXT: add x9, x9, #16
35+
; CHECK-BE-NEXT: ld1 { v1.4s }, [x10]
36+
; CHECK-BE-NEXT: ld1 { v2.4s }, [x11]
37+
; CHECK-BE-NEXT: ld1 { v3.4s }, [x9]
38+
; CHECK-BE-NEXT: add x9, x1, x8, lsl #4
39+
; CHECK-BE-NEXT: add x8, x8, #1
40+
; CHECK-BE-NEXT: cmp x8, #1000
41+
; CHECK-BE-NEXT: uzp1 v1.8h, v2.8h, v1.8h
42+
; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v3.8h
43+
; CHECK-BE-NEXT: uzp1 v0.16b, v0.16b, v1.16b
44+
; CHECK-BE-NEXT: st1 { v0.16b }, [x9]
45+
; CHECK-BE-NEXT: b.eq .LBB0_1
46+
; CHECK-BE-NEXT: // %bb.2: // %exit
47+
; CHECK-BE-NEXT: ret
2648
entry:
2749
br label %loop
2850

@@ -53,6 +75,21 @@ define void @trunc_v16i32_to_v16i8_no_loop(ptr %A, ptr %dst) {
5375
; CHECK-NEXT: uzp1.16b v0, v1, v0
5476
; CHECK-NEXT: str q0, [x1]
5577
; CHECK-NEXT: ret
78+
;
79+
; CHECK-BE-LABEL: trunc_v16i32_to_v16i8_no_loop:
80+
; CHECK-BE: // %bb.0: // %entry
81+
; CHECK-BE-NEXT: add x8, x0, #48
82+
; CHECK-BE-NEXT: add x9, x0, #32
83+
; CHECK-BE-NEXT: ld1 { v0.4s }, [x0]
84+
; CHECK-BE-NEXT: ld1 { v1.4s }, [x8]
85+
; CHECK-BE-NEXT: add x8, x0, #16
86+
; CHECK-BE-NEXT: ld1 { v2.4s }, [x9]
87+
; CHECK-BE-NEXT: ld1 { v3.4s }, [x8]
88+
; CHECK-BE-NEXT: uzp1 v1.8h, v2.8h, v1.8h
89+
; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v3.8h
90+
; CHECK-BE-NEXT: uzp1 v0.16b, v0.16b, v1.16b
91+
; CHECK-BE-NEXT: st1 { v0.16b }, [x1]
92+
; CHECK-BE-NEXT: ret
5693
entry:
5794
%l.A = load <16 x i32>, ptr %A
5895
%trunc = trunc <16 x i32> %l.A to <16 x i8>
@@ -77,6 +114,25 @@ define void @trunc_v8i32_to_v8i8_in_loop(ptr %A, ptr %dst) {
77114
; CHECK-NEXT: b.eq LBB2_1
78115
; CHECK-NEXT: ; %bb.2: ; %exit
79116
; CHECK-NEXT: ret
117+
;
118+
; CHECK-BE-LABEL: trunc_v8i32_to_v8i8_in_loop:
119+
; CHECK-BE: // %bb.0: // %entry
120+
; CHECK-BE-NEXT: mov x8, xzr
121+
; CHECK-BE-NEXT: .LBB2_1: // %loop
122+
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
123+
; CHECK-BE-NEXT: add x9, x0, x8, lsl #5
124+
; CHECK-BE-NEXT: add x10, x9, #16
125+
; CHECK-BE-NEXT: ld1 { v0.4s }, [x9]
126+
; CHECK-BE-NEXT: add x9, x1, x8, lsl #3
127+
; CHECK-BE-NEXT: add x8, x8, #1
128+
; CHECK-BE-NEXT: ld1 { v1.4s }, [x10]
129+
; CHECK-BE-NEXT: cmp x8, #1000
130+
; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
131+
; CHECK-BE-NEXT: xtn v0.8b, v0.8h
132+
; CHECK-BE-NEXT: st1 { v0.8b }, [x9]
133+
; CHECK-BE-NEXT: b.eq .LBB2_1
134+
; CHECK-BE-NEXT: // %bb.2: // %exit
135+
; CHECK-BE-NEXT: ret
80136
entry:
81137
br label %loop
82138

0 commit comments

Comments
 (0)