Skip to content

Commit d20afbd

Browse files
committed
[AArch64] Additional testing for uqshl and regenerate arm64-vshift.ll. NFC
This tries to fill in some missing testing for neon shift intrinsics, and regenerates the existing tests. See D148309 and D148311.
1 parent a423b7f commit d20afbd

File tree

3 files changed

+2611
-1487
lines changed

3 files changed

+2611
-1487
lines changed
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
3+
4+
define i64 @add_i64_ext_load(<1 x i64> %A, ptr %B) nounwind {
5+
; CHECK-LABEL: add_i64_ext_load:
6+
; CHECK: // %bb.0:
7+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
8+
; CHECK-NEXT: ldr x8, [x0]
9+
; CHECK-NEXT: fmov x9, d0
10+
; CHECK-NEXT: add x0, x9, x8
11+
; CHECK-NEXT: ret
12+
%a = extractelement <1 x i64> %A, i32 0
13+
%b = load i64, ptr %B
14+
%c = add i64 %a, %b
15+
ret i64 %c
16+
}
17+
18+
define i64 @sub_i64_ext_load(<1 x i64> %A, ptr %B) nounwind {
19+
; CHECK-LABEL: sub_i64_ext_load:
20+
; CHECK: // %bb.0:
21+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
22+
; CHECK-NEXT: ldr x8, [x0]
23+
; CHECK-NEXT: fmov x9, d0
24+
; CHECK-NEXT: sub x0, x9, x8
25+
; CHECK-NEXT: ret
26+
%a = extractelement <1 x i64> %A, i32 0
27+
%b = load i64, ptr %B
28+
%c = sub i64 %a, %b
29+
ret i64 %c
30+
}
31+
32+
define void @add_i64_ext_load_store(<1 x i64> %A, ptr %B) nounwind {
33+
; CHECK-LABEL: add_i64_ext_load_store:
34+
; CHECK: // %bb.0:
35+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
36+
; CHECK-NEXT: ldr x8, [x0]
37+
; CHECK-NEXT: fmov x9, d0
38+
; CHECK-NEXT: add x8, x9, x8
39+
; CHECK-NEXT: str x8, [x0]
40+
; CHECK-NEXT: ret
41+
%a = extractelement <1 x i64> %A, i32 0
42+
%b = load i64, ptr %B
43+
%c = add i64 %a, %b
44+
store i64 %c, ptr %B
45+
ret void
46+
}
47+
48+
define i64 @add_v2i64_ext_load(<2 x i64> %A, ptr %B) nounwind {
49+
; CHECK-LABEL: add_v2i64_ext_load:
50+
; CHECK: // %bb.0:
51+
; CHECK-NEXT: ldr x8, [x0]
52+
; CHECK-NEXT: fmov x9, d0
53+
; CHECK-NEXT: add x0, x9, x8
54+
; CHECK-NEXT: ret
55+
%a = extractelement <2 x i64> %A, i32 0
56+
%b = load i64, ptr %B
57+
%c = add i64 %a, %b
58+
ret i64 %c
59+
}
60+
61+
define i64 @add_i64_ext_ext(<1 x i64> %A, <1 x i64> %B) nounwind {
62+
; CHECK-LABEL: add_i64_ext_ext:
63+
; CHECK: // %bb.0:
64+
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
65+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
66+
; CHECK-NEXT: fmov x8, d0
67+
; CHECK-NEXT: fmov x9, d1
68+
; CHECK-NEXT: add x0, x8, x9
69+
; CHECK-NEXT: ret
70+
%a = extractelement <1 x i64> %A, i32 0
71+
%b = extractelement <1 x i64> %B, i32 0
72+
%c = add i64 %a, %b
73+
ret i64 %c
74+
}
75+
76+
define i32 @add_i32_ext_load(<1 x i32> %A, ptr %B) nounwind {
77+
; CHECK-LABEL: add_i32_ext_load:
78+
; CHECK: // %bb.0:
79+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
80+
; CHECK-NEXT: ldr w8, [x0]
81+
; CHECK-NEXT: fmov w9, s0
82+
; CHECK-NEXT: add w0, w9, w8
83+
; CHECK-NEXT: ret
84+
%a = extractelement <1 x i32> %A, i32 0
85+
%b = load i32, ptr %B
86+
%c = add i32 %a, %b
87+
ret i32 %c
88+
}
89+
90+
define i64 @add_i64_ext_ext_test1(<1 x i64> %A, <2 x i64> %B) nounwind {
91+
; CHECK-LABEL: add_i64_ext_ext_test1:
92+
; CHECK: // %bb.0:
93+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
94+
; CHECK-NEXT: mov x8, v1.d[1]
95+
; CHECK-NEXT: fmov x9, d0
96+
; CHECK-NEXT: fmov x10, d1
97+
; CHECK-NEXT: add x9, x9, x10
98+
; CHECK-NEXT: add x0, x9, x8
99+
; CHECK-NEXT: ret
100+
%a = extractelement <1 x i64> %A, i32 0
101+
%b = extractelement <2 x i64> %B, i32 0
102+
%c = extractelement <2 x i64> %B, i32 1
103+
%d = add i64 %a, %b
104+
%e = add i64 %d, %c
105+
ret i64 %e
106+
}
107+
108+
define i64 @sub_i64_ext_ext_test1(<1 x i64> %A, <2 x i64> %B) nounwind {
109+
; CHECK-LABEL: sub_i64_ext_ext_test1:
110+
; CHECK: // %bb.0:
111+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
112+
; CHECK-NEXT: mov x8, v1.d[1]
113+
; CHECK-NEXT: fmov x9, d0
114+
; CHECK-NEXT: fmov x10, d1
115+
; CHECK-NEXT: sub x9, x9, x10
116+
; CHECK-NEXT: sub x0, x9, x8
117+
; CHECK-NEXT: ret
118+
%a = extractelement <1 x i64> %A, i32 0
119+
%b = extractelement <2 x i64> %B, i32 0
120+
%c = extractelement <2 x i64> %B, i32 1
121+
%d = sub i64 %a, %b
122+
%e = sub i64 %d, %c
123+
ret i64 %e
124+
}
Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,53 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
12
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi | FileCheck %s
23

3-
; Check if sqshl/uqshl with constant shift amout can be selected.
4+
; Check if sqshl/uqshl with constant shift amount can be selected.
45
define i64 @test_vqshld_s64_i(i64 %a) {
56
; CHECK-LABEL: test_vqshld_s64_i:
6-
; CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, #36
7+
; CHECK: // %bb.0:
8+
; CHECK-NEXT: fmov d0, x0
9+
; CHECK-NEXT: sqshl d0, d0, #36
10+
; CHECK-NEXT: fmov x0, d0
11+
; CHECK-NEXT: ret
712
%1 = tail call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 36)
813
ret i64 %1
914
}
1015

1116
define i64 @test_vqshld_u64_i(i64 %a) {
1217
; CHECK-LABEL: test_vqshld_u64_i:
13-
; CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, #36
18+
; CHECK: // %bb.0:
19+
; CHECK-NEXT: fmov d0, x0
20+
; CHECK-NEXT: uqshl d0, d0, #36
21+
; CHECK-NEXT: fmov x0, d0
22+
; CHECK-NEXT: ret
1423
%1 = tail call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 36)
1524
ret i64 %1
1625
}
1726

27+
define i32 @test_vqshld_s32_i(i32 %a) {
28+
; CHECK-LABEL: test_vqshld_s32_i:
29+
; CHECK: // %bb.0:
30+
; CHECK-NEXT: fmov s0, w0
31+
; CHECK-NEXT: sqshl s0, s0, #16
32+
; CHECK-NEXT: fmov w0, s0
33+
; CHECK-NEXT: ret
34+
%1 = tail call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 16)
35+
ret i32 %1
36+
}
37+
38+
define i32 @test_vqshld_u32_i(i32 %a) {
39+
; CHECK-LABEL: test_vqshld_u32_i:
40+
; CHECK: // %bb.0:
41+
; CHECK-NEXT: fmov s0, w0
42+
; CHECK-NEXT: uqshl s0, s0, #16
43+
; CHECK-NEXT: fmov w0, s0
44+
; CHECK-NEXT: ret
45+
%1 = tail call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 16)
46+
ret i32 %1
47+
}
48+
1849
declare i64 @llvm.aarch64.neon.uqshl.i64(i64, i64)
1950
declare i64 @llvm.aarch64.neon.sqshl.i64(i64, i64)
51+
52+
declare i32 @llvm.aarch64.neon.uqshl.i32(i32, i32)
53+
declare i32 @llvm.aarch64.neon.sqshl.i32(i32, i32)

0 commit comments

Comments
 (0)