Skip to content

Commit c74aea4

Browse files
author
Sjoerd Meijer
committed
[AArch64] Precommit extending load tests for D104782. NFC.
1 parent cc05418 commit c74aea4

File tree

2 files changed

+145
-27
lines changed

2 files changed

+145
-27
lines changed
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=LE
3+
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=BE
4+
5+
define <4 x i32> @fsext_v4i32(<4 x i8>* %a) {
6+
; LE-LABEL: fsext_v4i32:
7+
; LE: // %bb.0:
8+
; LE-NEXT: ldrsb w8, [x0]
9+
; LE-NEXT: ldrsb w9, [x0, #1]
10+
; LE-NEXT: ldrsb w10, [x0, #2]
11+
; LE-NEXT: ldrsb w11, [x0, #3]
12+
; LE-NEXT: fmov s0, w8
13+
; LE-NEXT: mov v0.s[1], w9
14+
; LE-NEXT: mov v0.s[2], w10
15+
; LE-NEXT: mov v0.s[3], w11
16+
; LE-NEXT: ret
17+
;
18+
; BE-LABEL: fsext_v4i32:
19+
; BE: // %bb.0:
20+
; BE-NEXT: ldrsb w8, [x0]
21+
; BE-NEXT: ldrsb w9, [x0, #1]
22+
; BE-NEXT: ldrsb w10, [x0, #2]
23+
; BE-NEXT: ldrsb w11, [x0, #3]
24+
; BE-NEXT: fmov s0, w8
25+
; BE-NEXT: mov v0.s[1], w9
26+
; BE-NEXT: mov v0.s[2], w10
27+
; BE-NEXT: mov v0.s[3], w11
28+
; BE-NEXT: rev64 v0.4s, v0.4s
29+
; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
30+
; BE-NEXT: ret
31+
%x = load <4 x i8>, <4 x i8>* %a
32+
%y = sext <4 x i8> %x to <4 x i32>
33+
ret <4 x i32> %y
34+
}
35+
36+
define <4 x i32> @fzext_v4i32(<4 x i8>* %a) {
37+
; LE-LABEL: fzext_v4i32:
38+
; LE: // %bb.0:
39+
; LE-NEXT: ldrb w8, [x0]
40+
; LE-NEXT: ldrb w9, [x0, #1]
41+
; LE-NEXT: ldrb w10, [x0, #2]
42+
; LE-NEXT: ldrb w11, [x0, #3]
43+
; LE-NEXT: fmov s0, w8
44+
; LE-NEXT: mov v0.s[1], w9
45+
; LE-NEXT: mov v0.s[2], w10
46+
; LE-NEXT: mov v0.s[3], w11
47+
; LE-NEXT: ret
48+
;
49+
; BE-LABEL: fzext_v4i32:
50+
; BE: // %bb.0:
51+
; BE-NEXT: ldrb w8, [x0]
52+
; BE-NEXT: ldrb w9, [x0, #1]
53+
; BE-NEXT: ldrb w10, [x0, #2]
54+
; BE-NEXT: ldrb w11, [x0, #3]
55+
; BE-NEXT: fmov s0, w8
56+
; BE-NEXT: mov v0.s[1], w9
57+
; BE-NEXT: mov v0.s[2], w10
58+
; BE-NEXT: mov v0.s[3], w11
59+
; BE-NEXT: rev64 v0.4s, v0.4s
60+
; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
61+
; BE-NEXT: ret
62+
%x = load <4 x i8>, <4 x i8>* %a
63+
%y = zext <4 x i8> %x to <4 x i32>
64+
ret <4 x i32> %y
65+
}
66+
67+
define i32 @loadExt.i32(<4 x i8>* %ref) {
68+
; CHECK-LABEL: loadExt.i32:
69+
; CHECK: ldrb
70+
; LE-LABEL: loadExt.i32:
71+
; LE: // %bb.0:
72+
; LE-NEXT: ldrb w0, [x0]
73+
; LE-NEXT: ret
74+
;
75+
; BE-LABEL: loadExt.i32:
76+
; BE: // %bb.0:
77+
; BE-NEXT: ldrb w0, [x0]
78+
; BE-NEXT: ret
79+
%a = load <4 x i8>, <4 x i8>* %ref
80+
%vecext = extractelement <4 x i8> %a, i32 0
81+
%conv = zext i8 %vecext to i32
82+
ret i32 %conv
83+
}
84+
85+
define <4 x i16> @fsext_v4i16(<4 x i8>* %a) {
86+
; LE-LABEL: fsext_v4i16:
87+
; LE: // %bb.0:
88+
; LE-NEXT: ldrsb w8, [x0]
89+
; LE-NEXT: ldrsb w9, [x0, #1]
90+
; LE-NEXT: ldrsb w10, [x0, #2]
91+
; LE-NEXT: ldrsb w11, [x0, #3]
92+
; LE-NEXT: fmov s0, w8
93+
; LE-NEXT: mov v0.h[1], w9
94+
; LE-NEXT: mov v0.h[2], w10
95+
; LE-NEXT: mov v0.h[3], w11
96+
; LE-NEXT: // kill: def $d0 killed $d0 killed $q0
97+
; LE-NEXT: ret
98+
;
99+
; BE-LABEL: fsext_v4i16:
100+
; BE: // %bb.0:
101+
; BE-NEXT: ldrsb w8, [x0]
102+
; BE-NEXT: ldrsb w9, [x0, #1]
103+
; BE-NEXT: ldrsb w10, [x0, #2]
104+
; BE-NEXT: ldrsb w11, [x0, #3]
105+
; BE-NEXT: fmov s0, w8
106+
; BE-NEXT: mov v0.h[1], w9
107+
; BE-NEXT: mov v0.h[2], w10
108+
; BE-NEXT: mov v0.h[3], w11
109+
; BE-NEXT: rev64 v0.4h, v0.4h
110+
; BE-NEXT: ret
111+
%x = load <4 x i8>, <4 x i8>* %a
112+
%y = sext <4 x i8> %x to <4 x i16>
113+
ret <4 x i16> %y
114+
}
115+
116+
define <4 x i16> @fzext_v4i16(<4 x i8>* %a) {
117+
; LE-LABEL: fzext_v4i16:
118+
; LE: // %bb.0:
119+
; LE-NEXT: ldrb w8, [x0]
120+
; LE-NEXT: ldrb w9, [x0, #1]
121+
; LE-NEXT: ldrb w10, [x0, #2]
122+
; LE-NEXT: ldrb w11, [x0, #3]
123+
; LE-NEXT: fmov s0, w8
124+
; LE-NEXT: mov v0.h[1], w9
125+
; LE-NEXT: mov v0.h[2], w10
126+
; LE-NEXT: mov v0.h[3], w11
127+
; LE-NEXT: // kill: def $d0 killed $d0 killed $q0
128+
; LE-NEXT: ret
129+
;
130+
; BE-LABEL: fzext_v4i16:
131+
; BE: // %bb.0:
132+
; BE-NEXT: ldrb w8, [x0]
133+
; BE-NEXT: ldrb w9, [x0, #1]
134+
; BE-NEXT: ldrb w10, [x0, #2]
135+
; BE-NEXT: ldrb w11, [x0, #3]
136+
; BE-NEXT: fmov s0, w8
137+
; BE-NEXT: mov v0.h[1], w9
138+
; BE-NEXT: mov v0.h[2], w10
139+
; BE-NEXT: mov v0.h[3], w11
140+
; BE-NEXT: rev64 v0.4h, v0.4h
141+
; BE-NEXT: ret
142+
%x = load <4 x i8>, <4 x i8>* %a
143+
%y = zext <4 x i8> %x to <4 x i16>
144+
ret <4 x i16> %y
145+
}

llvm/test/CodeGen/AArch64/neon-truncStore-extLoad.ll renamed to llvm/test/CodeGen/AArch64/neon-truncstore.ll

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -38,30 +38,3 @@ define void @truncStore.v8i16(<8 x i16> %a, <8 x i8>* %result) {
3838
store <8 x i8> %b, <8 x i8>* %result
3939
ret void
4040
}
41-
42-
; A vector LoadExt can not be selected.
43-
; Test a vector load IR and a sext/zext IR can be selected correctly.
44-
define <4 x i32> @loadSExt.v4i8(<4 x i8>* %ref) {
45-
; CHECK-LABEL: loadSExt.v4i8:
46-
; CHECK: ldrsb
47-
%a = load <4 x i8>, <4 x i8>* %ref
48-
%conv = sext <4 x i8> %a to <4 x i32>
49-
ret <4 x i32> %conv
50-
}
51-
52-
define <4 x i32> @loadZExt.v4i8(<4 x i8>* %ref) {
53-
; CHECK-LABEL: loadZExt.v4i8:
54-
; CHECK: ldrb
55-
%a = load <4 x i8>, <4 x i8>* %ref
56-
%conv = zext <4 x i8> %a to <4 x i32>
57-
ret <4 x i32> %conv
58-
}
59-
60-
define i32 @loadExt.i32(<4 x i8>* %ref) {
61-
; CHECK-LABEL: loadExt.i32:
62-
; CHECK: ldrb
63-
%a = load <4 x i8>, <4 x i8>* %ref
64-
%vecext = extractelement <4 x i8> %a, i32 0
65-
%conv = zext i8 %vecext to i32
66-
ret i32 %conv
67-
}

0 commit comments

Comments
 (0)