1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
- ; RUN: llc < %s -mtriple=aarch64-eabi -aarch64-neon-syntax=apple | FileCheck %s
2
+ ; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+fullfp16,+bf16 | FileCheck %s
3
3
4
4
; Check that building a vector from floats doesn't insert an unnecessary
5
5
; copy for lane zero.
@@ -10,9 +10,9 @@ define <4 x float> @foo(float %a, float %b, float %c, float %d) nounwind {
10
10
; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1
11
11
; CHECK-NEXT: // kill: def $s2 killed $s2 def $q2
12
12
; CHECK-NEXT: // kill: def $s3 killed $s3 def $q3
13
- ; CHECK-NEXT: mov.s v0[1], v1[0]
14
- ; CHECK-NEXT: mov.s v0[2], v2[0]
15
- ; CHECK-NEXT: mov.s v0[3], v3[0]
13
+ ; CHECK-NEXT: mov v0.s [1], v1.s [0]
14
+ ; CHECK-NEXT: mov v0.s [2], v2.s [0]
15
+ ; CHECK-NEXT: mov v0.s [3], v3.s [0]
16
16
; CHECK-NEXT: ret
17
17
%1 = insertelement <4 x float > undef , float %a , i32 0
18
18
%2 = insertelement <4 x float > %1 , float %b , i32 1
@@ -26,7 +26,7 @@ define <8 x i16> @build_all_zero(<8 x i16> %a) #1 {
26
26
; CHECK: // %bb.0:
27
27
; CHECK-NEXT: mov w8, #44672
28
28
; CHECK-NEXT: fmov s1, w8
29
- ; CHECK-NEXT: mul.8h v0, v0, v1
29
+ ; CHECK-NEXT: mul v0.8h , v0.8h , v1.8h
30
30
; CHECK-NEXT: ret
31
31
%b = add <8 x i16 > %a , <i16 -32768 , i16 undef , i16 undef , i16 undef , i16 undef , i16 undef , i16 undef , i16 undef >
32
32
%c = mul <8 x i16 > %b , <i16 -20864 , i16 undef , i16 undef , i16 undef , i16 undef , i16 undef , i16 undef , i16 undef >
@@ -41,7 +41,7 @@ define <8 x i16> @build_all_zero(<8 x i16> %a) #1 {
41
41
define <8 x i16 > @concat_2_build_vector (<4 x i16 > %in0 ) {
42
42
; CHECK-LABEL: concat_2_build_vector:
43
43
; CHECK: // %bb.0:
44
- ; CHECK-NEXT: movi.2d v0, #0000000000000000
44
+ ; CHECK-NEXT: movi v0.2d , #0000000000000000
45
45
; CHECK-NEXT: ret
46
46
%vshl_n = shl <4 x i16 > %in0 , <i16 8 , i16 8 , i16 8 , i16 8 >
47
47
%vshl_n2 = shl <4 x i16 > %vshl_n , <i16 9 , i16 9 , i16 9 , i16 9 >
@@ -98,9 +98,165 @@ define <1 x double> @convert_single_fp_vector_constant(i1 %cmp) {
98
98
; CHECK-NEXT: csetm x9, ne
99
99
; CHECK-NEXT: fmov d0, x8
100
100
; CHECK-NEXT: fmov d1, x9
101
- ; CHECK-NEXT: and.8b v0, v0, v1
101
+ ; CHECK-NEXT: and v0.8b , v0.8b , v1.8b
102
102
; CHECK-NEXT: ret
103
103
entry:
104
104
%sel = select i1 %cmp , <1 x double > <double 1 .000000e+00 >, <1 x double > zeroinitializer
105
105
ret <1 x double > %sel
106
106
}
107
+
108
+ ; All Zero and All -Zero tests.
109
+
110
+ define <2 x double > @poszero_v2f64 (<2 x double > %a ) {
111
+ ; CHECK-LABEL: poszero_v2f64:
112
+ ; CHECK: // %bb.0:
113
+ ; CHECK-NEXT: movi v1.2d, #0000000000000000
114
+ ; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d
115
+ ; CHECK-NEXT: ret
116
+ %b = fadd <2 x double > %a , <double 0 .0 , double 0 .0 >
117
+ ret <2 x double > %b
118
+ }
119
+
120
+ define <2 x double > @negzero_v2f64 (<2 x double > %a ) {
121
+ ; CHECK-LABEL: negzero_v2f64:
122
+ ; CHECK: // %bb.0:
123
+ ; CHECK-NEXT: mov x8, #-9223372036854775808
124
+ ; CHECK-NEXT: dup v1.2d, x8
125
+ ; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
126
+ ; CHECK-NEXT: ret
127
+ %b = fmul <2 x double > %a , <double -0 .0 , double -0 .0 >
128
+ ret <2 x double > %b
129
+ }
130
+
131
+ define <1 x double > @poszero_v1f64 (<1 x double > %a ) {
132
+ ; CHECK-LABEL: poszero_v1f64:
133
+ ; CHECK: // %bb.0:
134
+ ; CHECK-NEXT: movi d1, #0000000000000000
135
+ ; CHECK-NEXT: fadd d0, d0, d1
136
+ ; CHECK-NEXT: ret
137
+ %b = fadd <1 x double > %a , <double 0 .0 >
138
+ ret <1 x double > %b
139
+ }
140
+
141
+ define <1 x double > @negzero_v1f64 (<1 x double > %a ) {
142
+ ; CHECK-LABEL: negzero_v1f64:
143
+ ; CHECK: // %bb.0:
144
+ ; CHECK-NEXT: mov x8, #-9223372036854775808
145
+ ; CHECK-NEXT: fmov d1, x8
146
+ ; CHECK-NEXT: fmul d0, d0, d1
147
+ ; CHECK-NEXT: ret
148
+ %b = fmul <1 x double > %a , <double -0 .0 >
149
+ ret <1 x double > %b
150
+ }
151
+
152
+ define <4 x float > @poszero_v4f32 (<4 x float > %a ) {
153
+ ; CHECK-LABEL: poszero_v4f32:
154
+ ; CHECK: // %bb.0:
155
+ ; CHECK-NEXT: movi v1.2d, #0000000000000000
156
+ ; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
157
+ ; CHECK-NEXT: ret
158
+ %b = fadd <4 x float > %a , <float 0 .0 , float 0 .0 , float 0 .0 , float 0 .0 >
159
+ ret <4 x float > %b
160
+ }
161
+
162
+ define <4 x float > @negzero_v4f32 (<4 x float > %a ) {
163
+ ; CHECK-LABEL: negzero_v4f32:
164
+ ; CHECK: // %bb.0:
165
+ ; CHECK-NEXT: movi v1.4s, #128, lsl #24
166
+ ; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
167
+ ; CHECK-NEXT: ret
168
+ %b = fmul <4 x float > %a , <float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 >
169
+ ret <4 x float > %b
170
+ }
171
+
172
+ define <2 x float > @poszero_v2f32 (<2 x float > %a ) {
173
+ ; CHECK-LABEL: poszero_v2f32:
174
+ ; CHECK: // %bb.0:
175
+ ; CHECK-NEXT: movi d1, #0000000000000000
176
+ ; CHECK-NEXT: fadd v0.2s, v0.2s, v1.2s
177
+ ; CHECK-NEXT: ret
178
+ %b = fadd <2 x float > %a , <float 0 .0 , float 0 .0 >
179
+ ret <2 x float > %b
180
+ }
181
+
182
+ define <2 x float > @negzero_v2f32 (<2 x float > %a ) {
183
+ ; CHECK-LABEL: negzero_v2f32:
184
+ ; CHECK: // %bb.0:
185
+ ; CHECK-NEXT: movi v1.2s, #128, lsl #24
186
+ ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
187
+ ; CHECK-NEXT: ret
188
+ %b = fmul <2 x float > %a , <float -0 .0 , float -0 .0 >
189
+ ret <2 x float > %b
190
+ }
191
+
192
+ define <8 x half > @poszero_v8f16 (<8 x half > %a ) {
193
+ ; CHECK-LABEL: poszero_v8f16:
194
+ ; CHECK: // %bb.0:
195
+ ; CHECK-NEXT: movi v1.2d, #0000000000000000
196
+ ; CHECK-NEXT: fadd v0.8h, v0.8h, v1.8h
197
+ ; CHECK-NEXT: ret
198
+ %b = fadd <8 x half > %a , <half 0 .0 , half 0 .0 , half 0 .0 , half 0 .0 , half 0 .0 , half 0 .0 , half 0 .0 , half 0 .0 >
199
+ ret <8 x half > %b
200
+ }
201
+
202
+ define <8 x half > @negzero_v8f16 (<8 x half > %a ) {
203
+ ; CHECK-LABEL: negzero_v8f16:
204
+ ; CHECK: // %bb.0:
205
+ ; CHECK-NEXT: movi v1.8h, #128, lsl #8
206
+ ; CHECK-NEXT: fmul v0.8h, v0.8h, v1.8h
207
+ ; CHECK-NEXT: ret
208
+ %b = fmul <8 x half > %a , <half -0 .0 , half -0 .0 , half -0 .0 , half -0 .0 , half -0 .0 , half -0 .0 , half -0 .0 , half -0 .0 >
209
+ ret <8 x half > %b
210
+ }
211
+
212
+ define <4 x half > @poszero_v4f16 (<4 x half > %a ) {
213
+ ; CHECK-LABEL: poszero_v4f16:
214
+ ; CHECK: // %bb.0:
215
+ ; CHECK-NEXT: movi d1, #0000000000000000
216
+ ; CHECK-NEXT: fadd v0.4h, v0.4h, v1.4h
217
+ ; CHECK-NEXT: ret
218
+ %b = fadd <4 x half > %a , <half 0 .0 , half 0 .0 , half 0 .0 , half 0 .0 >
219
+ ret <4 x half > %b
220
+ }
221
+
222
+ define <4 x half > @negzero_v4f16 (<4 x half > %a ) {
223
+ ; CHECK-LABEL: negzero_v4f16:
224
+ ; CHECK: // %bb.0:
225
+ ; CHECK-NEXT: movi v1.4h, #128, lsl #8
226
+ ; CHECK-NEXT: fmul v0.4h, v0.4h, v1.4h
227
+ ; CHECK-NEXT: ret
228
+ %b = fmul <4 x half > %a , <half -0 .0 , half -0 .0 , half -0 .0 , half -0 .0 >
229
+ ret <4 x half > %b
230
+ }
231
+
232
+ define <8 x bfloat> @poszero_v8bf16 (<8 x bfloat> %a ) {
233
+ ; CHECK-LABEL: poszero_v8bf16:
234
+ ; CHECK: // %bb.0:
235
+ ; CHECK-NEXT: movi v0.2d, #0000000000000000
236
+ ; CHECK-NEXT: ret
237
+ ret <8 x bfloat> <bfloat 0 .0 , bfloat 0 .0 , bfloat 0 .0 , bfloat 0 .0 , bfloat 0 .0 , bfloat 0 .0 , bfloat 0 .0 , bfloat 0 .0 >
238
+ }
239
+
240
+ define <8 x bfloat> @negzero_v8bf16 (<8 x bfloat> %a ) {
241
+ ; CHECK-LABEL: negzero_v8bf16:
242
+ ; CHECK: // %bb.0:
243
+ ; CHECK-NEXT: movi v0.8h, #128, lsl #8
244
+ ; CHECK-NEXT: ret
245
+ ret <8 x bfloat> <bfloat -0 .0 , bfloat -0 .0 , bfloat -0 .0 , bfloat -0 .0 , bfloat -0 .0 , bfloat -0 .0 , bfloat -0 .0 , bfloat -0 .0 >
246
+ }
247
+
248
+ define <4 x bfloat> @poszero_v4bf16 (<4 x bfloat> %a ) {
249
+ ; CHECK-LABEL: poszero_v4bf16:
250
+ ; CHECK: // %bb.0:
251
+ ; CHECK-NEXT: movi d0, #0000000000000000
252
+ ; CHECK-NEXT: ret
253
+ ret <4 x bfloat> <bfloat 0 .0 , bfloat 0 .0 , bfloat 0 .0 , bfloat 0 .0 >
254
+ }
255
+
256
+ define <4 x bfloat> @negzero_v4bf16 (<4 x bfloat> %a ) {
257
+ ; CHECK-LABEL: negzero_v4bf16:
258
+ ; CHECK: // %bb.0:
259
+ ; CHECK-NEXT: movi v0.4h, #128, lsl #8
260
+ ; CHECK-NEXT: ret
261
+ ret <4 x bfloat> <bfloat -0 .0 , bfloat -0 .0 , bfloat -0 .0 , bfloat -0 .0 >
262
+ }
0 commit comments