1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
- ; RUN: llc < %s -mtriple=aarch64 -mattr=+sve -o - | FileCheck --check-prefixes=CHECK,CHECK-NO-EXTEND-ROUND %s
3
- ; RUN: llc < %s -mtriple=aarch64 -mattr=+sve --combiner-vector-fcopysign-extend-round -o - | FileCheck --check-prefixes=CHECK,CHECK-EXTEND-ROUND %s
2
+ ; RUN: llc < %s -mtriple=aarch64 -mattr=+sve -o - | FileCheck %s
4
3
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
5
4
6
5
;============ v2f32
@@ -47,32 +46,16 @@ define <vscale x 4 x float> @test_copysign_v4f32_v4f32(<vscale x 4 x float> %a,
47
46
48
47
; SplitVecOp #1
49
48
define <vscale x 4 x float > @test_copysign_v4f32_v4f64 (<vscale x 4 x float > %a , <vscale x 4 x double > %b ) #0 {
50
- ; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_v4f32_v4f64:
51
- ; CHECK-NO-EXTEND-ROUND: // %bb.0:
52
- ; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.d
53
- ; CHECK-NO-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff
54
- ; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z2.s, p0/m, z2.d
55
- ; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z1.s, p0/m, z1.d
56
- ; CHECK-NO-EXTEND-ROUND-NEXT: uzp1 z1.s, z1.s, z2.s
57
- ; CHECK-NO-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000
58
- ; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
59
- ; CHECK-NO-EXTEND-ROUND-NEXT: ret
60
- ;
61
- ; CHECK-EXTEND-ROUND-LABEL: test_copysign_v4f32_v4f64:
62
- ; CHECK-EXTEND-ROUND: // %bb.0:
63
- ; CHECK-EXTEND-ROUND-NEXT: ptrue p0.d
64
- ; CHECK-EXTEND-ROUND-NEXT: uunpkhi z3.d, z0.s
65
- ; CHECK-EXTEND-ROUND-NEXT: uunpklo z0.d, z0.s
66
- ; CHECK-EXTEND-ROUND-NEXT: fcvt z2.s, p0/m, z2.d
67
- ; CHECK-EXTEND-ROUND-NEXT: fcvt z1.s, p0/m, z1.d
68
- ; CHECK-EXTEND-ROUND-NEXT: and z3.s, z3.s, #0x7fffffff
69
- ; CHECK-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff
70
- ; CHECK-EXTEND-ROUND-NEXT: and z2.s, z2.s, #0x80000000
71
- ; CHECK-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000
72
- ; CHECK-EXTEND-ROUND-NEXT: orr z2.d, z3.d, z2.d
73
- ; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
74
- ; CHECK-EXTEND-ROUND-NEXT: uzp1 z0.s, z0.s, z2.s
75
- ; CHECK-EXTEND-ROUND-NEXT: ret
49
+ ; CHECK-LABEL: test_copysign_v4f32_v4f64:
50
+ ; CHECK: // %bb.0:
51
+ ; CHECK-NEXT: ptrue p0.d
52
+ ; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff
53
+ ; CHECK-NEXT: fcvt z2.s, p0/m, z2.d
54
+ ; CHECK-NEXT: fcvt z1.s, p0/m, z1.d
55
+ ; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s
56
+ ; CHECK-NEXT: and z1.s, z1.s, #0x80000000
57
+ ; CHECK-NEXT: orr z0.d, z0.d, z1.d
58
+ ; CHECK-NEXT: ret
76
59
%tmp0 = fptrunc <vscale x 4 x double > %b to <vscale x 4 x float >
77
60
%r = call <vscale x 4 x float > @llvm.copysign.v4f32 (<vscale x 4 x float > %a , <vscale x 4 x float > %tmp0 )
78
61
ret <vscale x 4 x float > %r
@@ -177,32 +160,16 @@ define <vscale x 4 x half> @test_copysign_v4f16_v4f32(<vscale x 4 x half> %a, <v
177
160
}
178
161
179
162
define <vscale x 4 x half > @test_copysign_v4f16_v4f64 (<vscale x 4 x half > %a , <vscale x 4 x double > %b ) #0 {
180
- ; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_v4f16_v4f64:
181
- ; CHECK-NO-EXTEND-ROUND: // %bb.0:
182
- ; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.d
183
- ; CHECK-NO-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff
184
- ; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z2.h, p0/m, z2.d
185
- ; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.d
186
- ; CHECK-NO-EXTEND-ROUND-NEXT: uzp1 z1.s, z1.s, z2.s
187
- ; CHECK-NO-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000
188
- ; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
189
- ; CHECK-NO-EXTEND-ROUND-NEXT: ret
190
- ;
191
- ; CHECK-EXTEND-ROUND-LABEL: test_copysign_v4f16_v4f64:
192
- ; CHECK-EXTEND-ROUND: // %bb.0:
193
- ; CHECK-EXTEND-ROUND-NEXT: ptrue p0.d
194
- ; CHECK-EXTEND-ROUND-NEXT: uunpkhi z3.d, z0.s
195
- ; CHECK-EXTEND-ROUND-NEXT: uunpklo z0.d, z0.s
196
- ; CHECK-EXTEND-ROUND-NEXT: fcvt z2.h, p0/m, z2.d
197
- ; CHECK-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.d
198
- ; CHECK-EXTEND-ROUND-NEXT: and z3.h, z3.h, #0x7fff
199
- ; CHECK-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff
200
- ; CHECK-EXTEND-ROUND-NEXT: and z2.h, z2.h, #0x8000
201
- ; CHECK-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000
202
- ; CHECK-EXTEND-ROUND-NEXT: orr z2.d, z3.d, z2.d
203
- ; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
204
- ; CHECK-EXTEND-ROUND-NEXT: uzp1 z0.s, z0.s, z2.s
205
- ; CHECK-EXTEND-ROUND-NEXT: ret
163
+ ; CHECK-LABEL: test_copysign_v4f16_v4f64:
164
+ ; CHECK: // %bb.0:
165
+ ; CHECK-NEXT: ptrue p0.d
166
+ ; CHECK-NEXT: and z0.h, z0.h, #0x7fff
167
+ ; CHECK-NEXT: fcvt z2.h, p0/m, z2.d
168
+ ; CHECK-NEXT: fcvt z1.h, p0/m, z1.d
169
+ ; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s
170
+ ; CHECK-NEXT: and z1.h, z1.h, #0x8000
171
+ ; CHECK-NEXT: orr z0.d, z0.d, z1.d
172
+ ; CHECK-NEXT: ret
206
173
%tmp0 = fptrunc <vscale x 4 x double > %b to <vscale x 4 x half >
207
174
%r = call <vscale x 4 x half > @llvm.copysign.v4f16 (<vscale x 4 x half > %a , <vscale x 4 x half > %tmp0 )
208
175
ret <vscale x 4 x half > %r
@@ -224,32 +191,16 @@ define <vscale x 8 x half> @test_copysign_v8f16_v8f16(<vscale x 8 x half> %a, <v
224
191
}
225
192
226
193
define <vscale x 8 x half > @test_copysign_v8f16_v8f32 (<vscale x 8 x half > %a , <vscale x 8 x float > %b ) #0 {
227
- ; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_v8f16_v8f32:
228
- ; CHECK-NO-EXTEND-ROUND: // %bb.0:
229
- ; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.s
230
- ; CHECK-NO-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff
231
- ; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z2.h, p0/m, z2.s
232
- ; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.s
233
- ; CHECK-NO-EXTEND-ROUND-NEXT: uzp1 z1.h, z1.h, z2.h
234
- ; CHECK-NO-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000
235
- ; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
236
- ; CHECK-NO-EXTEND-ROUND-NEXT: ret
237
- ;
238
- ; CHECK-EXTEND-ROUND-LABEL: test_copysign_v8f16_v8f32:
239
- ; CHECK-EXTEND-ROUND: // %bb.0:
240
- ; CHECK-EXTEND-ROUND-NEXT: ptrue p0.s
241
- ; CHECK-EXTEND-ROUND-NEXT: uunpkhi z3.s, z0.h
242
- ; CHECK-EXTEND-ROUND-NEXT: uunpklo z0.s, z0.h
243
- ; CHECK-EXTEND-ROUND-NEXT: fcvt z2.h, p0/m, z2.s
244
- ; CHECK-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.s
245
- ; CHECK-EXTEND-ROUND-NEXT: and z3.h, z3.h, #0x7fff
246
- ; CHECK-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff
247
- ; CHECK-EXTEND-ROUND-NEXT: and z2.h, z2.h, #0x8000
248
- ; CHECK-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000
249
- ; CHECK-EXTEND-ROUND-NEXT: orr z2.d, z3.d, z2.d
250
- ; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
251
- ; CHECK-EXTEND-ROUND-NEXT: uzp1 z0.h, z0.h, z2.h
252
- ; CHECK-EXTEND-ROUND-NEXT: ret
194
+ ; CHECK-LABEL: test_copysign_v8f16_v8f32:
195
+ ; CHECK: // %bb.0:
196
+ ; CHECK-NEXT: ptrue p0.s
197
+ ; CHECK-NEXT: and z0.h, z0.h, #0x7fff
198
+ ; CHECK-NEXT: fcvt z2.h, p0/m, z2.s
199
+ ; CHECK-NEXT: fcvt z1.h, p0/m, z1.s
200
+ ; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h
201
+ ; CHECK-NEXT: and z1.h, z1.h, #0x8000
202
+ ; CHECK-NEXT: orr z0.d, z0.d, z1.d
203
+ ; CHECK-NEXT: ret
253
204
%tmp0 = fptrunc <vscale x 8 x float > %b to <vscale x 8 x half >
254
205
%r = call <vscale x 8 x half > @llvm.copysign.v8f16 (<vscale x 8 x half > %a , <vscale x 8 x half > %tmp0 )
255
206
ret <vscale x 8 x half > %r
@@ -259,48 +210,28 @@ define <vscale x 8 x half> @test_copysign_v8f16_v8f32(<vscale x 8 x half> %a, <v
259
210
;========== FCOPYSIGN_EXTEND_ROUND
260
211
261
212
define <vscale x 4 x half > @test_copysign_nxv4f32_nxv4f16 (<vscale x 4 x float > %a , <vscale x 4 x float > %b ) #0 {
262
- ; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_nxv4f32_nxv4f16:
263
- ; CHECK-NO-EXTEND-ROUND: // %bb.0:
264
- ; CHECK-NO-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000
265
- ; CHECK-NO-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff
266
- ; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.s
267
- ; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
268
- ; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z0.h, p0/m, z0.s
269
- ; CHECK-NO-EXTEND-ROUND-NEXT: ret
270
- ;
271
- ; CHECK-EXTEND-ROUND-LABEL: test_copysign_nxv4f32_nxv4f16:
272
- ; CHECK-EXTEND-ROUND: // %bb.0:
273
- ; CHECK-EXTEND-ROUND-NEXT: ptrue p0.s
274
- ; CHECK-EXTEND-ROUND-NEXT: fcvt z0.h, p0/m, z0.s
275
- ; CHECK-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.s
276
- ; CHECK-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000
277
- ; CHECK-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff
278
- ; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
279
- ; CHECK-EXTEND-ROUND-NEXT: ret
213
+ ; CHECK-LABEL: test_copysign_nxv4f32_nxv4f16:
214
+ ; CHECK: // %bb.0:
215
+ ; CHECK-NEXT: and z1.s, z1.s, #0x80000000
216
+ ; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff
217
+ ; CHECK-NEXT: ptrue p0.s
218
+ ; CHECK-NEXT: orr z0.d, z0.d, z1.d
219
+ ; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
220
+ ; CHECK-NEXT: ret
280
221
%t1 = call <vscale x 4 x float > @llvm.copysign.v4f32 (<vscale x 4 x float > %a , <vscale x 4 x float > %b )
281
222
%t2 = fptrunc <vscale x 4 x float > %t1 to <vscale x 4 x half >
282
223
ret <vscale x 4 x half > %t2
283
224
}
284
225
285
226
define <vscale x 2 x float > @test_copysign_nxv2f64_nxv2f32 (<vscale x 2 x double > %a , <vscale x 2 x double > %b ) #0 {
286
- ; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_nxv2f64_nxv2f32:
287
- ; CHECK-NO-EXTEND-ROUND: // %bb.0:
288
- ; CHECK-NO-EXTEND-ROUND-NEXT: and z1.d, z1.d, #0x8000000000000000
289
- ; CHECK-NO-EXTEND-ROUND-NEXT: and z0.d, z0.d, #0x7fffffffffffffff
290
- ; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.d
291
- ; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
292
- ; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z0.s, p0/m, z0.d
293
- ; CHECK-NO-EXTEND-ROUND-NEXT: ret
294
- ;
295
- ; CHECK-EXTEND-ROUND-LABEL: test_copysign_nxv2f64_nxv2f32:
296
- ; CHECK-EXTEND-ROUND: // %bb.0:
297
- ; CHECK-EXTEND-ROUND-NEXT: ptrue p0.d
298
- ; CHECK-EXTEND-ROUND-NEXT: fcvt z0.s, p0/m, z0.d
299
- ; CHECK-EXTEND-ROUND-NEXT: fcvt z1.s, p0/m, z1.d
300
- ; CHECK-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000
301
- ; CHECK-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff
302
- ; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
303
- ; CHECK-EXTEND-ROUND-NEXT: ret
227
+ ; CHECK-LABEL: test_copysign_nxv2f64_nxv2f32:
228
+ ; CHECK: // %bb.0:
229
+ ; CHECK-NEXT: and z1.d, z1.d, #0x8000000000000000
230
+ ; CHECK-NEXT: and z0.d, z0.d, #0x7fffffffffffffff
231
+ ; CHECK-NEXT: ptrue p0.d
232
+ ; CHECK-NEXT: orr z0.d, z0.d, z1.d
233
+ ; CHECK-NEXT: fcvt z0.s, p0/m, z0.d
234
+ ; CHECK-NEXT: ret
304
235
%t1 = call <vscale x 2 x double > @llvm.copysign.v2f64 (<vscale x 2 x double > %a , <vscale x 2 x double > %b )
305
236
%t2 = fptrunc <vscale x 2 x double > %t1 to <vscale x 2 x float >
306
237
ret <vscale x 2 x float > %t2
0 commit comments