@@ -277,21 +277,64 @@ define <8 x i16> @bitcast_h_to_i(float, <8 x half> %a) {
277
277
ret <8 x i16 > %2
278
278
}
279
279
280
-
281
- define <8 x half > @sitofp_i8 (<8 x i8 > %a ) #0 {
282
- ; CHECK-LABEL: sitofp_i8:
283
- ; CHECK-NEXT: sshll v[[REG1:[0-9]+]].8h, v0.8b, #0
284
- ; CHECK-NEXT: sshll2 [[LO:v[0-9]+\.4s]], v[[REG1]].8h, #0
285
- ; CHECK-NEXT: sshll [[HI:v[0-9]+\.4s]], v[[REG1]].4h, #0
286
- ; CHECK-DAG: scvtf [[HIF:v[0-9]+\.4s]], [[HI]]
287
- ; CHECK-DAG: scvtf [[LOF:v[0-9]+\.4s]], [[LO]]
288
- ; CHECK-DAG: fcvtn v[[LOREG:[0-9]+]].4h, [[LOF]]
289
- ; CHECK-DAG: fcvtn v0.4h, [[HIF]]
290
- ; CHECK: mov v0.d[1], v[[LOREG]].d[0]
280
+ define <4 x half > @sitofp_v4i8 (<4 x i8 > %a ) #0 {
281
+ ; CHECK-CVT-LABEL: sitofp_v4i8:
282
+ ; CHECK-CVT-NEXT: shl v0.4h, v0.4h, #8
283
+ ; CHECK-CVT-NEXT: sshr v0.4h, v0.4h, #8
284
+ ; CHECK-CVT-NEXT: sshll v0.4s, v0.4h, #0
285
+ ; CHECK-CVT-NEXT: scvtf v0.4s, v0.4s
286
+ ; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s
287
+ ; CHECK-CVT-NEXT: ret
288
+ ;
289
+ ; CHECK-FP16-LABEL: sitofp_v4i8:
290
+ ; CHECK-FP16-NEXT: shl v0.4h, v0.4h, #8
291
+ ; CHECK-FP16-NEXT: sshr v0.4h, v0.4h, #8
292
+ ; CHECK-FP16-NEXT: scvtf v0.4h, v0.4h
293
+ ; CHECK-FP16-NEXT: ret
294
+ ;
295
+ %1 = sitofp <4 x i8 > %a to <4 x half >
296
+ ret <4 x half > %1
297
+ }
298
+
299
+ define <8 x half > @sitofp_v8i8 (<8 x i8 > %a ) #0 {
300
+ ; CHECK-LABEL: sitofp_v8i8:
301
+ ; CHECK-NEXT: sshll v0.8h, v0.8b, #0
302
+ ; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0
303
+ ; CHECK-NEXT: sshll v0.4s, v0.4h, #0
304
+ ; CHECK-NEXT: scvtf v1.4s, v1.4s
305
+ ; CHECK-NEXT: scvtf v0.4s, v0.4s
306
+ ; CHECK-NEXT: fcvtn v1.4h, v1.4s
307
+ ; CHECK-NEXT: fcvtn v0.4h, v0.4s
308
+ ; CHECK-NEXT: mov v0.d[1], v1.d[0]
309
+ ; CHECK-NEXT: ret
310
+ ;
291
311
%1 = sitofp <8 x i8 > %a to <8 x half >
292
312
ret <8 x half > %1
293
313
}
294
314
315
+ define <16 x half > @sitofp_v16i8 (<16 x i8 > %a ) #0 {
316
+ ; CHECK-LABEL: sitofp_v16i8:
317
+ ; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0
318
+ ; CHECK-NEXT: sshll2 v2.4s, v1.8h, #0
319
+ ; CHECK-NEXT: sshll v1.4s, v1.4h, #0
320
+ ; CHECK-NEXT: scvtf v2.4s, v2.4s
321
+ ; CHECK-NEXT: scvtf v1.4s, v1.4s
322
+ ; CHECK-NEXT: sshll v0.8h, v0.8b, #0
323
+ ; CHECK-NEXT: fcvtn v2.4h, v2.4s
324
+ ; CHECK-NEXT: fcvtn v1.4h, v1.4s
325
+ ; CHECK-NEXT: mov v1.d[1], v2.d[0]
326
+ ; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0
327
+ ; CHECK-NEXT: sshll v0.4s, v0.4h, #0
328
+ ; CHECK-NEXT: scvtf v2.4s, v2.4s
329
+ ; CHECK-NEXT: scvtf v0.4s, v0.4s
330
+ ; CHECK-NEXT: fcvtn v2.4h, v2.4s
331
+ ; CHECK-NEXT: fcvtn v0.4h, v0.4s
332
+ ; CHECK-NEXT: mov v0.d[1], v2.d[0]
333
+ ; CHECK-NEXT: ret
334
+ ;
335
+ %1 = sitofp <16 x i8 > %a to <16 x half >
336
+ ret <16 x half > %1
337
+ }
295
338
296
339
define <8 x half > @sitofp_i16 (<8 x i16 > %a ) #0 {
297
340
; CHECK-LABEL: sitofp_i16:
@@ -307,7 +350,6 @@ define <8 x half> @sitofp_i16(<8 x i16> %a) #0 {
307
350
ret <8 x half > %1
308
351
}
309
352
310
-
311
353
define <8 x half > @sitofp_i32 (<8 x i32 > %a ) #0 {
312
354
; CHECK-LABEL: sitofp_i32:
313
355
; CHECK-DAG: scvtf [[OP1:v[0-9]+\.4s]], v0.4s
@@ -331,20 +373,62 @@ define <8 x half> @sitofp_i64(<8 x i64> %a) #0 {
331
373
ret <8 x half > %1
332
374
}
333
375
334
- define <8 x half > @uitofp_i8 (<8 x i8 > %a ) #0 {
335
- ; CHECK-LABEL: uitofp_i8:
336
- ; CHECK-NEXT: ushll v[[REG1:[0-9]+]].8h, v0.8b, #0
337
- ; CHECK-NEXT: ushll2 [[LO:v[0-9]+\.4s]], v[[REG1]].8h, #0
338
- ; CHECK-NEXT: ushll [[HI:v[0-9]+\.4s]], v[[REG1]].4h, #0
339
- ; CHECK-DAG: ucvtf [[HIF:v[0-9]+\.4s]], [[HI]]
340
- ; CHECK-DAG: ucvtf [[LOF:v[0-9]+\.4s]], [[LO]]
341
- ; CHECK-DAG: fcvtn v[[LOREG:[0-9]+]].4h, [[LOF]]
342
- ; CHECK-DAG: fcvtn v0.4h, [[HIF]]
343
- ; CHECK: mov v0.d[1], v[[LOREG]].d[0]
376
+ define <4 x half > @uitofp_v4i8 (<4 x i8 > %a ) #0 {
377
+ ; CHECK-CVT-LABEL: uitofp_v4i8:
378
+ ; CHECK-CVT-NEXT: bic v0.4h, #255, lsl #8
379
+ ; CHECK-CVT-NEXT: ushll v0.4s, v0.4h, #0
380
+ ; CHECK-CVT-NEXT: ucvtf v0.4s, v0.4s
381
+ ; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s
382
+ ; CHECK-CVT-NEXT: ret
383
+ ;
384
+ ; CHECK-FP16-LABEL: uitofp_v4i8:
385
+ ; CHECK-FP16-NEXT: bic v0.4h, #255, lsl #8
386
+ ; CHECK-FP16-NEXT: ucvtf v0.4h, v0.4h
387
+ ; CHECK-FP16-NEXT: ret
388
+ ;
389
+ %1 = uitofp <4 x i8 > %a to <4 x half >
390
+ ret <4 x half > %1
391
+ }
392
+
393
+ define <8 x half > @uitofp_v8i8 (<8 x i8 > %a ) #0 {
394
+ ; CHECK-LABEL: uitofp_v8i8:
395
+ ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
396
+ ; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0
397
+ ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
398
+ ; CHECK-NEXT: ucvtf v1.4s, v1.4s
399
+ ; CHECK-NEXT: ucvtf v0.4s, v0.4s
400
+ ; CHECK-NEXT: fcvtn v1.4h, v1.4s
401
+ ; CHECK-NEXT: fcvtn v0.4h, v0.4s
402
+ ; CHECK-NEXT: mov v0.d[1], v1.d[0]
403
+ ; CHECK-NEXT: ret
404
+ ;
344
405
%1 = uitofp <8 x i8 > %a to <8 x half >
345
406
ret <8 x half > %1
346
407
}
347
408
409
+ define <16 x half > @uitofp_v16i8 (<16 x i8 > %a ) #0 {
410
+ ; CHECK-LABEL: uitofp_v16i8:
411
+ ; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0
412
+ ; CHECK-NEXT: ushll2 v2.4s, v1.8h, #0
413
+ ; CHECK-NEXT: ushll v1.4s, v1.4h, #0
414
+ ; CHECK-NEXT: ucvtf v2.4s, v2.4s
415
+ ; CHECK-NEXT: ucvtf v1.4s, v1.4s
416
+ ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
417
+ ; CHECK-NEXT: fcvtn v2.4h, v2.4s
418
+ ; CHECK-NEXT: fcvtn v1.4h, v1.4s
419
+ ; CHECK-NEXT: mov v1.d[1], v2.d[0]
420
+ ; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0
421
+ ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
422
+ ; CHECK-NEXT: ucvtf v2.4s, v2.4s
423
+ ; CHECK-NEXT: ucvtf v0.4s, v0.4s
424
+ ; CHECK-NEXT: fcvtn v2.4h, v2.4s
425
+ ; CHECK-NEXT: fcvtn v0.4h, v0.4s
426
+ ; CHECK-NEXT: mov v0.d[1], v2.d[0]
427
+ ; CHECK-NEXT: ret
428
+ ;
429
+ %1 = uitofp <16 x i8 > %a to <16 x half >
430
+ ret <16 x half > %1
431
+ }
348
432
349
433
define <8 x half > @uitofp_i16 (<8 x i16 > %a ) #0 {
350
434
; CHECK-LABEL: uitofp_i16:
0 commit comments