@@ -310,29 +310,43 @@ define <4 x bfloat> @sitofp_i32(<4 x i32> %a) #0 {
310
310
define <4 x bfloat> @sitofp_i64 (<4 x i64 > %a ) #0 {
311
311
; CHECK-CVT-LABEL: sitofp_i64:
312
312
; CHECK-CVT: // %bb.0:
313
- ; CHECK-CVT-NEXT: scvtf v0.2d, v0.2d
314
- ; CHECK-CVT-NEXT: scvtf v1.2d, v1.2d
315
- ; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8
316
- ; CHECK-CVT-NEXT: fcvtn v0.2s, v0.2d
317
- ; CHECK-CVT-NEXT: fcvtn2 v0.4s, v1.2d
318
- ; CHECK-CVT-NEXT: movi v1.4s, #1
319
- ; CHECK-CVT-NEXT: ushr v3.4s, v0.4s, #16
320
- ; CHECK-CVT-NEXT: add v2.4s, v0.4s, v2.4s
321
- ; CHECK-CVT-NEXT: and v1.16b, v3.16b, v1.16b
322
- ; CHECK-CVT-NEXT: fcmeq v3.4s, v0.4s, v0.4s
323
- ; CHECK-CVT-NEXT: orr v0.4s, #64, lsl #16
324
- ; CHECK-CVT-NEXT: add v1.4s, v1.4s, v2.4s
325
- ; CHECK-CVT-NEXT: bit v0.16b, v1.16b, v3.16b
313
+ ; CHECK-CVT-NEXT: mov x8, v0.d[1]
314
+ ; CHECK-CVT-NEXT: fmov x9, d0
315
+ ; CHECK-CVT-NEXT: scvtf s2, x9
316
+ ; CHECK-CVT-NEXT: mov x9, v1.d[1]
317
+ ; CHECK-CVT-NEXT: scvtf s0, x8
318
+ ; CHECK-CVT-NEXT: fmov x8, d1
319
+ ; CHECK-CVT-NEXT: scvtf s1, x8
320
+ ; CHECK-CVT-NEXT: mov v2.s[1], v0.s[0]
321
+ ; CHECK-CVT-NEXT: scvtf s0, x9
322
+ ; CHECK-CVT-NEXT: mov v2.s[2], v1.s[0]
323
+ ; CHECK-CVT-NEXT: movi v1.4s, #127, msl #8
324
+ ; CHECK-CVT-NEXT: mov v2.s[3], v0.s[0]
325
+ ; CHECK-CVT-NEXT: movi v0.4s, #1
326
+ ; CHECK-CVT-NEXT: ushr v3.4s, v2.4s, #16
327
+ ; CHECK-CVT-NEXT: add v1.4s, v2.4s, v1.4s
328
+ ; CHECK-CVT-NEXT: and v0.16b, v3.16b, v0.16b
329
+ ; CHECK-CVT-NEXT: fcmeq v3.4s, v2.4s, v2.4s
330
+ ; CHECK-CVT-NEXT: orr v2.4s, #64, lsl #16
331
+ ; CHECK-CVT-NEXT: add v0.4s, v0.4s, v1.4s
332
+ ; CHECK-CVT-NEXT: bif v0.16b, v2.16b, v3.16b
326
333
; CHECK-CVT-NEXT: shrn v0.4h, v0.4s, #16
327
334
; CHECK-CVT-NEXT: ret
328
335
;
329
336
; CHECK-BF16-LABEL: sitofp_i64:
330
337
; CHECK-BF16: // %bb.0:
331
- ; CHECK-BF16-NEXT: scvtf v0.2d, v0.2d
332
- ; CHECK-BF16-NEXT: scvtf v1.2d, v1.2d
333
- ; CHECK-BF16-NEXT: fcvtn v0.2s, v0.2d
334
- ; CHECK-BF16-NEXT: fcvtn2 v0.4s, v1.2d
335
- ; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s
338
+ ; CHECK-BF16-NEXT: mov x8, v0.d[1]
339
+ ; CHECK-BF16-NEXT: fmov x9, d0
340
+ ; CHECK-BF16-NEXT: scvtf s2, x9
341
+ ; CHECK-BF16-NEXT: mov x9, v1.d[1]
342
+ ; CHECK-BF16-NEXT: scvtf s0, x8
343
+ ; CHECK-BF16-NEXT: fmov x8, d1
344
+ ; CHECK-BF16-NEXT: mov v2.s[1], v0.s[0]
345
+ ; CHECK-BF16-NEXT: scvtf s0, x8
346
+ ; CHECK-BF16-NEXT: mov v2.s[2], v0.s[0]
347
+ ; CHECK-BF16-NEXT: scvtf s0, x9
348
+ ; CHECK-BF16-NEXT: mov v2.s[3], v0.s[0]
349
+ ; CHECK-BF16-NEXT: bfcvtn v0.4h, v2.4s
336
350
; CHECK-BF16-NEXT: ret
337
351
%1 = sitofp <4 x i64 > %a to <4 x bfloat>
338
352
ret <4 x bfloat> %1
@@ -413,12 +427,39 @@ define <4 x bfloat> @uitofp_i32(<4 x i32> %a) #0 {
413
427
define <4 x bfloat> @uitofp_i64 (<4 x i64 > %a ) #0 {
414
428
; CHECK-CVT-LABEL: uitofp_i64:
415
429
; CHECK-CVT: // %bb.0:
416
- ; CHECK-CVT-NEXT: ucvtf v0.2d, v0.2d
417
- ; CHECK-CVT-NEXT: ucvtf v1.2d, v1.2d
418
- ; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8
419
- ; CHECK-CVT-NEXT: fcvtn v0.2s, v0.2d
420
- ; CHECK-CVT-NEXT: fcvtn2 v0.4s, v1.2d
430
+ ; CHECK-CVT-NEXT: movi v2.2d, #0x000000ffffffff
431
+ ; CHECK-CVT-NEXT: ushr v3.2d, v0.2d, #32
432
+ ; CHECK-CVT-NEXT: ushr v4.2d, v1.2d, #32
433
+ ; CHECK-CVT-NEXT: mov x8, v3.d[1]
434
+ ; CHECK-CVT-NEXT: fmov x10, d3
435
+ ; CHECK-CVT-NEXT: and v0.16b, v0.16b, v2.16b
436
+ ; CHECK-CVT-NEXT: and v1.16b, v1.16b, v2.16b
437
+ ; CHECK-CVT-NEXT: scvtf s3, x10
438
+ ; CHECK-CVT-NEXT: scvtf s5, x8
439
+ ; CHECK-CVT-NEXT: fmov x8, d0
440
+ ; CHECK-CVT-NEXT: mov x9, v0.d[1]
441
+ ; CHECK-CVT-NEXT: scvtf s2, x8
442
+ ; CHECK-CVT-NEXT: fmov x8, d4
443
+ ; CHECK-CVT-NEXT: scvtf s0, x9
444
+ ; CHECK-CVT-NEXT: mov x9, v4.d[1]
445
+ ; CHECK-CVT-NEXT: mov v3.s[1], v5.s[0]
446
+ ; CHECK-CVT-NEXT: scvtf s4, x8
447
+ ; CHECK-CVT-NEXT: fmov x8, d1
448
+ ; CHECK-CVT-NEXT: mov v2.s[1], v0.s[0]
449
+ ; CHECK-CVT-NEXT: scvtf s0, x8
450
+ ; CHECK-CVT-NEXT: mov x8, v1.d[1]
451
+ ; CHECK-CVT-NEXT: scvtf s1, x9
452
+ ; CHECK-CVT-NEXT: mov v3.s[2], v4.s[0]
453
+ ; CHECK-CVT-NEXT: mov v2.s[2], v0.s[0]
454
+ ; CHECK-CVT-NEXT: scvtf s0, x8
455
+ ; CHECK-CVT-NEXT: mov w8, #1333788672 // =0x4f800000
456
+ ; CHECK-CVT-NEXT: mov v3.s[3], v1.s[0]
457
+ ; CHECK-CVT-NEXT: dup v1.4s, w8
458
+ ; CHECK-CVT-NEXT: mov v2.s[3], v0.s[0]
459
+ ; CHECK-CVT-NEXT: fmul v0.4s, v3.4s, v1.4s
421
460
; CHECK-CVT-NEXT: movi v1.4s, #1
461
+ ; CHECK-CVT-NEXT: fadd v0.4s, v0.4s, v2.4s
462
+ ; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8
422
463
; CHECK-CVT-NEXT: ushr v3.4s, v0.4s, #16
423
464
; CHECK-CVT-NEXT: add v2.4s, v0.4s, v2.4s
424
465
; CHECK-CVT-NEXT: and v1.16b, v3.16b, v1.16b
@@ -431,10 +472,37 @@ define <4 x bfloat> @uitofp_i64(<4 x i64> %a) #0 {
431
472
;
432
473
; CHECK-BF16-LABEL: uitofp_i64:
433
474
; CHECK-BF16: // %bb.0:
434
- ; CHECK-BF16-NEXT: ucvtf v0.2d, v0.2d
435
- ; CHECK-BF16-NEXT: ucvtf v1.2d, v1.2d
436
- ; CHECK-BF16-NEXT: fcvtn v0.2s, v0.2d
437
- ; CHECK-BF16-NEXT: fcvtn2 v0.4s, v1.2d
475
+ ; CHECK-BF16-NEXT: movi v2.2d, #0x000000ffffffff
476
+ ; CHECK-BF16-NEXT: ushr v3.2d, v0.2d, #32
477
+ ; CHECK-BF16-NEXT: ushr v4.2d, v1.2d, #32
478
+ ; CHECK-BF16-NEXT: mov x8, v3.d[1]
479
+ ; CHECK-BF16-NEXT: fmov x10, d3
480
+ ; CHECK-BF16-NEXT: and v0.16b, v0.16b, v2.16b
481
+ ; CHECK-BF16-NEXT: and v1.16b, v1.16b, v2.16b
482
+ ; CHECK-BF16-NEXT: scvtf s3, x10
483
+ ; CHECK-BF16-NEXT: scvtf s5, x8
484
+ ; CHECK-BF16-NEXT: fmov x8, d0
485
+ ; CHECK-BF16-NEXT: mov x9, v0.d[1]
486
+ ; CHECK-BF16-NEXT: scvtf s2, x8
487
+ ; CHECK-BF16-NEXT: fmov x8, d4
488
+ ; CHECK-BF16-NEXT: scvtf s0, x9
489
+ ; CHECK-BF16-NEXT: mov x9, v4.d[1]
490
+ ; CHECK-BF16-NEXT: mov v3.s[1], v5.s[0]
491
+ ; CHECK-BF16-NEXT: scvtf s4, x8
492
+ ; CHECK-BF16-NEXT: fmov x8, d1
493
+ ; CHECK-BF16-NEXT: mov v2.s[1], v0.s[0]
494
+ ; CHECK-BF16-NEXT: scvtf s0, x8
495
+ ; CHECK-BF16-NEXT: mov x8, v1.d[1]
496
+ ; CHECK-BF16-NEXT: scvtf s1, x9
497
+ ; CHECK-BF16-NEXT: mov v3.s[2], v4.s[0]
498
+ ; CHECK-BF16-NEXT: mov v2.s[2], v0.s[0]
499
+ ; CHECK-BF16-NEXT: scvtf s0, x8
500
+ ; CHECK-BF16-NEXT: mov w8, #1333788672 // =0x4f800000
501
+ ; CHECK-BF16-NEXT: mov v3.s[3], v1.s[0]
502
+ ; CHECK-BF16-NEXT: dup v1.4s, w8
503
+ ; CHECK-BF16-NEXT: mov v2.s[3], v0.s[0]
504
+ ; CHECK-BF16-NEXT: fmul v0.4s, v3.4s, v1.4s
505
+ ; CHECK-BF16-NEXT: fadd v0.4s, v0.4s, v2.4s
438
506
; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s
439
507
; CHECK-BF16-NEXT: ret
440
508
%1 = uitofp <4 x i64 > %a to <4 x bfloat>
0 commit comments