@@ -386,28 +386,69 @@ exit:
386
386
ret void
387
387
}
388
388
389
+ ; CHECK-LABEL: lCPI8_0:
390
+ ; CHECK-NEXT: .byte 4 ; 0x4
391
+ ; CHECK-NEXT: .byte 16 ; 0x10
392
+ ; CHECK-NEXT: .byte 16 ; 0x10
393
+ ; CHECK-NEXT: .byte 16 ; 0x10
394
+ ; CHECK-NEXT: .byte 5 ; 0x5
395
+ ; CHECK-NEXT: .byte 16 ; 0x10
396
+ ; CHECK-NEXT: .byte 16 ; 0x10
397
+ ; CHECK-NEXT: .byte 16 ; 0x10
398
+ ; CHECK-NEXT: .byte 6 ; 0x6
399
+ ; CHECK-NEXT: .byte 16 ; 0x10
400
+ ; CHECK-NEXT: .byte 16 ; 0x10
401
+ ; CHECK-NEXT: .byte 16 ; 0x10
402
+ ; CHECK-NEXT: .byte 7 ; 0x7
403
+ ; CHECK-NEXT: .byte 16 ; 0x10
404
+ ; CHECK-NEXT: .byte 16 ; 0x10
405
+ ; CHECK-NEXT: .byte 16 ; 0x10
406
+ ; CHECK-NEXT:lCPI8_1:
407
+ ; CHECK-NEXT: .byte 0 ; 0x0
408
+ ; CHECK-NEXT: .byte 16 ; 0x10
409
+ ; CHECK-NEXT: .byte 16 ; 0x10
410
+ ; CHECK-NEXT: .byte 16 ; 0x10
411
+ ; CHECK-NEXT: .byte 1 ; 0x1
412
+ ; CHECK-NEXT: .byte 16 ; 0x10
413
+ ; CHECK-NEXT: .byte 16 ; 0x10
414
+ ; CHECK-NEXT: .byte 16 ; 0x10
415
+ ; CHECK-NEXT: .byte 2 ; 0x2
416
+ ; CHECK-NEXT: .byte 16 ; 0x10
417
+ ; CHECK-NEXT: .byte 16 ; 0x10
418
+ ; CHECK-NEXT: .byte 16 ; 0x10
419
+ ; CHECK-NEXT: .byte 3 ; 0x3
420
+ ; CHECK-NEXT: .byte 16 ; 0x10
421
+ ; CHECK-NEXT: .byte 16 ; 0x10
422
+ ; CHECK-NEXT: .byte 16 ; 0x10
423
+
389
424
define void @uitofp_v8i8_to_v8f32 (ptr %src , ptr %dst ) {
390
425
; CHECK-LABEL: uitofp_v8i8_to_v8f32:
391
426
; CHECK: ; %bb.0: ; %entry
427
+ ; CHECK-NEXT: Lloh2:
428
+ ; CHECK-NEXT: adrp x9, lCPI8_0@PAGE
429
+ ; CHECK-NEXT: Lloh3:
430
+ ; CHECK-NEXT: adrp x10, lCPI8_1@PAGE
392
431
; CHECK-NEXT: mov x8, xzr
432
+ ; CHECK-NEXT: Lloh4:
433
+ ; CHECK-NEXT: ldr q0, [x9, lCPI8_0@PAGEOFF]
434
+ ; CHECK-NEXT: Lloh5:
435
+ ; CHECK-NEXT: ldr q1, [x10, lCPI8_1@PAGEOFF]
393
436
; CHECK-NEXT: LBB8_1: ; %loop
394
437
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
395
- ; CHECK-NEXT: ldr d0 , [x0, x8, lsl #3]
438
+ ; CHECK-NEXT: ldr d2 , [x0, x8, lsl #3]
396
439
; CHECK-NEXT: add x9, x1, x8, lsl #5
397
440
; CHECK-NEXT: add x8, x8, #1
398
441
; CHECK-NEXT: cmp x8, #1000
399
- ; CHECK-NEXT: zip1.8b v1, v0, v0
400
- ; CHECK-NEXT: zip2.8b v0, v0, v0
401
- ; CHECK-NEXT: bic.4h v1, #255, lsl #8
402
- ; CHECK-NEXT: bic.4h v0, #255, lsl #8
403
- ; CHECK-NEXT: ushll.4s v0, v0, #0
404
- ; CHECK-NEXT: ushll.4s v1, v1, #0
405
- ; CHECK-NEXT: ucvtf.4s v0, v0
406
- ; CHECK-NEXT: ucvtf.4s v1, v1
407
- ; CHECK-NEXT: stp q1, q0, [x9]
442
+ ; CHECK-NEXT: tbl.16b v3, { v2 }, v0
443
+ ; CHECK-NEXT: tbl.16b v2, { v2 }, v1
444
+ ; CHECK-NEXT: ucvtf.4s v3, v3
445
+ ; CHECK-NEXT: ucvtf.4s v2, v2
446
+ ; CHECK-NEXT: stp q2, q3, [x9]
408
447
; CHECK-NEXT: b.eq LBB8_1
409
448
; CHECK-NEXT: ; %bb.2: ; %exit
410
449
; CHECK-NEXT: ret
450
+ ; CHECK-NEXT: .loh AdrpLdr Lloh3, Lloh5
451
+ ; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh4
411
452
entry:
412
453
br label %loop
413
454
@@ -426,38 +467,118 @@ exit:
426
467
ret void
427
468
}
428
469
470
+ ; CHECK-LABEL: lCPI9_0:
471
+ ; CHECK-NEXT: .byte 12 ; 0xc
472
+ ; CHECK-NEXT: .byte 16 ; 0x10
473
+ ; CHECK-NEXT: .byte 16 ; 0x10
474
+ ; CHECK-NEXT: .byte 16 ; 0x10
475
+ ; CHECK-NEXT: .byte 13 ; 0xd
476
+ ; CHECK-NEXT: .byte 16 ; 0x10
477
+ ; CHECK-NEXT: .byte 16 ; 0x10
478
+ ; CHECK-NEXT: .byte 16 ; 0x10
479
+ ; CHECK-NEXT: .byte 14 ; 0xe
480
+ ; CHECK-NEXT: .byte 16 ; 0x10
481
+ ; CHECK-NEXT: .byte 16 ; 0x10
482
+ ; CHECK-NEXT: .byte 16 ; 0x10
483
+ ; CHECK-NEXT: .byte 15 ; 0xf
484
+ ; CHECK-NEXT: .byte 16 ; 0x10
485
+ ; CHECK-NEXT: .byte 16 ; 0x10
486
+ ; CHECK-NEXT: .byte 16 ; 0x10
487
+ ; CHECK-NEXT: lCPI9_1:
488
+ ; CHECK-NEXT: .byte 8 ; 0x8
489
+ ; CHECK-NEXT: .byte 16 ; 0x10
490
+ ; CHECK-NEXT: .byte 16 ; 0x10
491
+ ; CHECK-NEXT: .byte 16 ; 0x10
492
+ ; CHECK-NEXT: .byte 9 ; 0x9
493
+ ; CHECK-NEXT: .byte 16 ; 0x10
494
+ ; CHECK-NEXT: .byte 16 ; 0x10
495
+ ; CHECK-NEXT: .byte 16 ; 0x10
496
+ ; CHECK-NEXT: .byte 10 ; 0xa
497
+ ; CHECK-NEXT: .byte 16 ; 0x10
498
+ ; CHECK-NEXT: .byte 16 ; 0x10
499
+ ; CHECK-NEXT: .byte 16 ; 0x10
500
+ ; CHECK-NEXT: .byte 11 ; 0xb
501
+ ; CHECK-NEXT: .byte 16 ; 0x10
502
+ ; CHECK-NEXT: .byte 16 ; 0x10
503
+ ; CHECK-NEXT: .byte 16 ; 0x10
504
+ ; CHECK-NEXT: lCPI9_2:
505
+ ; CHECK-NEXT: .byte 4 ; 0x4
506
+ ; CHECK-NEXT: .byte 16 ; 0x10
507
+ ; CHECK-NEXT: .byte 16 ; 0x10
508
+ ; CHECK-NEXT: .byte 16 ; 0x10
509
+ ; CHECK-NEXT: .byte 5 ; 0x5
510
+ ; CHECK-NEXT: .byte 16 ; 0x10
511
+ ; CHECK-NEXT: .byte 16 ; 0x10
512
+ ; CHECK-NEXT: .byte 16 ; 0x10
513
+ ; CHECK-NEXT: .byte 6 ; 0x6
514
+ ; CHECK-NEXT: .byte 16 ; 0x10
515
+ ; CHECK-NEXT: .byte 16 ; 0x10
516
+ ; CHECK-NEXT: .byte 16 ; 0x10
517
+ ; CHECK-NEXT: .byte 7 ; 0x7
518
+ ; CHECK-NEXT: .byte 16 ; 0x10
519
+ ; CHECK-NEXT: .byte 16 ; 0x10
520
+ ; CHECK-NEXT: .byte 16 ; 0x10
521
+ ; CHECK-NEXT: lCPI9_3:
522
+ ; CHECK-NEXT: .byte 0 ; 0x0
523
+ ; CHECK-NEXT: .byte 16 ; 0x10
524
+ ; CHECK-NEXT: .byte 16 ; 0x10
525
+ ; CHECK-NEXT: .byte 16 ; 0x10
526
+ ; CHECK-NEXT: .byte 1 ; 0x1
527
+ ; CHECK-NEXT: .byte 16 ; 0x10
528
+ ; CHECK-NEXT: .byte 16 ; 0x10
529
+ ; CHECK-NEXT: .byte 16 ; 0x10
530
+ ; CHECK-NEXT: .byte 2 ; 0x2
531
+ ; CHECK-NEXT: .byte 16 ; 0x10
532
+ ; CHECK-NEXT: .byte 16 ; 0x10
533
+ ; CHECK-NEXT: .byte 16 ; 0x10
534
+ ; CHECK-NEXT: .byte 3 ; 0x3
535
+ ; CHECK-NEXT: .byte 16 ; 0x10
536
+ ; CHECK-NEXT: .byte 16 ; 0x10
537
+ ; CHECK-NEXT: .byte 16 ; 0x10
538
+
429
539
define void @uitofp_v16i8_to_v16f32 (ptr %src , ptr %dst ) {
430
540
; CHECK-LABEL: uitofp_v16i8_to_v16f32:
431
541
; CHECK: ; %bb.0: ; %entry
542
+ ; CHECK-NEXT: Lloh6:
543
+ ; CHECK-NEXT: adrp x9, lCPI9_0@PAGE
544
+ ; CHECK-NEXT: Lloh7:
545
+ ; CHECK-NEXT: adrp x10, lCPI9_1@PAGE
546
+ ; CHECK-NEXT: Lloh8:
547
+ ; CHECK-NEXT: adrp x11, lCPI9_2@PAGE
548
+ ; CHECK-NEXT: Lloh9:
549
+ ; CHECK-NEXT: adrp x12, lCPI9_3@PAGE
432
550
; CHECK-NEXT: mov x8, xzr
551
+ ; CHECK-NEXT: Lloh10:
552
+ ; CHECK-NEXT: ldr q0, [x9, lCPI9_0@PAGEOFF]
553
+ ; CHECK-NEXT: Lloh11:
554
+ ; CHECK-NEXT: ldr q1, [x10, lCPI9_1@PAGEOFF]
555
+ ; CHECK-NEXT: Lloh12:
556
+ ; CHECK-NEXT: ldr q2, [x11, lCPI9_2@PAGEOFF]
557
+ ; CHECK-NEXT: Lloh13:
558
+ ; CHECK-NEXT: ldr q3, [x12, lCPI9_3@PAGEOFF]
433
559
; CHECK-NEXT: LBB9_1: ; %loop
434
560
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
435
- ; CHECK-NEXT: ldr q0 , [x0, x8, lsl #4]
561
+ ; CHECK-NEXT: ldr q4 , [x0, x8, lsl #4]
436
562
; CHECK-NEXT: add x9, x1, x8, lsl #6
437
563
; CHECK-NEXT: add x8, x8, #1
438
564
; CHECK-NEXT: cmp x8, #1000
439
- ; CHECK-NEXT: ext.16b v1, v0, v0, #8
440
- ; CHECK-NEXT: zip1.8b v2, v0, v0
441
- ; CHECK-NEXT: zip2.8b v0, v0, v0
442
- ; CHECK-NEXT: bic.4h v2, #255, lsl #8
443
- ; CHECK-NEXT: zip1.8b v3, v1, v0
444
- ; CHECK-NEXT: zip2.8b v1, v1, v0
445
- ; CHECK-NEXT: bic.4h v0, #255, lsl #8
446
- ; CHECK-NEXT: ushll.4s v2, v2, #0
447
- ; CHECK-NEXT: ushll.4s v0, v0, #0
448
- ; CHECK-NEXT: bic.4h v3, #255, lsl #8
449
- ; CHECK-NEXT: bic.4h v1, #255, lsl #8
450
- ; CHECK-NEXT: ucvtf.4s v2, v2
451
- ; CHECK-NEXT: ushll.4s v1, v1, #0
452
- ; CHECK-NEXT: ucvtf.4s v0, v0
453
- ; CHECK-NEXT: ushll.4s v3, v3, #0
454
- ; CHECK-NEXT: ucvtf.4s v1, v1
455
- ; CHECK-NEXT: ucvtf.4s v3, v3
456
- ; CHECK-NEXT: stp q2, q0, [x9]
457
- ; CHECK-NEXT: stp q3, q1, [x9, #32]
565
+ ; CHECK-NEXT: tbl.16b v5, { v4 }, v0
566
+ ; CHECK-NEXT: tbl.16b v6, { v4 }, v1
567
+ ; CHECK-NEXT: tbl.16b v7, { v4 }, v2
568
+ ; CHECK-NEXT: tbl.16b v4, { v4 }, v3
569
+ ; CHECK-NEXT: ucvtf.4s v5, v5
570
+ ; CHECK-NEXT: ucvtf.4s v6, v6
571
+ ; CHECK-NEXT: ucvtf.4s v7, v7
572
+ ; CHECK-NEXT: ucvtf.4s v4, v4
573
+ ; CHECK-NEXT: stp q6, q5, [x9, #32]
574
+ ; CHECK-NEXT: stp q4, q7, [x9]
458
575
; CHECK-NEXT: b.eq LBB9_1
459
576
; CHECK-NEXT: ; %bb.2: ; %exit
460
577
; CHECK-NEXT: ret
578
+ ; CHECK-NEXT: .loh AdrpLdr Lloh9, Lloh13
579
+ ; CHECK-NEXT: .loh AdrpLdr Lloh8, Lloh12
580
+ ; CHECK-NEXT: .loh AdrpLdr Lloh7, Lloh11
581
+ ; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh10
461
582
entry:
462
583
br label %loop
463
584
0 commit comments