@@ -503,54 +503,38 @@ define half @fold_demote_h_s(half %a, float %b) nounwind {
503
503
;
504
504
; RV32IFZFHMIN-LABEL: fold_demote_h_s:
505
505
; RV32IFZFHMIN: # %bb.0:
506
- ; RV32IFZFHMIN-NEXT: fmv.x.h a0, fa0
507
- ; RV32IFZFHMIN-NEXT: slli a0, a0, 17
508
- ; RV32IFZFHMIN-NEXT: fmv.x.w a1, fa1
509
- ; RV32IFZFHMIN-NEXT: srli a0, a0, 17
510
- ; RV32IFZFHMIN-NEXT: bgez a1, .LBB4_2
511
- ; RV32IFZFHMIN-NEXT: # %bb.1:
512
- ; RV32IFZFHMIN-NEXT: lui a1, 1048568
513
- ; RV32IFZFHMIN-NEXT: or a0, a0, a1
514
- ; RV32IFZFHMIN-NEXT: .LBB4_2:
515
- ; RV32IFZFHMIN-NEXT: fmv.h.x fa5, a0
516
- ; RV32IFZFHMIN-NEXT: fcvt.s.h fa5, fa5
517
- ; RV32IFZFHMIN-NEXT: fcvt.h.s fa0, fa5
506
+ ; RV32IFZFHMIN-NEXT: fmv.x.w a0, fa1
507
+ ; RV32IFZFHMIN-NEXT: srli a0, a0, 31
508
+ ; RV32IFZFHMIN-NEXT: slli a0, a0, 15
509
+ ; RV32IFZFHMIN-NEXT: fmv.x.h a1, fa0
510
+ ; RV32IFZFHMIN-NEXT: slli a1, a1, 17
511
+ ; RV32IFZFHMIN-NEXT: srli a1, a1, 17
512
+ ; RV32IFZFHMIN-NEXT: or a0, a1, a0
513
+ ; RV32IFZFHMIN-NEXT: fmv.h.x fa0, a0
518
514
; RV32IFZFHMIN-NEXT: ret
519
515
;
520
516
; RV32IFDZFHMIN-LABEL: fold_demote_h_s:
521
517
; RV32IFDZFHMIN: # %bb.0:
522
- ; RV32IFDZFHMIN-NEXT: fmv.x.h a0, fa0
523
- ; RV32IFDZFHMIN-NEXT: slli a0, a0, 17
524
- ; RV32IFDZFHMIN-NEXT: fmv.x.w a1, fa1
525
- ; RV32IFDZFHMIN-NEXT: srli a0, a0, 17
526
- ; RV32IFDZFHMIN-NEXT: bgez a1, .LBB4_2
527
- ; RV32IFDZFHMIN-NEXT: # %bb.1:
528
- ; RV32IFDZFHMIN-NEXT: lui a1, 1048568
529
- ; RV32IFDZFHMIN-NEXT: or a0, a0, a1
530
- ; RV32IFDZFHMIN-NEXT: .LBB4_2:
531
- ; RV32IFDZFHMIN-NEXT: fmv.h.x fa5, a0
532
- ; RV32IFDZFHMIN-NEXT: fcvt.s.h fa5, fa5
533
- ; RV32IFDZFHMIN-NEXT: fcvt.h.s fa0, fa5
518
+ ; RV32IFDZFHMIN-NEXT: fmv.x.w a0, fa1
519
+ ; RV32IFDZFHMIN-NEXT: srli a0, a0, 31
520
+ ; RV32IFDZFHMIN-NEXT: slli a0, a0, 15
521
+ ; RV32IFDZFHMIN-NEXT: fmv.x.h a1, fa0
522
+ ; RV32IFDZFHMIN-NEXT: slli a1, a1, 17
523
+ ; RV32IFDZFHMIN-NEXT: srli a1, a1, 17
524
+ ; RV32IFDZFHMIN-NEXT: or a0, a1, a0
525
+ ; RV32IFDZFHMIN-NEXT: fmv.h.x fa0, a0
534
526
; RV32IFDZFHMIN-NEXT: ret
535
527
;
536
528
; RV64IFDZFHMIN-LABEL: fold_demote_h_s:
537
529
; RV64IFDZFHMIN: # %bb.0:
538
- ; RV64IFDZFHMIN-NEXT: addi sp, sp, -16
539
- ; RV64IFDZFHMIN-NEXT: fsw fa1, 8(sp)
540
- ; RV64IFDZFHMIN-NEXT: lbu a0, 11(sp)
530
+ ; RV64IFDZFHMIN-NEXT: fmv.x.w a0, fa1
531
+ ; RV64IFDZFHMIN-NEXT: srli a0, a0, 31
532
+ ; RV64IFDZFHMIN-NEXT: slli a0, a0, 15
541
533
; RV64IFDZFHMIN-NEXT: fmv.x.h a1, fa0
542
534
; RV64IFDZFHMIN-NEXT: slli a1, a1, 49
543
- ; RV64IFDZFHMIN-NEXT: andi a2, a0, 128
544
- ; RV64IFDZFHMIN-NEXT: srli a0, a1, 49
545
- ; RV64IFDZFHMIN-NEXT: beqz a2, .LBB4_2
546
- ; RV64IFDZFHMIN-NEXT: # %bb.1:
547
- ; RV64IFDZFHMIN-NEXT: lui a1, 1048568
548
- ; RV64IFDZFHMIN-NEXT: or a0, a0, a1
549
- ; RV64IFDZFHMIN-NEXT: .LBB4_2:
550
- ; RV64IFDZFHMIN-NEXT: fmv.h.x fa5, a0
551
- ; RV64IFDZFHMIN-NEXT: fcvt.s.h fa5, fa5
552
- ; RV64IFDZFHMIN-NEXT: fcvt.h.s fa0, fa5
553
- ; RV64IFDZFHMIN-NEXT: addi sp, sp, 16
535
+ ; RV64IFDZFHMIN-NEXT: srli a1, a1, 49
536
+ ; RV64IFDZFHMIN-NEXT: or a0, a1, a0
537
+ ; RV64IFDZFHMIN-NEXT: fmv.h.x fa0, a0
554
538
; RV64IFDZFHMIN-NEXT: ret
555
539
%c = fptrunc float %b to half
556
540
%t = call half @llvm.copysign.f16 (half %a , half %c )
@@ -646,60 +630,40 @@ define half @fold_demote_h_d(half %a, double %b) nounwind {
646
630
;
647
631
; RV32IFZFHMIN-LABEL: fold_demote_h_d:
648
632
; RV32IFZFHMIN: # %bb.0:
649
- ; RV32IFZFHMIN-NEXT: addi sp, sp, -16
650
- ; RV32IFZFHMIN-NEXT: srli a1, a1, 16
651
- ; RV32IFZFHMIN-NEXT: fmv.h.x fa5, a1
652
- ; RV32IFZFHMIN-NEXT: fsh fa5, 12(sp)
653
- ; RV32IFZFHMIN-NEXT: lbu a0, 13(sp)
654
- ; RV32IFZFHMIN-NEXT: fmv.x.h a1, fa0
655
- ; RV32IFZFHMIN-NEXT: slli a1, a1, 17
656
- ; RV32IFZFHMIN-NEXT: andi a2, a0, 128
657
- ; RV32IFZFHMIN-NEXT: srli a0, a1, 17
658
- ; RV32IFZFHMIN-NEXT: beqz a2, .LBB5_2
659
- ; RV32IFZFHMIN-NEXT: # %bb.1:
660
- ; RV32IFZFHMIN-NEXT: lui a1, 1048568
633
+ ; RV32IFZFHMIN-NEXT: srli a1, a1, 31
634
+ ; RV32IFZFHMIN-NEXT: slli a1, a1, 15
635
+ ; RV32IFZFHMIN-NEXT: fmv.x.h a0, fa0
636
+ ; RV32IFZFHMIN-NEXT: slli a0, a0, 17
637
+ ; RV32IFZFHMIN-NEXT: srli a0, a0, 17
661
638
; RV32IFZFHMIN-NEXT: or a0, a0, a1
662
- ; RV32IFZFHMIN-NEXT: .LBB5_2:
663
- ; RV32IFZFHMIN-NEXT: fmv.h.x fa5, a0
664
- ; RV32IFZFHMIN-NEXT: fcvt.s.h fa5, fa5
665
- ; RV32IFZFHMIN-NEXT: fcvt.h.s fa0, fa5
666
- ; RV32IFZFHMIN-NEXT: addi sp, sp, 16
639
+ ; RV32IFZFHMIN-NEXT: fmv.h.x fa0, a0
667
640
; RV32IFZFHMIN-NEXT: ret
668
641
;
669
642
; RV32IFDZFHMIN-LABEL: fold_demote_h_d:
670
643
; RV32IFDZFHMIN: # %bb.0:
671
644
; RV32IFDZFHMIN-NEXT: addi sp, sp, -16
672
645
; RV32IFDZFHMIN-NEXT: fsd fa1, 8(sp)
673
- ; RV32IFDZFHMIN-NEXT: lbu a0, 15(sp)
646
+ ; RV32IFDZFHMIN-NEXT: lw a0, 12(sp)
647
+ ; RV32IFDZFHMIN-NEXT: srli a0, a0, 31
648
+ ; RV32IFDZFHMIN-NEXT: slli a0, a0, 15
674
649
; RV32IFDZFHMIN-NEXT: fmv.x.h a1, fa0
675
650
; RV32IFDZFHMIN-NEXT: slli a1, a1, 17
676
- ; RV32IFDZFHMIN-NEXT: andi a2, a0, 128
677
- ; RV32IFDZFHMIN-NEXT: srli a0, a1, 17
678
- ; RV32IFDZFHMIN-NEXT: beqz a2, .LBB5_2
679
- ; RV32IFDZFHMIN-NEXT: # %bb.1:
680
- ; RV32IFDZFHMIN-NEXT: lui a1, 1048568
681
- ; RV32IFDZFHMIN-NEXT: or a0, a0, a1
682
- ; RV32IFDZFHMIN-NEXT: .LBB5_2:
683
- ; RV32IFDZFHMIN-NEXT: fmv.h.x fa5, a0
684
- ; RV32IFDZFHMIN-NEXT: fcvt.s.h fa5, fa5
685
- ; RV32IFDZFHMIN-NEXT: fcvt.h.s fa0, fa5
651
+ ; RV32IFDZFHMIN-NEXT: srli a1, a1, 17
652
+ ; RV32IFDZFHMIN-NEXT: or a0, a1, a0
653
+ ; RV32IFDZFHMIN-NEXT: fmv.h.x fa0, a0
686
654
; RV32IFDZFHMIN-NEXT: addi sp, sp, 16
687
655
; RV32IFDZFHMIN-NEXT: ret
688
656
;
689
657
; RV64IFDZFHMIN-LABEL: fold_demote_h_d:
690
658
; RV64IFDZFHMIN: # %bb.0:
691
- ; RV64IFDZFHMIN-NEXT: fmv.x.h a0, fa0
692
- ; RV64IFDZFHMIN-NEXT: slli a0, a0, 49
693
- ; RV64IFDZFHMIN-NEXT: fmv.x.d a1, fa1
694
- ; RV64IFDZFHMIN-NEXT: srli a0, a0, 49
695
- ; RV64IFDZFHMIN-NEXT: bgez a1, .LBB5_2
696
- ; RV64IFDZFHMIN-NEXT: # %bb.1:
697
- ; RV64IFDZFHMIN-NEXT: lui a1, 1048568
698
- ; RV64IFDZFHMIN-NEXT: or a0, a0, a1
699
- ; RV64IFDZFHMIN-NEXT: .LBB5_2:
700
- ; RV64IFDZFHMIN-NEXT: fmv.h.x fa5, a0
701
- ; RV64IFDZFHMIN-NEXT: fcvt.s.h fa5, fa5
702
- ; RV64IFDZFHMIN-NEXT: fcvt.h.s fa0, fa5
659
+ ; RV64IFDZFHMIN-NEXT: fmv.x.d a0, fa1
660
+ ; RV64IFDZFHMIN-NEXT: srli a0, a0, 63
661
+ ; RV64IFDZFHMIN-NEXT: slli a0, a0, 15
662
+ ; RV64IFDZFHMIN-NEXT: fmv.x.h a1, fa0
663
+ ; RV64IFDZFHMIN-NEXT: slli a1, a1, 49
664
+ ; RV64IFDZFHMIN-NEXT: srli a1, a1, 49
665
+ ; RV64IFDZFHMIN-NEXT: or a0, a1, a0
666
+ ; RV64IFDZFHMIN-NEXT: fmv.h.x fa0, a0
703
667
; RV64IFDZFHMIN-NEXT: ret
704
668
%c = fptrunc double %b to half
705
669
%t = call half @llvm.copysign.f16 (half %a , half %c )
0 commit comments