@@ -560,6 +560,180 @@ Instruction *InstCombinerImpl::foldFPSignBitOps(BinaryOperator &I) {
560
560
return nullptr ;
561
561
}
562
562
563
+ Instruction *InstCombinerImpl::foldFMulReassoc (BinaryOperator &I) {
564
+ Value *Op0 = I.getOperand (0 );
565
+ Value *Op1 = I.getOperand (1 );
566
+ Value *X, *Y;
567
+ Constant *C;
568
+
569
+ // Reassociate constant RHS with another constant to form constant
570
+ // expression.
571
+ if (match (Op1, m_Constant (C)) && C->isFiniteNonZeroFP ()) {
572
+ Constant *C1;
573
+ if (match (Op0, m_OneUse (m_FDiv (m_Constant (C1), m_Value (X))))) {
574
+ // (C1 / X) * C --> (C * C1) / X
575
+ Constant *CC1 =
576
+ ConstantFoldBinaryOpOperands (Instruction::FMul, C, C1, DL);
577
+ if (CC1 && CC1->isNormalFP ())
578
+ return BinaryOperator::CreateFDivFMF (CC1, X, &I);
579
+ }
580
+ if (match (Op0, m_FDiv (m_Value (X), m_Constant (C1)))) {
581
+ // (X / C1) * C --> X * (C / C1)
582
+ Constant *CDivC1 =
583
+ ConstantFoldBinaryOpOperands (Instruction::FDiv, C, C1, DL);
584
+ if (CDivC1 && CDivC1->isNormalFP ())
585
+ return BinaryOperator::CreateFMulFMF (X, CDivC1, &I);
586
+
587
+ // If the constant was a denormal, try reassociating differently.
588
+ // (X / C1) * C --> X / (C1 / C)
589
+ Constant *C1DivC =
590
+ ConstantFoldBinaryOpOperands (Instruction::FDiv, C1, C, DL);
591
+ if (C1DivC && Op0->hasOneUse () && C1DivC->isNormalFP ())
592
+ return BinaryOperator::CreateFDivFMF (X, C1DivC, &I);
593
+ }
594
+
595
+ // We do not need to match 'fadd C, X' and 'fsub X, C' because they are
596
+ // canonicalized to 'fadd X, C'. Distributing the multiply may allow
597
+ // further folds and (X * C) + C2 is 'fma'.
598
+ if (match (Op0, m_OneUse (m_FAdd (m_Value (X), m_Constant (C1))))) {
599
+ // (X + C1) * C --> (X * C) + (C * C1)
600
+ if (Constant *CC1 =
601
+ ConstantFoldBinaryOpOperands (Instruction::FMul, C, C1, DL)) {
602
+ Value *XC = Builder.CreateFMulFMF (X, C, &I);
603
+ return BinaryOperator::CreateFAddFMF (XC, CC1, &I);
604
+ }
605
+ }
606
+ if (match (Op0, m_OneUse (m_FSub (m_Constant (C1), m_Value (X))))) {
607
+ // (C1 - X) * C --> (C * C1) - (X * C)
608
+ if (Constant *CC1 =
609
+ ConstantFoldBinaryOpOperands (Instruction::FMul, C, C1, DL)) {
610
+ Value *XC = Builder.CreateFMulFMF (X, C, &I);
611
+ return BinaryOperator::CreateFSubFMF (CC1, XC, &I);
612
+ }
613
+ }
614
+ }
615
+
616
+ Value *Z;
617
+ if (match (&I,
618
+ m_c_FMul (m_OneUse (m_FDiv (m_Value (X), m_Value (Y))), m_Value (Z)))) {
619
+ // Sink division: (X / Y) * Z --> (X * Z) / Y
620
+ Value *NewFMul = Builder.CreateFMulFMF (X, Z, &I);
621
+ return BinaryOperator::CreateFDivFMF (NewFMul, Y, &I);
622
+ }
623
+
624
+ // sqrt(X) * sqrt(Y) -> sqrt(X * Y)
625
+ // nnan disallows the possibility of returning a number if both operands are
626
+ // negative (in that case, we should return NaN).
627
+ if (I.hasNoNaNs () && match (Op0, m_OneUse (m_Sqrt (m_Value (X)))) &&
628
+ match (Op1, m_OneUse (m_Sqrt (m_Value (Y))))) {
629
+ Value *XY = Builder.CreateFMulFMF (X, Y, &I);
630
+ Value *Sqrt = Builder.CreateUnaryIntrinsic (Intrinsic::sqrt, XY, &I);
631
+ return replaceInstUsesWith (I, Sqrt);
632
+ }
633
+
634
+ // The following transforms are done irrespective of the number of uses
635
+ // for the expression "1.0/sqrt(X)".
636
+ // 1) 1.0/sqrt(X) * X -> X/sqrt(X)
637
+ // 2) X * 1.0/sqrt(X) -> X/sqrt(X)
638
+ // We always expect the backend to reduce X/sqrt(X) to sqrt(X), if it
639
+ // has the necessary (reassoc) fast-math-flags.
640
+ if (I.hasNoSignedZeros () &&
641
+ match (Op0, (m_FDiv (m_SpecificFP (1.0 ), m_Value (Y)))) &&
642
+ match (Y, m_Sqrt (m_Value (X))) && Op1 == X)
643
+ return BinaryOperator::CreateFDivFMF (X, Y, &I);
644
+ if (I.hasNoSignedZeros () &&
645
+ match (Op1, (m_FDiv (m_SpecificFP (1.0 ), m_Value (Y)))) &&
646
+ match (Y, m_Sqrt (m_Value (X))) && Op0 == X)
647
+ return BinaryOperator::CreateFDivFMF (X, Y, &I);
648
+
649
+ // Like the similar transform in instsimplify, this requires 'nsz' because
650
+ // sqrt(-0.0) = -0.0, and -0.0 * -0.0 does not simplify to -0.0.
651
+ if (I.hasNoNaNs () && I.hasNoSignedZeros () && Op0 == Op1 && Op0->hasNUses (2 )) {
652
+ // Peek through fdiv to find squaring of square root:
653
+ // (X / sqrt(Y)) * (X / sqrt(Y)) --> (X * X) / Y
654
+ if (match (Op0, m_FDiv (m_Value (X), m_Sqrt (m_Value (Y))))) {
655
+ Value *XX = Builder.CreateFMulFMF (X, X, &I);
656
+ return BinaryOperator::CreateFDivFMF (XX, Y, &I);
657
+ }
658
+ // (sqrt(Y) / X) * (sqrt(Y) / X) --> Y / (X * X)
659
+ if (match (Op0, m_FDiv (m_Sqrt (m_Value (Y)), m_Value (X)))) {
660
+ Value *XX = Builder.CreateFMulFMF (X, X, &I);
661
+ return BinaryOperator::CreateFDivFMF (Y, XX, &I);
662
+ }
663
+ }
664
+
665
+ // pow(X, Y) * X --> pow(X, Y+1)
666
+ // X * pow(X, Y) --> pow(X, Y+1)
667
+ if (match (&I, m_c_FMul (m_OneUse (m_Intrinsic<Intrinsic::pow>(m_Value (X),
668
+ m_Value (Y))),
669
+ m_Deferred (X)))) {
670
+ Value *Y1 = Builder.CreateFAddFMF (Y, ConstantFP::get (I.getType (), 1.0 ), &I);
671
+ Value *Pow = Builder.CreateBinaryIntrinsic (Intrinsic::pow, X, Y1, &I);
672
+ return replaceInstUsesWith (I, Pow);
673
+ }
674
+
675
+ if (I.isOnlyUserOfAnyOperand ()) {
676
+ // pow(X, Y) * pow(X, Z) -> pow(X, Y + Z)
677
+ if (match (Op0, m_Intrinsic<Intrinsic::pow>(m_Value (X), m_Value (Y))) &&
678
+ match (Op1, m_Intrinsic<Intrinsic::pow>(m_Specific (X), m_Value (Z)))) {
679
+ auto *YZ = Builder.CreateFAddFMF (Y, Z, &I);
680
+ auto *NewPow = Builder.CreateBinaryIntrinsic (Intrinsic::pow, X, YZ, &I);
681
+ return replaceInstUsesWith (I, NewPow);
682
+ }
683
+ // pow(X, Y) * pow(Z, Y) -> pow(X * Z, Y)
684
+ if (match (Op0, m_Intrinsic<Intrinsic::pow>(m_Value (X), m_Value (Y))) &&
685
+ match (Op1, m_Intrinsic<Intrinsic::pow>(m_Value (Z), m_Specific (Y)))) {
686
+ auto *XZ = Builder.CreateFMulFMF (X, Z, &I);
687
+ auto *NewPow = Builder.CreateBinaryIntrinsic (Intrinsic::pow, XZ, Y, &I);
688
+ return replaceInstUsesWith (I, NewPow);
689
+ }
690
+
691
+ // powi(x, y) * powi(x, z) -> powi(x, y + z)
692
+ if (match (Op0, m_Intrinsic<Intrinsic::powi>(m_Value (X), m_Value (Y))) &&
693
+ match (Op1, m_Intrinsic<Intrinsic::powi>(m_Specific (X), m_Value (Z))) &&
694
+ Y->getType () == Z->getType ()) {
695
+ auto *YZ = Builder.CreateAdd (Y, Z);
696
+ auto *NewPow = Builder.CreateIntrinsic (
697
+ Intrinsic::powi, {X->getType (), YZ->getType ()}, {X, YZ}, &I);
698
+ return replaceInstUsesWith (I, NewPow);
699
+ }
700
+
701
+ // exp(X) * exp(Y) -> exp(X + Y)
702
+ if (match (Op0, m_Intrinsic<Intrinsic::exp>(m_Value (X))) &&
703
+ match (Op1, m_Intrinsic<Intrinsic::exp>(m_Value (Y)))) {
704
+ Value *XY = Builder.CreateFAddFMF (X, Y, &I);
705
+ Value *Exp = Builder.CreateUnaryIntrinsic (Intrinsic::exp, XY, &I);
706
+ return replaceInstUsesWith (I, Exp);
707
+ }
708
+
709
+ // exp2(X) * exp2(Y) -> exp2(X + Y)
710
+ if (match (Op0, m_Intrinsic<Intrinsic::exp2>(m_Value (X))) &&
711
+ match (Op1, m_Intrinsic<Intrinsic::exp2>(m_Value (Y)))) {
712
+ Value *XY = Builder.CreateFAddFMF (X, Y, &I);
713
+ Value *Exp2 = Builder.CreateUnaryIntrinsic (Intrinsic::exp2, XY, &I);
714
+ return replaceInstUsesWith (I, Exp2);
715
+ }
716
+ }
717
+
718
+ // (X*Y) * X => (X*X) * Y where Y != X
719
+ // The purpose is two-fold:
720
+ // 1) to form a power expression (of X).
721
+ // 2) potentially shorten the critical path: After transformation, the
722
+ // latency of the instruction Y is amortized by the expression of X*X,
723
+ // and therefore Y is in a "less critical" position compared to what it
724
+ // was before the transformation.
725
+ if (match (Op0, m_OneUse (m_c_FMul (m_Specific (Op1), m_Value (Y)))) && Op1 != Y) {
726
+ Value *XX = Builder.CreateFMulFMF (Op1, Op1, &I);
727
+ return BinaryOperator::CreateFMulFMF (XX, Y, &I);
728
+ }
729
+ if (match (Op1, m_OneUse (m_c_FMul (m_Specific (Op0), m_Value (Y)))) && Op0 != Y) {
730
+ Value *XX = Builder.CreateFMulFMF (Op0, Op0, &I);
731
+ return BinaryOperator::CreateFMulFMF (XX, Y, &I);
732
+ }
733
+
734
+ return nullptr ;
735
+ }
736
+
563
737
Instruction *InstCombinerImpl::visitFMul (BinaryOperator &I) {
564
738
if (Value *V = simplifyFMulInst (I.getOperand (0 ), I.getOperand (1 ),
565
739
I.getFastMathFlags (),
@@ -607,176 +781,9 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {
607
781
if (Value *V = SimplifySelectsFeedingBinaryOp (I, Op0, Op1))
608
782
return replaceInstUsesWith (I, V);
609
783
610
- if (I.hasAllowReassoc ()) {
611
- // Reassociate constant RHS with another constant to form constant
612
- // expression.
613
- if (match (Op1, m_Constant (C)) && C->isFiniteNonZeroFP ()) {
614
- Constant *C1;
615
- if (match (Op0, m_OneUse (m_FDiv (m_Constant (C1), m_Value (X))))) {
616
- // (C1 / X) * C --> (C * C1) / X
617
- Constant *CC1 =
618
- ConstantFoldBinaryOpOperands (Instruction::FMul, C, C1, DL);
619
- if (CC1 && CC1->isNormalFP ())
620
- return BinaryOperator::CreateFDivFMF (CC1, X, &I);
621
- }
622
- if (match (Op0, m_FDiv (m_Value (X), m_Constant (C1)))) {
623
- // (X / C1) * C --> X * (C / C1)
624
- Constant *CDivC1 =
625
- ConstantFoldBinaryOpOperands (Instruction::FDiv, C, C1, DL);
626
- if (CDivC1 && CDivC1->isNormalFP ())
627
- return BinaryOperator::CreateFMulFMF (X, CDivC1, &I);
628
-
629
- // If the constant was a denormal, try reassociating differently.
630
- // (X / C1) * C --> X / (C1 / C)
631
- Constant *C1DivC =
632
- ConstantFoldBinaryOpOperands (Instruction::FDiv, C1, C, DL);
633
- if (C1DivC && Op0->hasOneUse () && C1DivC->isNormalFP ())
634
- return BinaryOperator::CreateFDivFMF (X, C1DivC, &I);
635
- }
636
-
637
- // We do not need to match 'fadd C, X' and 'fsub X, C' because they are
638
- // canonicalized to 'fadd X, C'. Distributing the multiply may allow
639
- // further folds and (X * C) + C2 is 'fma'.
640
- if (match (Op0, m_OneUse (m_FAdd (m_Value (X), m_Constant (C1))))) {
641
- // (X + C1) * C --> (X * C) + (C * C1)
642
- if (Constant *CC1 = ConstantFoldBinaryOpOperands (
643
- Instruction::FMul, C, C1, DL)) {
644
- Value *XC = Builder.CreateFMulFMF (X, C, &I);
645
- return BinaryOperator::CreateFAddFMF (XC, CC1, &I);
646
- }
647
- }
648
- if (match (Op0, m_OneUse (m_FSub (m_Constant (C1), m_Value (X))))) {
649
- // (C1 - X) * C --> (C * C1) - (X * C)
650
- if (Constant *CC1 = ConstantFoldBinaryOpOperands (
651
- Instruction::FMul, C, C1, DL)) {
652
- Value *XC = Builder.CreateFMulFMF (X, C, &I);
653
- return BinaryOperator::CreateFSubFMF (CC1, XC, &I);
654
- }
655
- }
656
- }
657
-
658
- Value *Z;
659
- if (match (&I, m_c_FMul (m_OneUse (m_FDiv (m_Value (X), m_Value (Y))),
660
- m_Value (Z)))) {
661
- // Sink division: (X / Y) * Z --> (X * Z) / Y
662
- Value *NewFMul = Builder.CreateFMulFMF (X, Z, &I);
663
- return BinaryOperator::CreateFDivFMF (NewFMul, Y, &I);
664
- }
665
-
666
- // sqrt(X) * sqrt(Y) -> sqrt(X * Y)
667
- // nnan disallows the possibility of returning a number if both operands are
668
- // negative (in that case, we should return NaN).
669
- if (I.hasNoNaNs () && match (Op0, m_OneUse (m_Sqrt (m_Value (X)))) &&
670
- match (Op1, m_OneUse (m_Sqrt (m_Value (Y))))) {
671
- Value *XY = Builder.CreateFMulFMF (X, Y, &I);
672
- Value *Sqrt = Builder.CreateUnaryIntrinsic (Intrinsic::sqrt, XY, &I);
673
- return replaceInstUsesWith (I, Sqrt);
674
- }
675
-
676
- // The following transforms are done irrespective of the number of uses
677
- // for the expression "1.0/sqrt(X)".
678
- // 1) 1.0/sqrt(X) * X -> X/sqrt(X)
679
- // 2) X * 1.0/sqrt(X) -> X/sqrt(X)
680
- // We always expect the backend to reduce X/sqrt(X) to sqrt(X), if it
681
- // has the necessary (reassoc) fast-math-flags.
682
- if (I.hasNoSignedZeros () &&
683
- match (Op0, (m_FDiv (m_SpecificFP (1.0 ), m_Value (Y)))) &&
684
- match (Y, m_Sqrt (m_Value (X))) && Op1 == X)
685
- return BinaryOperator::CreateFDivFMF (X, Y, &I);
686
- if (I.hasNoSignedZeros () &&
687
- match (Op1, (m_FDiv (m_SpecificFP (1.0 ), m_Value (Y)))) &&
688
- match (Y, m_Sqrt (m_Value (X))) && Op0 == X)
689
- return BinaryOperator::CreateFDivFMF (X, Y, &I);
690
-
691
- // Like the similar transform in instsimplify, this requires 'nsz' because
692
- // sqrt(-0.0) = -0.0, and -0.0 * -0.0 does not simplify to -0.0.
693
- if (I.hasNoNaNs () && I.hasNoSignedZeros () && Op0 == Op1 &&
694
- Op0->hasNUses (2 )) {
695
- // Peek through fdiv to find squaring of square root:
696
- // (X / sqrt(Y)) * (X / sqrt(Y)) --> (X * X) / Y
697
- if (match (Op0, m_FDiv (m_Value (X), m_Sqrt (m_Value (Y))))) {
698
- Value *XX = Builder.CreateFMulFMF (X, X, &I);
699
- return BinaryOperator::CreateFDivFMF (XX, Y, &I);
700
- }
701
- // (sqrt(Y) / X) * (sqrt(Y) / X) --> Y / (X * X)
702
- if (match (Op0, m_FDiv (m_Sqrt (m_Value (Y)), m_Value (X)))) {
703
- Value *XX = Builder.CreateFMulFMF (X, X, &I);
704
- return BinaryOperator::CreateFDivFMF (Y, XX, &I);
705
- }
706
- }
707
-
708
- // pow(X, Y) * X --> pow(X, Y+1)
709
- // X * pow(X, Y) --> pow(X, Y+1)
710
- if (match (&I, m_c_FMul (m_OneUse (m_Intrinsic<Intrinsic::pow>(m_Value (X),
711
- m_Value (Y))),
712
- m_Deferred (X)))) {
713
- Value *Y1 =
714
- Builder.CreateFAddFMF (Y, ConstantFP::get (I.getType (), 1.0 ), &I);
715
- Value *Pow = Builder.CreateBinaryIntrinsic (Intrinsic::pow, X, Y1, &I);
716
- return replaceInstUsesWith (I, Pow);
717
- }
718
-
719
- if (I.isOnlyUserOfAnyOperand ()) {
720
- // pow(X, Y) * pow(X, Z) -> pow(X, Y + Z)
721
- if (match (Op0, m_Intrinsic<Intrinsic::pow>(m_Value (X), m_Value (Y))) &&
722
- match (Op1, m_Intrinsic<Intrinsic::pow>(m_Specific (X), m_Value (Z)))) {
723
- auto *YZ = Builder.CreateFAddFMF (Y, Z, &I);
724
- auto *NewPow = Builder.CreateBinaryIntrinsic (Intrinsic::pow, X, YZ, &I);
725
- return replaceInstUsesWith (I, NewPow);
726
- }
727
- // pow(X, Y) * pow(Z, Y) -> pow(X * Z, Y)
728
- if (match (Op0, m_Intrinsic<Intrinsic::pow>(m_Value (X), m_Value (Y))) &&
729
- match (Op1, m_Intrinsic<Intrinsic::pow>(m_Value (Z), m_Specific (Y)))) {
730
- auto *XZ = Builder.CreateFMulFMF (X, Z, &I);
731
- auto *NewPow = Builder.CreateBinaryIntrinsic (Intrinsic::pow, XZ, Y, &I);
732
- return replaceInstUsesWith (I, NewPow);
733
- }
734
-
735
- // powi(x, y) * powi(x, z) -> powi(x, y + z)
736
- if (match (Op0, m_Intrinsic<Intrinsic::powi>(m_Value (X), m_Value (Y))) &&
737
- match (Op1, m_Intrinsic<Intrinsic::powi>(m_Specific (X), m_Value (Z))) &&
738
- Y->getType () == Z->getType ()) {
739
- auto *YZ = Builder.CreateAdd (Y, Z);
740
- auto *NewPow = Builder.CreateIntrinsic (
741
- Intrinsic::powi, {X->getType (), YZ->getType ()}, {X, YZ}, &I);
742
- return replaceInstUsesWith (I, NewPow);
743
- }
744
-
745
- // exp(X) * exp(Y) -> exp(X + Y)
746
- if (match (Op0, m_Intrinsic<Intrinsic::exp>(m_Value (X))) &&
747
- match (Op1, m_Intrinsic<Intrinsic::exp>(m_Value (Y)))) {
748
- Value *XY = Builder.CreateFAddFMF (X, Y, &I);
749
- Value *Exp = Builder.CreateUnaryIntrinsic (Intrinsic::exp, XY, &I);
750
- return replaceInstUsesWith (I, Exp);
751
- }
752
-
753
- // exp2(X) * exp2(Y) -> exp2(X + Y)
754
- if (match (Op0, m_Intrinsic<Intrinsic::exp2>(m_Value (X))) &&
755
- match (Op1, m_Intrinsic<Intrinsic::exp2>(m_Value (Y)))) {
756
- Value *XY = Builder.CreateFAddFMF (X, Y, &I);
757
- Value *Exp2 = Builder.CreateUnaryIntrinsic (Intrinsic::exp2, XY, &I);
758
- return replaceInstUsesWith (I, Exp2);
759
- }
760
- }
761
-
762
- // (X*Y) * X => (X*X) * Y where Y != X
763
- // The purpose is two-fold:
764
- // 1) to form a power expression (of X).
765
- // 2) potentially shorten the critical path: After transformation, the
766
- // latency of the instruction Y is amortized by the expression of X*X,
767
- // and therefore Y is in a "less critical" position compared to what it
768
- // was before the transformation.
769
- if (match (Op0, m_OneUse (m_c_FMul (m_Specific (Op1), m_Value (Y)))) &&
770
- Op1 != Y) {
771
- Value *XX = Builder.CreateFMulFMF (Op1, Op1, &I);
772
- return BinaryOperator::CreateFMulFMF (XX, Y, &I);
773
- }
774
- if (match (Op1, m_OneUse (m_c_FMul (m_Specific (Op0), m_Value (Y)))) &&
775
- Op0 != Y) {
776
- Value *XX = Builder.CreateFMulFMF (Op0, Op0, &I);
777
- return BinaryOperator::CreateFMulFMF (XX, Y, &I);
778
- }
779
- }
784
+ if (I.hasAllowReassoc ())
785
+ if (Instruction *FoldedMul = foldFMulReassoc (I))
786
+ return FoldedMul;
780
787
781
788
// log2(X * 0.5) * Y = log2(X) * Y - Y
782
789
if (I.isFast ()) {
0 commit comments