@@ -1402,71 +1402,173 @@ Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
1402
1402
}
1403
1403
1404
1404
// Try to fold:
1405
- // 1) (add (sitofp x), (sitofp y))
1406
- // -> (sitofp (add x, y))
1407
- // 2) (add (sitofp x), FpC)
1408
- // -> (sitofp (add x, (fptosi FpC)))
1405
+ // 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1406
+ // -> ({s|u}itofp (int_binop x, y))
1407
+ // 2) (fp_binop ({s|u}itofp x), FpC)
1408
+ // -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1409
1409
Instruction *InstCombinerImpl::foldFBinOpOfIntCasts (BinaryOperator &BO) {
1410
- // Check for (fadd double (sitofp x), y), see if we can merge this into an
1411
- // integer add followed by a promotion.
1412
- Value *LHS = BO.getOperand (0 ), *RHS = BO.getOperand (1 );
1413
- if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
1414
- Value *LHSIntVal = LHSConv->getOperand (0 );
1415
- Type *FPType = LHSConv->getType ();
1416
-
1417
- // TODO: This check is overly conservative. In many cases known bits
1418
- // analysis can tell us that the result of the addition has less significant
1419
- // bits than the integer type can hold.
1420
- auto IsValidPromotion = [](Type *FTy, Type *ITy) {
1421
- Type *FScalarTy = FTy->getScalarType ();
1422
- Type *IScalarTy = ITy->getScalarType ();
1423
-
1424
- // Do we have enough bits in the significand to represent the result of
1425
- // the integer addition?
1426
- unsigned MaxRepresentableBits =
1427
- APFloat::semanticsPrecision (FScalarTy->getFltSemantics ());
1428
- return IScalarTy->getIntegerBitWidth () <= MaxRepresentableBits;
1429
- };
1410
+ Value *IntOps[2 ] = {nullptr , nullptr };
1411
+ Constant *Op1FpC = nullptr ;
1412
+
1413
+ // Check for:
1414
+ // 1) (binop ({s|u}itofp x), ({s|u}itofp y))
1415
+ // 2) (binop ({s|u}itofp x), FpC)
1416
+ if (!match (BO.getOperand (0 ), m_SIToFP (m_Value (IntOps[0 ]))) &&
1417
+ !match (BO.getOperand (0 ), m_UIToFP (m_Value (IntOps[0 ]))))
1418
+ return nullptr ;
1430
1419
1431
- // (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
1432
- // ... if the constant fits in the integer value. This is useful for things
1433
- // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
1434
- // requires a constant pool load, and generally allows the add to be better
1435
- // instcombined.
1436
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))
1437
- if (IsValidPromotion (FPType, LHSIntVal->getType ())) {
1438
- Constant *CI = ConstantFoldCastOperand (Instruction::FPToSI, CFP,
1439
- LHSIntVal->getType (), DL);
1440
- if (LHSConv->hasOneUse () &&
1441
- ConstantFoldCastOperand (Instruction::SIToFP, CI, BO.getType (),
1442
- DL) == CFP &&
1443
- willNotOverflowSignedAdd (LHSIntVal, CI, BO)) {
1444
- // Insert the new integer add.
1445
- Value *NewAdd = Builder.CreateNSWAdd (LHSIntVal, CI);
1446
- return new SIToFPInst (NewAdd, BO.getType ());
1447
- }
1448
- }
1420
+ if (!match (BO.getOperand (1 ), m_Constant (Op1FpC)) &&
1421
+ !match (BO.getOperand (1 ), m_SIToFP (m_Value (IntOps[1 ]))) &&
1422
+ !match (BO.getOperand (1 ), m_UIToFP (m_Value (IntOps[1 ]))))
1423
+ return nullptr ;
1449
1424
1450
- // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
1451
- if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
1452
- Value *RHSIntVal = RHSConv->getOperand (0 );
1453
- // It's enough to check LHS types only because we require int types to
1454
- // be the same for this transform.
1455
- if (IsValidPromotion (FPType, LHSIntVal->getType ())) {
1456
- // Only do this if x/y have the same type, if at least one of them has a
1457
- // single use (so we don't increase the number of int->fp conversions),
1458
- // and if the integer add will not overflow.
1459
- if (LHSIntVal->getType () == RHSIntVal->getType () &&
1460
- (LHSConv->hasOneUse () || RHSConv->hasOneUse ()) &&
1461
- willNotOverflowSignedAdd (LHSIntVal, RHSIntVal, BO)) {
1462
- // Insert the new integer add.
1463
- Value *NewAdd = Builder.CreateNSWAdd (LHSIntVal, RHSIntVal);
1464
- return new SIToFPInst (NewAdd, BO.getType ());
1465
- }
1466
- }
1425
+ Type *FPTy = BO.getType ();
1426
+ Type *IntTy = IntOps[0 ]->getType ();
1427
+
1428
+ // Do we have signed casts?
1429
+ bool OpsFromSigned = isa<SIToFPInst>(BO.getOperand (0 ));
1430
+
1431
+ unsigned IntSz = IntTy->getScalarSizeInBits ();
1432
+ // This is the maximum number of inuse bits by the integer where the int -> fp
1433
+ // casts are exact.
1434
+ unsigned MaxRepresentableBits =
1435
+ APFloat::semanticsPrecision (FPTy->getScalarType ()->getFltSemantics ());
1436
+
1437
+ // Cache KnownBits a bit to potentially save some analysis.
1438
+ WithCache<const Value *> OpsKnown[2 ] = {IntOps[0 ], IntOps[1 ]};
1439
+
1440
+ // Preserve known number of leading bits. This can allow us to trivial nsw/nuw
1441
+ // checks later on.
1442
+ unsigned NumUsedLeadingBits[2 ] = {IntSz, IntSz};
1443
+
1444
+ auto IsNonZero = [&](unsigned OpNo) -> bool {
1445
+ if (OpsKnown[OpNo].hasKnownBits () &&
1446
+ OpsKnown[OpNo].getKnownBits (SQ).isNonZero ())
1447
+ return true ;
1448
+ return isKnownNonZero (IntOps[OpNo], SQ.DL );
1449
+ };
1450
+
1451
+ auto IsNonNeg = [&](unsigned OpNo) -> bool {
1452
+ if (OpsKnown[OpNo].hasKnownBits () &&
1453
+ OpsKnown[OpNo].getKnownBits (SQ).isNonNegative ())
1454
+ return true ;
1455
+ return isKnownNonNegative (IntOps[OpNo], SQ);
1456
+ };
1457
+
1458
+ // Check if we know for certain that ({s|u}itofp op) is exact.
1459
+ auto IsValidPromotion = [&](unsigned OpNo) -> bool {
1460
+ // If fp precision >= bitwidth(op) then its exact.
1461
+ if (MaxRepresentableBits >= IntSz)
1462
+ ;
1463
+ // Otherwise if its signed cast check that fp precisions >= bitwidth(op) -
1464
+ // numSignBits(op).
1465
+ // TODO: If we add support for `WithCache` in `ComputeNumSignBits`, change
1466
+ // `IntOps[OpNo]` arguments to `KnownOps[OpNo]`.
1467
+ else if (OpsFromSigned)
1468
+ NumUsedLeadingBits[OpNo] = IntSz - ComputeNumSignBits (IntOps[OpNo]);
1469
+ // Finally for unsigned check that fp precision >= bitwidth(op) -
1470
+ // numLeadingZeros(op).
1471
+ else {
1472
+ NumUsedLeadingBits[OpNo] =
1473
+ IntSz - OpsKnown[OpNo].getKnownBits (SQ).countMinLeadingZeros ();
1474
+ }
1475
+ // NB: We could also check if op is known to be a power of 2 or zero (which
1476
+ // will always be representable). Its unlikely, however, that is we are
1477
+ // unable to bound op in any way we will be able to pass the overflow checks
1478
+ // later on.
1479
+
1480
+ if (MaxRepresentableBits < NumUsedLeadingBits[OpNo])
1481
+ return false ;
1482
+ // Signed + Mul also requires that op is non-zero to avoid -0 cases.
1483
+ return (OpsFromSigned && BO.getOpcode () == Instruction::FMul)
1484
+ ? IsNonZero (OpNo)
1485
+ : true ;
1486
+ };
1487
+
1488
+ // If we have a constant rhs, see if we can losslessly convert it to an int.
1489
+ if (Op1FpC != nullptr ) {
1490
+ Constant *Op1IntC = ConstantFoldCastOperand (
1491
+ OpsFromSigned ? Instruction::FPToSI : Instruction::FPToUI, Op1FpC,
1492
+ IntTy, DL);
1493
+ if (Op1IntC == nullptr )
1494
+ return nullptr ;
1495
+ if (ConstantFoldCastOperand (OpsFromSigned ? Instruction::SIToFP
1496
+ : Instruction::UIToFP,
1497
+ Op1IntC, FPTy, DL) != Op1FpC)
1498
+ return nullptr ;
1499
+
1500
+ // First try to keep sign of cast the same.
1501
+ IntOps[1 ] = Op1IntC;
1502
+ }
1503
+
1504
+ // Ensure lhs/rhs integer types match.
1505
+ if (IntTy != IntOps[1 ]->getType ())
1506
+ return nullptr ;
1507
+
1508
+ if (Op1FpC == nullptr ) {
1509
+ if (OpsFromSigned != isa<SIToFPInst>(BO.getOperand (1 ))) {
1510
+ // If we have a signed + unsigned, see if we can treat both as signed
1511
+ // (uitofp nneg x) == (sitofp nneg x).
1512
+ if (OpsFromSigned ? !IsNonNeg (1 ) : !IsNonNeg (0 ))
1513
+ return nullptr ;
1514
+ OpsFromSigned = true ;
1467
1515
}
1516
+ if (!IsValidPromotion (1 ))
1517
+ return nullptr ;
1468
1518
}
1469
- return nullptr ;
1519
+ if (!IsValidPromotion (0 ))
1520
+ return nullptr ;
1521
+
1522
+ // Final we check if the integer version of the binop will not overflow.
1523
+ BinaryOperator::BinaryOps IntOpc;
1524
+ // Because of the precision check, we can often rule out overflows.
1525
+ bool NeedsOverflowCheck = true ;
1526
+ // Try to conservatively rule out overflow based on the already done precision
1527
+ // checks.
1528
+ unsigned OverflowMaxOutputBits = OpsFromSigned ? 2 : 1 ;
1529
+ unsigned OverflowMaxCurBits =
1530
+ std::max (NumUsedLeadingBits[0 ], NumUsedLeadingBits[1 ]);
1531
+ bool OutputSigned = OpsFromSigned;
1532
+ switch (BO.getOpcode ()) {
1533
+ case Instruction::FAdd:
1534
+ IntOpc = Instruction::Add;
1535
+ OverflowMaxOutputBits += OverflowMaxCurBits;
1536
+ break ;
1537
+ case Instruction::FSub:
1538
+ IntOpc = Instruction::Sub;
1539
+ OverflowMaxOutputBits += OverflowMaxCurBits;
1540
+ break ;
1541
+ case Instruction::FMul:
1542
+ IntOpc = Instruction::Mul;
1543
+ OverflowMaxOutputBits += OverflowMaxCurBits * 2 ;
1544
+ break ;
1545
+ default :
1546
+ llvm_unreachable (" Unsupported binop" );
1547
+ }
1548
+ // The precision check may have already ruled out overflow.
1549
+ if (OverflowMaxOutputBits < IntSz) {
1550
+ NeedsOverflowCheck = false ;
1551
+ // We can bound unsigned overflow from sub to in range signed value (this is
1552
+ // what allows us to avoid the overflow check for sub).
1553
+ if (IntOpc == Instruction::Sub)
1554
+ OutputSigned = true ;
1555
+ }
1556
+
1557
+ // Precision check did not rule out overflow, so need to check.
1558
+ // TODO: If we add support for `WithCache` in `willNotOverflow`, change
1559
+ // `IntOps[...]` arguments to `KnownOps[...]`.
1560
+ if (NeedsOverflowCheck &&
1561
+ !willNotOverflow (IntOpc, IntOps[0 ], IntOps[1 ], BO, OutputSigned))
1562
+ return nullptr ;
1563
+
1564
+ Value *IntBinOp = Builder.CreateBinOp (IntOpc, IntOps[0 ], IntOps[1 ]);
1565
+ if (auto *IntBO = dyn_cast<BinaryOperator>(IntBinOp)) {
1566
+ IntBO->setHasNoSignedWrap (OutputSigned);
1567
+ IntBO->setHasNoUnsignedWrap (!OutputSigned);
1568
+ }
1569
+ if (OutputSigned)
1570
+ return new SIToFPInst (IntBinOp, FPTy);
1571
+ return new UIToFPInst (IntBinOp, FPTy);
1470
1572
}
1471
1573
1472
1574
// / A binop with a constant operand and a sign-extended boolean operand may be
0 commit comments