|
45 | 45 | #include "llvm/IR/IntrinsicsAArch64.h"
|
46 | 46 | #include "llvm/IR/IntrinsicsAMDGPU.h"
|
47 | 47 | #include "llvm/IR/IntrinsicsARM.h"
|
| 48 | +#include "llvm/IR/IntrinsicsNVPTX.h" |
48 | 49 | #include "llvm/IR/IntrinsicsWebAssembly.h"
|
49 | 50 | #include "llvm/IR/IntrinsicsX86.h"
|
| 51 | +#include "llvm/IR/NVVMIntrinsicUtils.h" |
50 | 52 | #include "llvm/IR/Operator.h"
|
51 | 53 | #include "llvm/IR/Type.h"
|
52 | 54 | #include "llvm/IR/Value.h"
|
@@ -1687,6 +1689,58 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
|
1687 | 1689 | case Intrinsic::x86_avx512_cvttsd2usi64:
|
1688 | 1690 | return !Call->isStrictFP();
|
1689 | 1691 |
|
| 1692 | + // NVVM float/double to int32/uint32 conversion intrinsics |
| 1693 | + case Intrinsic::nvvm_f2i_rm: |
| 1694 | + case Intrinsic::nvvm_f2i_rn: |
| 1695 | + case Intrinsic::nvvm_f2i_rp: |
| 1696 | + case Intrinsic::nvvm_f2i_rz: |
| 1697 | + case Intrinsic::nvvm_f2i_rm_ftz: |
| 1698 | + case Intrinsic::nvvm_f2i_rn_ftz: |
| 1699 | + case Intrinsic::nvvm_f2i_rp_ftz: |
| 1700 | + case Intrinsic::nvvm_f2i_rz_ftz: |
| 1701 | + case Intrinsic::nvvm_f2ui_rm: |
| 1702 | + case Intrinsic::nvvm_f2ui_rn: |
| 1703 | + case Intrinsic::nvvm_f2ui_rp: |
| 1704 | + case Intrinsic::nvvm_f2ui_rz: |
| 1705 | + case Intrinsic::nvvm_f2ui_rm_ftz: |
| 1706 | + case Intrinsic::nvvm_f2ui_rn_ftz: |
| 1707 | + case Intrinsic::nvvm_f2ui_rp_ftz: |
| 1708 | + case Intrinsic::nvvm_f2ui_rz_ftz: |
| 1709 | + case Intrinsic::nvvm_d2i_rm: |
| 1710 | + case Intrinsic::nvvm_d2i_rn: |
| 1711 | + case Intrinsic::nvvm_d2i_rp: |
| 1712 | + case Intrinsic::nvvm_d2i_rz: |
| 1713 | + case Intrinsic::nvvm_d2ui_rm: |
| 1714 | + case Intrinsic::nvvm_d2ui_rn: |
| 1715 | + case Intrinsic::nvvm_d2ui_rp: |
| 1716 | + case Intrinsic::nvvm_d2ui_rz: |
| 1717 | + |
| 1718 | + // NVVM float/double to int64/uint64 conversion intrinsics |
| 1719 | + case Intrinsic::nvvm_f2ll_rm: |
| 1720 | + case Intrinsic::nvvm_f2ll_rn: |
| 1721 | + case Intrinsic::nvvm_f2ll_rp: |
| 1722 | + case Intrinsic::nvvm_f2ll_rz: |
| 1723 | + case Intrinsic::nvvm_f2ll_rm_ftz: |
| 1724 | + case Intrinsic::nvvm_f2ll_rn_ftz: |
| 1725 | + case Intrinsic::nvvm_f2ll_rp_ftz: |
| 1726 | + case Intrinsic::nvvm_f2ll_rz_ftz: |
| 1727 | + case Intrinsic::nvvm_f2ull_rm: |
| 1728 | + case Intrinsic::nvvm_f2ull_rn: |
| 1729 | + case Intrinsic::nvvm_f2ull_rp: |
| 1730 | + case Intrinsic::nvvm_f2ull_rz: |
| 1731 | + case Intrinsic::nvvm_f2ull_rm_ftz: |
| 1732 | + case Intrinsic::nvvm_f2ull_rn_ftz: |
| 1733 | + case Intrinsic::nvvm_f2ull_rp_ftz: |
| 1734 | + case Intrinsic::nvvm_f2ull_rz_ftz: |
| 1735 | + case Intrinsic::nvvm_d2ll_rm: |
| 1736 | + case Intrinsic::nvvm_d2ll_rn: |
| 1737 | + case Intrinsic::nvvm_d2ll_rp: |
| 1738 | + case Intrinsic::nvvm_d2ll_rz: |
| 1739 | + case Intrinsic::nvvm_d2ull_rm: |
| 1740 | + case Intrinsic::nvvm_d2ull_rn: |
| 1741 | + case Intrinsic::nvvm_d2ull_rp: |
| 1742 | + case Intrinsic::nvvm_d2ull_rz: |
| 1743 | + |
1690 | 1744 | // Sign operations are actually bitwise operations, they do not raise
|
1691 | 1745 | // exceptions even for SNANs.
|
1692 | 1746 | case Intrinsic::fabs:
|
@@ -1849,6 +1903,12 @@ inline bool llvm_fenv_testexcept() {
|
1849 | 1903 | return false;
|
1850 | 1904 | }
|
1851 | 1905 |
|
| 1906 | +static const APFloat FTZPreserveSign(const APFloat &V) { |
| 1907 | + if (V.isDenormal()) |
| 1908 | + return APFloat::getZero(V.getSemantics(), V.isNegative()); |
| 1909 | + return V; |
| 1910 | +} |
| 1911 | + |
1852 | 1912 | Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V,
|
1853 | 1913 | Type *Ty) {
|
1854 | 1914 | llvm_fenv_clearexcept();
|
@@ -2309,6 +2369,85 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
|
2309 | 2369 | return ConstantFP::get(Ty->getContext(), U);
|
2310 | 2370 | }
|
2311 | 2371 |
|
| 2372 | + // NVVM float/double to signed/unsigned int32/int64 conversions: |
| 2373 | + switch (IntrinsicID) { |
| 2374 | + // f2i |
| 2375 | + case Intrinsic::nvvm_f2i_rm: |
| 2376 | + case Intrinsic::nvvm_f2i_rn: |
| 2377 | + case Intrinsic::nvvm_f2i_rp: |
| 2378 | + case Intrinsic::nvvm_f2i_rz: |
| 2379 | + case Intrinsic::nvvm_f2i_rm_ftz: |
| 2380 | + case Intrinsic::nvvm_f2i_rn_ftz: |
| 2381 | + case Intrinsic::nvvm_f2i_rp_ftz: |
| 2382 | + case Intrinsic::nvvm_f2i_rz_ftz: |
| 2383 | + // f2ui |
| 2384 | + case Intrinsic::nvvm_f2ui_rm: |
| 2385 | + case Intrinsic::nvvm_f2ui_rn: |
| 2386 | + case Intrinsic::nvvm_f2ui_rp: |
| 2387 | + case Intrinsic::nvvm_f2ui_rz: |
| 2388 | + case Intrinsic::nvvm_f2ui_rm_ftz: |
| 2389 | + case Intrinsic::nvvm_f2ui_rn_ftz: |
| 2390 | + case Intrinsic::nvvm_f2ui_rp_ftz: |
| 2391 | + case Intrinsic::nvvm_f2ui_rz_ftz: |
| 2392 | + // d2i |
| 2393 | + case Intrinsic::nvvm_d2i_rm: |
| 2394 | + case Intrinsic::nvvm_d2i_rn: |
| 2395 | + case Intrinsic::nvvm_d2i_rp: |
| 2396 | + case Intrinsic::nvvm_d2i_rz: |
| 2397 | + // d2ui |
| 2398 | + case Intrinsic::nvvm_d2ui_rm: |
| 2399 | + case Intrinsic::nvvm_d2ui_rn: |
| 2400 | + case Intrinsic::nvvm_d2ui_rp: |
| 2401 | + case Intrinsic::nvvm_d2ui_rz: |
| 2402 | + // f2ll |
| 2403 | + case Intrinsic::nvvm_f2ll_rm: |
| 2404 | + case Intrinsic::nvvm_f2ll_rn: |
| 2405 | + case Intrinsic::nvvm_f2ll_rp: |
| 2406 | + case Intrinsic::nvvm_f2ll_rz: |
| 2407 | + case Intrinsic::nvvm_f2ll_rm_ftz: |
| 2408 | + case Intrinsic::nvvm_f2ll_rn_ftz: |
| 2409 | + case Intrinsic::nvvm_f2ll_rp_ftz: |
| 2410 | + case Intrinsic::nvvm_f2ll_rz_ftz: |
| 2411 | + // f2ull |
| 2412 | + case Intrinsic::nvvm_f2ull_rm: |
| 2413 | + case Intrinsic::nvvm_f2ull_rn: |
| 2414 | + case Intrinsic::nvvm_f2ull_rp: |
| 2415 | + case Intrinsic::nvvm_f2ull_rz: |
| 2416 | + case Intrinsic::nvvm_f2ull_rm_ftz: |
| 2417 | + case Intrinsic::nvvm_f2ull_rn_ftz: |
| 2418 | + case Intrinsic::nvvm_f2ull_rp_ftz: |
| 2419 | + case Intrinsic::nvvm_f2ull_rz_ftz: |
| 2420 | + // d2ll |
| 2421 | + case Intrinsic::nvvm_d2ll_rm: |
| 2422 | + case Intrinsic::nvvm_d2ll_rn: |
| 2423 | + case Intrinsic::nvvm_d2ll_rp: |
| 2424 | + case Intrinsic::nvvm_d2ll_rz: |
| 2425 | + // d2ull |
| 2426 | + case Intrinsic::nvvm_d2ull_rm: |
| 2427 | + case Intrinsic::nvvm_d2ull_rn: |
| 2428 | + case Intrinsic::nvvm_d2ull_rp: |
| 2429 | + case Intrinsic::nvvm_d2ull_rz: { |
| 2430 | + // In float-to-integer conversion, NaN inputs are converted to 0. |
| 2431 | + if (U.isNaN()) |
| 2432 | + return ConstantInt::get(Ty, 0); |
| 2433 | + |
| 2434 | + APFloat::roundingMode RMode = nvvm::IntrinsicGetRoundingMode(IntrinsicID); |
| 2435 | + bool IsFTZ = nvvm::IntrinsicShouldFTZ(IntrinsicID); |
| 2436 | + bool IsSigned = nvvm::IntrinsicConvertsToSignedInteger(IntrinsicID); |
| 2437 | + |
| 2438 | + APSInt ResInt(Ty->getIntegerBitWidth(), !IsSigned); |
| 2439 | + auto FloatToRound = IsFTZ ? FTZPreserveSign(U) : U; |
| 2440 | + |
| 2441 | + bool IsExact = false; |
| 2442 | + APFloat::opStatus Status = |
| 2443 | + FloatToRound.convertToInteger(ResInt, RMode, &IsExact); |
| 2444 | + |
| 2445 | + if (Status != APFloat::opInvalidOp) |
| 2446 | + return ConstantInt::get(Ty, ResInt); |
| 2447 | + return nullptr; |
| 2448 | + } |
| 2449 | + } |
| 2450 | + |
2312 | 2451 | /// We only fold functions with finite arguments. Folding NaN and inf is
|
2313 | 2452 | /// likely to be aborted with an exception anyway, and some host libms
|
2314 | 2453 | /// have known errors raising exceptions.
|
|
0 commit comments