@@ -551,10 +551,13 @@ struct FPRep : public FPRepSem<fp_type, RetT> {
551
551
using UP::SIG_LEN;
552
552
553
553
public:
554
+ // Constants.
554
555
using UP::EXP_BIAS;
555
556
using UP::EXP_MASK;
556
557
using UP::FRACTION_MASK;
557
558
using UP::SIGN_MASK;
559
+ LIBC_INLINE_VAR static constexpr int MAX_BIASED_EXPONENT =
560
+ (1 << UP::EXP_LEN) - 1 ;
558
561
559
562
LIBC_INLINE constexpr FPRep () = default;
560
563
LIBC_INLINE constexpr explicit FPRep (StorageType x) : UP(x) {}
@@ -652,6 +655,47 @@ struct FPRep : public FPRepSem<fp_type, RetT> {
652
655
bits = merge (bits, mantVal, FRACTION_MASK);
653
656
}
654
657
658
+ // Unsafe function to create a floating point representation.
659
+ // It simply packs the sign, biased exponent and mantissa values without
660
+ // checking bound nor normalization.
661
+ // FIXME: Use an uint32_t for 'biased_exp'.
662
+ LIBC_INLINE static constexpr RetT
663
+ create_value (Sign sign, StorageType biased_exp, StorageType mantissa) {
664
+ static_assert (fp_type != FPType::X86_Binary80,
665
+ " This function is not tested for X86 Extended Precision" );
666
+ return RetT (encode (sign, BiasedExp (static_cast <uint32_t >(biased_exp)),
667
+ Sig (mantissa)));
668
+ }
669
+
670
+ // The function converts integer number and unbiased exponent to proper float
671
+ // T type:
672
+ // Result = number * 2^(ep+1 - exponent_bias)
673
+ // Be careful!
674
+ // 1) "ep" is the raw exponent value.
675
+ // 2) The function adds +1 to ep for seamless normalized to denormalized
676
+ // transition.
677
+ // 3) The function does not check exponent high limit.
678
+ // 4) "number" zero value is not processed correctly.
679
+ // 5) Number is unsigned, so the result can be only positive.
680
+ LIBC_INLINE static constexpr RetT make_value (StorageType number, int ep) {
681
+ static_assert (fp_type != FPType::X86_Binary80,
682
+ " This function is not tested for X86 Extended Precision" );
683
+ FPRep result;
684
+ // offset: +1 for sign, but -1 for implicit first bit
685
+ int lz = cpp::countl_zero (number) - UP::EXP_LEN;
686
+ number <<= lz;
687
+ ep -= lz;
688
+
689
+ if (LIBC_LIKELY (ep >= 0 )) {
690
+ // Implicit number bit will be removed by mask
691
+ result.set_mantissa (number);
692
+ result.set_biased_exponent (ep + 1 );
693
+ } else {
694
+ result.set_mantissa (number >> -ep);
695
+ }
696
+ return RetT (result.uintval ());
697
+ }
698
+
655
699
private:
656
700
// Merge bits from 'a' and 'b' values according to 'mask'.
657
701
// Use 'a' bits when corresponding 'mask' bits are zeroes and 'b' bits when
@@ -696,79 +740,33 @@ template <typename T> LIBC_INLINE static constexpr FPType get_fp_type() {
696
740
static_assert (cpp::always_false<UnqualT>, " Unsupported type" );
697
741
}
698
742
699
- // A generic class to manipulate floating point formats.
743
+ // A generic class to manipulate C++ floating point formats.
700
744
// It derives most of its functionality to FPRep above.
701
745
template <typename T>
702
746
struct FPBits final : public internal::FPRep<get_fp_type<T>(), FPBits<T>> {
703
747
static_assert (cpp::is_floating_point_v<T>,
704
748
" FPBits instantiated with invalid type." );
705
749
using UP = internal::FPRep<get_fp_type<T>(), FPBits<T>>;
706
- using Rep = UP;
707
750
using StorageType = typename UP::StorageType;
708
751
709
- using UP::bits;
710
-
711
- // Constants.
712
- LIBC_INLINE_VAR static constexpr int MAX_BIASED_EXPONENT =
713
- (1 << UP::EXP_LEN) - 1 ;
714
-
715
752
// Constructors.
716
753
LIBC_INLINE constexpr FPBits () = default;
717
754
718
755
template <typename XType> LIBC_INLINE constexpr explicit FPBits (XType x) {
719
756
using Unqual = typename cpp::remove_cv_t <XType>;
720
757
if constexpr (cpp::is_same_v<Unqual, T>) {
721
- bits = cpp::bit_cast<StorageType>(x);
758
+ UP:: bits = cpp::bit_cast<StorageType>(x);
722
759
} else if constexpr (cpp::is_same_v<Unqual, StorageType>) {
723
- bits = x;
760
+ UP:: bits = x;
724
761
} else {
725
762
// We don't want accidental type promotions/conversions, so we require
726
763
// exact type match.
727
764
static_assert (cpp::always_false<XType>);
728
765
}
729
766
}
730
- // Floating-point conversions.
731
- LIBC_INLINE constexpr T get_val () const { return cpp::bit_cast<T>(bits); }
732
-
733
- // TODO: Use an uint32_t for 'biased_exp'.
734
- LIBC_INLINE static constexpr FPBits<T>
735
- create_value (Sign sign, StorageType biased_exp, StorageType mantissa) {
736
- static_assert (get_fp_type<T>() != FPType::X86_Binary80,
737
- " This function is not tested for X86 Extended Precision" );
738
- return FPBits (UP::encode (
739
- sign, typename UP::BiasedExponent (static_cast <uint32_t >(biased_exp)),
740
- typename UP::Significand (mantissa)));
741
- }
742
-
743
- // The function convert integer number and unbiased exponent to proper float
744
- // T type:
745
- // Result = number * 2^(ep+1 - exponent_bias)
746
- // Be careful!
747
- // 1) "ep" is raw exponent value.
748
- // 2) The function add to +1 to ep for seamless normalized to denormalized
749
- // transition.
750
- // 3) The function did not check exponent high limit.
751
- // 4) "number" zero value is not processed correctly.
752
- // 5) Number is unsigned, so the result can be only positive.
753
- LIBC_INLINE static constexpr FPBits<T> make_value (StorageType number,
754
- int ep) {
755
- static_assert (get_fp_type<T>() != FPType::X86_Binary80,
756
- " This function is not tested for X86 Extended Precision" );
757
- FPBits<T> result;
758
- // offset: +1 for sign, but -1 for implicit first bit
759
- int lz = cpp::countl_zero (number) - UP::EXP_LEN;
760
- number <<= lz;
761
- ep -= lz;
762
767
763
- if (LIBC_LIKELY (ep >= 0 )) {
764
- // Implicit number bit will be removed by mask
765
- result.set_mantissa (number);
766
- result.set_biased_exponent (ep + 1 );
767
- } else {
768
- result.set_mantissa (number >> -ep);
769
- }
770
- return result;
771
- }
768
+ // Floating-point conversions.
769
+ LIBC_INLINE constexpr T get_val () const { return cpp::bit_cast<T>(UP::bits); }
772
770
};
773
771
774
772
} // namespace fputil
0 commit comments