llvm · michaelrj-google · Jan 25, 2024 · Dec 14, 2023 · Dec 19, 2023 · Dec 20, 2023
diff --git a/libc/config/config.json b/libc/config/config.json
@@ -13,7 +13,7 @@
       "doc": "Disable handling of %n in printf format string."
     },
     "LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE": {
-      "value": true,
+      "value": false,
       "doc": "Use large table for better printf long double performance."
     }
   },

diff --git a/libc/docs/dev/printf_behavior.rst b/libc/docs/dev/printf_behavior.rst
@@ -13,7 +13,7 @@ C standard and POSIX standard. If any behavior is not mentioned here, it should
 be assumed to follow the behavior described in those standards.
 
 The LLVM-libc codebase is under active development, and may change. This
-document was last updated [August 18, 2023] by [michaelrj] and may
+document was last updated [January 8, 2024] by [michaelrj] and may
 not be accurate after this point.
 
 The behavior of LLVM-libc's printf is heavily influenced by compile-time flags.
@@ -87,14 +87,26 @@ are not recommended to be adjusted except by persons familiar with the Printf
 Ryu Algorithm. Additionally they have no effect when float conversions are
 disabled.
 
+LIBC_COPT_FLOAT_TO_STR_NO_SPECIALIZE_LD
+---------------------------------------
+This flag disables the separate long double conversion implementation. It is
+not based on the Ryu algorithm, instead generating the digits by
+multiplying/dividing the written-out number by 10^9 to get blocks. It's
+significantly faster than INT_CALC, only about 10x slower than MEGA_TABLE,
+and is small in binary size. Its downside is that it always calculates all
+of the digits above the decimal point, making it slightly inefficient for %e
+calls with large exponents. This is the default. This specialization overrides
+other flags, so this flag must be set for other flags to effect the long double
+behavior.
+
 LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE
 -------------------------------------------------
 When set, the float to string decimal conversion algorithm will use a larger
 table to accelerate long double conversions. This larger table is around 5MB of 
-size when compiled. This flag is enabled by default in the CMake.
+size when compiled.
 
-LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT(_LD)
---------------------------------------------
+LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT
+---------------------------------------
 When set, the float to string decimal conversion algorithm will use dyadic
 floats instead of a table when performing floating point conversions. This
 results in ~50 digits of accuracy in the result, then zeroes for the remaining
@@ -107,8 +119,7 @@ LIBC_COPT_FLOAT_TO_STR_USE_INT_CALC
 When set, the float to string decimal conversion algorithm will use wide
 integers instead of a table when performing floating point conversions. This
 gives the same results as the table, but is very slow at the extreme ends of
-the long double range. If no flags are set this is the default behavior for
-long double conversions.
+the long double range.
 
 LIBC_COPT_FLOAT_TO_STR_NO_TABLE
 -------------------------------

diff --git a/libc/src/__support/UInt.h b/libc/src/__support/UInt.h
@@ -27,10 +27,17 @@ namespace LIBC_NAMESPACE::cpp {
 
 template <size_t Bits, bool Signed> struct BigInt {
 
+  // This being hardcoded as 64 is okay because we're using uint64_t as our
+  // internal type which will always be 64 bits.
+  using word_type = uint64_t;
+  LIBC_INLINE_VAR static constexpr size_t WORD_SIZE =
+      sizeof(word_type) * CHAR_BIT;
+
+  // TODO: Replace references to 64 with WORD_SIZE, and uint64_t with word_type.
   static_assert(Bits > 0 && Bits % 64 == 0,
                 "Number of bits in BigInt should be a multiple of 64.");
   LIBC_INLINE_VAR static constexpr size_t WORDCOUNT = Bits / 64;
-  cpp::array<uint64_t, WORDCOUNT> val{};
+  cpp::array<word_type, WORDCOUNT> val{};
 
   LIBC_INLINE_VAR static constexpr uint64_t MASK32 = 0xFFFFFFFFu;
 
@@ -448,6 +455,8 @@ template <size_t Bits, bool Signed> struct BigInt {
     // pos is the index of the current 64-bit chunk that we are processing.
     size_t pos = WORDCOUNT;
 
+    // TODO: look into if constexpr(Bits > 256) skip leading zeroes.
+
     for (size_t q_pos = WORDCOUNT - lower_pos; q_pos > 0; --q_pos) {
       // q_pos is 1 + the index of the current 64-bit chunk of the quotient
       // being processed.