Skip to content

Speed up dec2flt fast path with additional tables. #30639

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 13, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 29 additions & 6 deletions src/etc/dec2flt_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"""
from __future__ import print_function
import sys
from math import ceil, log
from fractions import Fraction
from collections import namedtuple

Expand All @@ -33,7 +34,6 @@
MIN_SIG = 2 ** (N - 1)
MAX_SIG = (2 ** N) - 1


# Hand-rolled fp representation without arithmetic or any other operations.
# The significand is normalized and always N bit, but the exponent is
# unrestricted in range.
Expand Down Expand Up @@ -92,7 +92,7 @@ def error(f, e, z):
ulp_err = abs_err / Fraction(2) ** z.exp
return float(ulp_err)

LICENSE = """
HEADER = """
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
Expand All @@ -102,9 +102,23 @@ def error(f, e, z):
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

//! Tables of approximations of powers of ten.
//! DO NOT MODIFY: Generated by `src/etc/dec2flt_table.py`
"""


def main():
print(HEADER.strip())
print()
print_proper_powers()
print()
print_short_powers(32, 24)
print()
print_short_powers(64, 53)


def print_proper_powers():
MIN_E = -305
MAX_E = 305
e_range = range(MIN_E, MAX_E+1)
Expand All @@ -114,13 +128,10 @@ def main():
err = error(1, e, z)
assert err < 0.5
powers.append(z)
typ = "([u64; {0}], [i16; {0}])".format(len(e_range))
print(LICENSE.strip())
print("// Table of approximations of powers of ten.")
print("// DO NOT MODIFY: Generated by a src/etc/dec2flt_table.py")
print("pub const MIN_E: i16 = {};".format(MIN_E))
print("pub const MAX_E: i16 = {};".format(MAX_E))
print()
typ = "([u64; {0}], [i16; {0}])".format(len(powers))
print("pub const POWERS: ", typ, " = ([", sep='')
for z in powers:
print(" 0x{:x},".format(z.sig))
Expand All @@ -130,5 +141,17 @@ def main():
print("]);")


def print_short_powers(num_bits, significand_size):
max_sig = 2**significand_size - 1
# The fast path bails out for exponents >= ceil(log5(max_sig))
max_e = int(ceil(log(max_sig, 5)))
e_range = range(max_e)
typ = "[f{}; {}]".format(num_bits, len(e_range))
print("pub const F", num_bits, "_SHORT_POWERS: ", typ, " = [", sep='')
for e in e_range:
print(" 1e{},".format(e))
print("];")


if __name__ == '__main__':
main()
10 changes: 3 additions & 7 deletions src/libcore/num/dec2flt/algorithm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,17 +60,13 @@ pub fn fast_path<T: RawFloat>(integral: &[u8], fractional: &[u8], e: i64) -> Opt
if f > T::max_sig() {
return None;
}
let e = e as i16; // Can't overflow because e.abs() <= LOG5_OF_EXP_N
// The case e < 0 cannot be folded into the other branch. Negative powers result in
// a repeating fractional part in binary, which are rounded, which causes real
// (and occasioally quite significant!) errors in the final result.
// The case `e == 0`, however, is unnecessary for correctness. It's just measurably faster.
if e == 0 {
Some(T::from_int(f))
} else if e > 0 {
Some(T::from_int(f) * fp_to_float(power_of_ten(e)))
if e >= 0 {
Some(T::from_int(f) * T::short_fast_pow10(e as usize))
} else {
Some(T::from_int(f) / fp_to_float(power_of_ten(-e)))
Some(T::from_int(f) / T::short_fast_pow10(e.abs() as usize))
}
}

Expand Down
12 changes: 12 additions & 0 deletions src/libcore/num/dec2flt/rawfp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ use num::diy_float::Fp;
use num::FpCategory::{Infinite, Zero, Subnormal, Normal, Nan};
use num::Float;
use num::dec2flt::num::{self, Big};
use num::dec2flt::table;

#[derive(Copy, Clone, Debug)]
pub struct Unpacked {
Expand Down Expand Up @@ -73,6 +74,9 @@ pub trait RawFloat : Float + Copy + Debug + LowerExp
/// represented, the other code in this module makes sure to never let that happen.
fn from_int(x: u64) -> Self;

/// Get the value 10^e from a pre-computed table. Panics for e >= ceil_log5_of_max_sig().
fn short_fast_pow10(e: usize) -> Self;

// FIXME Everything that follows should be associated constants, but taking the value of an
// associated constant from a type parameter does not work (yet?)
// A possible workaround is having a `FloatInfo` struct for all the constants, but so far
Expand Down Expand Up @@ -175,6 +179,10 @@ impl RawFloat for f32 {
x as f32
}

fn short_fast_pow10(e: usize) -> Self {
table::F32_SHORT_POWERS[e]
}

fn max_normal_digits() -> usize {
35
}
Expand Down Expand Up @@ -222,6 +230,10 @@ impl RawFloat for f64 {
x as f64
}

fn short_fast_pow10(e: usize) -> Self {
table::F64_SHORT_POWERS[e]
}

fn max_normal_digits() -> usize {
305
}
Expand Down
46 changes: 44 additions & 2 deletions src/libcore/num/dec2flt/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// Table of approximations of powers of ten.
// DO NOT MODIFY: Generated by a src/etc/dec2flt_table.py

//! Tables of approximations of powers of ten.
//! DO NOT MODIFY: Generated by `src/etc/dec2flt_table.py`

pub const MIN_E: i16 = -305;
pub const MAX_E: i16 = 305;

Expand Down Expand Up @@ -1237,3 +1239,43 @@ pub const POWERS: ([u64; 611], [i16; 611]) = ([
946,
950,
]);

pub const F32_SHORT_POWERS: [f32; 11] = [
1e0,
1e1,
1e2,
1e3,
1e4,
1e5,
1e6,
1e7,
1e8,
1e9,
1e10,
];

pub const F64_SHORT_POWERS: [f64; 23] = [
1e0,
1e1,
1e2,
1e3,
1e4,
1e5,
1e6,
1e7,
1e8,
1e9,
1e10,
1e11,
1e12,
1e13,
1e14,
1e15,
1e16,
1e17,
1e18,
1e19,
1e20,
1e21,
1e22,
];