Merge pull request #48 from JeffBezanson/teh/convert

timholy · web-flow · commit b8506b53eff1 · 2016-09-14T09:46:21.000-05:00
Use widening to reliably throw InexactErrors
diff --git a/README.md b/README.md
@@ -1,44 +1,62 @@
 # FixedPointNumbers
 
-This library exports fixed-point number types.
-A [fixed-point number][wikipedia] represents a fractional, or non-integral, number.
-In contrast with the more widely known floating-point numbers, fixed-point
-numbers have a fixed number of digits (bits) after the decimal (radix) point.
-They are effectively integers scaled by a constant factor.
+This library exports fixed-point number types.  A
+[fixed-point number][wikipedia] represents a fractional, or
+non-integral, number.  In contrast with the more widely known
+floating-point numbers, with fixed-point numbers the decimal point
+doesn't "float": fixed-point numbers are effectively integers that are
+interpreted as being scaled by a constant factor.  Consequently, they
+have a fixed number of digits (bits) after the decimal (radix) point.
 
 Fixed-point numbers can be used to perform arithmetic. Another practical
 application is to implicitly rescale integers without modifying the
 underlying representation.
 
 This library exports two categories of fixed-point types. Fixed-point types are
 used like any other number: they can be added, multiplied, raised to a power,
-etc. In many cases these operations result in conversion to floating-point types.
+etc. In some cases these operations result in conversion to floating-point types.
+
+# Type hierarchy and interpretation
 
-# Type hierarchy
 This library defines an abstract type `FixedPoint{T <: Integer, f}` as a
-subtype of `Real`. The parameter `T` is the underlying representation and `f`
+subtype of `Real`. The parameter `T` is the underlying machine representation and `f`
 is the number of fraction bits.
 
-For signed integers, there is a fixed-point type `Fixed{T, f}` and for unsigned
-integers, there is the `UFixed{T, f}` type.
-
-These types, built with `f` fraction bits, map the closed interval [0.0,1.0] to
-the span of numbers with `f` bits.  For example, the `UFixed8` type (aliased to
-UFixed{UInt8,8}) is represented internally by a `UInt8`, and makes `0x00`
-equivalent to `0.0` and `0xff` to `1.0`.  The type aliases `UFixed10`, `UFixed12`,
-`UFixed14`, and `UFixed16` are all based on `UInt16` and reach the value `1.0`
-at 10, 12, 14, and 16 bits, respectively (`0x03ff`, `0x0fff`, `0x3fff`, and
-`0xffff`).
-
-To construct such a number, use `convert(UFixed12, 1.3)`, `ufixed12(1.3)` (a
-convenience function), `UFixed{UInt16,12}(1.3)`, or the literal syntax
+For `T<:Signed` (a signed integer), there is a fixed-point type
+`Fixed{T, f}`; for `T<:Unsigned` (an unsigned integer), there is the
+`UFixed{T, f}` type. However, there are slight differences in behavior
+that go beyond signed/unsigned distinctions.
+
+The `Fixed{T,f}` types use 1 bit for sign, and `f` bits to represent
+the fraction. For example, `Fixed{Int8,7}` uses 7 bits (all bits
+except the sign bit) for the fractional part. The value of the number
+is interpreted as if the integer representation has been divided by
+`2^f`. Consequently, `Fixed{Int8,7}` numbers `x` satisfy
+
+```
+-1.0 = -128/128 ≤ x ≤ 127/128 ≈ 0.992.
+```
+
+because the range of `Int8` is from -128 to 127.
+
+In contrast, the `UFixed{T,f}`, with `f` fraction bits, map the closed
+interval [0.0,1.0] to the span of numbers with `f` bits.  For example,
+the `UFixed8` type (aliased to `UFixed{UInt8,8}`) is represented
+internally by a `UInt8`, and makes `0x00` equivalent to `0.0` and
+`0xff` to `1.0`. Consequently, `UFixed` numbers are scaled by `2^f-1`
+rather than `2^f`.  The type aliases `UFixed10`, `UFixed12`,
+`UFixed14`, and `UFixed16` are all based on `UInt16` and reach the
+value `1.0` at 10, 12, 14, and 16 bits, respectively (`0x03ff`,
+`0x0fff`, `0x3fff`, and `0xffff`).
+
+To construct such a number, use `convert(UFixed12, 1.3)`, `UFixed12(1.3)`, `UFixed{UInt16,12}(1.3)`, or the literal syntax
 `0x14ccuf12`.  The latter syntax means to construct a `UFixed12` (it ends in
 `uf12`) from the `UInt16` value `0x14cc`.
 
 More generally, an arbitrary number of bits from any of the standard unsigned
 integer widths can be used for the fractional part.  For example:
 `UFixed{UInt32,16}`, `UFixed{UInt64,3}`, `UFixed{UInt128,7}`.
 
-There currently is no literal syntax for signed `Fixed` numbers. 
+There currently is no literal syntax for signed `Fixed` numbers.
 
 [wikipedia]: http://en.wikipedia.org/wiki/Fixed-point_arithmetic
diff --git a/appveyor.yml b/appveyor.yml
@@ -0,0 +1,36 @@
+environment:
+  matrix:
+  - JULIAVERSION: "julialang/bin/winnt/x86/0.4/julia-0.4-latest-win32.exe"
+  - JULIAVERSION: "julialang/bin/winnt/x64/0.4/julia-0.4-latest-win64.exe"
+  - JULIAVERSION: "julialang/bin/winnt/x86/0.5/julia-0.5-latest-win32.exe"
+  - JULIAVERSION: "julialang/bin/winnt/x64/0.5/julia-0.5-latest-win64.exe"
+  - JULIAVERSION: "julianightlies/bin/winnt/x86/julia-latest-win32.exe"
+  - JULIAVERSION: "julianightlies/bin/winnt/x64/julia-latest-win64.exe"
+
+branches:
+  only:
+    - master
+    - /release-.*/
+
+notifications:
+  - provider: Email
+    on_build_success: false
+    on_build_failure: false
+    on_build_status_changed: false
+
+install:
+# Download most recent Julia Windows binary
+  - ps: (new-object net.webclient).DownloadFile(
+        $("http://s3.amazonaws.com/"+$env:JULIAVERSION),
+        "C:\projects\julia-binary.exe")
+# Run installer silently, output to C:\projects\julia
+  - C:\projects\julia-binary.exe /S /D=C:\projects\julia
+
+build_script:
+# Need to convert from shallow to complete for Pkg.clone to work
+  - IF EXIST .git\shallow (git fetch --unshallow)
+  - C:\projects\julia\bin\julia -e "versioninfo();
+      Pkg.clone(pwd(), \"FixedPointNumbers\"); Pkg.build(\"FixedPointNumbers\")"
+
+test_script:
+  - C:\projects\julia\bin\julia -e "Pkg.test(\"FixedPointNumbers\")"
diff --git a/src/FixedPointNumbers.jl b/src/FixedPointNumbers.jl
@@ -28,12 +28,6 @@ export
     UFixed12,
     UFixed14,
     UFixed16,
-    # constructors
-    ufixed8,
-    ufixed10,
-    ufixed12,
-    ufixed14,
-    ufixed16,
     # literal constructor constants
     uf8,
     uf10,
@@ -58,6 +52,16 @@ typemin{T<: FixedPoint}(::Type{T}) = T(typemin(rawtype(T)), 0)
 realmin{T<: FixedPoint}(::Type{T}) = typemin(T)
 realmax{T<: FixedPoint}(::Type{T}) = typemax(T)
 
+widen1(::Type{Int8})   = Int16
+widen1(::Type{UInt8})  = UInt16
+widen1(::Type{Int16})  = Int32
+widen1(::Type{UInt16}) = UInt32
+widen1(::Type{Int32})  = Int64
+widen1(::Type{UInt32}) = UInt64
+widen1(::Type{Int64})  = Int128
+widen1(::Type{UInt64}) = UInt128
+widen1(x::Integer) = x % widen1(typeof(x))
+
 include("fixed.jl")
 include("ufixed.jl")
 include("deprecations.jl")
diff --git a/src/deprecations.jl b/src/deprecations.jl
@@ -11,3 +11,15 @@ import Base.@deprecate_binding
 
 @deprecate_binding Fixed32 Fixed16
 @deprecate Fixed(x::Real) convert(Fixed{Int32, 16}, x)
+
+@deprecate ufixed8(x)  UFixed8(x)
+@deprecate ufixed10(x) UFixed10(x)
+@deprecate ufixed12(x) UFixed12(x)
+@deprecate ufixed14(x) UFixed14(x)
+@deprecate ufixed16(x) UFixed16(x)
+
+Compat.@dep_vectorize_1arg Real ufixed8
+Compat.@dep_vectorize_1arg Real ufixed10
+Compat.@dep_vectorize_1arg Real ufixed12
+Compat.@dep_vectorize_1arg Real ufixed14
+Compat.@dep_vectorize_1arg Real ufixed16
diff --git a/src/fixed.jl b/src/fixed.jl
@@ -30,8 +30,8 @@ abs{T,f}(x::Fixed{T,f}) = Fixed{T,f}(abs(x.i),0)
 
 
 # # conversions and promotions
-convert{T,f}(::Type{Fixed{T,f}}, x::Integer) = Fixed{T,f}(convert(T,x)<<f,0)
-convert{T,f}(::Type{Fixed{T,f}}, x::AbstractFloat) = Fixed{T,f}(trunc(T,x)<<f + round(T, rem(x,1)*(1<<f)),0)
+convert{T,f}(::Type{Fixed{T,f}}, x::Integer) = Fixed{T,f}(round(T, convert(widen1(T),x)<<f),0)
+convert{T,f}(::Type{Fixed{T,f}}, x::AbstractFloat) = Fixed{T,f}(round(T, trunc(widen1(T),x)<<f + rem(x,1)*(1<<f)),0)
 convert{T,f}(::Type{Fixed{T,f}}, x::Rational) = Fixed{T,f}(x.num)/Fixed{T,f}(x.den)
 
 convert{T,f}(::Type{BigFloat}, x::Fixed{T,f}) =
diff --git a/src/ufixed.jl b/src/ufixed.jl
@@ -43,19 +43,9 @@ rawone(v) = reinterpret(one(v))
 convert{T<:UFixed}(::Type{T}, x::T) = x
 convert{T1<:UFixed}(::Type{T1}, x::UFixed) = reinterpret(T1, round(rawtype(T1), (rawone(T1)/rawone(x))*reinterpret(x)))
 convert(::Type{UFixed16}, x::UFixed8) = reinterpret(UFixed16, convert(UInt16, 0x0101*reinterpret(x)))
-convert{T<:UFixed}(::Type{T}, x::Real) = T(round(rawtype(T), rawone(T)*x),0)
-
-ufixed8(x)  = convert(UFixed8, x)
-ufixed10(x) = convert(UFixed10, x)
-ufixed12(x) = convert(UFixed12, x)
-ufixed14(x) = convert(UFixed14, x)
-ufixed16(x) = convert(UFixed16, x)
-
-Compat.@dep_vectorize_1arg Real ufixed8
-Compat.@dep_vectorize_1arg Real ufixed10
-Compat.@dep_vectorize_1arg Real ufixed12
-Compat.@dep_vectorize_1arg Real ufixed14
-Compat.@dep_vectorize_1arg Real ufixed16
+convert{U<:UFixed}(::Type{U}, x::Real) = _convert(U, rawtype(U), x)
+_convert{U<:UFixed,T}(::Type{U}, ::Type{T}, x)       = U(round(T, widen1(rawone(U))*x), 0)
+_convert{U<:UFixed  }(::Type{U}, ::Type{UInt128}, x) = U(round(UInt128, rawone(U)*x), 0)
 
 
 convert(::Type{BigFloat}, x::UFixed) = reinterpret(x)*(1/BigFloat(rawone(x)))
diff --git a/test/fixed.jl b/test/fixed.jl
@@ -50,6 +50,14 @@ function test_fixed{T}(::Type{T}, f)
     end
 end
 
+@test_approx_eq_eps convert(Fixed{Int8,7}, 0.8) 0.797 0.001
+@test_approx_eq_eps convert(Fixed{Int8,7}, 0.9) 0.898 0.001
+@test_throws InexactError convert(Fixed{Int8, 7}, 0.999)
+@test_throws InexactError convert(Fixed{Int8, 7}, 1.0)
+@test_throws InexactError convert(Fixed{Int8, 7}, 1)
+@test_throws InexactError convert(Fixed{Int8, 7}, 2)
+@test_throws InexactError convert(Fixed{Int8, 7}, 128)
+
 for (TI, f) in [(Int8, 8), (Int16, 8), (Int16, 10), (Int32, 16)]
     T = Fixed{TI,f}
     println("  Testing $T")
diff --git a/test/ufixed.jl b/test/ufixed.jl
@@ -14,10 +14,10 @@ using Compat
 @test reinterpret(UFixed14, 0x1fa2) == 0x1fa2uf14
 @test reinterpret(UFixed16, 0x1fa2) == 0x1fa2uf16
 
-@test ufixed8(1.0) == 0xffuf8
-@test ufixed8(0.5) == 0x80uf8
-@test ufixed14(1.0) == 0x3fffuf14
-v = @compat ufixed12.([2])
+@test UFixed8(1.0) == 0xffuf8
+@test UFixed8(0.5) == 0x80uf8
+@test UFixed14(1.0) == 0x3fffuf14
+v = @compat UFixed12.([2])
 @test v == UFixed12[0x1ffeuf12]
 @test isa(v, Vector{UFixed12})
 
@@ -44,6 +44,15 @@ end
 @test typemax(UFixed{UInt64,3}) == typemax(UInt64) // (2^3-1)
 @test typemax(UFixed{UInt128,7}) == typemax(UInt128) // (2^7-1)
 
+@test_throws InexactError UFixed8(2)
+@test_throws InexactError UFixed8(255)
+@test_throws InexactError UFixed8(0xff)
+@test_throws InexactError UFixed16(2)
+@test_throws InexactError UFixed16(0xff)
+@test_throws InexactError UFixed16(0xffff)
+@test_throws InexactError convert(UFixed8,  typemax(UFixed10))
+@test_throws InexactError convert(UFixed16, typemax(UFixed10))
+
 x = UFixed8(0.5)
 @test isfinite(x) == true
 @test isnan(x) == false