support BigInt

samuelcolvin · samuelcolvin · commit 0dab63d42441 · 2023-05-26T11:53:28.000+01:00
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -40,6 +40,7 @@ url = "2.3.1"
 # idna is already required by url, added here to be explicit
 idna = "0.3.0"
 base64 = "0.13.1"
+num-bigint = "0.4.3"
 
 [lib]
 name = "_pydantic_core"
@@ -50,7 +51,7 @@ crate-type = ["cdylib", "rlib"]
 extension-module = ["pyo3/extension-module"]
 # required for cargo bench
 auto-initialize = ["pyo3/auto-initialize"]
-default = ["mimalloc", "mimalloc/local_dynamic_tls", "pyo3/generate-import-lib"]
+default = ["mimalloc", "mimalloc/local_dynamic_tls", "pyo3/generate-import-lib", "pyo3/num-bigint"]
 
 [profile.release]
 lto = "fat"
diff --git a/pydantic_core/core_schema.py b/pydantic_core/core_schema.py
@@ -3772,8 +3772,8 @@ def definition_reference_schema(
     'bool_parsing',
     'int_type',
     'int_parsing',
+    'int_parsing_size',
     'int_from_float',
-    'int_overflow',
     'float_type',
     'float_parsing',
     'bytes_type',
diff --git a/src/errors/types.rs b/src/errors/types.rs
@@ -174,8 +174,8 @@ pub enum ErrorType {
     // int errors
     IntType,
     IntParsing,
+    IntParsingSize,
     IntFromFloat,
-    IntOverflow,
     // ---------------------
     // float errors
     FloatType,
@@ -489,7 +489,7 @@ impl ErrorType {
             Self::IntType => "Input should be a valid integer",
             Self::IntParsing => "Input should be a valid integer, unable to parse string as an integer",
             Self::IntFromFloat => "Input should be a valid integer, got a number with a fractional part",
-            Self::IntOverflow => "Input integer too large to convert to 64-bit integer",
+            Self::IntParsingSize => "Unable to parse input string as an integer, exceed maximum size",
             Self::FloatType => "Input should be a valid number",
             Self::FloatParsing => "Input should be a valid number, unable to parse string as an number",
             Self::BytesType => "Input should be a valid bytes",
diff --git a/src/input/input_json.rs b/src/input/input_json.rs
@@ -107,7 +107,7 @@ impl<'a> Input<'a> for JsonInput {
             JsonInput::String(s) => str_as_bool(self, s),
             JsonInput::Int(int) => int_as_bool(self, *int),
             JsonInput::Float(float) => match float_as_int(self, *float) {
-                Ok(int) => int_as_bool(self, int),
+                Ok(int) => int.as_bool().ok_or_else(|| ValError::new(ErrorType::BoolParsing, self)),
                 _ => Err(ValError::new(ErrorType::BoolType, self)),
             },
             _ => Err(ValError::new(ErrorType::BoolType, self)),
@@ -122,18 +122,17 @@ impl<'a> Input<'a> for JsonInput {
         }
     }
     fn lax_int(&'a self) -> ValResult<EitherInt<'a>> {
-        let int_result = match self {
+        match self {
             JsonInput::Bool(b) => match *b {
-                true => Ok(1),
-                false => Ok(0),
+                true => Ok(EitherInt::I64(1)),
+                false => Ok(EitherInt::I64(0)),
             },
-            JsonInput::Int(i) => Ok(*i),
-            JsonInput::Uint(u) => return Ok(EitherInt::U64(*u)),
+            JsonInput::Int(i) => Ok(EitherInt::I64(*i)),
+            JsonInput::Uint(u) => Ok(EitherInt::U64(*u)),
             JsonInput::Float(f) => float_as_int(self, *f),
             JsonInput::String(str) => str_as_int(self, str),
             _ => Err(ValError::new(ErrorType::IntType, self)),
-        };
-        int_result.map(EitherInt::I64)
+        }
     }
 
     fn ultra_strict_float(&self) -> ValResult<f64> {
diff --git a/src/input/input_python.rs b/src/input/input_python.rs
@@ -260,7 +260,7 @@ impl<'a> Input<'a> for PyAny {
             int_as_bool(self, int)
         } else if let Ok(float) = self.extract::<f64>() {
             match float_as_int(self, float) {
-                Ok(int) => int_as_bool(self, int),
+                Ok(int) => int.as_bool().ok_or_else(|| ValError::new(ErrorType::BoolParsing, self)),
                 _ => Err(ValError::new(ErrorType::BoolType, self)),
             }
         } else {
@@ -287,10 +287,9 @@ impl<'a> Input<'a> for PyAny {
         if PyInt::is_exact_type_of(self) {
             Ok(EitherInt::Py(self))
         } else if let Some(cow_str) = maybe_as_string(self, ErrorType::IntParsing)? {
-            let int = str_as_int(self, &cow_str)?;
-            Ok(EitherInt::I64(int))
+            str_as_int(self, &cow_str)
         } else if let Ok(float) = self.extract::<f64>() {
-            Ok(EitherInt::I64(float_as_int(self, float)?))
+            float_as_int(self, float)
         } else {
             Err(ValError::new(ErrorType::IntType, self))
         }
diff --git a/src/input/return_enums.rs b/src/input/return_enums.rs
@@ -1,6 +1,8 @@
 use std::borrow::Cow;
 use std::slice::Iter as SliceIter;
 
+use num_bigint::BigInt;
+
 use pyo3::prelude::*;
 use pyo3::types::iter::PyDictIterator;
 use pyo3::types::{
@@ -822,6 +824,7 @@ impl<'a> IntoPy<PyObject> for EitherBytes<'a> {
 pub enum EitherInt<'a> {
     I64(i64),
     U64(u64),
+    BigInt(BigInt),
     Py(&'a PyAny),
 }
 
@@ -831,9 +834,41 @@ impl<'a> EitherInt<'a> {
             EitherInt::I64(i) => Ok(i),
             EitherInt::U64(u) => match i64::try_from(u) {
                 Ok(u) => Ok(u),
-                Err(_) => Err(ValError::new(ErrorType::IntOverflow, u.into_py(py).into_ref(py))),
+                Err(_) => Err(ValError::new(ErrorType::IntParsingSize, u.into_py(py).into_ref(py))),
+            },
+            EitherInt::BigInt(u) => match i64::try_from(u) {
+                Ok(u) => Ok(u),
+                Err(e) => Err(ValError::new(
+                    ErrorType::IntParsingSize,
+                    e.into_original().into_py(py).into_ref(py),
+                )),
+            },
+            EitherInt::Py(i) => i.extract().map_err(|_| ValError::new(ErrorType::IntParsingSize, i)),
+        }
+    }
+
+    pub fn as_bool(&self) -> Option<bool> {
+        match self {
+            EitherInt::I64(i) => match i {
+                0 => Some(false),
+                1 => Some(true),
+                _ => None,
+            },
+            EitherInt::U64(u) => match u {
+                0 => Some(false),
+                1 => Some(true),
+                _ => None,
+            },
+            EitherInt::BigInt(i) => match u8::try_from(i) {
+                Ok(0) => Some(false),
+                Ok(1) => Some(true),
+                _ => None,
+            },
+            EitherInt::Py(i) => match i.extract::<u8>() {
+                Ok(0) => Some(false),
+                Ok(1) => Some(true),
+                _ => None,
             },
-            EitherInt::Py(i) => i.extract().map_err(|_| ValError::new(ErrorType::IntOverflow, i)),
         }
     }
 }
@@ -843,6 +878,7 @@ impl<'a> IntoPy<PyObject> for EitherInt<'a> {
         match self {
             Self::I64(int) => int.into_py(py),
             Self::U64(int) => int.into_py(py),
+            Self::BigInt(int) => int.into_py(py),
             Self::Py(int) => int.into_py(py),
         }
     }
diff --git a/src/input/shared.rs b/src/input/shared.rs
@@ -1,4 +1,7 @@
+use num_bigint::BigInt;
+
 use crate::errors::{ErrorType, ValError, ValResult};
+use crate::input::EitherInt;
 
 use super::Input;
 
@@ -43,12 +46,20 @@ pub fn int_as_bool<'a>(input: &'a impl Input<'a>, int: i64) -> ValResult<'a, boo
     }
 }
 
-pub fn str_as_int<'s, 'l>(input: &'s impl Input<'s>, str: &'l str) -> ValResult<'s, i64> {
-    if let Ok(i) = str.parse::<i64>() {
-        Ok(i)
-    } else if let Some(s) = strip_decimal_zeros(str) {
-        if let Ok(i) = s.parse::<i64>() {
-            Ok(i)
+/// parse a string as an int
+///
+/// max length of the input is 4300, see
+/// https://docs.python.org/3/whatsnew/3.11.html#other-cpython-implementation-changes and
+/// https://github.com/python/cpython/issues/95778 for more info in that length bound
+pub fn str_as_int<'s, 'l>(input: &'s impl Input<'s>, str: &'l str) -> ValResult<'s, EitherInt<'s>> {
+    let len = str.len();
+    if len > 4300 {
+        Err(ValError::new(ErrorType::IntParsing, input))
+    } else if let Some(int) = _parse_str(input, str, len) {
+        Ok(int)
+    } else if let Some(str_stripped) = strip_decimal_zeros(str) {
+        if let Some(int) = _parse_str(input, str_stripped, len) {
+            Ok(int)
         } else {
             Err(ValError::new(ErrorType::IntParsing, input))
         }
@@ -57,6 +68,19 @@ pub fn str_as_int<'s, 'l>(input: &'s impl Input<'s>, str: &'l str) -> ValResult<
     }
 }
 
+/// parse a string as an int, `input` is required here to get lifetimes to match up
+///
+fn _parse_str<'s, 'l>(_input: &'s impl Input<'s>, str: &'l str, len: usize) -> Option<EitherInt<'s>> {
+    if len < 19 {
+        if let Ok(i) = str.parse::<i64>() {
+            return Some(EitherInt::I64(i));
+        }
+    } else if let Ok(i) = str.parse::<BigInt>() {
+        return Some(EitherInt::BigInt(i));
+    }
+    None
+}
+
 /// we don't want to parse as f64 then call `float_as_int` as it can loose precision for large ints, therefore
 /// we strip `.0+` manually instead, then parse as i64
 fn strip_decimal_zeros(s: &str) -> Option<&str> {
@@ -68,14 +92,14 @@ fn strip_decimal_zeros(s: &str) -> Option<&str> {
     None
 }
 
-pub fn float_as_int<'a>(input: &'a impl Input<'a>, float: f64) -> ValResult<'a, i64> {
+pub fn float_as_int<'a>(input: &'a impl Input<'a>, float: f64) -> ValResult<'a, EitherInt<'a>> {
     if float == f64::INFINITY || float == f64::NEG_INFINITY || float.is_nan() {
         Err(ValError::new(ErrorType::FiniteNumber, input))
     } else if float % 1.0 != 0.0 {
         Err(ValError::new(ErrorType::IntFromFloat, input))
-    } else if float > i64::MAX as f64 || float < i64::MIN as f64 {
-        Err(ValError::new(ErrorType::IntOverflow, input))
+    } else if (i64::MIN as f64) < float && float < (i64::MAX as f64) {
+        Ok(EitherInt::I64(float as i64))
     } else {
-        Ok(float as i64)
+        Err(ValError::new(ErrorType::IntParsingSize, input))
     }
 }
diff --git a/tests/test_errors.py b/tests/test_errors.py
@@ -224,8 +224,8 @@ def f(input_value, info):
     ('bool_parsing', 'Input should be a valid boolean, unable to interpret input', None),
     ('int_type', 'Input should be a valid integer', None),
     ('int_parsing', 'Input should be a valid integer, unable to parse string as an integer', None),
+    ('int_parsing_size', 'Unable to parse input string as an integer, exceed maximum size', None),
     ('int_from_float', 'Input should be a valid integer, got a number with a fractional part', None),
-    ('int_overflow', 'Input integer too large to convert to 64-bit integer', None),
     ('multiple_of', 'Input should be a multiple of 42.1', {'multiple_of': 42.1}),
     ('greater_than', 'Input should be greater than 42.1', {'gt': 42.1}),
     ('greater_than_equal', 'Input should be greater than or equal to 42.1', {'ge': 42.1}),
diff --git a/tests/validators/test_int.py b/tests/validators/test_int.py