Skip to content

Check iterable max_length as we validate #602

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 17 commits into from
Closed
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 57 additions & 1 deletion benches/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ extern crate test;
use test::{black_box, Bencher};

use pyo3::prelude::*;
use pyo3::types::{PyDict, PyString};
use pyo3::types::{PyDict, PySet, PyString};

use _pydantic_core::SchemaValidator;

Expand Down Expand Up @@ -265,6 +265,7 @@ fn dict_python(bench: &mut Bencher) {
.collect::<Vec<String>>()
.join(", ")
);
dbg!(code.clone());
let input = py.eval(&code, None, None).unwrap();
let input = black_box(input);
bench.iter(|| {
Expand Down Expand Up @@ -696,3 +697,58 @@ class Foo(Enum):
}
})
}

const COLLECTION_SIZE: usize = 100_000;

#[bench]
fn constructing_pyset_from_vec_without_capacity(bench: &mut Bencher) {
Python::with_gil(|py| {
let input: Vec<PyObject> = (0..COLLECTION_SIZE).map(|v| v.to_object(py)).collect();

bench.iter(|| {
black_box({
let mut output = Vec::new();
for x in &input {
output.push(x);
}
let set = PySet::new(py, output.iter()).unwrap();
set
})
})
})
}

#[bench]
fn constructing_pyset_from_vec_with_capacity(bench: &mut Bencher) {
Python::with_gil(|py| {
let input: Vec<PyObject> = (0..COLLECTION_SIZE).map(|v| v.to_object(py)).collect();

bench.iter(|| {
black_box({
let mut output = Vec::with_capacity(COLLECTION_SIZE);
for x in &input {
output.push(x);
}
let set = PySet::new(py, output.iter()).unwrap();
set
})
})
})
}

#[bench]
fn constructing_pyset_from_vec_directly(bench: &mut Bencher) {
Python::with_gil(|py| {
let input: Vec<PyObject> = (0..COLLECTION_SIZE).map(|v| v.to_object(py)).collect();

bench.iter(|| {
black_box({
let output = PySet::new(py, &Vec::<i64>::new()).unwrap();
for x in &input {
output.add(x).unwrap();
}
output
})
})
})
}
4 changes: 0 additions & 4 deletions pydantic_core/core_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -1188,7 +1188,6 @@ class ListSchema(TypedDict, total=False):
min_length: int
max_length: int
strict: bool
allow_any_iter: bool
ref: str
metadata: Any
serialization: IncExSeqOrElseSerSchema
Expand All @@ -1200,7 +1199,6 @@ def list_schema(
min_length: int | None = None,
max_length: int | None = None,
strict: bool | None = None,
allow_any_iter: bool | None = None,
ref: str | None = None,
metadata: Any = None,
serialization: IncExSeqOrElseSerSchema | None = None,
Expand All @@ -1221,7 +1219,6 @@ def list_schema(
min_length: The value must be a list with at least this many items
max_length: The value must be a list with at most this many items
strict: The value must be a list with exactly this many items
allow_any_iter: Whether the value can be any iterable
ref: optional unique identifier of the schema, used to reference the schema in other places
metadata: Any other information you want to include with the schema, not used by pydantic-core
serialization: Custom serialization schema
Expand All @@ -1232,7 +1229,6 @@ def list_schema(
min_length=min_length,
max_length=max_length,
strict=strict,
allow_any_iter=allow_any_iter,
ref=ref,
metadata=metadata,
serialization=serialization,
Expand Down
2 changes: 1 addition & 1 deletion src/errors/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,7 @@ impl ErrorType {
Self::MultipleOf {..} => "Input should be a multiple of {multiple_of}",
Self::FiniteNumber => "Input should be a finite number",
Self::TooShort {..} => "{field_type} should have at least {min_length} item{expected_plural} after validation, not {actual_length}",
Self::TooLong {..} => "{field_type} should have at most {max_length} item{expected_plural} after validation, not {actual_length}",
Self::TooLong {..} => "{field_type} should have at most {max_length} item{expected_plural} after validation, not >= {actual_length}",
Self::IterableType => "Input should be iterable",
Self::IterationError {..} => "Error iterating over object, error: {error}",
Self::StringType => "Input should be a valid string",
Expand Down
210 changes: 210 additions & 0 deletions src/input/generic_iterable.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
use crate::errors::{py_err_string, ErrorType, ValError, ValResult};

use super::parse_json::{JsonInput, JsonObject};
use pyo3::{
exceptions::PyTypeError,
types::{
PyByteArray, PyBytes, PyDict, PyFrozenSet, PyIterator, PyList, PyMapping, PySequence, PySet, PyString, PyTuple,
},
PyAny, PyErr, PyResult, Python, ToPyObject,
};

#[derive(Debug)]
pub enum GenericIterable<'a> {
List(&'a PyList),
Tuple(&'a PyTuple),
Set(&'a PySet),
FrozenSet(&'a PyFrozenSet),
Dict(&'a PyDict),
// Treat dict values / keys / items as generic iterators
// since PyPy doesn't export the concrete types
DictKeys(&'a PyIterator),
DictValues(&'a PyIterator),
DictItems(&'a PyIterator),
Mapping(&'a PyMapping),
String(&'a PyString),
Bytes(&'a PyBytes),
PyByteArray(&'a PyByteArray),
Sequence(&'a PySequence),
Iterator(&'a PyIterator),
JsonArray(&'a [JsonInput]),
JsonObject(&'a JsonObject),
}

type PyMappingItems<'a> = (&'a PyAny, &'a PyAny);

#[inline(always)]
fn extract_items(item: PyResult<&PyAny>) -> PyResult<PyMappingItems<'_>> {
match item {
Ok(v) => v.extract::<PyMappingItems>(),
Err(e) => Err(e),
}
}

#[inline(always)]
fn map_err<'data>(py: Python<'data>, err: PyErr, input: &'data PyAny) -> ValError<'data> {
ValError::new(
ErrorType::IterationError {
error: py_err_string(py, err),
},
input,
)
}

impl<'a, 'py: 'a> GenericIterable<'a> {
pub fn len(&self) -> Option<usize> {
match &self {
GenericIterable::List(iter) => Some(iter.len()),
GenericIterable::Tuple(iter) => Some(iter.len()),
GenericIterable::Set(iter) => Some(iter.len()),
GenericIterable::FrozenSet(iter) => Some(iter.len()),
GenericIterable::Dict(iter) => Some(iter.len()),
GenericIterable::DictKeys(iter) => iter.len().ok(),
GenericIterable::DictValues(iter) => iter.len().ok(),
GenericIterable::DictItems(iter) => iter.len().ok(),
GenericIterable::Mapping(iter) => iter.len().ok(),
GenericIterable::String(iter) => iter.len().ok(),
GenericIterable::Bytes(iter) => iter.len().ok(),
GenericIterable::PyByteArray(iter) => Some(iter.len()),
GenericIterable::Sequence(iter) => iter.len().ok(),
GenericIterable::Iterator(iter) => iter.len().ok(),
GenericIterable::JsonArray(iter) => Some(iter.len()),
GenericIterable::JsonObject(iter) => Some(iter.len()),
}
}
pub fn into_sequence_iterator(
self,
py: Python<'py>,
) -> PyResult<Box<dyn Iterator<Item = ValResult<'a, &'a PyAny>> + 'a>> {
match self {
GenericIterable::List(iter) => Ok(Box::new(iter.iter().map(Ok))),
GenericIterable::Tuple(iter) => Ok(Box::new(iter.iter().map(Ok))),
GenericIterable::Set(iter) => Ok(Box::new(iter.iter().map(Ok))),
GenericIterable::FrozenSet(iter) => Ok(Box::new(iter.iter().map(Ok))),
// Note that this iterates over only the keys, just like doing iter({}) in Python
GenericIterable::Dict(iter) => Ok(Box::new(iter.iter().map(|(k, _)| Ok(k)))),
GenericIterable::DictKeys(iter) => Ok(Box::new(
iter.map(move |r| r.map_err(|e| map_err(py, e, iter.as_ref()))),
)),
GenericIterable::DictValues(iter) => Ok(Box::new(
iter.map(move |r| r.map_err(|e| map_err(py, e, iter.as_ref()))),
)),
GenericIterable::DictItems(iter) => Ok(Box::new(
iter.map(move |r| r.map_err(|e| map_err(py, e, iter.as_ref()))),
)),
// Note that this iterates over only the keys, just like doing iter({}) in Python
GenericIterable::Mapping(iter) => Ok(Box::new(
iter.keys()?
.iter()?
.map(move |r| r.map_err(|e| map_err(py, e, iter.as_ref()))),
)),
GenericIterable::String(iter) => Ok(Box::new(
iter.iter()?.map(move |r| r.map_err(|e| map_err(py, e, iter.as_ref()))),
)),
GenericIterable::Bytes(iter) => Ok(Box::new(
iter.iter()?.map(move |r| r.map_err(|e| map_err(py, e, iter.as_ref()))),
)),
GenericIterable::PyByteArray(iter) => Ok(Box::new(
iter.iter()?.map(move |r| r.map_err(|e| map_err(py, e, iter.as_ref()))),
)),
GenericIterable::Sequence(iter) => Ok(Box::new(
iter.iter()?.map(move |r| r.map_err(|e| map_err(py, e, iter.as_ref()))),
)),
GenericIterable::Iterator(iter) => Ok(Box::new(
iter.map(move |r| r.map_err(|e| map_err(py, e, iter.as_ref()))),
)),
GenericIterable::JsonArray(iter) => Ok(Box::new(iter.iter().map(move |v| {
let v = v.to_object(py);
Ok(v.into_ref(py))
}))),
// Note that this iterates over only the keys, just like doing iter({}) in Python, just for consistency
GenericIterable::JsonObject(iter) => Ok(Box::new(
iter.iter().map(move |(k, _)| Ok(k.to_object(py).into_ref(py))),
)),
}
}

pub fn into_mapping_items_iterator(
self,
py: Python<'a>,
) -> PyResult<Box<dyn Iterator<Item = ValResult<'a, PyMappingItems<'a>>> + 'a>> {
match self {
GenericIterable::List(iter) => {
Ok(Box::new(iter.iter().map(move |v| {
extract_items(Ok(v)).map_err(|e| map_err(py, e, iter.as_ref()))
})))
}
GenericIterable::Tuple(iter) => {
Ok(Box::new(iter.iter().map(move |v| {
extract_items(Ok(v)).map_err(|e| map_err(py, e, iter.as_ref()))
})))
}
GenericIterable::Set(iter) => {
Ok(Box::new(iter.iter().map(move |v| {
extract_items(Ok(v)).map_err(|e| map_err(py, e, iter.as_ref()))
})))
}
GenericIterable::FrozenSet(iter) => {
Ok(Box::new(iter.iter().map(move |v| {
extract_items(Ok(v)).map_err(|e| map_err(py, e, iter.as_ref()))
})))
}
// Note that we iterate over (key, value), unlike doing iter({}) in Python
GenericIterable::Dict(iter) => Ok(Box::new(iter.iter().map(Ok))),
// Keys or values can be tuples
GenericIterable::DictKeys(iter) => Ok(Box::new(
iter.map(extract_items)
.map(move |r| r.map_err(|e| map_err(py, e, iter.as_ref()))),
)),
GenericIterable::DictValues(iter) => Ok(Box::new(
iter.map(extract_items)
.map(move |r| r.map_err(|e| map_err(py, e, iter.as_ref()))),
)),
GenericIterable::DictItems(iter) => Ok(Box::new(
iter.map(extract_items)
.map(move |r| r.map_err(|e| map_err(py, e, iter.as_ref()))),
)),
// Note that we iterate over (key, value), unlike doing iter({}) in Python
GenericIterable::Mapping(iter) => Ok(Box::new(
iter.items()?
.iter()?
.map(extract_items)
.map(move |r| r.map_err(|e| map_err(py, e, iter.as_ref()))),
)),
// In Python if you do dict("foobar") you get "dictionary update sequence element #0 has length 1; 2 is required"
// This is similar but arguably a better error message
GenericIterable::String(_) => Err(PyTypeError::new_err(
"Expected an iterable of (key, value) pairs, got a string",
)),
GenericIterable::Bytes(_) => Err(PyTypeError::new_err(
"Expected an iterable of (key, value) pairs, got a bytes",
)),
GenericIterable::PyByteArray(_) => Err(PyTypeError::new_err(
"Expected an iterable of (key, value) pairs, got a bytearray",
)),
// Obviously these may be things that are not convertible to a tuple of (Hashable, Any)
// Python fails with a similar error message to above, ours will be slightly different (PyO3 will fail to extract) but similar enough
GenericIterable::Sequence(iter) => Ok(Box::new(
iter.iter()?
.map(extract_items)
.map(move |r| r.map_err(|e| map_err(py, e, iter.as_ref()))),
)),
GenericIterable::Iterator(iter) => Ok(Box::new(
iter.iter()?
.map(extract_items)
.map(move |r| r.map_err(|e| map_err(py, e, iter.as_ref()))),
)),
GenericIterable::JsonArray(iter) => Ok(Box::new(
iter.iter()
.map(move |v| extract_items(Ok(v.to_object(py).into_ref(py))))
.map(move |r| r.map_err(|e| map_err(py, e, iter.to_object(py).into_ref(py)))),
)),
// Note that we iterate over (key, value), unlike doing iter({}) in Python
GenericIterable::JsonObject(iter) => Ok(Box::new(iter.iter().map(move |(k, v)| {
let k = PyString::new(py, k).as_ref();
let v = v.to_object(py).into_ref(py);
Ok((k, v))
}))),
}
}
}
55 changes: 3 additions & 52 deletions src/input/input_abstract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@ use crate::errors::{InputValue, LocItem, ValResult};
use crate::{PyMultiHostUrl, PyUrl};

use super::datetime::{EitherDate, EitherDateTime, EitherTime, EitherTimedelta};
use super::generic_iterable::GenericIterable;
use super::return_enums::{EitherBytes, EitherString};
use super::{GenericArguments, GenericCollection, GenericIterator, GenericMapping, JsonInput};
use super::{GenericArguments, GenericIterator, GenericMapping, JsonInput};

#[derive(Debug, Clone, Copy)]
pub enum InputType {
Expand Down Expand Up @@ -166,57 +167,7 @@ pub trait Input<'a>: fmt::Debug + ToPyObject {
self.validate_dict(strict)
}

fn validate_list(&'a self, strict: bool, allow_any_iter: bool) -> ValResult<GenericCollection<'a>> {
if strict && !allow_any_iter {
self.strict_list()
} else {
self.lax_list(allow_any_iter)
}
}
fn strict_list(&'a self) -> ValResult<GenericCollection<'a>>;
#[cfg_attr(has_no_coverage, no_coverage)]
fn lax_list(&'a self, _allow_any_iter: bool) -> ValResult<GenericCollection<'a>> {
self.strict_list()
}

fn validate_tuple(&'a self, strict: bool) -> ValResult<GenericCollection<'a>> {
if strict {
self.strict_tuple()
} else {
self.lax_tuple()
}
}
fn strict_tuple(&'a self) -> ValResult<GenericCollection<'a>>;
#[cfg_attr(has_no_coverage, no_coverage)]
fn lax_tuple(&'a self) -> ValResult<GenericCollection<'a>> {
self.strict_tuple()
}

fn validate_set(&'a self, strict: bool) -> ValResult<GenericCollection<'a>> {
if strict {
self.strict_set()
} else {
self.lax_set()
}
}
fn strict_set(&'a self) -> ValResult<GenericCollection<'a>>;
#[cfg_attr(has_no_coverage, no_coverage)]
fn lax_set(&'a self) -> ValResult<GenericCollection<'a>> {
self.strict_set()
}

fn validate_frozenset(&'a self, strict: bool) -> ValResult<GenericCollection<'a>> {
if strict {
self.strict_frozenset()
} else {
self.lax_frozenset()
}
}
fn strict_frozenset(&'a self) -> ValResult<GenericCollection<'a>>;
#[cfg_attr(has_no_coverage, no_coverage)]
fn lax_frozenset(&'a self) -> ValResult<GenericCollection<'a>> {
self.strict_frozenset()
}
fn extract_iterable(&'a self) -> ValResult<GenericIterable<'a>>;

fn validate_iter(&self) -> ValResult<GenericIterator>;

Expand Down
Loading