Skip to content

Commit f4a0675

Browse files
Use more explicit warning regarding serialization warning for missing fields (#1415)
1 parent 4113638 commit f4a0675

File tree

8 files changed

+143
-85
lines changed

8 files changed

+143
-85
lines changed

src/errors/mod.rs

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
use core::fmt;
2-
use std::borrow::Cow;
3-
41
use pyo3::prelude::*;
52

63
mod line_error;
@@ -33,46 +30,3 @@ pub fn py_err_string(py: Python, err: PyErr) -> String {
3330
Err(_) => "Unknown Error".to_string(),
3431
}
3532
}
36-
37-
// TODO: is_utf8_char_boundary, floor_char_boundary and ceil_char_boundary
38-
// with builtin methods once https://github.com/rust-lang/rust/issues/93743 is resolved
39-
// These are just copy pasted from the current implementation
40-
const fn is_utf8_char_boundary(value: u8) -> bool {
41-
// This is bit magic equivalent to: b < 128 || b >= 192
42-
(value as i8) >= -0x40
43-
}
44-
45-
pub fn floor_char_boundary(value: &str, index: usize) -> usize {
46-
if index >= value.len() {
47-
value.len()
48-
} else {
49-
let lower_bound = index.saturating_sub(3);
50-
let new_index = value.as_bytes()[lower_bound..=index]
51-
.iter()
52-
.rposition(|b| is_utf8_char_boundary(*b));
53-
54-
// SAFETY: we know that the character boundary will be within four bytes
55-
unsafe { lower_bound + new_index.unwrap_unchecked() }
56-
}
57-
}
58-
59-
pub fn ceil_char_boundary(value: &str, index: usize) -> usize {
60-
let upper_bound = Ord::min(index + 4, value.len());
61-
value.as_bytes()[index..upper_bound]
62-
.iter()
63-
.position(|b| is_utf8_char_boundary(*b))
64-
.map_or(upper_bound, |pos| pos + index)
65-
}
66-
67-
pub fn write_truncated_to_50_bytes<F: fmt::Write>(f: &mut F, val: Cow<'_, str>) -> std::fmt::Result {
68-
if val.len() > 50 {
69-
write!(
70-
f,
71-
"{}...{}",
72-
&val[0..floor_char_boundary(&val, 25)],
73-
&val[ceil_char_boundary(&val, val.len() - 24)..]
74-
)
75-
} else {
76-
write!(f, "{val}")
77-
}
78-
}

src/errors/validation_exception.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ use crate::errors::LocItem;
1818
use crate::get_pydantic_version;
1919
use crate::input::InputType;
2020
use crate::serializers::{DuckTypingSerMode, Extra, SerMode, SerializationState};
21-
use crate::tools::{safe_repr, SchemaDict};
21+
use crate::tools::{safe_repr, write_truncated_to_limited_bytes, SchemaDict};
2222

2323
use super::line_error::ValLineError;
2424
use super::location::Location;
@@ -526,7 +526,7 @@ impl PyLineError {
526526
let input_value = self.input_value.bind(py);
527527
let input_str = safe_repr(input_value);
528528
write!(output, ", input_value=")?;
529-
super::write_truncated_to_50_bytes(&mut output, input_str.to_cow())?;
529+
write_truncated_to_limited_bytes(&mut output, &input_str.to_string(), 50)?;
530530

531531
if let Ok(type_) = input_value.get_type().qualname() {
532532
write!(output, ", input_type={type_}")?;

src/serializers/extra.rs

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use crate::recursion_guard::ContainsRecursionState;
1515
use crate::recursion_guard::RecursionError;
1616
use crate::recursion_guard::RecursionGuard;
1717
use crate::recursion_guard::RecursionState;
18-
use crate::tools::safe_repr;
18+
use crate::tools::truncate_safe_repr;
1919
use crate::PydanticSerializationError;
2020

2121
/// this is ugly, would be much better if extra could be stored in `SerializationState`
@@ -426,15 +426,10 @@ impl CollectWarnings {
426426
.qualname()
427427
.unwrap_or_else(|_| PyString::new_bound(value.py(), "<unknown python object>"));
428428

429-
let input_str = safe_repr(value);
430-
let mut value_str = String::with_capacity(100);
431-
value_str.push_str("with value `");
432-
crate::errors::write_truncated_to_50_bytes(&mut value_str, input_str.to_cow())
433-
.expect("Writing to a `String` failed");
434-
value_str.push('`');
429+
let value_str = truncate_safe_repr(value, None);
435430

436431
self.add_warning(format!(
437-
"Expected `{field_type}` but got `{type_name}` {value_str} - serialized value may not be as expected"
432+
"Expected `{field_type}` but got `{type_name}` with value `{value_str}` - serialized value may not be as expected"
438433
));
439434
}
440435
}

src/serializers/fields.rs

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use smallvec::SmallVec;
99

1010
use crate::serializers::extra::SerCheck;
1111
use crate::serializers::DuckTypingSerMode;
12+
use crate::tools::truncate_safe_repr;
1213
use crate::PydanticSerializationUnexpectedValue;
1314

1415
use super::computed_fields::ComputedFields;
@@ -210,7 +211,24 @@ impl GeneralFieldsSerializer {
210211
// Check for missing fields, we can't have extra fields here
211212
&& self.required_fields > used_req_fields
212213
{
213-
Err(PydanticSerializationUnexpectedValue::new_err(None))
214+
let required_fields = self.required_fields;
215+
let type_name = match extra.model {
216+
Some(model) => model
217+
.get_type()
218+
.qualname()
219+
.ok()
220+
.unwrap_or_else(|| PyString::new_bound(py, "<unknown python object>"))
221+
.to_string(),
222+
None => "<unknown python object>".to_string(),
223+
};
224+
let field_value = match extra.model {
225+
Some(model) => truncate_safe_repr(model, Some(100)),
226+
None => "<unknown python object>".to_string(),
227+
};
228+
229+
Err(PydanticSerializationUnexpectedValue::new_err(Some(format!(
230+
"Expected {required_fields} fields but got {used_req_fields} for type `{type_name}` with value `{field_value}` - serialized value may not be as expected."
231+
))))
214232
} else {
215233
Ok(output_dict)
216234
}

src/serializers/type_serializers/model.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,9 +167,9 @@ impl TypeSerializer for ModelSerializer {
167167
) -> PyResult<PyObject> {
168168
let model = Some(value);
169169
let duck_typing_ser_mode = extra.duck_typing_ser_mode.next_mode();
170+
170171
let model_extra = Extra {
171172
model,
172-
field_name: None,
173173
duck_typing_ser_mode,
174174
..*extra
175175
};
@@ -221,7 +221,6 @@ impl TypeSerializer for ModelSerializer {
221221
let duck_typing_ser_mode = extra.duck_typing_ser_mode.next_mode();
222222
let model_extra = Extra {
223223
model,
224-
field_name: None,
225224
duck_typing_ser_mode,
226225
..*extra
227226
};

src/serializers/type_serializers/union.rs

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,9 @@ use std::borrow::Cow;
88
use crate::build_tools::py_schema_err;
99
use crate::common::union::{Discriminator, SMALL_UNION_THRESHOLD};
1010
use crate::definitions::DefinitionsBuilder;
11-
use crate::errors::write_truncated_to_50_bytes;
1211
use crate::lookup_key::LookupKey;
1312
use crate::serializers::type_serializers::py_err_se_err;
14-
use crate::tools::{safe_repr, SchemaDict};
13+
use crate::tools::{truncate_safe_repr, SchemaDict};
1514
use crate::PydanticSerializationUnexpectedValue;
1615

1716
use super::{
@@ -446,15 +445,10 @@ impl TaggedUnionSerializer {
446445
Discriminator::Function(func) => func.call1(py, (value,)).ok(),
447446
};
448447
if discriminator_value.is_none() {
449-
let input_str = safe_repr(value);
450-
let mut value_str = String::with_capacity(100);
451-
value_str.push_str("with value `");
452-
write_truncated_to_50_bytes(&mut value_str, input_str.to_cow()).expect("Writing to a `String` failed");
453-
value_str.push('`');
454-
448+
let value_str = truncate_safe_repr(value, None);
455449
extra.warnings.custom_warning(
456450
format!(
457-
"Failed to get discriminator value for tagged union serialization {value_str} - defaulting to left to right union serialization."
451+
"Failed to get discriminator value for tagged union serialization with value `{value_str}` - defaulting to left to right union serialization."
458452
)
459453
);
460454
}

src/tools.rs

Lines changed: 54 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use std::borrow::Cow;
1+
use core::fmt;
22

33
use pyo3::exceptions::PyKeyError;
44
use pyo3::prelude::*;
@@ -96,15 +96,6 @@ pub enum ReprOutput<'py> {
9696
Fallback(String),
9797
}
9898

99-
impl ReprOutput<'_> {
100-
pub fn to_cow(&self) -> Cow<'_, str> {
101-
match self {
102-
ReprOutput::Python(s) => s.to_string_lossy(),
103-
ReprOutput::Fallback(s) => s.into(),
104-
}
105-
}
106-
}
107-
10899
impl std::fmt::Display for ReprOutput<'_> {
109100
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
110101
match self {
@@ -124,6 +115,15 @@ pub fn safe_repr<'py>(v: &Bound<'py, PyAny>) -> ReprOutput<'py> {
124115
}
125116
}
126117

118+
pub fn truncate_safe_repr(v: &Bound<'_, PyAny>, max_len: Option<usize>) -> String {
119+
let max_len = max_len.unwrap_or(50); // default to 100 bytes
120+
let input_str = safe_repr(v);
121+
let mut limited_str = String::with_capacity(max_len);
122+
write_truncated_to_limited_bytes(&mut limited_str, &input_str.to_string(), max_len)
123+
.expect("Writing to a `String` failed");
124+
limited_str
125+
}
126+
127127
pub fn extract_i64(v: &Bound<'_, PyAny>) -> Option<i64> {
128128
#[cfg(PyPy)]
129129
if !v.is_instance_of::<pyo3::types::PyInt>() {
@@ -146,3 +146,47 @@ pub(crate) fn new_py_string<'py>(py: Python<'py>, s: &str, cache_str: StringCach
146146
pystring_fast_new(py, s, ascii_only)
147147
}
148148
}
149+
150+
// TODO: is_utf8_char_boundary, floor_char_boundary and ceil_char_boundary
151+
// with builtin methods once https://github.com/rust-lang/rust/issues/93743 is resolved
152+
// These are just copy pasted from the current implementation
153+
const fn is_utf8_char_boundary(value: u8) -> bool {
154+
// This is bit magic equivalent to: b < 128 || b >= 192
155+
(value as i8) >= -0x40
156+
}
157+
158+
pub fn floor_char_boundary(value: &str, index: usize) -> usize {
159+
if index >= value.len() {
160+
value.len()
161+
} else {
162+
let lower_bound = index.saturating_sub(3);
163+
let new_index = value.as_bytes()[lower_bound..=index]
164+
.iter()
165+
.rposition(|b| is_utf8_char_boundary(*b));
166+
167+
// SAFETY: we know that the character boundary will be within four bytes
168+
unsafe { lower_bound + new_index.unwrap_unchecked() }
169+
}
170+
}
171+
172+
pub fn ceil_char_boundary(value: &str, index: usize) -> usize {
173+
let upper_bound = Ord::min(index + 4, value.len());
174+
value.as_bytes()[index..upper_bound]
175+
.iter()
176+
.position(|b| is_utf8_char_boundary(*b))
177+
.map_or(upper_bound, |pos| pos + index)
178+
}
179+
180+
pub fn write_truncated_to_limited_bytes<F: fmt::Write>(f: &mut F, val: &str, max_len: usize) -> std::fmt::Result {
181+
if val.len() > max_len {
182+
let mid_point = max_len.div_ceil(2);
183+
write!(
184+
f,
185+
"{}...{}",
186+
&val[0..floor_char_boundary(val, mid_point)],
187+
&val[ceil_char_boundary(val, val.len() - (mid_point - 1))..]
188+
)
189+
} else {
190+
write!(f, "{val}")
191+
}
192+
}

tests/serializers/test_model.py

Lines changed: 61 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,13 @@
1313
import pytest
1414
from dirty_equals import IsJson
1515

16-
from pydantic_core import PydanticSerializationError, SchemaSerializer, SchemaValidator, core_schema
16+
from pydantic_core import (
17+
PydanticSerializationError,
18+
PydanticSerializationUnexpectedValue,
19+
SchemaSerializer,
20+
SchemaValidator,
21+
core_schema,
22+
)
1723

1824
from ..conftest import plain_repr
1925

@@ -1084,20 +1090,68 @@ class Model:
10841090

10851091

10861092
def test_no_warn_on_exclude() -> None:
1087-
warnings.simplefilter('error')
1093+
with warnings.catch_warnings():
1094+
warnings.simplefilter('error')
1095+
1096+
s = SchemaSerializer(
1097+
core_schema.model_schema(
1098+
BasicModel,
1099+
core_schema.model_fields_schema(
1100+
{
1101+
'a': core_schema.model_field(core_schema.int_schema()),
1102+
'b': core_schema.model_field(core_schema.int_schema()),
1103+
}
1104+
),
1105+
)
1106+
)
1107+
1108+
value = BasicModel(a=0, b=1)
1109+
assert s.to_python(value, exclude={'b'}) == {'a': 0}
1110+
assert s.to_python(value, mode='json', exclude={'b'}) == {'a': 0}
1111+
1112+
1113+
def test_warn_on_missing_field() -> None:
1114+
class AModel(BasicModel): ...
1115+
1116+
class BModel(BasicModel): ...
10881117

10891118
s = SchemaSerializer(
10901119
core_schema.model_schema(
10911120
BasicModel,
10921121
core_schema.model_fields_schema(
10931122
{
1094-
'a': core_schema.model_field(core_schema.int_schema()),
1095-
'b': core_schema.model_field(core_schema.int_schema()),
1123+
'root': core_schema.model_field(
1124+
core_schema.tagged_union_schema(
1125+
choices={
1126+
'a': core_schema.model_schema(
1127+
AModel,
1128+
core_schema.model_fields_schema(
1129+
{
1130+
'type': core_schema.model_field(core_schema.literal_schema(['a'])),
1131+
'a': core_schema.model_field(core_schema.int_schema()),
1132+
}
1133+
),
1134+
),
1135+
'b': core_schema.model_schema(
1136+
BModel,
1137+
core_schema.model_fields_schema(
1138+
{
1139+
'type': core_schema.model_field(core_schema.literal_schema(['b'])),
1140+
'b': core_schema.model_field(core_schema.int_schema()),
1141+
}
1142+
),
1143+
),
1144+
},
1145+
discriminator='type',
1146+
)
1147+
),
10961148
}
10971149
),
10981150
)
10991151
)
11001152

1101-
value = BasicModel(a=0, b=1)
1102-
assert s.to_python(value, exclude={'b'}) == {'a': 0}
1103-
assert s.to_python(value, mode='json', exclude={'b'}) == {'a': 0}
1153+
with pytest.raises(
1154+
PydanticSerializationUnexpectedValue, match='Expected 2 fields but got 1 for type `.*AModel` with value `.*`.+'
1155+
):
1156+
value = BasicModel(root=AModel(type='a'))
1157+
s.to_python(value)

0 commit comments

Comments
 (0)