Skip to content

Commit 3a9c162

Browse files
authored
Properly truncate characters when splitting up strings in error repr (#746)
1 parent 6666073 commit 3a9c162

File tree

2 files changed

+50
-4
lines changed

2 files changed

+50
-4
lines changed

src/errors/validation_exception.rs

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -230,14 +230,44 @@ impl ValidationError {
230230
}
231231
}
232232

233+
// TODO: is_utf8_char_boundary, floor_char_boundary and ceil_char_boundary
234+
// with builtin methods once https://github.com/rust-lang/rust/issues/93743 is resolved
235+
// These are just copy pasted from the current implementation
236+
const fn is_utf8_char_boundary(value: u8) -> bool {
237+
// This is bit magic equivalent to: b < 128 || b >= 192
238+
(value as i8) >= -0x40
239+
}
240+
241+
fn floor_char_boundary(value: &str, index: usize) -> usize {
242+
if index >= value.len() {
243+
value.len()
244+
} else {
245+
let lower_bound = index.saturating_sub(3);
246+
let new_index = value.as_bytes()[lower_bound..=index]
247+
.iter()
248+
.rposition(|b| is_utf8_char_boundary(*b));
249+
250+
// SAFETY: we know that the character boundary will be within four bytes
251+
unsafe { lower_bound + new_index.unwrap_unchecked() }
252+
}
253+
}
254+
255+
pub fn ceil_char_boundary(value: &str, index: usize) -> usize {
256+
let upper_bound = Ord::min(index + 4, value.len());
257+
value.as_bytes()[index..upper_bound]
258+
.iter()
259+
.position(|b| is_utf8_char_boundary(*b))
260+
.map_or(upper_bound, |pos| pos + index)
261+
}
262+
233263
macro_rules! truncate_input_value {
234264
($out:expr, $value:expr) => {
235265
if $value.len() > 50 {
236266
write!(
237267
$out,
238268
", input_value={}...{}",
239-
&$value[0..25],
240-
&$value[$value.len() - 24..]
269+
&$value[0..floor_char_boundary($value, 25)],
270+
&$value[ceil_char_boundary($value, $value.len() - 24)..]
241271
)?;
242272
} else {
243273
write!($out, ", input_value={}", $value)?;
@@ -381,7 +411,7 @@ impl PyLineError {
381411
if !hide_input {
382412
let input_value = self.input_value.as_ref(py);
383413
let input_str = safe_repr(input_value);
384-
truncate_input_value!(output, input_str);
414+
truncate_input_value!(output, &input_str);
385415

386416
if let Ok(type_) = input_value.get_type().name() {
387417
write!(output, ", input_type={type_}")?;

tests/test_misc.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import pytest
66
from typing_extensions import get_args
77

8-
from pydantic_core import CoreSchema, CoreSchemaType, PydanticUndefined
8+
from pydantic_core import CoreSchema, CoreSchemaType, PydanticUndefined, core_schema
99
from pydantic_core._pydantic_core import SchemaError, SchemaValidator, ValidationError, __version__, build_profile
1010

1111

@@ -177,3 +177,19 @@ def test_undefined():
177177
assert undefined_deepcopy is PydanticUndefined
178178

179179
assert pickle.loads(pickle.dumps(PydanticUndefined)) is PydanticUndefined
180+
181+
182+
def test_unicode_error_input_repr() -> None:
183+
"""https://github.com/pydantic/pydantic/issues/6448"""
184+
185+
schema = core_schema.int_schema()
186+
187+
validator = SchemaValidator(schema)
188+
189+
danger_str = 'ÿ' * 1000
190+
expected = "1 validation error for int\n Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='ÿÿÿÿÿÿÿÿÿÿÿÿ...ÿÿÿÿÿÿÿÿÿÿÿ', input_type=str]" # noqa: E501
191+
with pytest.raises(ValidationError) as exc_info:
192+
validator.validate_python(danger_str)
193+
actual = repr(exc_info.value).split('For further information visit ')[0].strip()
194+
195+
assert expected == actual

0 commit comments

Comments
 (0)