Skip to content

Properly truncate characters when splitting up strings in error repr #746

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 33 additions & 3 deletions src/errors/validation_exception.rs
Original file line number Diff line number Diff line change
Expand Up @@ -230,14 +230,44 @@ impl ValidationError {
}
}

// TODO: is_utf8_char_boundary, floor_char_boundary and ceil_char_boundary
// with builtin methods once https://github.com/rust-lang/rust/issues/93743 is resolved
// These are just copy pasted from the current implementation
const fn is_utf8_char_boundary(value: u8) -> bool {
// This is bit magic equivalent to: b < 128 || b >= 192
(value as i8) >= -0x40
}

fn floor_char_boundary(value: &str, index: usize) -> usize {
if index >= value.len() {
value.len()
} else {
let lower_bound = index.saturating_sub(3);
let new_index = value.as_bytes()[lower_bound..=index]
.iter()
.rposition(|b| is_utf8_char_boundary(*b));

// SAFETY: we know that the character boundary will be within four bytes
unsafe { lower_bound + new_index.unwrap_unchecked() }
}
}

pub fn ceil_char_boundary(value: &str, index: usize) -> usize {
let upper_bound = Ord::min(index + 4, value.len());
value.as_bytes()[index..upper_bound]
.iter()
.position(|b| is_utf8_char_boundary(*b))
.map_or(upper_bound, |pos| pos + index)
}

macro_rules! truncate_input_value {
($out:expr, $value:expr) => {
if $value.len() > 50 {
write!(
$out,
", input_value={}...{}",
&$value[0..25],
&$value[$value.len() - 24..]
&$value[0..floor_char_boundary($value, 25)],
&$value[ceil_char_boundary($value, $value.len() - 24)..]
)?;
} else {
write!($out, ", input_value={}", $value)?;
Expand Down Expand Up @@ -381,7 +411,7 @@ impl PyLineError {
if !hide_input {
let input_value = self.input_value.as_ref(py);
let input_str = safe_repr(input_value);
truncate_input_value!(output, input_str);
truncate_input_value!(output, &input_str);

if let Ok(type_) = input_value.get_type().name() {
write!(output, ", input_type={type_}")?;
Expand Down
18 changes: 17 additions & 1 deletion tests/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytest
from typing_extensions import get_args

from pydantic_core import CoreSchema, CoreSchemaType, PydanticUndefined
from pydantic_core import CoreSchema, CoreSchemaType, PydanticUndefined, core_schema
from pydantic_core._pydantic_core import SchemaError, SchemaValidator, ValidationError, __version__, build_profile


Expand Down Expand Up @@ -177,3 +177,19 @@ def test_undefined():
assert undefined_deepcopy is PydanticUndefined

assert pickle.loads(pickle.dumps(PydanticUndefined)) is PydanticUndefined


def test_unicode_error_input_repr() -> None:
"""https://github.com/pydantic/pydantic/issues/6448"""

schema = core_schema.int_schema()

validator = SchemaValidator(schema)

danger_str = 'ÿ' * 1000
expected = "1 validation error for int\n Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='ÿÿÿÿÿÿÿÿÿÿÿÿ...ÿÿÿÿÿÿÿÿÿÿÿ', input_type=str]" # noqa: E501
with pytest.raises(ValidationError) as exc_info:
validator.validate_python(danger_str)
actual = repr(exc_info.value).split('For further information visit ')[0].strip()

assert expected == actual