pydantic · adriangb · Jul 6, 2023 · Jul 6, 2023 · Jul 6, 2023 · Jul 6, 2023
diff --git a/src/errors/validation_exception.rs b/src/errors/validation_exception.rs
@@ -230,14 +230,44 @@ impl ValidationError {
     }
 }
 
+// TODO: is_utf8_char_boundary, floor_char_boundary and ceil_char_boundary
+// with builtin methods once https://github.com/rust-lang/rust/issues/93743 is resolved
+// These are just copy pasted from the current implementation
+const fn is_utf8_char_boundary(value: u8) -> bool {
+    // This is bit magic equivalent to: b < 128 || b >= 192
+    (value as i8) >= -0x40
+}
+
+fn floor_char_boundary(value: &str, index: usize) -> usize {
+    if index >= value.len() {
+        value.len()
+    } else {
+        let lower_bound = index.saturating_sub(3);
+        let new_index = value.as_bytes()[lower_bound..=index]
+            .iter()
+            .rposition(|b| is_utf8_char_boundary(*b));
+
+        // SAFETY: we know that the character boundary will be within four bytes
+        unsafe { lower_bound + new_index.unwrap_unchecked() }
+    }
+}
+
+pub fn ceil_char_boundary(value: &str, index: usize) -> usize {
+    let upper_bound = Ord::min(index + 4, value.len());
+    value.as_bytes()[index..upper_bound]
+        .iter()
+        .position(|b| is_utf8_char_boundary(*b))
+        .map_or(upper_bound, |pos| pos + index)
+}
+
 macro_rules! truncate_input_value {
     ($out:expr, $value:expr) => {
         if $value.len() > 50 {
             write!(
                 $out,
                 ", input_value={}...{}",
-                &$value[0..25],
-                &$value[$value.len() - 24..]
+                &$value[0..floor_char_boundary($value, 25)],
+                &$value[ceil_char_boundary($value, $value.len() - 24)..]
             )?;
         } else {
             write!($out, ", input_value={}", $value)?;
@@ -381,7 +411,7 @@ impl PyLineError {
         if !hide_input {
             let input_value = self.input_value.as_ref(py);
             let input_str = safe_repr(input_value);
-            truncate_input_value!(output, input_str);
+            truncate_input_value!(output, &input_str);
 
             if let Ok(type_) = input_value.get_type().name() {
                 write!(output, ", input_type={type_}")?;

diff --git a/tests/test_misc.py b/tests/test_misc.py
@@ -5,7 +5,7 @@
 import pytest
 from typing_extensions import get_args
 
-from pydantic_core import CoreSchema, CoreSchemaType, PydanticUndefined
+from pydantic_core import CoreSchema, CoreSchemaType, PydanticUndefined, core_schema
 from pydantic_core._pydantic_core import SchemaError, SchemaValidator, ValidationError, __version__, build_profile
 
 
@@ -177,3 +177,19 @@ def test_undefined():
     assert undefined_deepcopy is PydanticUndefined
 
     assert pickle.loads(pickle.dumps(PydanticUndefined)) is PydanticUndefined
+
+
+def test_unicode_error_input_repr() -> None:
+    """https://github.com/pydantic/pydantic/issues/6448"""
+
+    schema = core_schema.int_schema()
+
+    validator = SchemaValidator(schema)
+
+    danger_str = 'ÿ' * 1000
+    expected = "1 validation error for int\n  Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='ÿÿÿÿÿÿÿÿÿÿÿÿ...ÿÿÿÿÿÿÿÿÿÿÿ', input_type=str]"  # noqa: E501
+    with pytest.raises(ValidationError) as exc_info:
+        validator.validate_python(danger_str)
+    actual = repr(exc_info.value).split('For further information visit ')[0].strip()
+
+    assert expected == actual