Merge pull request #7264 from bluetech/wcwidth

nicoddemus · web-flow · commit 70b5bdf4ba6f · 2020-05-31T12:37:58.000-03:00
Improve our own wcwidth implementation and remove dependency on wcwidth package
diff --git a/changelog/7264.improvement.rst b/changelog/7264.improvement.rst
@@ -0,0 +1 @@
+The dependency on the ``wcwidth`` package has been removed.
diff --git a/setup.py b/setup.py
@@ -12,7 +12,6 @@
     'colorama;sys_platform=="win32"',
     "pluggy>=0.12,<1.0",
     'importlib-metadata>=0.12;python_version<"3.8"',
-    "wcwidth",
 ]
 
 
diff --git a/src/_pytest/_io/terminalwriter.py b/src/_pytest/_io/terminalwriter.py
@@ -2,12 +2,12 @@
 import os
 import shutil
 import sys
-import unicodedata
-from functools import lru_cache
 from typing import Optional
 from typing import Sequence
 from typing import TextIO
 
+from .wcwidth import wcswidth
+
 
 # This code was initially copied from py 1.8.1, file _io/terminalwriter.py.
 
@@ -22,17 +22,6 @@ def get_terminal_width() -> int:
     return width
 
 
-@lru_cache(100)
-def char_width(c: str) -> int:
-    # Fullwidth and Wide -> 2, all else (including Ambiguous) -> 1.
-    return 2 if unicodedata.east_asian_width(c) in ("F", "W") else 1
-
-
-def get_line_width(text: str) -> int:
-    text = unicodedata.normalize("NFC", text)
-    return sum(char_width(c) for c in text)
-
-
 def should_do_markup(file: TextIO) -> bool:
     if os.environ.get("PY_COLORS") == "1":
         return True
@@ -99,7 +88,7 @@ def fullwidth(self, value: int) -> None:
     @property
     def width_of_current_line(self) -> int:
         """Return an estimate of the width so far in the current line."""
-        return get_line_width(self._current_line)
+        return wcswidth(self._current_line)
 
     def markup(self, text: str, **markup: bool) -> str:
         for name in markup:
diff --git a/src/_pytest/_io/wcwidth.py b/src/_pytest/_io/wcwidth.py
@@ -0,0 +1,55 @@
+import unicodedata
+from functools import lru_cache
+
+
+@lru_cache(100)
+def wcwidth(c: str) -> int:
+    """Determine how many columns are needed to display a character in a terminal.
+
+    Returns -1 if the character is not printable.
+    Returns 0, 1 or 2 for other characters.
+    """
+    o = ord(c)
+
+    # ASCII fast path.
+    if 0x20 <= o < 0x07F:
+        return 1
+
+    # Some Cf/Zp/Zl characters which should be zero-width.
+    if (
+        o == 0x0000
+        or 0x200B <= o <= 0x200F
+        or 0x2028 <= o <= 0x202E
+        or 0x2060 <= o <= 0x2063
+    ):
+        return 0
+
+    category = unicodedata.category(c)
+
+    # Control characters.
+    if category == "Cc":
+        return -1
+
+    # Combining characters with zero width.
+    if category in ("Me", "Mn"):
+        return 0
+
+    # Full/Wide east asian characters.
+    if unicodedata.east_asian_width(c) in ("F", "W"):
+        return 2
+
+    return 1
+
+
+def wcswidth(s: str) -> int:
+    """Determine how many columns are needed to display a string in a terminal.
+
+    Returns -1 if the string contains non-printable characters.
+    """
+    width = 0
+    for c in unicodedata.normalize("NFC", s):
+        wc = wcwidth(c)
+        if wc < 0:
+            return -1
+        width += wc
+    return width
diff --git a/src/_pytest/terminal.py b/src/_pytest/terminal.py
@@ -27,6 +27,7 @@
 import pytest
 from _pytest import nodes
 from _pytest._io import TerminalWriter
+from _pytest._io.wcwidth import wcswidth
 from _pytest.compat import order_preserving_dict
 from _pytest.config import Config
 from _pytest.config import ExitCode
@@ -1120,8 +1121,6 @@ def _get_pos(config, rep):
 
 def _get_line_with_reprcrash_message(config, rep, termwidth):
     """Get summary line for a report, trying to add reprcrash message."""
-    from wcwidth import wcswidth
-
     verbose_word = rep._get_verbose_word(config)
     pos = _get_pos(config, rep)
 
diff --git a/testing/io/test_wcwidth.py b/testing/io/test_wcwidth.py
@@ -0,0 +1,38 @@
+import pytest
+from _pytest._io.wcwidth import wcswidth
+from _pytest._io.wcwidth import wcwidth
+
+
+@pytest.mark.parametrize(
+    ("c", "expected"),
+    [
+        ("\0", 0),
+        ("\n", -1),
+        ("a", 1),
+        ("1", 1),
+        ("א", 1),
+        ("\u200B", 0),
+        ("\u1ABE", 0),
+        ("\u0591", 0),
+        ("🉐", 2),
+        ("＄", 2),
+    ],
+)
+def test_wcwidth(c: str, expected: int) -> None:
+    assert wcwidth(c) == expected
+
+
+@pytest.mark.parametrize(
+    ("s", "expected"),
+    [
+        ("", 0),
+        ("hello, world!", 13),
+        ("hello, world!\n", -1),
+        ("0123456789", 10),
+        ("שלום, עולם!", 11),
+        ("שְבֻעָיים", 6),
+        ("🉐🉐🉐", 6),
+    ],
+)
+def test_wcswidth(s: str, expected: int) -> None:
+    assert wcswidth(s) == expected
diff --git a/testing/test_terminal.py b/testing/test_terminal.py
@@ -14,7 +14,9 @@
 import py
 
 import _pytest.config
+import _pytest.terminal
 import pytest
+from _pytest._io.wcwidth import wcswidth
 from _pytest.config import ExitCode
 from _pytest.pytester import Testdir
 from _pytest.reports import BaseReport
@@ -2027,9 +2029,6 @@ class X:
 
 
 def test_line_with_reprcrash(monkeypatch):
-    import _pytest.terminal
-    from wcwidth import wcswidth
-
     mocked_verbose_word = "FAILED"
 
     mocked_pos = "some::nodeid"
@@ -2079,19 +2078,19 @@ def check(msg, width, expected):
     check("some\nmessage", 80, "FAILED some::nodeid - some")
 
     # Test unicode safety.
-    check("😄😄😄😄😄\n2nd line", 25, "FAILED some::nodeid - ...")
-    check("😄😄😄😄😄\n2nd line", 26, "FAILED some::nodeid - ...")
-    check("😄😄😄😄😄\n2nd line", 27, "FAILED some::nodeid - 😄...")
-    check("😄😄😄😄😄\n2nd line", 28, "FAILED some::nodeid - 😄...")
-    check("😄😄😄😄😄\n2nd line", 29, "FAILED some::nodeid - 😄😄...")
+    check("🉐🉐🉐🉐🉐\n2nd line", 25, "FAILED some::nodeid - ...")
+    check("🉐🉐🉐🉐🉐\n2nd line", 26, "FAILED some::nodeid - ...")
+    check("🉐🉐🉐🉐🉐\n2nd line", 27, "FAILED some::nodeid - 🉐...")
+    check("🉐🉐🉐🉐🉐\n2nd line", 28, "FAILED some::nodeid - 🉐...")
+    check("🉐🉐🉐🉐🉐\n2nd line", 29, "FAILED some::nodeid - 🉐🉐...")
 
     # NOTE: constructed, not sure if this is supported.
-    mocked_pos = "nodeid::😄::withunicode"
-    check("😄😄😄😄😄\n2nd line", 29, "FAILED nodeid::😄::withunicode")
-    check("😄😄😄😄😄\n2nd line", 40, "FAILED nodeid::😄::withunicode - 😄😄...")
-    check("😄😄😄😄😄\n2nd line", 41, "FAILED nodeid::😄::withunicode - 😄😄...")
-    check("😄😄😄😄😄\n2nd line", 42, "FAILED nodeid::😄::withunicode - 😄😄😄...")
-    check("😄😄😄😄😄\n2nd line", 80, "FAILED nodeid::😄::withunicode - 😄😄😄😄😄")
+    mocked_pos = "nodeid::🉐::withunicode"
+    check("🉐🉐🉐🉐🉐\n2nd line", 29, "FAILED nodeid::🉐::withunicode")
+    check("🉐🉐🉐🉐🉐\n2nd line", 40, "FAILED nodeid::🉐::withunicode - 🉐🉐...")
+    check("🉐🉐🉐🉐🉐\n2nd line", 41, "FAILED nodeid::🉐::withunicode - 🉐🉐...")
+    check("🉐🉐🉐🉐🉐\n2nd line", 42, "FAILED nodeid::🉐::withunicode - 🉐🉐🉐...")
+    check("🉐🉐🉐🉐🉐\n2nd line", 80, "FAILED nodeid::🉐::withunicode - 🉐🉐🉐🉐🉐")
 
 
 @pytest.mark.parametrize(

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+The dependency on the ``wcwidth`` package has been removed.
Original file line number	Diff line number	Diff line change
`@@ -12,7 +12,6 @@`
`12`	`12`	`'colorama;sys_platform=="win32"',`
`13`	`13`	`"pluggy>=0.12,<1.0",`
`14`	`14`	`'importlib-metadata>=0.12;python_version<"3.8"',`
`15`		`- "wcwidth",`
`16`	`15`	`]`
`17`	`16`
`18`	`17`