Skip to content

Commit 9ad460f

Browse files
feat!: Add use_wcwidth for Asian character support (#63)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 5a2bc2c commit 9ad460f

File tree

6 files changed

+103
-12
lines changed

6 files changed

+103
-12
lines changed

README.md

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -165,15 +165,18 @@ All parameters are optional.
165165

166166
| Option | Type | Default | Description |
167167
| :-----------------: | :-------------------: | :-------------------: | :-------------------------------------------------------------------------------: |
168-
| `header` | `List[Any]` | `None` | First table row seperated by header row separator. Values should support `str()`. |
169-
| `body` | `List[List[Any]]` | `None` | List of rows for the main section of the table. Values should support `str()`. |
170-
| `footer` | `List[Any]` | `None` | Last table row seperated by header row separator. Values should support `str()`. |
168+
| `header` | `List[Any]` | `None` | First table row seperated by header row separator. Values should support `str()` |
169+
| `body` | `List[List[Any]]` | `None` | List of rows for the main section of the table. Values should support `str()` |
170+
| `footer` | `List[Any]` | `None` | Last table row seperated by header row separator. Values should support `str()` |
171171
| `column_widths` | `List[Optional[int]]` | `None` (automatic) | List of column widths in characters for each column |
172172
| `alignments` | `List[Alignment]` | `None` (all centered) | Column alignments<br/>(ex. `[Alignment.LEFT, Alignment.CENTER, Alignment.RIGHT]`) |
173173
| `style` | `TableStyle` | `double_thin_compact` | Table style to use for the table\* |
174174
| `first_col_heading` | `bool` | `False` | Whether to add a heading column separator after the first column |
175175
| `last_col_heading` | `bool` | `False` | Whether to add a heading column separator before the last column |
176-
| `cell_padding` | `int` | `1` | The minimum number of spaces to add between the cell content and the cell border. |
176+
| `cell_padding` | `int` | `1` | The minimum number of spaces to add between the cell content and the cell border |
177+
| `use_wcwidth` | `bool` | `True` | Whether to use [wcwidth][wcwidth] instead of `len()` to calculate cell width |
178+
179+
[wcwidth]: https://pypi.org/project/wcwidth/
177180

178181
\*See a list of all preset styles [here](https://table2ascii.readthedocs.io/en/latest/styles.html).
179182

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,5 +77,6 @@ namespace_packages = true
7777
[[tool.mypy.overrides]]
7878
module = [
7979
"setuptools.*",
80+
"wcwidth"
8081
]
8182
ignore_missing_imports = true

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
typing-extensions>=3.7.4; python_version<'3.8'
1+
typing-extensions>=3.7.4; python_version<'3.8'
2+
wcwidth<1

table2ascii/options.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,17 @@
88

99
@dataclass
1010
class Options:
11-
"""Class for storing options that the user sets"""
11+
"""Class for storing options that the user sets
12+
13+
.. versionchanged:: 1.0.0
14+
15+
Added ``use_wcwidth`` option
16+
"""
1217

1318
first_col_heading: bool
1419
last_col_heading: bool
1520
column_widths: list[int | None] | None
1621
alignments: list[Alignment] | None
1722
cell_padding: int
1823
style: TableStyle
24+
use_wcwidth: bool

table2ascii/table_to_ascii.py

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
from math import ceil, floor
44

5+
from wcwidth import wcswidth
6+
57
from .alignment import Alignment
68
from .annotations import SupportsStr
79
from .exceptions import (
@@ -44,6 +46,7 @@ def __init__(
4446
self.__first_col_heading = options.first_col_heading
4547
self.__last_col_heading = options.last_col_heading
4648
self.__cell_padding = options.cell_padding
49+
self.__use_wcwidth = options.use_wcwidth
4750

4851
# calculate number of columns
4952
self.__columns = self.__count_columns()
@@ -93,7 +96,7 @@ def __auto_column_widths(self) -> list[int]:
9396
def widest_line(value: SupportsStr) -> int:
9497
"""Returns the width of the longest line in a multi-line string"""
9598
text = str(value)
96-
return max(len(line) for line in text.splitlines()) if len(text) else 0
99+
return max(self.__str_width(line) for line in text.splitlines()) if len(text) else 0
97100

98101
column_widths = []
99102
# get the width necessary for each column
@@ -145,17 +148,18 @@ def __pad(self, cell_value: SupportsStr, width: int, alignment: Alignment) -> st
145148
text = str(cell_value)
146149
padding = " " * self.__cell_padding
147150
padded_text = f"{padding}{text}{padding}"
151+
text_width = self.__str_width(padded_text)
148152
if alignment == Alignment.LEFT:
149153
# pad with spaces on the end
150-
return padded_text + (" " * (width - len(padded_text)))
154+
return padded_text + (" " * (width - text_width))
151155
if alignment == Alignment.CENTER:
152156
# pad with spaces, half on each side
153-
before = " " * floor((width - len(padded_text)) / 2)
154-
after = " " * ceil((width - len(padded_text)) / 2)
157+
before = " " * floor((width - text_width) / 2)
158+
after = " " * ceil((width - text_width) / 2)
155159
return before + padded_text + after
156160
if alignment == Alignment.RIGHT:
157161
# pad with spaces at the beginning
158-
return (" " * (width - len(padded_text))) + padded_text
162+
return (" " * (width - text_width)) + padded_text
159163
raise InvalidAlignmentError(alignment)
160164

161165
def __row_to_ascii(
@@ -344,6 +348,23 @@ def __body_to_ascii(self, body: list[list[SupportsStr]]) -> str:
344348
for row in body
345349
)
346350

351+
def __str_width(self, text: str) -> int:
352+
"""
353+
Returns the width of the string in characters for the purposes of monospace
354+
formatting. This is usually the same as the length of the string, but can be
355+
different for double-width characters (East Asian Wide and East Asian Fullwidth)
356+
or zero-width characters (combining characters, zero-width space, etc.)
357+
358+
Args:
359+
text: The text to measure
360+
361+
Returns:
362+
The width of the string in characters
363+
"""
364+
width = wcswidth(text) if self.__use_wcwidth else -1
365+
# if use_wcwidth is False or wcswidth fails, fall back to len
366+
return width if width >= 0 else len(text)
367+
347368
def to_ascii(self) -> str:
348369
"""Generates a formatted ASCII table
349370
@@ -380,9 +401,13 @@ def table2ascii(
380401
alignments: list[Alignment] | None = None,
381402
cell_padding: int = 1,
382403
style: TableStyle = PresetStyle.double_thin_compact,
404+
use_wcwidth: bool = True,
383405
) -> str:
384406
"""Convert a 2D Python table to ASCII text
385407
408+
.. versionchanged:: 1.0.0
409+
Added the ``use_wcwidth`` parameter defaulting to :py:obj:`True`.
410+
386411
Args:
387412
header: List of column values in the table's header row. All values should be :class:`str`
388413
or support :class:`str` conversion. If not specified, the table will not have a header row.
@@ -396,7 +421,7 @@ def table2ascii(
396421
Defaults to :py:obj:`False`.
397422
column_widths: List of widths in characters for each column. Any value of :py:obj:`None`
398423
indicates that the column width should be determined automatically. If :py:obj:`None`
399-
is passed instead of a :py:obj:`~typing.List`, all columns will be automatically sized.
424+
is passed instead of a :class:`list`, all columns will be automatically sized.
400425
Defaults to :py:obj:`None`.
401426
alignments: List of alignments for each column
402427
(ex. ``[Alignment.LEFT, Alignment.CENTER, Alignment.RIGHT]``). If not specified or set to
@@ -406,6 +431,11 @@ def table2ascii(
406431
Defaults to ``1``.
407432
style: Table style to use for styling (preset styles can be imported).
408433
Defaults to :ref:`PresetStyle.double_thin_compact <PresetStyle.double_thin_compact>`.
434+
use_wcwidth: Whether to use :func:`wcwidth.wcswidth` to determine the width of each cell instead of
435+
:func:`len`. The :func:`~wcwidth.wcswidth` function takes into account double-width characters
436+
(East Asian Wide and East Asian Fullwidth) and zero-width characters (combining characters,
437+
zero-width space, etc.), whereas :func:`len` determines the width solely based on the number of
438+
characters in the string. Defaults to :py:obj:`True`.
409439
410440
Returns:
411441
The generated ASCII table
@@ -421,5 +451,6 @@ def table2ascii(
421451
alignments=alignments,
422452
cell_padding=cell_padding,
423453
style=style,
454+
use_wcwidth=use_wcwidth,
424455
),
425456
).to_ascii()

tests/test_convert.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,3 +247,52 @@ def test_multiline_cells():
247247
"╚═══════════════════════════════════════════╝"
248248
)
249249
assert text == expected
250+
251+
252+
def test_east_asian_wide_characters_and_zero_width_wcwidth():
253+
# using wcwidth.wcswidth() to count the number of characters
254+
text = t2a(
255+
header=["#\u200b", "🦁", "🦡", "🦅", "🐍"],
256+
body=[["💻", "✅", "✅", "❌", "❌"]],
257+
footer=["🥞", "日", "月", "火", "水"],
258+
first_col_heading=True,
259+
)
260+
text2 = t2a(
261+
header=["#\u200b", "🦁", "🦡", "🦅", "🐍"],
262+
body=[["💻", "✅", "✅", "❌", "❌"]],
263+
footer=["🥞", "日", "月", "火", "水"],
264+
first_col_heading=True,
265+
use_wcwidth=True,
266+
)
267+
expected = (
268+
"╔════╦═══════════════════╗\n"
269+
"║ #​ ║ 🦁 🦡 🦅 🐍 ║\n"
270+
"╟────╫───────────────────╢\n"
271+
"║ 💻 ║ ✅ ✅ ❌ ❌ ║\n"
272+
"╟────╫───────────────────╢\n"
273+
"║ 🥞 ║ 日 月 火 水 ║\n"
274+
"╚════╩═══════════════════╝"
275+
)
276+
assert text == expected
277+
assert text2 == expected
278+
279+
280+
def test_east_asian_wide_characters_and_zero_width_no_wcwidth():
281+
# using len() to count the number of characters
282+
text = t2a(
283+
header=["#\u200b", "🦁", "🦡", "🦅", "🐍"],
284+
body=[["💻", "✅", "✅", "❌", "❌"]],
285+
footer=["🥞", "日", "月", "火", "水"],
286+
first_col_heading=True,
287+
use_wcwidth=False,
288+
)
289+
expected = (
290+
"╔════╦═══════════════╗\n"
291+
"║ #​ ║ 🦁 🦡 🦅 🐍 ║\n"
292+
"╟────╫───────────────╢\n"
293+
"║ 💻 ║ ✅ ✅ ❌ ❌ ║\n"
294+
"╟────╫───────────────╢\n"
295+
"║ 🥞 ║ 日 月 火 水 ║\n"
296+
"╚════╩═══════════════╝"
297+
)
298+
assert text == expected

0 commit comments

Comments
 (0)