Skip to content

Commit b81c833

Browse files
authored
bpo-28660: Make TextWrapper break long words on hyphens (GH-22721)
1 parent 67f0487 commit b81c833

File tree

3 files changed

+83
-2
lines changed

3 files changed

+83
-2
lines changed

Lib/test/test_textwrap.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -640,6 +640,78 @@ def test_max_lines_long(self):
640640
max_lines=4)
641641

642642

643+
class LongWordWithHyphensTestCase(BaseTestCase):
644+
def setUp(self):
645+
self.wrapper = TextWrapper()
646+
self.text1 = '''\
647+
We used enyzme 2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate synthase.
648+
'''
649+
self.text2 = '''\
650+
1234567890-1234567890--this_is_a_very_long_option_indeed-good-bye"
651+
'''
652+
653+
def test_break_long_words_on_hyphen(self):
654+
expected = ['We used enyzme 2-succinyl-6-hydroxy-2,4-',
655+
'cyclohexadiene-1-carboxylate synthase.']
656+
self.check_wrap(self.text1, 50, expected)
657+
658+
expected = ['We used', 'enyzme 2-', 'succinyl-', '6-hydroxy-', '2,4-',
659+
'cyclohexad', 'iene-1-', 'carboxylat', 'e', 'synthase.']
660+
self.check_wrap(self.text1, 10, expected)
661+
662+
expected = ['1234567890', '-123456789', '0--this_is', '_a_very_lo',
663+
'ng_option_', 'indeed-', 'good-bye"']
664+
self.check_wrap(self.text2, 10, expected)
665+
666+
def test_break_long_words_not_on_hyphen(self):
667+
expected = ['We used enyzme 2-succinyl-6-hydroxy-2,4-cyclohexad',
668+
'iene-1-carboxylate synthase.']
669+
self.check_wrap(self.text1, 50, expected, break_on_hyphens=False)
670+
671+
expected = ['We used', 'enyzme 2-s', 'uccinyl-6-', 'hydroxy-2,',
672+
'4-cyclohex', 'adiene-1-c', 'arboxylate', 'synthase.']
673+
self.check_wrap(self.text1, 10, expected, break_on_hyphens=False)
674+
675+
expected = ['1234567890', '-123456789', '0--this_is', '_a_very_lo',
676+
'ng_option_', 'indeed-', 'good-bye"']
677+
self.check_wrap(self.text2, 10, expected)
678+
679+
def test_break_on_hyphen_but_not_long_words(self):
680+
expected = ['We used enyzme',
681+
'2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate',
682+
'synthase.']
683+
684+
self.check_wrap(self.text1, 50, expected, break_long_words=False)
685+
686+
expected = ['We used', 'enyzme',
687+
'2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate',
688+
'synthase.']
689+
self.check_wrap(self.text1, 10, expected, break_long_words=False)
690+
691+
expected = ['1234567890', '-123456789', '0--this_is', '_a_very_lo',
692+
'ng_option_', 'indeed-', 'good-bye"']
693+
self.check_wrap(self.text2, 10, expected)
694+
695+
696+
def test_do_not_break_long_words_or_on_hyphens(self):
697+
expected = ['We used enyzme',
698+
'2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate',
699+
'synthase.']
700+
self.check_wrap(self.text1, 50, expected,
701+
break_long_words=False,
702+
break_on_hyphens=False)
703+
704+
expected = ['We used', 'enyzme',
705+
'2-succinyl-6-hydroxy-2,4-cyclohexadiene-1-carboxylate',
706+
'synthase.']
707+
self.check_wrap(self.text1, 10, expected,
708+
break_long_words=False,
709+
break_on_hyphens=False)
710+
711+
expected = ['1234567890', '-123456789', '0--this_is', '_a_very_lo',
712+
'ng_option_', 'indeed-', 'good-bye"']
713+
self.check_wrap(self.text2, 10, expected)
714+
643715
class IndentTestCases(BaseTestCase):
644716

645717
# called before each test method

Lib/textwrap.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -215,8 +215,16 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
215215
# If we're allowed to break long words, then do so: put as much
216216
# of the next chunk onto the current line as will fit.
217217
if self.break_long_words:
218-
cur_line.append(reversed_chunks[-1][:space_left])
219-
reversed_chunks[-1] = reversed_chunks[-1][space_left:]
218+
end = space_left
219+
chunk = reversed_chunks[-1]
220+
if self.break_on_hyphens and len(chunk) > space_left:
221+
# break after last hyphen, but only if there are
222+
# non-hyphens before it
223+
hyphen = chunk.rfind('-', 0, space_left)
224+
if hyphen > 0 and any(c != '-' for c in chunk[:hyphen]):
225+
end = hyphen + 1
226+
cur_line.append(chunk[:end])
227+
reversed_chunks[-1] = chunk[end:]
220228

221229
# Otherwise, we have to preserve the long word intact. Only add
222230
# it to the current line if there's nothing already there --
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
:func:`textwrap.wrap` now attempts to break long words after hyphens when ``break_long_words=True`` and ``break_on_hyphens=True``.

0 commit comments

Comments
 (0)