Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 24 additions & 3 deletions Doc/library/textwrap.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ functions should be good enough; otherwise, you should use an instance of
replace_whitespace=True, fix_sentence_endings=False, \
break_long_words=True, drop_whitespace=True, \
break_on_hyphens=True, tabsize=8, max_lines=None, \
placeholder=' [...]')
placeholder=' [...]', text_len=len)

Wraps the single paragraph in *text* (a string) so every line is at most
*width* characters long. Returns a list of output lines, without final
Expand All @@ -37,7 +37,7 @@ functions should be good enough; otherwise, you should use an instance of
replace_whitespace=True, fix_sentence_endings=False, \
break_long_words=True, drop_whitespace=True, \
break_on_hyphens=True, tabsize=8, \
max_lines=None, placeholder=' [...]')
max_lines=None, placeholder=' [...]', text_len=len)

Wraps the single paragraph in *text*, and returns a single string containing the
wrapped paragraph. :func:`fill` is shorthand for ::
Expand All @@ -50,7 +50,7 @@ functions should be good enough; otherwise, you should use an instance of

.. function:: shorten(text, width, *, fix_sentence_endings=False, \
break_long_words=True, break_on_hyphens=True, \
placeholder=' [...]')
placeholder=' [...]', text_len=len)

Collapse and truncate the given *text* to fit in the given *width*.

Expand Down Expand Up @@ -293,6 +293,27 @@ hyphenated words; only then will long words be broken if necessary, unless
.. versionadded:: 3.4


.. attribute:: text_len

(default: :func:`len`) Callable used to measure the visible width of a
string when deciding where to wrap. Override the default to account for
characters that are not a single column wide, such as zero-width or
double-width characters, or invisible ANSI escape sequences::

>>> import re, textwrap
>>> visible_len = lambda s: len(re.sub(r'\x1b\[[0-9;]*m', '', s))
>>> colored = 'normal \x1b[31mcolored\x1b[0m words here'
>>> lines = textwrap.wrap(colored, width=14, text_len=visible_len)
>>> [re.sub(r'\x1b\[[0-9;]*m', '', line) for line in lines]
['normal colored', 'words here']

The callable must return a non-negative integer. It is assumed to be
additive over the whitespace- and hyphen-delimited chunks that wrapping
produces; a chunk that is too long to fit is split by visible width.

.. versionadded:: 3.16


:class:`TextWrapper` also provides some public methods, analogous to the
module-level convenience functions:

Expand Down
11 changes: 11 additions & 0 deletions Doc/whatsnew/3.16.rst
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,17 @@ shlex
(Contributed by Jay Berry in :gh:`148846`.)


textwrap
--------

* Add a *text_len* parameter to :func:`textwrap.wrap`, :func:`textwrap.fill`,
:func:`textwrap.shorten`, and :class:`textwrap.TextWrapper`. It customizes
how the visible width of a string is measured, so text that contains
zero-width or double-width characters, or invisible ANSI escape sequences,
can be wrapped correctly.
(Contributed by Kevin Deldycke in :gh:`152702`.)


tkinter
-------

Expand Down
10 changes: 8 additions & 2 deletions Lib/argparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -771,19 +771,25 @@ def _iter_indented_subactions(self, action):
yield from get_subactions()
self._dedent()

def _text_len(self, text):
# Measure the visible width of *text*, ignoring any ANSI color escape
# sequences that may have been inserted for colored help output.
return len(self._decolor(text))

def _split_lines(self, text, width):
text = self._whitespace_matcher.sub(' ', text).strip()
# The textwrap module is used only for formatting help.
# Delay its import for speeding up the common usage of argparse.
import textwrap
return textwrap.wrap(text, width)
return textwrap.wrap(text, width, text_len=self._text_len)

def _fill_text(self, text, width, indent):
text = self._whitespace_matcher.sub(' ', text).strip()
import textwrap
return textwrap.fill(text, width,
initial_indent=indent,
subsequent_indent=indent)
subsequent_indent=indent,
text_len=self._text_len)

def _get_help_string(self, action):
return action.help
Expand Down
2 changes: 1 addition & 1 deletion Lib/idlelib/idle_test/test_calltip.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def test_signature_wrap(self):
(width=70, initial_indent='', subsequent_indent='', expand_tabs=True,
replace_whitespace=True, fix_sentence_endings=False, break_long_words=True,
drop_whitespace=True, break_on_hyphens=True, tabsize=8, *, max_lines=None,
placeholder=' [...]')
placeholder=' [...]', text_len=<built-in function len>)
Object for wrapping/filling text. The public interface consists of
the wrap() and fill() methods; the other methods are just there for
subclasses to override in order to tweak the default behaviour.
Expand Down
26 changes: 26 additions & 0 deletions Lib/test/test_argparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -7869,6 +7869,32 @@ def test_help_with_format_specifiers(self):
self.assertIn(f'type: {interp}int{reset}', help_text)
self.assertIn(f'choices: {interp}a, b{reset}', help_text)

def test_colored_help_wraps_like_plain_help(self):
# gh-142035: ANSI color escapes in the help text (around the
# interpolated "(default: ...)" value) must not change where lines
# wrap. Stripping the colors must yield exactly the plain layout.
env = self.enterContext(os_helper.EnvironmentVarGuard())
env["COLUMNS"] = "70"

def build(color):
parser = argparse.ArgumentParser(
prog="PROG",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
color=color,
)
parser.add_argument(
"--verbose",
action="store_true",
help="A l o n g d e s c r i p t i o n f o r t h e v e r b "
"o s e f l a g t o d e m o n s t r a t e w r a p p i n g",
)
parser.add_argument("--input", default="input.txt", help="Input file path")
return parser

colored = build(color=True).format_help()
plain = build(color=False).format_help()
self.assertEqual(_colorize.decolor(colored), plain)

def test_print_help_uses_target_file_for_color_decision(self):
parser = argparse.ArgumentParser(prog='PROG', color=True)
parser.add_argument('--opt')
Expand Down
72 changes: 71 additions & 1 deletion Lib/test/test_textwrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
# $Id$
#

import re
import unittest

from textwrap import TextWrapper, wrap, fill, dedent, indent, shorten
Expand Down Expand Up @@ -1133,5 +1134,74 @@ def test_first_word_too_long_but_placeholder_fits(self):
self.check_shorten("Helloo", 5, "[...]")


if __name__ == '__main__':
class TextLenTestCase(BaseTestCase):
# The text_len option customizes how the visible width of a string is
# measured. The motivating case is colored output, where invisible ANSI
# escape sequences must not count towards the line width (gh-142035).

_ansi = re.compile(r"\x1b\[[0-9;]*m")

@classmethod
def visible_len(cls, text):
return len(cls._ansi.sub("", text))

@classmethod
def decolor(cls, lines):
return [cls._ansi.sub("", line) for line in lines]

@staticmethod
def color(text):
# Wrap every word in a pair of (zero visible width) escape sequences.
return " ".join(f"\x1b[31m{word}\x1b[0m" for word in text.split())

def check_shorten(self, text, width, expect, **kwargs):
self.check(shorten(text, width, **kwargs), expect)

def test_default_text_len_is_len(self):
self.assertIs(TextWrapper().text_len, len)

def test_explicit_len_matches_default(self):
text = "Hello there, how are you this fine day? I'm glad to hear it!"
self.check_wrap(text, 12, wrap(text, 12), text_len=len)

def test_color_does_not_change_breaks(self):
text = "These are several short words to be wrapped and colored here"
for width in (10, 15, 20, 30):
with self.subTest(width=width):
lines = wrap(self.color(text), width, text_len=self.visible_len)
self.assertEqual(self.decolor(lines), wrap(text, width))

def test_color_respects_width(self):
lines = wrap(
self.color("one two three four five six seven"),
9,
text_len=self.visible_len,
)
for line in lines:
self.assertLessEqual(self.visible_len(line), 9)

def test_break_long_word_by_visible_width(self):
word = "\x1b[31m" + "x" * 20 + "\x1b[0m"
lines = wrap(word, 8, text_len=self.visible_len)
self.assertEqual(self.decolor(lines), ["xxxxxxxx", "xxxxxxxx", "xxxx"])

def test_break_on_hyphens_with_color(self):
lines = wrap(self.color("spam-egg-ham-bacon"), 9, text_len=self.visible_len)
self.assertEqual(self.decolor(lines), ["spam-egg-", "ham-bacon"])

def test_shorten_with_text_len(self):
result = shorten(
self.color("one two three four five"), 12, text_len=self.visible_len
)
self.assertLessEqual(self.visible_len(result), 12)
self.assertEqual(self._ansi.sub("", result), "one [...]")

def test_measure_is_not_limited_to_ansi(self):
# Any width measure works, e.g. counting every character as two columns.
double = lambda s: 2 * len(s)
self.check_wrap("aa bb cc dd", 4, ["aa", "bb", "cc", "dd"], text_len=double)
self.check_wrap("aa bb cc dd", 5, ["aa", "bb", "cc", "dd"], text_len=double)


if __name__ == "__main__":
unittest.main()
62 changes: 49 additions & 13 deletions Lib/textwrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ class TextWrapper:
Truncate wrapped lines.
placeholder (default: ' [...]')
Append to the last line of truncated text.
text_len (default: len)
Callable returning the visible width of a string. Override the
default to account for characters that are not one column wide,
such as zero-width or double-width characters, or invisible ANSI
escape sequences. It should return a non-negative integer.
"""

unicode_whitespace_trans = dict.fromkeys(map(ord, _whitespace), ord(' '))
Expand Down Expand Up @@ -122,7 +127,8 @@ def __init__(self,
tabsize=8,
*,
max_lines=None,
placeholder=' [...]'):
placeholder=' [...]',
text_len=len):
self.width = width
self.initial_indent = initial_indent
self.subsequent_indent = subsequent_indent
Expand All @@ -135,6 +141,7 @@ def __init__(self,
self.tabsize = tabsize
self.max_lines = max_lines
self.placeholder = placeholder
self.text_len = text_len


# -- Private methods -----------------------------------------------
Expand Down Expand Up @@ -194,6 +201,28 @@ def _fix_sentence_endings(self, chunks):
else:
i += 1

def _truncate_to_width(self, text, width):
"""_truncate_to_width(text : string, width : int) -> string

Return the longest prefix of *text* whose visible width, as measured
by ``self.text_len``, does not exceed *width*. With a custom text_len the
number of characters that fit need not equal *width*, so an over-long
word cannot be broken by slicing at the column count. At least one
character is always kept so that wrapping makes progress.
"""
# Fast path for the default len(): the width is the number of
# characters, so the prefix can be sliced directly.
if self.text_len is len:
return text[: max(width, 1)]
if self.text_len(text) <= width:
return text
cut = 1
for i in range(1, len(text) + 1):
if self.text_len(text[:i]) > width:
break
cut = i
return text[:cut]

def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
"""_handle_long_word(chunks : [string],
cur_line : [string],
Expand All @@ -212,9 +241,10 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
# If we're allowed to break long words, then do so: put as much
# of the next chunk onto the current line as will fit.
if self.break_long_words and space_left > 0:
end = space_left
chunk = reversed_chunks[-1]
if self.break_on_hyphens and len(chunk) > space_left:
# Keep as many leading characters as fit in the visible width.
end = len(self._truncate_to_width(chunk, space_left))
if self.break_on_hyphens and self.text_len(chunk) > space_left:
# break after last hyphen, but only if there are
# non-hyphens before it
hyphen = chunk.rfind('-', 0, space_left)
Expand Down Expand Up @@ -256,7 +286,10 @@ def _wrap_chunks(self, chunks):
indent = self.subsequent_indent
else:
indent = self.initial_indent
if len(indent) + len(self.placeholder.lstrip()) > self.width:
if (
self.text_len(indent) + self.text_len(self.placeholder.lstrip())
> self.width
):
raise ValueError("placeholder too large for max width")

# Arrange in reverse order so items can be efficiently popped
Expand All @@ -277,15 +310,15 @@ def _wrap_chunks(self, chunks):
indent = self.initial_indent

# Maximum width for this line.
width = self.width - len(indent)
width = self.width - self.text_len(indent)

# First chunk on line is whitespace -- drop it, unless this
# is the very beginning of the text (ie. no lines started yet).
if self.drop_whitespace and chunks[-1].strip() == '' and lines:
del chunks[-1]

while chunks:
l = len(chunks[-1])
l = self.text_len(chunks[-1])

# Can at least squeeze this chunk onto the current line.
if cur_len + l <= width:
Expand All @@ -298,13 +331,13 @@ def _wrap_chunks(self, chunks):

# The current line is full, and the next chunk is too big to
# fit on *any* line (not just this one).
if chunks and len(chunks[-1]) > width:
if chunks and self.text_len(chunks[-1]) > width:
self._handle_long_word(chunks, cur_line, cur_len, width)
cur_len = sum(map(len, cur_line))
cur_len = sum(map(self.text_len, cur_line))

# If the last chunk on this line is all whitespace, drop it.
if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
cur_len -= len(cur_line[-1])
cur_len -= self.text_len(cur_line[-1])
del cur_line[-1]

if cur_line:
Expand All @@ -320,17 +353,20 @@ def _wrap_chunks(self, chunks):
else:
while cur_line:
if (cur_line[-1].strip() and
cur_len + len(self.placeholder) <= width):
cur_len + self.text_len(self.placeholder) <= width):
cur_line.append(self.placeholder)
lines.append(indent + ''.join(cur_line))
break
cur_len -= len(cur_line[-1])
cur_len -= self.text_len(cur_line[-1])
del cur_line[-1]
else:
if lines:
prev_line = lines[-1].rstrip()
if (len(prev_line) + len(self.placeholder) <=
self.width):
if (
self.text_len(prev_line)
+ self.text_len(self.placeholder)
<= self.width
):
lines[-1] = prev_line + self.placeholder
break
lines.append(indent + self.placeholder.lstrip())
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Add a *text_len* parameter to :func:`textwrap.wrap`, :func:`textwrap.fill`,
:func:`textwrap.shorten`, and :class:`textwrap.TextWrapper`. It customizes how
the visible width of a string is measured, allowing text that contains
zero-width or double-width characters, or invisible ANSI escape sequences, to
be wrapped correctly.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Fix :mod:`argparse` help text wrapping when colors are enabled. ANSI escape
sequences inserted around interpolated values such as the ``(default: ...)``
suffix no longer count towards the line width, so colored help wraps at the
same place as the equivalent uncolored help.
Loading