Skip to content
Draft
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Optimise import time for textwrap
  • Loading branch information
AA-Turner committed Mar 31, 2025
commit a759b016648aa3d01bd44b666775e58cb2c7ed05
58 changes: 36 additions & 22 deletions Lib/textwrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,26 @@
# Copyright (C) 2002 Python Software Foundation.
# Written by Greg Ward <gward@python.net>

import re

__all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent', 'shorten']


class _cached_regex:
def __init__(self, pattern):
self.pattern = pattern

def __set_name__(self, owner, name):
self.attr_name = name

def __get__(self, instance, owner=None):
if owner is None:
return self
import re
# replace this descriptor with the compiled pattern
pat = re.compile(self.pattern)
setattr(owner, self.attr_name, pat)
return pat


# Hardcode the recognized whitespace characters to the US-ASCII
# whitespace characters. The main reason for doing this is that
# some Unicode spaces (like \u00a0) are non-breaking whitespaces.
Expand Down Expand Up @@ -73,41 +89,39 @@ class TextWrapper:
# (after stripping out empty strings).
word_punct = r'[\w!"\'&.,?]'
letter = r'[^\d\W]'
whitespace = r'[%s]' % re.escape(_whitespace)
nowhitespace = '[^' + whitespace[1:]
wordsep_re = re.compile(r'''
whitespace = fr'[{_whitespace}]'
Comment thread
AA-Turner marked this conversation as resolved.
no_whitespace = f'[^{_whitespace}]'
wordsep_re = _cached_regex(fr'''(?x)
( # any whitespace
%(ws)s+
{whitespace}+
| # em-dash between words
(?<=%(wp)s) -{2,} (?=\w)
(?<={word_punct}) -{{2,}} (?=\w)
| # word, possibly hyphenated
%(nws)s+? (?:
{no_whitespace}+? (?:
# hyphenated word
-(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-))
(?= %(lt)s -? %(lt)s)
-(?: (?<={letter}{{2}}-) | (?<={letter}-{letter}-))
(?= {letter} -? {letter})
| # end of word
(?=%(ws)s|\Z)
(?={whitespace}|\Z)
| # em-dash
(?<=%(wp)s) (?=-{2,}\w)
(?<={word_punct}) (?=-{{2,}}\w)
)
)''' % {'wp': word_punct, 'lt': letter,
'ws': whitespace, 'nws': nowhitespace},
re.VERBOSE)
del word_punct, letter, nowhitespace
)''')
del word_punct, letter, no_whitespace

# This less funky little regex just split on recognized spaces. E.g.
# "Hello there -- you goof-ball, use the -b option!"
# splits into
# Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
wordsep_simple_re = re.compile(r'(%s+)' % whitespace)
wordsep_simple_re = _cached_regex(fr'({whitespace}+)')
del whitespace

# XXX this is not locale- or charset-aware -- string.lowercase
# is US-ASCII only (and therefore English-only)
sentence_end_re = re.compile(r'[a-z]' # lowercase letter
r'[\.\!\?]' # sentence-ending punct.
r'[\"\']?' # optional end-of-quote
r'\Z') # end of chunk
sentence_end_re = _cached_regex(r'[a-z]' # lowercase letter
r'[\.\!\?]' # sentence-ending punct.
r'[\"\']?' # optional end-of-quote
r'\Z') # end of chunk

def __init__(self,
width=70,
Expand Down Expand Up @@ -250,7 +264,7 @@ def _wrap_chunks(self, chunks):
"""
lines = []
if self.width <= 0:
raise ValueError("invalid width %r (must be > 0)" % self.width)
raise ValueError(f"invalid width {self.width!r} (must be > 0)")
if self.max_lines is not None:
if self.max_lines > 1:
indent = self.subsequent_indent
Expand Down
Loading