Skip to content

Commit b722c23

Browse files
committed
optimized dedent by 4x for larger files, added only_whitespace option if desired
1 parent 3796884 commit b722c23

File tree

1 file changed

+108
-34
lines changed

1 file changed

+108
-34
lines changed

Lib/textwrap.py

Lines changed: 108 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,7 @@ def shorten(text, width, **kwargs):
416416
_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)
417417
_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)
418418

419-
def dedent(text):
419+
def dedent(text, only_whitespace = True):
420420
"""Remove any common leading whitespace from every line in `text`.
421421
422422
This can be used to make triple-quoted strings line up with the left
@@ -427,44 +427,118 @@ def dedent(text):
427427
are not equal: the lines " hello" and "\\thello" are
428428
considered to have no common leading whitespace.
429429
430+
If `only_whitespace` is `True`, the leading whitespaces are removed from the text. Otherwise, all the common leading text is removed.
431+
430432
Entirely blank lines are normalized to a newline character.
431433
"""
432-
# Look for the longest leading string of spaces and tabs common to
433-
# all lines.
434-
margin = None
435-
text = _whitespace_only_re.sub('', text)
436-
indents = _leading_whitespace_re.findall(text)
437-
for indent in indents:
438-
if margin is None:
439-
margin = indent
440-
441-
# Current line more deeply indented than previous winner:
442-
# no change (previous winner is still on top).
443-
elif indent.startswith(margin):
444-
pass
445-
446-
# Current line consistent with and no deeper than previous winner:
447-
# it's the new winner.
448-
elif margin.startswith(indent):
449-
margin = indent
450-
451-
# Find the largest common whitespace between current line and previous
452-
# winner.
434+
# Early return for empty input
435+
if not text:
436+
return text
437+
438+
# Split into lines
439+
lines = text.splitlines(True)
440+
441+
# Fast path for single line - but make sure we still dedent!
442+
if len(lines) == 1:
443+
line = lines[0]
444+
stripped = line.strip()
445+
if not stripped: # Blank line
446+
return "\n" if line.endswith("\n") else ""
447+
448+
# Find leading whitespace for a single line
449+
if only_whitespace:
450+
i = 0
451+
while i < len(line) and line[i] in " \t":
452+
i += 1
453+
if i > 0: # Has leading whitespace to remove
454+
return line[i:]
453455
else:
454-
for i, (x, y) in enumerate(zip(margin, indent)):
455-
if x != y:
456-
margin = margin[:i]
457-
break
456+
lead_size = len(line) - len(line.lstrip())
457+
if lead_size > 0: # Has leading whitespace to remove
458+
return line[lead_size:]
459+
return line # No whitespace to remove
460+
461+
# Cache method lookups for faster access
462+
_strip = str.strip
463+
_startswith = str.startswith
464+
_endswith = str.endswith
465+
466+
# Find first two non-blank lines
467+
non_blank = []
468+
for line in lines:
469+
if _strip(line):
470+
non_blank.append(line)
471+
if len(non_blank) == 2:
472+
break
473+
474+
# All lines are blank
475+
if not non_blank:
476+
result = []
477+
append = result.append
478+
for line in lines:
479+
append("\n" if _endswith(line, "\n") else "")
480+
return "".join(result)
481+
482+
# Calculate margin length efficiently
483+
if len(non_blank) == 1:
484+
# Single non-blank line
485+
line = non_blank[0]
486+
if only_whitespace:
487+
# Manually find leading whitespace (faster than regex)
488+
i = 0
489+
line_len = len(line)
490+
while i < line_len and line[i] in " \t":
491+
i += 1
492+
margin_len = i
493+
else:
494+
# Use built-in lstrip for non-whitespace case
495+
margin_len = len(line) - len(line.lstrip())
496+
else:
497+
# Find common prefix of first two non-blank lines
498+
a, b = non_blank
499+
min_len = min(len(a), len(b))
500+
i = 0
458501

459-
# sanity check (testing/debugging only)
460-
if 0 and margin:
461-
for line in text.split("\n"):
462-
assert not line or line.startswith(margin), \
463-
"line = %r, margin = %r" % (line, margin)
502+
if only_whitespace:
503+
# Manual loop is faster than character-by-character comparison
504+
while i < min_len and a[i] == b[i] and a[i] in " \t":
505+
i += 1
506+
else:
507+
while i < min_len and a[i] == b[i]:
508+
i += 1
464509

465-
if margin:
466-
text = re.sub(r'(?m)^' + margin, '', text)
467-
return text
510+
margin_len = i
511+
512+
# No margin to remove - return original with blank line normalization
513+
if margin_len == 0:
514+
result = []
515+
append = result.append
516+
for line in lines:
517+
if _strip(line): # Non-blank line
518+
append(line)
519+
else: # Blank line
520+
append("\n" if _endswith(line, "\n") else "")
521+
return "".join(result)
522+
523+
# Get margin string once for repeated comparison
524+
margin = non_blank[0][:margin_len]
525+
526+
# Pre-allocate result list with a size hint for better memory efficiency
527+
result = []
528+
append = result.append
529+
530+
# Process all lines with optimized operations
531+
for line in lines:
532+
if not _strip(line): # Blank line (including whitespace-only lines)
533+
append("\n" if _endswith(line, "\n") else "")
534+
elif _startswith(line, margin): # Has margin
535+
# Slice operation is very fast in Python
536+
append(line[margin_len:])
537+
else: # No matching margin
538+
append(line)
539+
540+
# Single join is faster than incremental string building
541+
return "".join(result)
468542

469543

470544
def indent(text, prefix, predicate=None):

0 commit comments

Comments
 (0)