Skip to content

Commit 2691a0a

Browse files
committed
Remove pure-Python base-85-related codepaths in base64
Per [1] this will be allowed despite PEP-0399. [1]: https://discuss.python.org/t/accelerator-for-ascii85-base85/105415/3
1 parent 1e928e3 commit 2691a0a

File tree

1 file changed

+8
-198
lines changed

1 file changed

+8
-198
lines changed

Lib/base64.py

Lines changed: 8 additions & 198 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support
55
# Modified 22-May-2007 by Guido van Rossum to use bytes everywhere
66

7-
import struct
87
import binascii
98

109

@@ -290,36 +289,6 @@ def b16decode(s, casefold=False):
290289
#
291290
# Ascii85 encoding/decoding
292291
#
293-
294-
_a85chars = None
295-
_a85chars2 = None
296-
_A85START = b"<~"
297-
_A85END = b"~>"
298-
299-
def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
300-
# Helper function for a85encode and b85encode
301-
if not isinstance(b, bytes_types):
302-
b = memoryview(b).tobytes()
303-
304-
padding = (-len(b)) % 4
305-
if padding:
306-
b = b + b'\0' * padding
307-
words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b)
308-
309-
chunks = [b'z' if foldnuls and not word else
310-
b'y' if foldspaces and word == 0x20202020 else
311-
(chars2[word // 614125] +
312-
chars2[word // 85 % 7225] +
313-
chars[word % 85])
314-
for word in words]
315-
316-
if padding and not pad:
317-
if chunks[-1] == b'z':
318-
chunks[-1] = chars[0] * 5
319-
chunks[-1] = chunks[-1][:-padding]
320-
321-
return b''.join(chunks)
322-
323292
def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
324293
"""Encode bytes-like object b using Ascii85 and return a bytes object.
325294
@@ -337,29 +306,8 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
337306
adobe controls whether the encoded byte sequence is framed with <~ and ~>,
338307
which is used by the Adobe implementation.
339308
"""
340-
global _a85chars, _a85chars2
341-
# Delay the initialization of tables to not waste memory
342-
# if the function is never called
343-
if _a85chars2 is None:
344-
_a85chars = [bytes((i,)) for i in range(33, 118)]
345-
_a85chars2 = [(a + b) for a in _a85chars for b in _a85chars]
346-
347-
result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces)
348-
349-
if adobe:
350-
result = _A85START + result
351-
if wrapcol:
352-
wrapcol = max(2 if adobe else 1, wrapcol)
353-
chunks = [result[i: i + wrapcol]
354-
for i in range(0, len(result), wrapcol)]
355-
if adobe:
356-
if len(chunks[-1]) + 2 > wrapcol:
357-
chunks.append(b'')
358-
result = b'\n'.join(chunks)
359-
if adobe:
360-
result += _A85END
361-
362-
return result
309+
return binascii.b2a_ascii85(b, fold_spaces=foldspaces,
310+
wrap=adobe, width=wrapcol, pad=pad)
363311

364312
def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
365313
"""Decode the Ascii85 encoded bytes-like object or ASCII string b.
@@ -377,152 +325,34 @@ def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
377325
378326
The result is returned as a bytes object.
379327
"""
380-
b = _bytes_from_decode_data(b)
381-
if adobe:
382-
if not b.endswith(_A85END):
383-
raise ValueError(
384-
"Ascii85 encoded byte sequences must end "
385-
"with {!r}".format(_A85END)
386-
)
387-
if b.startswith(_A85START):
388-
b = b[2:-2] # Strip off start/end markers
389-
else:
390-
b = b[:-2]
391-
#
392-
# We have to go through this stepwise, so as to ignore spaces and handle
393-
# special short sequences
394-
#
395-
packI = struct.Struct('!I').pack
396-
decoded = []
397-
decoded_append = decoded.append
398-
curr = []
399-
curr_append = curr.append
400-
curr_clear = curr.clear
401-
for x in b + b'u' * 4:
402-
if b'!'[0] <= x <= b'u'[0]:
403-
curr_append(x)
404-
if len(curr) == 5:
405-
acc = 0
406-
for x in curr:
407-
acc = 85 * acc + (x - 33)
408-
try:
409-
decoded_append(packI(acc))
410-
except struct.error:
411-
raise ValueError('Ascii85 overflow') from None
412-
curr_clear()
413-
elif x == b'z'[0]:
414-
if curr:
415-
raise ValueError('z inside Ascii85 5-tuple')
416-
decoded_append(b'\0\0\0\0')
417-
elif foldspaces and x == b'y'[0]:
418-
if curr:
419-
raise ValueError('y inside Ascii85 5-tuple')
420-
decoded_append(b'\x20\x20\x20\x20')
421-
elif x in ignorechars:
422-
# Skip whitespace
423-
continue
424-
else:
425-
raise ValueError('Non-Ascii85 digit found: %c' % x)
426-
427-
result = b''.join(decoded)
428-
padding = 4 - len(curr)
429-
if padding:
430-
# Throw away the extra padding
431-
result = result[:-padding]
432-
return result
433-
434-
# The following code is originally taken (with permission) from Mercurial
435-
436-
_b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
437-
b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~")
438-
_b85chars = None
439-
_b85chars2 = None
440-
_b85dec = None
328+
return binascii.a2b_ascii85(b, fold_spaces=foldspaces,
329+
wrap=adobe, ignore=ignorechars)
441330

442331
def b85encode(b, pad=False):
443332
"""Encode bytes-like object b in base85 format and return a bytes object.
444333
445334
If pad is true, the input is padded with b'\\0' so its length is a multiple of
446335
4 bytes before encoding.
447336
"""
448-
global _b85chars, _b85chars2
449-
# Delay the initialization of tables to not waste memory
450-
# if the function is never called
451-
if _b85chars2 is None:
452-
_b85chars = [bytes((i,)) for i in _b85alphabet]
453-
_b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
454-
return _85encode(b, _b85chars, _b85chars2, pad)
337+
return binascii.b2a_base85(b, pad=pad, newline=False)
455338

456339
def b85decode(b):
457340
"""Decode the base85-encoded bytes-like object or ASCII string b
458341
459342
The result is returned as a bytes object.
460343
"""
461-
global _b85dec
462-
# Delay the initialization of tables to not waste memory
463-
# if the function is never called
464-
if _b85dec is None:
465-
# we don't assign to _b85dec directly to avoid issues when
466-
# multiple threads call this function simultaneously
467-
b85dec_tmp = [None] * 256
468-
for i, c in enumerate(_b85alphabet):
469-
b85dec_tmp[c] = i
470-
_b85dec = b85dec_tmp
471-
472-
b = _bytes_from_decode_data(b)
473-
padding = (-len(b)) % 5
474-
b = b + b'~' * padding
475-
out = []
476-
packI = struct.Struct('!I').pack
477-
for i in range(0, len(b), 5):
478-
chunk = b[i:i + 5]
479-
acc = 0
480-
try:
481-
for c in chunk:
482-
acc = acc * 85 + _b85dec[c]
483-
except TypeError:
484-
for j, c in enumerate(chunk):
485-
if _b85dec[c] is None:
486-
raise ValueError('bad base85 character at position %d'
487-
% (i + j)) from None
488-
raise
489-
try:
490-
out.append(packI(acc))
491-
except struct.error:
492-
raise ValueError('base85 overflow in hunk starting at byte %d'
493-
% i) from None
494-
495-
result = b''.join(out)
496-
if padding:
497-
result = result[:-padding]
498-
return result
499-
500-
_z85alphabet = (b'0123456789abcdefghijklmnopqrstuvwxyz'
501-
b'ABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#')
502-
# Translating b85 valid but z85 invalid chars to b'\x00' is required
503-
# to prevent them from being decoded as b85 valid chars.
504-
_z85_b85_decode_diff = b';_`|~'
505-
_z85_decode_translation = bytes.maketrans(
506-
_z85alphabet + _z85_b85_decode_diff,
507-
_b85alphabet + b'\x00' * len(_z85_b85_decode_diff)
508-
)
509-
_z85_encode_translation = bytes.maketrans(_b85alphabet, _z85alphabet)
344+
return binascii.a2b_base85(b, strict_mode=True)
510345

511346
def z85encode(s, pad=False):
512347
"""Encode bytes-like object b in z85 format and return a bytes object."""
513-
return b85encode(s, pad).translate(_z85_encode_translation)
348+
return binascii.b2a_base85(s, pad=pad, newline=False, z85=True)
514349

515350
def z85decode(s):
516351
"""Decode the z85-encoded bytes-like object or ASCII string b
517352
518353
The result is returned as a bytes object.
519354
"""
520-
s = _bytes_from_decode_data(s)
521-
s = s.translate(_z85_decode_translation)
522-
try:
523-
return b85decode(s)
524-
except ValueError as e:
525-
raise ValueError(e.args[0].replace('base85', 'z85')) from None
355+
return binascii.a2b_base85(s, strict_mode=True, z85=True)
526356

527357
# Legacy interface. This code could be cleaned up since I don't believe
528358
# binascii has any line length limitations. It just doesn't seem worth it
@@ -579,26 +409,6 @@ def decodebytes(s):
579409
return binascii.a2b_base64(s)
580410

581411

582-
# Use accelerated implementations of originally pure-Python parts if possible.
583-
try:
584-
from _base64 import (a85encode as _a85encode, a85decode as _a85decode,
585-
b85encode as _b85encode, b85decode as _b85decode,
586-
z85encode as _z85encode, z85decode as _z85decode)
587-
# Avoid expensive import of update_wrapper() from functools.
588-
def _copy_attributes(func, src_func):
589-
func.__doc__ = src_func.__doc__
590-
func.__module__ = "base64"
591-
return func
592-
a85encode = _copy_attributes(_a85encode, a85encode)
593-
a85decode = _copy_attributes(_a85decode, a85decode)
594-
b85encode = _copy_attributes(_b85encode, b85encode)
595-
b85decode = _copy_attributes(_b85decode, b85decode)
596-
z85encode = _copy_attributes(_z85encode, z85encode)
597-
z85decode = _copy_attributes(_z85decode, z85decode)
598-
except ImportError:
599-
pass
600-
601-
602412
# Usable as a script...
603413
def main():
604414
"""Small main program"""

0 commit comments

Comments
 (0)