Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions Doc/library/binascii.rst
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,87 @@ The :mod:`binascii` module defines the following functions:
Added the *newline* parameter.


.. function:: a2b_ascii85(string, /, *, fold_spaces=False, wrap=False, ignore=b"")

Convert Ascii85 data back to binary and return the binary data.

Valid Ascii85 data contains characters from the Ascii85 alphabet in groups
of five (except for the final group, which may have from two to five
characters). Each group encodes 32 bits of binary data in the range from
``0`` to ``2 ** 32 - 1``, inclusive. The special character ``z`` is
accepted as a short form of the group ``!!!!!``, which encodes four
consecutive null bytes.

If *fold_spaces* is true, the special character ``y`` is also accepted as a
short form of the group ``+<VdL``, which encodes four consecutive spaces.
Note that neither short form is permitted if it occurs in the middle of
another group.

If *wrap* is true, the input begins with ``<~`` and ends with ``~>``, as in
the Adobe Ascii85 format.

*ignore* is an optional bytes-like object that specifies characters to
ignore in the input.

Invalid Ascii85 data will raise :exc:`binascii.Error`.

.. versionadded:: next


.. function:: b2a_ascii85(data, /, *, fold_spaces=False, wrap=False, width=0, pad=False)

Convert binary data to a formatted sequence of ASCII characters in Ascii85
coding. The return value is the converted data.

If *fold_spaces* is true, four consecutive spaces are encoded as the
special character ``y`` instead of the sequence ``+<VdL``.

If *wrap* is true, the output begins with ``<~`` and ends with ``~>``, as
in the Adobe Ascii85 format.

If *width* is provided and greater than 0, the output is split into lines
of no more than the specified width separated by the ASCII newline
character.

If *pad* is true, the input is padded to a multiple of 4 before encoding.

.. versionadded:: next


.. function:: a2b_base85(string, /, *, strict_mode=False, z85=False)

Convert base85 data back to binary and return the binary data.
More than one line may be passed at a time.

If *strict_mode* is true, only valid base85 data will be converted.
Invalid base85 data will raise :exc:`binascii.Error`.

If *z85* is true, the base85 data uses the Z85 alphabet.
See `Z85 specification <https://rfc.zeromq.org/spec/32/>`_ for more information.

Valid base85 data contains characters from the base85 alphabet in groups
of five (except for the final group, which may have from two to five
characters). Each group encodes 32 bits of binary data in the range from
``0`` to ``2 ** 32 - 1``, inclusive.

.. versionadded:: next


.. function:: b2a_base85(data, /, *, pad=False, newline=True, z85=False)

Convert binary data to a line of ASCII characters in base85 coding.
The return value is the converted line.

If *pad* is true, the input is padded to a multiple of 4 before encoding.

If *newline* is true, a newline char is appended to the result.

If *z85* is true, the Z85 alphabet is used for conversion.
See `Z85 specification <https://rfc.zeromq.org/spec/32/>`_ for more information.

.. versionadded:: next


.. function:: a2b_qp(data, header=False)

Convert a block of quoted-printable data back to binary and return the binary
Expand Down
5 changes: 5 additions & 0 deletions Include/internal/pycore_global_objects_fini_generated.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions Include/internal/pycore_global_strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(flags)
STRUCT_FOR_ID(flush)
STRUCT_FOR_ID(fold)
STRUCT_FOR_ID(fold_spaces)
STRUCT_FOR_ID(follow_symlinks)
STRUCT_FOR_ID(format)
STRUCT_FOR_ID(format_spec)
Expand Down Expand Up @@ -678,6 +679,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(outpath)
STRUCT_FOR_ID(overlapped)
STRUCT_FOR_ID(owner)
STRUCT_FOR_ID(pad)
STRUCT_FOR_ID(pages)
STRUCT_FOR_ID(parameter)
STRUCT_FOR_ID(parent)
Expand Down Expand Up @@ -864,11 +866,14 @@ struct _Py_global_strings {
STRUCT_FOR_ID(weeks)
STRUCT_FOR_ID(which)
STRUCT_FOR_ID(who)
STRUCT_FOR_ID(width)
STRUCT_FOR_ID(withdata)
STRUCT_FOR_ID(wrap)
STRUCT_FOR_ID(writable)
STRUCT_FOR_ID(write)
STRUCT_FOR_ID(write_through)
STRUCT_FOR_ID(year)
STRUCT_FOR_ID(z85)
STRUCT_FOR_ID(zdict)
STRUCT_FOR_ID(zstd_dict)
} identifiers;
Expand Down
5 changes: 5 additions & 0 deletions Include/internal/pycore_runtime_init_generated.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions Include/internal/pycore_unicodeobject_generated.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

79 changes: 79 additions & 0 deletions Lib/_base64.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
"""C accelerator wrappers for originally pure-Python parts of base64."""

from binascii import Error, a2b_ascii85, a2b_base85, b2a_ascii85, b2a_base85


# Base 85 functions in base64 silently convert input to bytes.
# Copy the conversion logic from base64 to avoid circular imports.

bytes_types = (bytes, bytearray) # Types acceptable as binary data


def _bytes_from_decode_data(s):
if isinstance(s, str):
try:
return s.encode('ascii')
except UnicodeEncodeError:
raise ValueError('string argument should contain only ASCII characters')
if isinstance(s, bytes_types):
return s
try:
return memoryview(s).tobytes()
except TypeError:
raise TypeError("argument should be a bytes-like object or ASCII "
"string, not %r" % s.__class__.__name__) from None


def _bytes_from_encode_data(b):
return b if isinstance(b, bytes_types) else memoryview(b).tobytes()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

memoryview(b).tobytes() may raise a generic TypeError here, whereas it is in _bytes_from_decode_data. Should we do it as well?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The discussion by @serhiy-storchaka seems to have been resolved with the conclusion that the pure-Python base-85-related functions from base64 may be removed.

I'll therefore turn base64.a85encode(), etc. into wrappers for the new binascii.b2a_ascii85(), etc. as I had it originally, similarly to how e.g. base64.b64encode() is a wrapper for binascii.b2a_base64().

_base64.py will be removed, since it will no longer be necessary to test against both Python and C implementations of the base-85-related functions. So that makes your concerns for this specific file moot.

However, to follow your comment here a bit:

The type conversion logic is replicated from the existing _85encode() helper function, which itself seems to have replicated the logic from the existing _b32encode() helper function.

The logic in _b32encode(), in turn, seems to be an attempt to replicate the existing binascii facility for input type conversions.

The new additions (base64.a85encode(), etc.) use the existing binascii facility, which raises a more descriptive TypeError instead of a generic one.

Can you or someone else verify that the C type conversion logic in binascii and the Python logic in base64 are equivalent, or at least equivalent enough?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh so it was something that was already there. Ok, sorry for the noise. I was more worried about the fact that a TypeError was caught during decoding but not during encoding.



# Functions in binascii raise binascii.Error instead of ValueError.

def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
def a85encode(b, /, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, the existing Python implementation does not use positional-only parameters. Changing this is a separate issue.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Arf, ok :( I thought it was incorrectly c/c.

b = _bytes_from_encode_data(b)
try:
return b2a_ascii85(b, fold_spaces=foldspaces,
wrap=adobe, width=wrapcol, pad=pad)
except Error as e:
raise ValueError(e) from None


def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
def a85decode(b, /, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):

b = _bytes_from_decode_data(b)
try:
return a2b_ascii85(b, fold_spaces=foldspaces,
wrap=adobe, ignore=ignorechars)
except Error as e:
raise ValueError(e) from None

def b85encode(b, pad=False):
b = _bytes_from_encode_data(b)
try:
return b2a_base85(b, pad=pad, newline=False)
except Error as e:
raise ValueError(e) from None


def b85decode(b):
b = _bytes_from_decode_data(b)
try:
return a2b_base85(b, strict_mode=True)
except Error as e:
raise ValueError(e) from None


def z85encode(s, pad=False):
s = _bytes_from_encode_data(s)
try:
return b2a_base85(s, pad=pad, newline=False, z85=True)
except Error as e:
raise ValueError(e) from None


def z85decode(s):
s = _bytes_from_decode_data(s)
try:
return a2b_base85(s, strict_mode=True, z85=True)
except Error as e:
raise ValueError(e) from None
20 changes: 20 additions & 0 deletions Lib/base64.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,26 @@ def decodebytes(s):
return binascii.a2b_base64(s)


# Use accelerated implementations of originally pure-Python parts if possible.
try:
from _base64 import (a85encode as _a85encode, a85decode as _a85decode,
b85encode as _b85encode, b85decode as _b85decode,
z85encode as _z85encode, z85decode as _z85decode)
# Avoid expensive import of update_wrapper() from functools.
def _copy_attributes(func, src_func):
func.__doc__ = src_func.__doc__
func.__module__ = "base64"
return func
a85encode = _copy_attributes(_a85encode, a85encode)
a85decode = _copy_attributes(_a85decode, a85decode)
b85encode = _copy_attributes(_b85encode, b85encode)
b85decode = _copy_attributes(_b85decode, b85decode)
z85encode = _copy_attributes(_z85encode, z85encode)
z85decode = _copy_attributes(_z85decode, z85decode)
except ImportError:
pass


# Usable as a script...
def main():
"""Small main program"""
Expand Down
Loading
Loading