Skip to content

Commit 01df442

Browse files
Optimize binascii.b2a_ascii85().
1 parent 167e83e commit 01df442

File tree

2 files changed

+81
-43
lines changed

2 files changed

+81
-43
lines changed

Lib/test/test_base64.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -874,6 +874,12 @@ def test_a85_padding(self):
874874
eq(base64.a85decode(b'G^+IX'), b"xxxx")
875875
eq(base64.a85decode(b'G^+IXGQ7^D'), b"xxxxx\x00\x00\x00")
876876

877+
eq(base64.a85encode(b"\x00", pad=True), b'z')
878+
eq(base64.a85encode(b"\x00"*2, pad=True), b'z')
879+
eq(base64.a85encode(b"\x00"*3, pad=True), b'z')
880+
eq(base64.a85encode(b"\x00"*4, pad=True), b'z')
881+
eq(base64.a85encode(b"\x00"*5, pad=True), b'zz')
882+
877883
def test_b85_padding(self):
878884
base64 = self.module
879885
eq = self.assertEqual

Modules/binascii.c

Lines changed: 75 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,28 @@ ascii_buffer_converter(PyObject *arg, Py_buffer *buf)
267267
return Py_CLEANUP_SUPPORTED;
268268
}
269269

270+
static Py_ssize_t
271+
wraplines(unsigned char *data, Py_ssize_t size, size_t width)
272+
{
273+
if ((size_t)size <= width) {
274+
return size;
275+
}
276+
unsigned char *src = data + size;
277+
Py_ssize_t newlines = (size - 1) / width;
278+
Py_ssize_t line_len = size - newlines * width;
279+
size += newlines;
280+
unsigned char *dst = data + size;
281+
282+
while ((src -= line_len) != data) {
283+
dst -= line_len;
284+
memmove(dst, src, line_len);
285+
*--dst = '\n';
286+
line_len = width;
287+
}
288+
assert(dst == data + width);
289+
return size;
290+
}
291+
270292
#include "clinic/binascii.c.h"
271293

272294
/*[clinic input]
@@ -839,15 +861,8 @@ binascii_b2a_ascii85_impl(PyObject *module, Py_buffer *data, int fold_spaces,
839861
int wrap, unsigned int width, int pad)
840862
/*[clinic end generated code: output=78426392ad3fc75b input=d5122dbab4dbb9f2]*/
841863
{
842-
const unsigned char *bin_data;
843-
int chunk_pos = 0;
844-
unsigned char this_group[5];
845-
uint32_t leftchar = 0;
846-
unsigned int line_len = 0;
847-
Py_ssize_t bin_len, group_len, out_len;
848-
849-
bin_data = data->buf;
850-
bin_len = data->len;
864+
const unsigned char *bin_data = data->buf;
865+
Py_ssize_t bin_len = data->len;
851866

852867
assert(bin_len >= 0);
853868

@@ -858,7 +873,7 @@ binascii_b2a_ascii85_impl(PyObject *module, Py_buffer *data, int fold_spaces,
858873
/* Allocate output buffer.
859874
XXX: Do a pre-pass above some threshold estimate (cf. 'yz')?
860875
*/
861-
out_len = 5 * ((bin_len + 3) / 4);
876+
Py_ssize_t out_len = 5 * ((bin_len + 3) / 4);
862877
if (wrap) out_len += 4;
863878
if (!pad && (bin_len % 4)) out_len -= 4 - (bin_len % 4);
864879
if (width && out_len) out_len += (out_len - 1) / width;
@@ -872,56 +887,73 @@ binascii_b2a_ascii85_impl(PyObject *module, Py_buffer *data, int fold_spaces,
872887
if (wrap) {
873888
*ascii_data++ = BASE85_A85_PREFIX;
874889
*ascii_data++ = BASE85_A85_AFFIX;
875-
line_len = 2;
876890
}
877891

878-
for (; bin_len > 0 || chunk_pos != 0; bin_len--, bin_data++) {
879-
/* Shift data or padding into our buffer. */
880-
leftchar <<= 8; /* Pad with zero when encoding. */
881-
if (bin_len > 0) {
882-
leftchar |= *bin_data;
892+
/* Encode all full-length chunks. */
893+
for (; bin_len >= 4; bin_len -= 4, bin_data += 4) {
894+
uint32_t leftchar = (bin_data[0] << 24) | (bin_data[1] << 16) |
895+
(bin_data[2] << 8) | bin_data[3];
896+
if (leftchar == BASE85_A85_Z) {
897+
*ascii_data++ = 'z';
898+
}
899+
else if (fold_spaces && leftchar == BASE85_A85_Y) {
900+
*ascii_data++ = 'y';
883901
}
902+
else {
903+
ascii_data[4] = table_b2a_base85_a85[leftchar % 85];
904+
leftchar /= 85;
905+
ascii_data[3] = table_b2a_base85_a85[leftchar % 85];
906+
leftchar /= 85;
907+
ascii_data[2] = table_b2a_base85_a85[leftchar % 85];
908+
leftchar /= 85;
909+
ascii_data[1] = table_b2a_base85_a85[leftchar % 85];
910+
leftchar /= 85;
911+
ascii_data[0] = table_b2a_base85_a85[leftchar];
884912

885-
/* Wait until buffer is full. */
886-
if (++chunk_pos != 4) {
887-
continue;
913+
ascii_data += 5;
888914
}
915+
}
889916

890-
/* Encode current chunk. */
891-
if (((bin_len > 0 || pad) && leftchar == BASE85_A85_Z) ||
892-
(fold_spaces && leftchar == BASE85_A85_Y)) {
893-
this_group[0] = leftchar == BASE85_A85_Y ? 'y' : 'z';
894-
group_len = 1;
895-
leftchar = 0;
896-
} else {
897-
group_len = bin_len > 0 || pad ? 5 : 4 + bin_len;
898-
for (Py_ssize_t i = 4; i >= 0; i--) {
899-
this_group[i] = table_b2a_base85_a85[leftchar % 85];
900-
leftchar /= 85;
917+
/* Encode partial-length final chunk. */
918+
if (bin_len > 0) {
919+
uint32_t leftchar = 0;
920+
for (Py_ssize_t i = 0; i < 4; i++) {
921+
leftchar <<= 8; /* Pad with zero when encoding. */
922+
if (i < bin_len) {
923+
leftchar |= *bin_data++;
901924
}
902925
}
903-
904-
/* Write current group. */
905-
for (Py_ssize_t i = 0; i < group_len; i++) {
906-
if (width && line_len == width) {
907-
*ascii_data++ = '\n';
908-
line_len = 0;
926+
if (pad && leftchar == BASE85_A85_Z) {
927+
*ascii_data++ = 'z';
928+
}
929+
else {
930+
Py_ssize_t group_len = pad ? 5 : bin_len + 1;
931+
for (Py_ssize_t i = 4; i >= 0; i--) {
932+
if (i < group_len) {
933+
ascii_data[i] = table_b2a_base85_a85[leftchar % 85];
934+
}
935+
leftchar /= 85;
909936
}
910-
*ascii_data++ = this_group[i];
911-
line_len++;
937+
ascii_data += group_len;
912938
}
913-
914-
chunk_pos = 0;
915939
}
916940

917941
if (wrap) {
918-
if (width && line_len + 2 > width) {
919-
*ascii_data++ = '\n';
920-
}
921942
*ascii_data++ = BASE85_A85_AFFIX;
922943
*ascii_data++ = BASE85_A85_SUFFIX;
923944
}
924945

946+
if (width && out_len) {
947+
unsigned char *start = PyBytesWriter_GetData(writer);
948+
ascii_data = start + wraplines(start, ascii_data - start, width);
949+
if (wrap && ascii_data[-2] == '\n') {
950+
assert(ascii_data[-1] == BASE85_A85_SUFFIX);
951+
assert(ascii_data[-3] == BASE85_A85_AFFIX);
952+
ascii_data[-3] = '\n';
953+
ascii_data[-2] = BASE85_A85_AFFIX;
954+
}
955+
}
956+
925957
return PyBytesWriter_FinishWithPointer(writer, ascii_data);
926958
}
927959

0 commit comments

Comments
 (0)