Skip to content

Commit 8ee4c93

Browse files
committed
Reuse code
1 parent b4a8026 commit 8ee4c93

File tree

1 file changed

+77
-150
lines changed

1 file changed

+77
-150
lines changed

Modules/_json.c

Lines changed: 77 additions & 150 deletions
Original file line numberDiff line numberDiff line change
@@ -148,17 +148,13 @@ ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
148148
return chars;
149149
}
150150

151-
static PyObject *
152-
ascii_escape_unicode(PyObject *pystr)
151+
static int
152+
ascii_escape_size(PyObject *pystr)
153153
{
154-
/* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
155154
Py_ssize_t i;
156155
Py_ssize_t input_chars;
157156
Py_ssize_t output_size;
158-
Py_ssize_t chars;
159-
PyObject *rval;
160157
const void *input;
161-
Py_UCS1 *output;
162158
int kind;
163159

164160
input_chars = PyUnicode_GET_LENGTH(pystr);
@@ -183,11 +179,29 @@ ascii_escape_unicode(PyObject *pystr)
183179
}
184180
if (output_size > PY_SSIZE_T_MAX - d) {
185181
PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
186-
return NULL;
182+
return -1;
187183
}
188184
output_size += d;
189185
}
190186

187+
return output_size;
188+
}
189+
190+
static PyObject *
191+
ascii_escape_unicode_and_size(PyObject *pystr, Py_ssize_t output_size)
192+
{
193+
Py_ssize_t i;
194+
Py_ssize_t input_chars;
195+
Py_ssize_t chars;
196+
PyObject *rval;
197+
const void *input;
198+
Py_UCS1 *output;
199+
int kind;
200+
201+
input_chars = PyUnicode_GET_LENGTH(pystr);
202+
input = PyUnicode_DATA(pystr);
203+
kind = PyUnicode_KIND(pystr);
204+
191205
rval = PyUnicode_New(output_size, 127);
192206
if (rval == NULL) {
193207
return NULL;
@@ -211,47 +225,27 @@ ascii_escape_unicode(PyObject *pystr)
211225
return rval;
212226
}
213227

214-
static int
215-
write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr)
228+
static PyObject *
229+
ascii_escape_unicode(PyObject *pystr)
216230
{
217231
/* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
218-
Py_ssize_t i;
219-
Py_ssize_t input_chars;
220-
Py_ssize_t output_size;
221-
Py_ssize_t chars;
222-
PyObject *rval;
223-
const void *input;
224-
Py_UCS1 *output;
225-
int kind;
232+
Py_ssize_t output_size = ascii_escape_size(pystr);
233+
if (output_size < 0) {
234+
return NULL;
235+
}
226236

227-
input_chars = PyUnicode_GET_LENGTH(pystr);
228-
input = PyUnicode_DATA(pystr);
229-
kind = PyUnicode_KIND(pystr);
237+
return ascii_escape_unicode_and_size(pystr, output_size);
238+
}
230239

231-
/* Compute the output size */
232-
for (i = 0, output_size = 2; i < input_chars; i++) {
233-
Py_UCS4 c = PyUnicode_READ(kind, input, i);
234-
Py_ssize_t d;
235-
if (S_CHAR(c)) {
236-
d = 1;
237-
}
238-
else {
239-
switch(c) {
240-
case '\\': case '"': case '\b': case '\f':
241-
case '\n': case '\r': case '\t':
242-
d = 2; break;
243-
default:
244-
d = c >= 0x10000 ? 12 : 6;
245-
}
246-
}
247-
if (output_size > PY_SSIZE_T_MAX - d) {
248-
PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
249-
return -1;
250-
}
251-
output_size += d;
240+
static int
241+
write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr)
242+
{
243+
Py_ssize_t output_size = ascii_escape_size(pystr);
244+
if (output_size < 0) {
245+
return -1;
252246
}
253247

254-
if (output_size == input_chars + 2) {
248+
if (output_size == PyUnicode_GET_LENGTH(pystr) + 2) {
255249
/* No need to escape anything */
256250
if (PyUnicodeWriter_WriteChar(writer, '"') < 0) {
257251
return -1;
@@ -262,43 +256,23 @@ write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr)
262256
return PyUnicodeWriter_WriteChar(writer, '"');
263257
}
264258

265-
rval = PyUnicode_New(output_size, 127);
259+
PyObject *rval = ascii_escape_unicode_and_size(pystr, output_size);
266260
if (rval == NULL) {
267261
return -1;
268262
}
269-
output = PyUnicode_1BYTE_DATA(rval);
270-
chars = 0;
271-
output[chars++] = '"';
272-
for (i = 0; i < input_chars; i++) {
273-
Py_UCS4 c = PyUnicode_READ(kind, input, i);
274-
if (S_CHAR(c)) {
275-
output[chars++] = c;
276-
}
277-
else {
278-
chars = ascii_escape_unichar(c, output, chars);
279-
}
280-
}
281-
output[chars++] = '"';
282-
#ifdef Py_DEBUG
283-
assert(_PyUnicode_CheckConsistency(rval, 1));
284-
#endif
263+
285264
return _steal_accumulate(writer, rval);
286265
}
287266

288-
static PyObject *
289-
escape_unicode(PyObject *pystr)
267+
static int
268+
escape_size(PyObject *pystr)
290269
{
291-
/* Take a PyUnicode pystr and return a new escaped PyUnicode */
292270
Py_ssize_t i;
293271
Py_ssize_t input_chars;
294272
Py_ssize_t output_size;
295-
Py_ssize_t chars;
296-
PyObject *rval;
297273
const void *input;
298274
int kind;
299-
Py_UCS4 maxchar;
300275

301-
maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
302276
input_chars = PyUnicode_GET_LENGTH(pystr);
303277
input = PyUnicode_DATA(pystr);
304278
kind = PyUnicode_KIND(pystr);
@@ -320,11 +294,30 @@ escape_unicode(PyObject *pystr)
320294
}
321295
if (output_size > PY_SSIZE_T_MAX - d) {
322296
PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
323-
return NULL;
297+
return -1;
324298
}
325299
output_size += d;
326300
}
327301

302+
return output_size;
303+
}
304+
305+
static PyObject *
306+
escape_unicode_and_size(PyObject *pystr, Py_ssize_t output_size)
307+
{
308+
Py_ssize_t i;
309+
Py_ssize_t input_chars;
310+
Py_ssize_t chars;
311+
PyObject *rval;
312+
const void *input;
313+
int kind;
314+
Py_UCS4 maxchar;
315+
316+
maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
317+
input_chars = PyUnicode_GET_LENGTH(pystr);
318+
input = PyUnicode_DATA(pystr);
319+
kind = PyUnicode_KIND(pystr);
320+
328321
rval = PyUnicode_New(output_size, maxchar);
329322
if (rval == NULL)
330323
return NULL;
@@ -379,47 +372,27 @@ escape_unicode(PyObject *pystr)
379372
return rval;
380373
}
381374

382-
static int
383-
write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr)
375+
static PyObject *
376+
escape_unicode(PyObject *pystr)
384377
{
385378
/* Take a PyUnicode pystr and return a new escaped PyUnicode */
386-
Py_ssize_t i;
387-
Py_ssize_t input_chars;
388-
Py_ssize_t output_size;
389-
Py_ssize_t chars;
390-
PyObject *rval;
391-
const void *input;
392-
int kind;
393-
Py_UCS4 maxchar;
379+
Py_ssize_t output_size = escape_size(pystr);
380+
if (output_size < 0) {
381+
return NULL;
382+
}
394383

395-
maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
396-
input_chars = PyUnicode_GET_LENGTH(pystr);
397-
input = PyUnicode_DATA(pystr);
398-
kind = PyUnicode_KIND(pystr);
384+
return escape_unicode_and_size(pystr, output_size);
385+
}
399386

400-
/* Compute the output size */
401-
for (i = 0, output_size = 2; i < input_chars; i++) {
402-
Py_UCS4 c = PyUnicode_READ(kind, input, i);
403-
Py_ssize_t d;
404-
switch (c) {
405-
case '\\': case '"': case '\b': case '\f':
406-
case '\n': case '\r': case '\t':
407-
d = 2;
408-
break;
409-
default:
410-
if (c <= 0x1f)
411-
d = 6;
412-
else
413-
d = 1;
414-
}
415-
if (output_size > PY_SSIZE_T_MAX - d) {
416-
PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
417-
return -1;
418-
}
419-
output_size += d;
387+
static int
388+
write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr)
389+
{
390+
Py_ssize_t output_size = escape_size(pystr);
391+
if (output_size < 0) {
392+
return -1;
420393
}
421394

422-
if (output_size == input_chars + 2) {
395+
if (output_size == PyUnicode_GET_LENGTH(pystr) + 2) {
423396
/* No need to escape anything */
424397
if (PyUnicodeWriter_WriteChar(writer, '"') < 0) {
425398
return -1;
@@ -430,57 +403,11 @@ write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr)
430403
return PyUnicodeWriter_WriteChar(writer, '"');
431404
}
432405

433-
rval = PyUnicode_New(output_size, maxchar);
434-
if (rval == NULL)
406+
PyObject *rval = escape_unicode_and_size(pystr, output_size);
407+
if (rval == NULL) {
435408
return -1;
436-
437-
kind = PyUnicode_KIND(rval);
438-
439-
#define ENCODE_OUTPUT do { \
440-
chars = 0; \
441-
output[chars++] = '"'; \
442-
for (i = 0; i < input_chars; i++) { \
443-
Py_UCS4 c = PyUnicode_READ(kind, input, i); \
444-
switch (c) { \
445-
case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
446-
case '"': output[chars++] = '\\'; output[chars++] = c; break; \
447-
case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
448-
case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
449-
case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
450-
case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
451-
case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
452-
default: \
453-
if (c <= 0x1f) { \
454-
output[chars++] = '\\'; \
455-
output[chars++] = 'u'; \
456-
output[chars++] = '0'; \
457-
output[chars++] = '0'; \
458-
output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
459-
output[chars++] = Py_hexdigits[(c ) & 0xf]; \
460-
} else { \
461-
output[chars++] = c; \
462-
} \
463-
} \
464-
} \
465-
output[chars++] = '"'; \
466-
} while (0)
467-
468-
if (kind == PyUnicode_1BYTE_KIND) {
469-
Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
470-
ENCODE_OUTPUT;
471-
} else if (kind == PyUnicode_2BYTE_KIND) {
472-
Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
473-
ENCODE_OUTPUT;
474-
} else {
475-
Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
476-
assert(kind == PyUnicode_4BYTE_KIND);
477-
ENCODE_OUTPUT;
478409
}
479-
#undef ENCODE_OUTPUT
480410

481-
#ifdef Py_DEBUG
482-
assert(_PyUnicode_CheckConsistency(rval, 1));
483-
#endif
484411
return _steal_accumulate(writer, rval);
485412
}
486413

0 commit comments

Comments
 (0)