@@ -148,17 +148,13 @@ ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
148148 return chars ;
149149}
150150
151- static PyObject *
152- ascii_escape_unicode (PyObject * pystr )
151+ static int
152+ ascii_escape_size (PyObject * pystr )
153153{
154- /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
155154 Py_ssize_t i ;
156155 Py_ssize_t input_chars ;
157156 Py_ssize_t output_size ;
158- Py_ssize_t chars ;
159- PyObject * rval ;
160157 const void * input ;
161- Py_UCS1 * output ;
162158 int kind ;
163159
164160 input_chars = PyUnicode_GET_LENGTH (pystr );
@@ -183,11 +179,29 @@ ascii_escape_unicode(PyObject *pystr)
183179 }
184180 if (output_size > PY_SSIZE_T_MAX - d ) {
185181 PyErr_SetString (PyExc_OverflowError , "string is too long to escape" );
186- return NULL ;
182+ return -1 ;
187183 }
188184 output_size += d ;
189185 }
190186
187+ return output_size ;
188+ }
189+
190+ static PyObject *
191+ ascii_escape_unicode_and_size (PyObject * pystr , Py_ssize_t output_size )
192+ {
193+ Py_ssize_t i ;
194+ Py_ssize_t input_chars ;
195+ Py_ssize_t chars ;
196+ PyObject * rval ;
197+ const void * input ;
198+ Py_UCS1 * output ;
199+ int kind ;
200+
201+ input_chars = PyUnicode_GET_LENGTH (pystr );
202+ input = PyUnicode_DATA (pystr );
203+ kind = PyUnicode_KIND (pystr );
204+
191205 rval = PyUnicode_New (output_size , 127 );
192206 if (rval == NULL ) {
193207 return NULL ;
@@ -211,47 +225,27 @@ ascii_escape_unicode(PyObject *pystr)
211225 return rval ;
212226}
213227
214- static int
215- write_escaped_ascii ( PyUnicodeWriter * writer , PyObject * pystr )
228+ static PyObject *
229+ ascii_escape_unicode ( PyObject * pystr )
216230{
217231 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
218- Py_ssize_t i ;
219- Py_ssize_t input_chars ;
220- Py_ssize_t output_size ;
221- Py_ssize_t chars ;
222- PyObject * rval ;
223- const void * input ;
224- Py_UCS1 * output ;
225- int kind ;
232+ Py_ssize_t output_size = ascii_escape_size (pystr );
233+ if (output_size < 0 ) {
234+ return NULL ;
235+ }
226236
227- input_chars = PyUnicode_GET_LENGTH (pystr );
228- input = PyUnicode_DATA (pystr );
229- kind = PyUnicode_KIND (pystr );
237+ return ascii_escape_unicode_and_size (pystr , output_size );
238+ }
230239
231- /* Compute the output size */
232- for (i = 0 , output_size = 2 ; i < input_chars ; i ++ ) {
233- Py_UCS4 c = PyUnicode_READ (kind , input , i );
234- Py_ssize_t d ;
235- if (S_CHAR (c )) {
236- d = 1 ;
237- }
238- else {
239- switch (c ) {
240- case '\\' : case '"' : case '\b' : case '\f' :
241- case '\n' : case '\r' : case '\t' :
242- d = 2 ; break ;
243- default :
244- d = c >= 0x10000 ? 12 : 6 ;
245- }
246- }
247- if (output_size > PY_SSIZE_T_MAX - d ) {
248- PyErr_SetString (PyExc_OverflowError , "string is too long to escape" );
249- return -1 ;
250- }
251- output_size += d ;
240+ static int
241+ write_escaped_ascii (PyUnicodeWriter * writer , PyObject * pystr )
242+ {
243+ Py_ssize_t output_size = ascii_escape_size (pystr );
244+ if (output_size < 0 ) {
245+ return -1 ;
252246 }
253247
254- if (output_size == input_chars + 2 ) {
248+ if (output_size == PyUnicode_GET_LENGTH ( pystr ) + 2 ) {
255249 /* No need to escape anything */
256250 if (PyUnicodeWriter_WriteChar (writer , '"' ) < 0 ) {
257251 return -1 ;
@@ -262,43 +256,23 @@ write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr)
262256 return PyUnicodeWriter_WriteChar (writer , '"' );
263257 }
264258
265- rval = PyUnicode_New ( output_size , 127 );
259+ PyObject * rval = ascii_escape_unicode_and_size ( pystr , output_size );
266260 if (rval == NULL ) {
267261 return -1 ;
268262 }
269- output = PyUnicode_1BYTE_DATA (rval );
270- chars = 0 ;
271- output [chars ++ ] = '"' ;
272- for (i = 0 ; i < input_chars ; i ++ ) {
273- Py_UCS4 c = PyUnicode_READ (kind , input , i );
274- if (S_CHAR (c )) {
275- output [chars ++ ] = c ;
276- }
277- else {
278- chars = ascii_escape_unichar (c , output , chars );
279- }
280- }
281- output [chars ++ ] = '"' ;
282- #ifdef Py_DEBUG
283- assert (_PyUnicode_CheckConsistency (rval , 1 ));
284- #endif
263+
285264 return _steal_accumulate (writer , rval );
286265}
287266
288- static PyObject *
289- escape_unicode (PyObject * pystr )
267+ static int
268+ escape_size (PyObject * pystr )
290269{
291- /* Take a PyUnicode pystr and return a new escaped PyUnicode */
292270 Py_ssize_t i ;
293271 Py_ssize_t input_chars ;
294272 Py_ssize_t output_size ;
295- Py_ssize_t chars ;
296- PyObject * rval ;
297273 const void * input ;
298274 int kind ;
299- Py_UCS4 maxchar ;
300275
301- maxchar = PyUnicode_MAX_CHAR_VALUE (pystr );
302276 input_chars = PyUnicode_GET_LENGTH (pystr );
303277 input = PyUnicode_DATA (pystr );
304278 kind = PyUnicode_KIND (pystr );
@@ -320,11 +294,30 @@ escape_unicode(PyObject *pystr)
320294 }
321295 if (output_size > PY_SSIZE_T_MAX - d ) {
322296 PyErr_SetString (PyExc_OverflowError , "string is too long to escape" );
323- return NULL ;
297+ return -1 ;
324298 }
325299 output_size += d ;
326300 }
327301
302+ return output_size ;
303+ }
304+
305+ static PyObject *
306+ escape_unicode_and_size (PyObject * pystr , Py_ssize_t output_size )
307+ {
308+ Py_ssize_t i ;
309+ Py_ssize_t input_chars ;
310+ Py_ssize_t chars ;
311+ PyObject * rval ;
312+ const void * input ;
313+ int kind ;
314+ Py_UCS4 maxchar ;
315+
316+ maxchar = PyUnicode_MAX_CHAR_VALUE (pystr );
317+ input_chars = PyUnicode_GET_LENGTH (pystr );
318+ input = PyUnicode_DATA (pystr );
319+ kind = PyUnicode_KIND (pystr );
320+
328321 rval = PyUnicode_New (output_size , maxchar );
329322 if (rval == NULL )
330323 return NULL ;
@@ -379,47 +372,27 @@ escape_unicode(PyObject *pystr)
379372 return rval ;
380373}
381374
382- static int
383- write_escaped_unicode ( PyUnicodeWriter * writer , PyObject * pystr )
375+ static PyObject *
376+ escape_unicode ( PyObject * pystr )
384377{
385378 /* Take a PyUnicode pystr and return a new escaped PyUnicode */
386- Py_ssize_t i ;
387- Py_ssize_t input_chars ;
388- Py_ssize_t output_size ;
389- Py_ssize_t chars ;
390- PyObject * rval ;
391- const void * input ;
392- int kind ;
393- Py_UCS4 maxchar ;
379+ Py_ssize_t output_size = escape_size (pystr );
380+ if (output_size < 0 ) {
381+ return NULL ;
382+ }
394383
395- maxchar = PyUnicode_MAX_CHAR_VALUE (pystr );
396- input_chars = PyUnicode_GET_LENGTH (pystr );
397- input = PyUnicode_DATA (pystr );
398- kind = PyUnicode_KIND (pystr );
384+ return escape_unicode_and_size (pystr , output_size );
385+ }
399386
400- /* Compute the output size */
401- for (i = 0 , output_size = 2 ; i < input_chars ; i ++ ) {
402- Py_UCS4 c = PyUnicode_READ (kind , input , i );
403- Py_ssize_t d ;
404- switch (c ) {
405- case '\\' : case '"' : case '\b' : case '\f' :
406- case '\n' : case '\r' : case '\t' :
407- d = 2 ;
408- break ;
409- default :
410- if (c <= 0x1f )
411- d = 6 ;
412- else
413- d = 1 ;
414- }
415- if (output_size > PY_SSIZE_T_MAX - d ) {
416- PyErr_SetString (PyExc_OverflowError , "string is too long to escape" );
417- return -1 ;
418- }
419- output_size += d ;
387+ static int
388+ write_escaped_unicode (PyUnicodeWriter * writer , PyObject * pystr )
389+ {
390+ Py_ssize_t output_size = escape_size (pystr );
391+ if (output_size < 0 ) {
392+ return -1 ;
420393 }
421394
422- if (output_size == input_chars + 2 ) {
395+ if (output_size == PyUnicode_GET_LENGTH ( pystr ) + 2 ) {
423396 /* No need to escape anything */
424397 if (PyUnicodeWriter_WriteChar (writer , '"' ) < 0 ) {
425398 return -1 ;
@@ -430,57 +403,11 @@ write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr)
430403 return PyUnicodeWriter_WriteChar (writer , '"' );
431404 }
432405
433- rval = PyUnicode_New ( output_size , maxchar );
434- if (rval == NULL )
406+ PyObject * rval = escape_unicode_and_size ( pystr , output_size );
407+ if (rval == NULL ) {
435408 return -1 ;
436-
437- kind = PyUnicode_KIND (rval );
438-
439- #define ENCODE_OUTPUT do { \
440- chars = 0; \
441- output[chars++] = '"'; \
442- for (i = 0; i < input_chars; i++) { \
443- Py_UCS4 c = PyUnicode_READ(kind, input, i); \
444- switch (c) { \
445- case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
446- case '"': output[chars++] = '\\'; output[chars++] = c; break; \
447- case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
448- case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
449- case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
450- case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
451- case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
452- default: \
453- if (c <= 0x1f) { \
454- output[chars++] = '\\'; \
455- output[chars++] = 'u'; \
456- output[chars++] = '0'; \
457- output[chars++] = '0'; \
458- output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
459- output[chars++] = Py_hexdigits[(c ) & 0xf]; \
460- } else { \
461- output[chars++] = c; \
462- } \
463- } \
464- } \
465- output[chars++] = '"'; \
466- } while (0)
467-
468- if (kind == PyUnicode_1BYTE_KIND ) {
469- Py_UCS1 * output = PyUnicode_1BYTE_DATA (rval );
470- ENCODE_OUTPUT ;
471- } else if (kind == PyUnicode_2BYTE_KIND ) {
472- Py_UCS2 * output = PyUnicode_2BYTE_DATA (rval );
473- ENCODE_OUTPUT ;
474- } else {
475- Py_UCS4 * output = PyUnicode_4BYTE_DATA (rval );
476- assert (kind == PyUnicode_4BYTE_KIND );
477- ENCODE_OUTPUT ;
478409 }
479- #undef ENCODE_OUTPUT
480410
481- #ifdef Py_DEBUG
482- assert (_PyUnicode_CheckConsistency (rval , 1 ));
483- #endif
484411 return _steal_accumulate (writer , rval );
485412}
486413
0 commit comments