@@ -1587,83 +1587,112 @@ expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, Re
15871587 return _PyAST_JoinedStr (values , lineno , col_offset , debug_end_line , debug_end_offset , p -> arena );
15881588}
15891589
1590- expr_ty
1591- _PyPegen_concatenate_strings (Parser * p , asdl_expr_seq * strings ,
1592- int lineno , int col_offset , int end_lineno ,
1593- int end_col_offset , PyArena * arena )
1590+ static expr_ty
1591+ _build_concatenated_bytes (Parser * p , asdl_expr_seq * strings , int lineno ,
1592+ int col_offset , int end_lineno , int end_col_offset ,
1593+ PyArena * arena )
15941594{
15951595 Py_ssize_t len = asdl_seq_LEN (strings );
15961596 assert (len > 0 );
15971597
1598- int f_string_found = 0 ;
1599- int unicode_string_found = 0 ;
1600- int bytes_found = 0 ;
1598+ PyObject * res = Py_GetConstant (Py_CONSTANT_EMPTY_BYTES );
16011599
1602- Py_ssize_t i = 0 ;
1603- Py_ssize_t n_flattened_elements = 0 ;
1604- for (i = 0 ; i < len ; i ++ ) {
1600+ /* Bytes literals never get a kind, but just for consistency
1601+ since they are represented as Constant nodes, we'll mirror
1602+ the same behavior as unicode strings for determining the
1603+ kind. */
1604+ PyObject * kind = asdl_seq_GET (strings , 0 )-> v .Constant .kind ;
1605+ for (Py_ssize_t i = 0 ; i < len ; i ++ ) {
16051606 expr_ty elem = asdl_seq_GET (strings , i );
1606- switch (elem -> kind ) {
1607- case Constant_kind :
1608- if (PyBytes_CheckExact (elem -> v .Constant .value )) {
1609- bytes_found = 1 ;
1610- } else {
1611- unicode_string_found = 1 ;
1612- }
1613- n_flattened_elements ++ ;
1614- break ;
1615- case JoinedStr_kind :
1616- n_flattened_elements += asdl_seq_LEN (elem -> v .JoinedStr .values );
1617- f_string_found = 1 ;
1618- break ;
1619- default :
1620- n_flattened_elements ++ ;
1621- f_string_found = 1 ;
1622- break ;
1623- }
1607+ PyBytes_Concat (& res , elem -> v .Constant .value );
16241608 }
1609+ if (!res || _PyArena_AddPyObject (arena , res ) < 0 ) {
1610+ Py_XDECREF (res );
1611+ return NULL ;
1612+ }
1613+ return _PyAST_Constant (res , kind , lineno , col_offset , end_lineno , end_col_offset , p -> arena );
1614+ }
16251615
1626- if ((unicode_string_found || f_string_found ) && bytes_found ) {
1627- RAISE_SYNTAX_ERROR ("cannot mix bytes and nonbytes literals" );
1616+ static expr_ty
1617+ _build_concatenated_unicode (Parser * p , asdl_expr_seq * strings , int lineno ,
1618+ int col_offset , int end_lineno , int end_col_offset ,
1619+ PyArena * arena )
1620+ {
1621+ Py_ssize_t len = asdl_seq_LEN (strings );
1622+ assert (len > 1 );
1623+
1624+ expr_ty first = asdl_seq_GET (strings , 0 );
1625+
1626+ /* When a string is getting concatenated, the kind of the string
1627+ is determined by the first string in the concatenation
1628+ sequence.
1629+
1630+ u"abc" "def" -> u"abcdef"
1631+ "abc" u"abc" -> "abcabc" */
1632+ PyObject * kind = first -> v .Constant .kind ;
1633+
1634+ PyUnicodeWriter * writer = PyUnicodeWriter_Create (0 );
1635+ if (writer == NULL ) {
16281636 return NULL ;
16291637 }
16301638
1631- if (bytes_found ) {
1632- PyObject * res = Py_GetConstant (Py_CONSTANT_EMPTY_BYTES );
1639+ for (Py_ssize_t i = 0 ; i < len ; i ++ ) {
1640+ expr_ty current_elem = asdl_seq_GET (strings , i );
1641+ assert (current_elem -> kind == Constant_kind );
16331642
1634- /* Bytes literals never get a kind, but just for consistency
1635- since they are represented as Constant nodes, we'll mirror
1636- the same behavior as unicode strings for determining the
1637- kind. */
1638- PyObject * kind = asdl_seq_GET (strings , 0 )-> v .Constant .kind ;
1639- for (i = 0 ; i < len ; i ++ ) {
1640- expr_ty elem = asdl_seq_GET (strings , i );
1641- PyBytes_Concat (& res , elem -> v .Constant .value );
1642- }
1643- if (!res || _PyArena_AddPyObject (arena , res ) < 0 ) {
1644- Py_XDECREF (res );
1643+ if (PyUnicodeWriter_WriteStr (writer ,
1644+ current_elem -> v .Constant .value )) {
1645+ PyUnicodeWriter_Discard (writer );
16451646 return NULL ;
16461647 }
1647- return _PyAST_Constant (res , kind , lineno , col_offset , end_lineno , end_col_offset , p -> arena );
16481648 }
16491649
1650- if (!f_string_found && len == 1 ) {
1651- return asdl_seq_GET (strings , 0 );
1650+ PyObject * final = PyUnicodeWriter_Finish (writer );
1651+ if (final == NULL ) {
1652+ return NULL ;
1653+ }
1654+ if (_PyArena_AddPyObject (p -> arena , final ) < 0 ) {
1655+ Py_DECREF (final );
1656+ return NULL ;
1657+ }
1658+ return _PyAST_Constant (final , kind , lineno , col_offset ,
1659+ end_lineno , end_col_offset , arena );
1660+ }
1661+
1662+ static expr_ty
1663+ _build_concatenated_joined_str (Parser * p , asdl_expr_seq * strings ,
1664+ int lineno , int col_offset , int end_lineno ,
1665+ int end_col_offset , PyArena * arena )
1666+ {
1667+ Py_ssize_t len = asdl_seq_LEN (strings );
1668+ assert (len > 0 );
1669+
1670+ Py_ssize_t n_flattened_elements = 0 ;
1671+ for (Py_ssize_t i = 0 ; i < len ; i ++ ) {
1672+ expr_ty elem = asdl_seq_GET (strings , i );
1673+ switch (elem -> kind ) {
1674+ case JoinedStr_kind :
1675+ n_flattened_elements += asdl_seq_LEN (elem -> v .JoinedStr .values );
1676+ break ;
1677+ default :
1678+ n_flattened_elements ++ ;
1679+ break ;
1680+ }
16521681 }
16531682
1683+
16541684 asdl_expr_seq * flattened = _Py_asdl_expr_seq_new (n_flattened_elements , p -> arena );
16551685 if (flattened == NULL ) {
16561686 return NULL ;
16571687 }
16581688
16591689 /* build flattened list */
16601690 Py_ssize_t current_pos = 0 ;
1661- Py_ssize_t j = 0 ;
1662- for (i = 0 ; i < len ; i ++ ) {
1691+ for (Py_ssize_t i = 0 ; i < len ; i ++ ) {
16631692 expr_ty elem = asdl_seq_GET (strings , i );
16641693 switch (elem -> kind ) {
16651694 case JoinedStr_kind :
1666- for (j = 0 ; j < asdl_seq_LEN (elem -> v .JoinedStr .values ); j ++ ) {
1695+ for (Py_ssize_t j = 0 ; j < asdl_seq_LEN (elem -> v .JoinedStr .values ); j ++ ) {
16671696 expr_ty subvalue = asdl_seq_GET (elem -> v .JoinedStr .values , j );
16681697 if (subvalue == NULL ) {
16691698 return NULL ;
@@ -1680,13 +1709,13 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
16801709 /* calculate folded element count */
16811710 Py_ssize_t n_elements = 0 ;
16821711 int prev_is_constant = 0 ;
1683- for (i = 0 ; i < n_flattened_elements ; i ++ ) {
1712+ for (Py_ssize_t i = 0 ; i < n_flattened_elements ; i ++ ) {
16841713 expr_ty elem = asdl_seq_GET (flattened , i );
16851714
16861715 /* The concatenation of a FormattedValue and an empty Constant should
16871716 lead to the FormattedValue itself. Thus, we will not take any empty
16881717 constants into account, just as in `_PyPegen_joined_str` */
1689- if (f_string_found && elem -> kind == Constant_kind &&
1718+ if (elem -> kind == Constant_kind &&
16901719 PyUnicode_CheckExact (elem -> v .Constant .value ) &&
16911720 PyUnicode_GET_LENGTH (elem -> v .Constant .value ) == 0 )
16921721 continue ;
@@ -1704,7 +1733,7 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
17041733
17051734 /* build folded list */
17061735 current_pos = 0 ;
1707- for (i = 0 ; i < n_flattened_elements ; i ++ ) {
1736+ for (Py_ssize_t i = 0 ; i < n_flattened_elements ; i ++ ) {
17081737 expr_ty elem = asdl_seq_GET (flattened , i );
17091738
17101739 /* if the current elem and the following are constants,
@@ -1727,6 +1756,7 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
17271756 return NULL ;
17281757 }
17291758 expr_ty last_elem = elem ;
1759+ Py_ssize_t j ;
17301760 for (j = i ; j < n_flattened_elements ; j ++ ) {
17311761 expr_ty current_elem = asdl_seq_GET (flattened , j );
17321762 if (current_elem -> kind == Constant_kind ) {
@@ -1760,8 +1790,7 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
17601790 }
17611791
17621792 /* Drop all empty contanst strings */
1763- if (f_string_found &&
1764- PyUnicode_CheckExact (elem -> v .Constant .value ) &&
1793+ if (PyUnicode_CheckExact (elem -> v .Constant .value ) &&
17651794 PyUnicode_GET_LENGTH (elem -> v .Constant .value ) == 0 ) {
17661795 continue ;
17671796 }
@@ -1770,13 +1799,127 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings,
17701799 asdl_seq_SET (values , current_pos ++ , elem );
17711800 }
17721801
1773- if (!f_string_found ) {
1774- assert (n_elements == 1 );
1775- expr_ty elem = asdl_seq_GET (values , 0 );
1776- assert (elem -> kind == Constant_kind );
1777- return elem ;
1778- }
1779-
17801802 assert (current_pos == n_elements );
17811803 return _PyAST_JoinedStr (values , lineno , col_offset , end_lineno , end_col_offset , p -> arena );
17821804}
1805+
1806+ static expr_ty
1807+ _build_concatenated_template_str (Parser * p , asdl_expr_seq * strings ,
1808+ int lineno , int col_offset , int end_lineno ,
1809+ int end_col_offset , PyArena * arena )
1810+ {
1811+ Py_ssize_t len = asdl_seq_LEN (strings );
1812+ assert (len > 0 );
1813+
1814+ Py_ssize_t n_flattened_elements = 0 ;
1815+ for (Py_ssize_t i = 0 ; i < len ; i ++ ) {
1816+ expr_ty elem = asdl_seq_GET (strings , i );
1817+ switch (elem -> kind ) {
1818+ case TemplateStr_kind :
1819+ n_flattened_elements += asdl_seq_LEN (elem -> v .JoinedStr .values );
1820+ break ;
1821+ default :
1822+ n_flattened_elements ++ ;
1823+ break ;
1824+ }
1825+ }
1826+
1827+
1828+ asdl_expr_seq * flattened = _Py_asdl_expr_seq_new (n_flattened_elements , p -> arena );
1829+ if (flattened == NULL ) {
1830+ return NULL ;
1831+ }
1832+
1833+ Py_ssize_t pos = 0 ;
1834+ for (Py_ssize_t i = 0 ; i < len ; i ++ ) {
1835+ expr_ty elem = asdl_seq_GET (strings , i );
1836+
1837+ switch (elem -> kind ) {
1838+ case TemplateStr_kind :
1839+ for (Py_ssize_t j = 0 ; j < asdl_seq_LEN (elem -> v .TemplateStr .values ); j ++ ) {
1840+ expr_ty subitem = asdl_seq_GET (elem -> v .TemplateStr .values , j );
1841+ asdl_seq_SET (flattened , pos ++ , subitem );
1842+ }
1843+ break ;
1844+ case JoinedStr_kind : {
1845+ expr_ty joined_str = _build_concatenated_joined_str (p ,
1846+ elem -> v .JoinedStr .values , lineno , col_offset ,
1847+ end_lineno , end_col_offset , arena );
1848+ asdl_seq_SET (flattened , pos ++ , joined_str );
1849+ break ;
1850+ }
1851+ default :
1852+ asdl_seq_SET (flattened , pos ++ , elem );
1853+ break ;
1854+ }
1855+ }
1856+
1857+ return _PyAST_TemplateStr (flattened , lineno , col_offset , end_lineno ,
1858+ end_col_offset , arena );
1859+ }
1860+
1861+ expr_ty
1862+ _PyPegen_concatenate_strings (Parser * p , asdl_expr_seq * strings ,
1863+ int lineno , int col_offset , int end_lineno ,
1864+ int end_col_offset , PyArena * arena )
1865+ {
1866+ Py_ssize_t len = asdl_seq_LEN (strings );
1867+ assert (len > 0 );
1868+
1869+ int t_string_found = 0 ;
1870+ int f_string_found = 0 ;
1871+ int unicode_string_found = 0 ;
1872+ int bytes_found = 0 ;
1873+
1874+ Py_ssize_t i = 0 ;
1875+ for (i = 0 ; i < len ; i ++ ) {
1876+ expr_ty elem = asdl_seq_GET (strings , i );
1877+ switch (elem -> kind ) {
1878+ case Constant_kind :
1879+ if (PyBytes_CheckExact (elem -> v .Constant .value )) {
1880+ bytes_found = 1 ;
1881+ } else {
1882+ unicode_string_found = 1 ;
1883+ }
1884+ break ;
1885+ case JoinedStr_kind :
1886+ f_string_found = 1 ;
1887+ break ;
1888+ case TemplateStr_kind :
1889+ t_string_found = 1 ;
1890+ break ;
1891+ default :
1892+ f_string_found = 1 ;
1893+ break ;
1894+ }
1895+ }
1896+
1897+ // Cannot mix unicode and bytes
1898+ if ((unicode_string_found || f_string_found || t_string_found ) && bytes_found ) {
1899+ RAISE_SYNTAX_ERROR ("cannot mix bytes and nonbytes literals" );
1900+ return NULL ;
1901+ }
1902+
1903+ // If it's only bytes or only unicode string, do a simple concat
1904+ if (!f_string_found && !t_string_found ) {
1905+ if (len == 1 ) {
1906+ return asdl_seq_GET (strings , 0 );
1907+ }
1908+ else if (bytes_found ) {
1909+ return _build_concatenated_bytes (p , strings , lineno , col_offset ,
1910+ end_lineno , end_col_offset , arena );
1911+ }
1912+ else {
1913+ return _build_concatenated_unicode (p , strings , lineno , col_offset ,
1914+ end_lineno , end_col_offset , arena );
1915+ }
1916+ }
1917+
1918+ if (t_string_found ) {
1919+ return _build_concatenated_template_str (p , strings , lineno ,
1920+ col_offset , end_lineno , end_col_offset , arena );
1921+ }
1922+
1923+ return _build_concatenated_joined_str (p , strings , lineno ,
1924+ col_offset , end_lineno , end_col_offset , arena );
1925+ }
0 commit comments