Skip to content

Commit 4e89c87

Browse files
committed
Fix syntax error prefix in t-strings
1 parent 99184cc commit 4e89c87

File tree

6 files changed

+1195
-1404
lines changed

6 files changed

+1195
-1404
lines changed

Grammar/python.gram

Lines changed: 28 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -916,7 +916,8 @@ fstring_middle[expr_ty]:
916916
| t=FSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) }
917917
fstring_replacement_field[expr_ty]:
918918
| '{' a=annotated_rhs debug_expr='='? conversion=[fstring_conversion] format=[fstring_full_format_spec] rbrace='}' {
919-
_PyPegen_formatted_value(p, a, debug_expr, conversion, format, rbrace, EXTRA) }
919+
(TOK_GET_MODE(p->tok)->tstring ? _PyPegen_interpolation : _PyPegen_formatted_value)(
920+
p, a, debug_expr, conversion, format, rbrace, EXTRA) }
920921
| invalid_replacement_field
921922
fstring_conversion[ResultTokenWithMetadata*]:
922923
| conv_token="!" conv=NAME { _PyPegen_check_fstring_conversion(p, conv_token, conv) }
@@ -928,15 +929,8 @@ fstring_format_spec[expr_ty]:
928929
fstring[expr_ty]:
929930
| a=FSTRING_START b=fstring_middle* c=FSTRING_END { _PyPegen_joined_str(p, a, (asdl_expr_seq*)b, c) }
930931

931-
tstring_middle[expr_ty]:
932-
| tstring_replacement_field
933-
| t=FSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) }
934-
tstring_replacement_field[expr_ty]:
935-
| '{' a=annotated_rhs debug_expr='='? conversion=[fstring_conversion] format=[fstring_full_format_spec] rbrace='}' {
936-
_PyPegen_interpolation(p, a, debug_expr, conversion, format, rbrace, EXTRA) }
937-
| invalid_replacement_field
938-
tstring[expr_ty]:
939-
| a=TSTRING_START b=tstring_middle* c=FSTRING_END { _PyPegen_template_str(p, a, (asdl_expr_seq*)b, c) }
932+
tstring[expr_ty] (memo):
933+
| a=TSTRING_START b=fstring_middle* c=FSTRING_END { _PyPegen_template_str(p, a, (asdl_expr_seq*)b, c) }
940934

941935
string[expr_ty]: s[Token*]=STRING { _PyPegen_constant_from_string(p, s) }
942936
strings[expr_ty] (memo): a[asdl_expr_seq*]=(fstring|string)+ { _PyPegen_concatenate_strings(p, a, EXTRA) }
@@ -1196,7 +1190,8 @@ invalid_expression:
11961190
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Perhaps you forgot a comma?") }
11971191
| a=disjunction 'if' b=disjunction !('else'|':') { RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "expected 'else' after 'if' expression") }
11981192
| a='lambda' [lambda_params] b=':' &FSTRING_MIDDLE {
1199-
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "f-string: lambda expressions are not allowed without parentheses") }
1193+
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "%c-string: lambda expressions are not allowed without parentheses",
1194+
TOK_GET_MODE(p->tok)->tstring ? 't' : 'f') }
12001195

12011196
invalid_named_expression(memo):
12021197
| a=expression ':=' expression {
@@ -1439,26 +1434,36 @@ invalid_starred_expression:
14391434
| '*' { RAISE_SYNTAX_ERROR("Invalid star expression") }
14401435

14411436
invalid_replacement_field:
1442-
| '{' a='=' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "f-string: valid expression required before '='") }
1443-
| '{' a='!' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "f-string: valid expression required before '!'") }
1444-
| '{' a=':' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "f-string: valid expression required before ':'") }
1445-
| '{' a='}' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "f-string: valid expression required before '}'") }
1446-
| '{' !annotated_rhs { RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: expecting a valid expression after '{'")}
1437+
| '{' a='=' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "%c-string: valid expression required before '='",
1438+
TOK_GET_MODE(p->tok)->tstring ? 't' : 'f') }
1439+
| '{' a='!' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "%c-string: valid expression required before '!'",
1440+
TOK_GET_MODE(p->tok)->tstring ? 't' : 'f') }
1441+
| '{' a=':' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "%c-string: valid expression required before ':'",
1442+
TOK_GET_MODE(p->tok)->tstring ? 't' : 'f') }
1443+
| '{' a='}' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "%c-string: valid expression required before '}'",
1444+
TOK_GET_MODE(p->tok)->tstring ? 't' : 'f') }
1445+
| '{' !annotated_rhs { RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("%c-string: expecting a valid expression after '{'",
1446+
TOK_GET_MODE(p->tok)->tstring ? 't' : 'f')}
14471447
| '{' annotated_rhs !('=' | '!' | ':' | '}') {
1448-
PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: expecting '=', or '!', or ':', or '}'") }
1448+
PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("%c-string: expecting '=', or '!', or ':', or '}'",
1449+
TOK_GET_MODE(p->tok)->tstring ? 't' : 'f') }
14491450
| '{' annotated_rhs '=' !('!' | ':' | '}') {
1450-
PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: expecting '!', or ':', or '}'") }
1451+
PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("%c-string: expecting '!', or ':', or '}'",
1452+
TOK_GET_MODE(p->tok)->tstring ? 't' : 'f') }
14511453
| '{' annotated_rhs '='? invalid_conversion_character
14521454
| '{' annotated_rhs '='? ['!' NAME] !(':' | '}') {
1453-
PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: expecting ':' or '}'") }
1455+
PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("%c-string: expecting ':' or '}'",
1456+
TOK_GET_MODE(p->tok)->tstring ? 't' : 'f') }
14541457
| '{' annotated_rhs '='? ['!' NAME] ':' fstring_format_spec* !'}' {
1455-
PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: expecting '}', or format specs") }
1458+
PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("%c-string: expecting '}', or format specs",
1459+
TOK_GET_MODE(p->tok)->tstring ? 't' : 'f') }
14561460
| '{' annotated_rhs '='? ['!' NAME] !'}' {
1457-
PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: expecting '}'") }
1461+
PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("%c-string: expecting '}'",
1462+
TOK_GET_MODE(p->tok)->tstring ? 't' : 'f') }
14581463

14591464
invalid_conversion_character:
1460-
| '!' &(':' | '}') { RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: missing conversion character") }
1461-
| '!' !NAME { RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: invalid conversion character") }
1465+
| '!' &(':' | '}') { RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("%c-string: missing conversion character") }
1466+
| '!' !NAME { RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("%c-string: invalid conversion character") }
14621467

14631468
invalid_arithmetic:
14641469
| sum ('+'|'-'|'*'|'/'|'%'|'//'|'@') a='not' b=inversion { RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "'not' after an operator must be parenthesized") }

Parser/action_helpers.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -963,15 +963,17 @@ _PyPegen_check_fstring_conversion(Parser *p, Token* conv_token, expr_ty conv)
963963
if (conv_token->lineno != conv->lineno || conv_token->end_col_offset != conv->col_offset) {
964964
return RAISE_SYNTAX_ERROR_KNOWN_RANGE(
965965
conv_token, conv,
966-
"f-string: conversion type must come right after the exclamanation mark"
966+
"%c-string: conversion type must come right after the exclamanation mark",
967+
TOK_GET_MODE(p->tok)->tstring ? 't' : 'f'
967968
);
968969
}
969970

970971
Py_UCS4 first = PyUnicode_READ_CHAR(conv->v.Name.id, 0);
971972
if (PyUnicode_GET_LENGTH(conv->v.Name.id) > 1 ||
972973
!(first == 's' || first == 'r' || first == 'a')) {
973974
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(conv,
974-
"f-string: invalid conversion character %R: expected 's', 'r', or 'a'",
975+
"%c-string: invalid conversion character %R: expected 's', 'r', or 'a'",
976+
TOK_GET_MODE(p->tok)->tstring ? 't' : 'f',
975977
conv->v.Name.id);
976978
return NULL;
977979
}

Parser/lexer/lexer.c

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -980,7 +980,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
980980
p_start = tok->start;
981981
p_end = tok->cur;
982982
if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
983-
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings"));
983+
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings or t-strings"));
984984
}
985985
tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
986986
the_current_tok->kind = TOK_FSTRING_MODE;
@@ -1082,7 +1082,8 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
10821082
tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
10831083
if (the_current_tok->f_string_quote == quote &&
10841084
the_current_tok->f_string_quote_size == quote_size) {
1085-
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "f-string: expecting '}'", start));
1085+
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1086+
"%c-string: expecting '}'", TOK_GET_MODE(tok)->tstring ? 't' : 'f'));
10861087
}
10871088
}
10881089

@@ -1211,7 +1212,8 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
12111212
case ']':
12121213
case '}':
12131214
if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
1214-
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "f-string: single '}' is not allowed"));
1215+
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1216+
"%c-string: single '}' is not allowed", TOK_GET_MODE(tok)->tstring ? 't' : 'f'));
12151217
}
12161218
if (!tok->tok_extra_tokens && !tok->level) {
12171219
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "unmatched '%c'", c));
@@ -1231,7 +1233,8 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
12311233
assert(current_tok->curly_bracket_depth >= 0);
12321234
int previous_bracket = current_tok->curly_bracket_depth - 1;
12331235
if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
1234-
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "f-string: unmatched '%c'", c));
1236+
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1237+
"%c-string: unmatched '%c'", TOK_GET_MODE(tok)->tstring ? 't' : 'f', c));
12351238
}
12361239
}
12371240
if (tok->parenlinenostack[tok->level] != tok->lineno) {
@@ -1252,7 +1255,8 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
12521255
if (INSIDE_FSTRING(tok)) {
12531256
current_tok->curly_bracket_depth--;
12541257
if (current_tok->curly_bracket_depth < 0) {
1255-
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "f-string: unmatched '%c'", c));
1258+
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "%c-string: unmatched '%c'",
1259+
TOK_GET_MODE(tok)->tstring ? 't' : 'f', c));
12561260
}
12571261
if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
12581262
current_tok->curly_bracket_expr_start_depth--;
@@ -1302,7 +1306,8 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
13021306
if (peek1 != '{') {
13031307
current_tok->curly_bracket_expr_start_depth++;
13041308
if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1305-
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "f-string: expressions nested too deeply"));
1309+
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1310+
"%c-string: expressions nested too deeply", TOK_GET_MODE(tok)->tstring ? 't' : 'f'));
13061311
}
13071312
TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
13081313
return tok_get_normal_mode(tok, current_tok, token);
@@ -1380,17 +1385,18 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
13801385

13811386
if (current_tok->f_string_quote_size == 3) {
13821387
_PyTokenizer_syntaxerror(tok,
1383-
"unterminated triple-quoted f-string literal"
1384-
" (detected at line %d)", start);
1388+
"unterminated triple-quoted %c-string literal"
1389+
" (detected at line %d)",
1390+
TOK_GET_MODE(tok)->tstring ? 't' : 'f', start);
13851391
if (c != '\n') {
13861392
tok->done = E_EOFS;
13871393
}
13881394
return MAKE_TOKEN(ERRORTOKEN);
13891395
}
13901396
else {
13911397
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1392-
"unterminated f-string literal (detected at"
1393-
" line %d)", start));
1398+
"unterminated %c-string literal (detected at"
1399+
" line %d)", TOK_GET_MODE(tok)->tstring ? 't' : 'f', start));
13941400
}
13951401
}
13961402

@@ -1411,7 +1417,8 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
14111417
tok_backup(tok, c);
14121418
current_tok->curly_bracket_expr_start_depth++;
14131419
if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1414-
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "f-string: expressions nested too deeply"));
1420+
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1421+
"%c-string: expressions nested too deeply", TOK_GET_MODE(tok)->tstring ? 't' : 'f'));
14151422
}
14161423
TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
14171424
current_tok->in_format_spec = 0;

0 commit comments

Comments
 (0)