Skip to content

Commit bbd1156

Browse files
[3.14] gh-143054: Disallow non-top-level Cut for now (GH-143622) (GH-143790)
The behaviour of Cut in nested parentheses, Repeat, Opt, and similar is somewhat chaotic. Apparently even the academic papers on PEG aren't as clear as they could be. And it doesn't really matter. Python only uses top-level cuts. When that changes, we can clarify as much as necessary (and even change the implementation to make sense for what we'll need). Document that this is deliberately unspecified, and add a test to make sure any decision is deliberate, tested and documented. (cherry picked from commit f0a0467) Co-authored-by: Petr Viktorin <encukou@gmail.com>
1 parent 37c4a65 commit bbd1156

File tree

4 files changed

+85
-3
lines changed

4 files changed

+85
-3
lines changed

Doc/reference/grammar.rst

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,17 @@ The notation used here is the same as in the preceding docs,
1212
and is described in the :ref:`notation <notation>` section,
1313
except for an extra complication:
1414

15-
* ``~`` ("cut"): commit to the current alternative and fail the rule
16-
even if this fails to parse
15+
* ``~`` ("cut"): commit to the current alternative; fail the rule
16+
if the alternative fails to parse
17+
18+
Python mainly uses cuts for optimizations or improved error
19+
messages. They often appear to be useless in the listing below.
20+
21+
.. see gh-143054, and CutValidator in the source, if you want to change this:
22+
23+
Cuts currently don't appear inside parentheses, brackets, lookaheads
24+
and similar.
25+
Their behavior in these contexts is deliberately left unspecified.
1726

1827
.. literalinclude:: ../../Grammar/python.gram
1928
:language: peg

Lib/test/test_peg_generator/test_grammar_validator.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
test_tools.skip_if_missing("peg_generator")
55
with test_tools.imports_under_tool("peg_generator"):
66
from pegen.grammar_parser import GeneratedParser as GrammarParser
7-
from pegen.validator import SubRuleValidator, ValidationError, RaiseRuleValidator
7+
from pegen.validator import SubRuleValidator, ValidationError
8+
from pegen.validator import RaiseRuleValidator, CutValidator
89
from pegen.testutil import parse_string
910
from pegen.grammar import Grammar
1011

@@ -59,3 +60,18 @@ def test_raising_valid_rule(self) -> None:
5960
with self.assertRaises(ValidationError):
6061
for rule_name, rule in grammar.rules.items():
6162
validator.validate_rule(rule_name, rule)
63+
64+
def test_cut_validator(self) -> None:
65+
grammar_source = """
66+
star: (OP ~ OP)*
67+
plus: (OP ~ OP)+
68+
bracket: [OP ~ OP]
69+
gather: OP.(OP ~ OP)+
70+
nested: [OP | NAME ~ OP]
71+
"""
72+
grammar: Grammar = parse_string(grammar_source, GrammarParser)
73+
validator = CutValidator(grammar)
74+
for rule_name, rule in grammar.rules.items():
75+
with self.subTest(rule_name):
76+
with self.assertRaises(ValidationError):
77+
validator.validate_rule(rule_name, rule)

Lib/test/test_peg_generator/test_pegen.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -755,6 +755,30 @@ def test_cut(self) -> None:
755755
],
756756
)
757757

758+
def test_cut_is_local_in_rule(self) -> None:
759+
grammar = """
760+
start:
761+
| inner
762+
| 'x' { "ok" }
763+
inner:
764+
| 'x' ~ 'y'
765+
| 'x'
766+
"""
767+
parser_class = make_parser(grammar)
768+
node = parse_string("x", parser_class)
769+
self.assertEqual(node, 'ok')
770+
771+
def test_cut_is_local_in_parens(self) -> None:
772+
# we currently don't guarantee this behavior, see gh-143054
773+
grammar = """
774+
start:
775+
| ('x' ~ 'y' | 'x')
776+
| 'x' { "ok" }
777+
"""
778+
parser_class = make_parser(grammar)
779+
node = parse_string("x", parser_class)
780+
self.assertEqual(node, 'ok')
781+
758782
def test_dangling_reference(self) -> None:
759783
grammar = """
760784
start: foo ENDMARKER

Tools/peg_generator/pegen/validator.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from typing import Any
2+
13
from pegen import grammar
24
from pegen.grammar import Alt, GrammarVisitor, Rhs, Rule
35

@@ -44,6 +46,37 @@ def visit_Alt(self, node: Alt) -> None:
4446
)
4547

4648

49+
class CutValidator(GrammarValidator):
50+
"""Fail if Cut is not directly in a rule.
51+
52+
For simplicity, we currently document that a Cut affects alternatives
53+
of the *rule* it is in.
54+
However, the implementation makes cuts local to enclosing Rhs
55+
(e.g. parenthesized list of choices).
56+
Additionally, in academic papers about PEG, repeats and optional items
57+
are "desugared" to choices with an empty alternative, and thus contain
58+
a Cut's effect.
59+
60+
Please update documentation and tests when adding this cut,
61+
then get rid of this validator.
62+
63+
See gh-143054.
64+
"""
65+
66+
def visit(self, node: Any, parents: tuple[Any, ...] = ()) -> None:
67+
super().visit(node, parents=(*parents, node))
68+
69+
def visit_Cut(self, node: Alt, parents: tuple[Any, ...] = ()) -> None:
70+
parent_types = [type(p).__name__ for p in parents]
71+
if parent_types != ['Rule', 'Rhs', 'Alt', 'NamedItem', 'Cut']:
72+
raise ValidationError(
73+
f"Rule {self.rulename!r} contains cut that's not on the "
74+
"top level. "
75+
"The intended semantics of such cases need "
76+
"to be clarified; see the CutValidator docstring."
77+
f"\nThe cut is inside: {parent_types}"
78+
)
79+
4780
def validate_grammar(the_grammar: grammar.Grammar) -> None:
4881
for validator_cls in GrammarValidator.__subclasses__():
4982
validator = validator_cls(the_grammar)

0 commit comments

Comments
 (0)