From 9b7c3eb846e529e709805688bb030d26ba41cc2d Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 26 Mar 2023 01:23:57 -0400 Subject: [PATCH 1/4] WIP - what's up with RowBox parsing? --- mathics/core/parser/parser.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/mathics/core/parser/parser.py b/mathics/core/parser/parser.py index 84aada633..2bd708e82 100644 --- a/mathics/core/parser/parser.py +++ b/mathics/core/parser/parser.py @@ -171,17 +171,23 @@ def parse_p(self): self.tokeniser.sntx_message(token.pos) raise InvalidSyntaxError() - def parse_box(self, p): + def parse_box(self, precedence: int): # -> String: ? + """ + Return the String value of the next token in a box expression. + ``precedence`` is not used here, but may be passed + along + """ result = None while True: token = self.next() tag = token.tag method = getattr(self, "b_" + tag, None) if method is not None: - new_result = method(result, token, p) + new_result = method(result, token, precedence) elif tag in ("OtherscriptBox", "RightRowBox"): break - elif tag == "END": + + if tag == "END": self.incomplete(token.pos) elif result is None and tag != "END": self.consume() @@ -190,12 +196,15 @@ def parse_box(self, p): new_result = self.p_LeftRowBox(token) else: new_result = None + if new_result is None: break - else: - result = new_result + + result = new_result + if result is None: result = NullString + return result def parse_seq(self) -> list: @@ -219,7 +228,8 @@ def parse_seq(self) -> list: if tag == "RawComma": self.consume() continue - elif tag in ("RawRightAssociation", "RawRightBrace", "RawRightBracket"): + + if tag in ("RawRightAssociation", "RawRightBrace", "RawRightBracket"): break return result From 33c415dd1688dc892964f1b2f47572b20bc5d988 Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 26 Mar 2023 11:09:05 -0400 Subject: [PATCH 2/4] More type annotating tweaking --- mathics/core/parser/parser.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mathics/core/parser/parser.py b/mathics/core/parser/parser.py index 2bd708e82..8dedce324 100644 --- a/mathics/core/parser/parser.py +++ b/mathics/core/parser/parser.py @@ -189,6 +189,7 @@ def parse_box(self, precedence: int): # -> String: ? if tag == "END": self.incomplete(token.pos) + new_result = None elif result is None and tag != "END": self.consume() new_result = String(token.text) @@ -298,19 +299,19 @@ def parse_ternary(self, expr1, token: Token, p: int) -> Optional[Node]: # Called with one Token and return a Node. # Used for prefix operators and brackets. - def p_Factorial(self, token): + def p_Factorial(self, token) -> Node: self.consume() q = prefix_ops["Not"] child = self.parse_exp(q) return Node("Not", child) - def p_Factorial2(self, token): + def p_Factorial2(self, token) -> Node: self.consume() q = prefix_ops["Not"] child = self.parse_exp(q) return Node("Not", Node("Not", child)) - def p_RawLeftParenthesis(self, token): + def p_RawLeftParenthesis(self, token) -> Node: self.consume() self.bracket_depth += 1 result = self.parse_exp(0) @@ -434,6 +435,7 @@ def p_Integral(self, token) -> Node: def p_Pattern(self, token) -> Node: self.consume() text = token.text + name: str = "?Unknown" if "." in text: name = text[:-2] if name: From 6488250ac3c4cf68d76ef8a0a15de8e371927572 Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 1 Apr 2023 20:35:16 -0400 Subject: [PATCH 3/4] WIP first cut at binary box operators --- mathics/builtin/makeboxes.py | 8 +- mathics/core/parser/operators.py | 124 ++++++++++++++------------ mathics/core/parser/parser.py | 144 +++++++++++++++++-------------- mathics/eval/makeboxes.py | 11 +-- 4 files changed, 152 insertions(+), 135 deletions(-) diff --git a/mathics/builtin/makeboxes.py b/mathics/builtin/makeboxes.py index 2c25b84db..3bceb201e 100644 --- a/mathics/builtin/makeboxes.py +++ b/mathics/builtin/makeboxes.py @@ -17,14 +17,10 @@ from mathics.core.expression import Expression from mathics.core.list import ListExpression from mathics.core.number import dps +from mathics.core.parser.parser import NEVER_ADD_PARENTHESIS from mathics.core.symbols import Atom, Symbol from mathics.core.systemsymbols import SymbolInputForm, SymbolOutputForm, SymbolRowBox -from mathics.eval.makeboxes import ( - NEVER_ADD_PARENTHESIS, - _boxed_string, - format_element, - parenthesize, -) +from mathics.eval.makeboxes import _boxed_string, format_element, parenthesize def int_to_s_exp(expr, n): diff --git a/mathics/core/parser/operators.py b/mathics/core/parser/operators.py index 2489abfd8..f9a51c03a 100644 --- a/mathics/core/parser/operators.py +++ b/mathics/core/parser/operators.py @@ -4,6 +4,48 @@ from collections import defaultdict +binary_box_ops = { + "FractionBox": 670, + "OverscriptBox": 710, + "RadicalBox": 670, + "SubscriptBox": 695, + "SuperscriptBox": 590, + "UnderscriptBox": 710, +} + +left_binary_ops = { + "Divide": 470, + "PlusMinus": 310, + "MinusPlus": 310, + "Subtract": 310, + "LeftTee": 190, + "DoubleLeftTee": 190, + "Condition": 130, + "ReplaceAll": 110, + "ReplaceRepeated": 110, + "Because": 50, + "PutAppend": 30, + "Put": 30, + "Postfix": 70, +} + +misc_ops = { + "DifferentialD": 550, + "Sum": 320, + "Pattern": 150, + "Optional": 140, + "SqrtBox": 670, + "RadicalBox": 670, + "FractionBox": 670, + "OverscriptBox": 710, + "UnderscriptBox": 710, + "SubscriptBox": 695, + "FormBox": 670, + "SuperscriptBox": 590, + "UnderoverscriptBox": 700, + "SubsuperscriptBox": 690, +} + prefix_ops = { "Get": 720, "PreIncrement": 660, @@ -35,44 +77,6 @@ "Function": 90, } -left_binary_ops = { - "Divide": 470, - "PlusMinus": 310, - "MinusPlus": 310, - "Subtract": 310, - "LeftTee": 190, - "DoubleLeftTee": 190, - "Condition": 130, - "ReplaceAll": 110, - "ReplaceRepeated": 110, - "Because": 50, - "PutAppend": 30, - "Put": 30, - "Postfix": 70, -} - -right_binary_ops = { - "Apply": 620, - "Map": 620, - "MapAll": 620, - "Power": 590, - "Implies": 200, - "RightTee": 190, - "DoubleRightTee": 190, - "SuchThat": 180, - "Rule": 120, - "RuleDelayed": 120, - "AddTo": 100, - "SubtractFrom": 100, - "TimesBy": 100, - "DivideBy": 100, - "Therefore": 50, - "UpSet": 40, - "Set": 40, - "SetDelayed": 40, - "UpSetDelayed": 40, -} - flat_binary_ops = { "MessageName": 750, "Composition": 625, @@ -137,26 +141,36 @@ "PatternTest": 680, } -ternary_ops = { - "Span": 305, - "Infix": 630, +right_binary_ops = { + "Apply": 620, + "Map": 620, + "MapAll": 620, + "Power": 590, + "Implies": 200, + "RightTee": 190, + "DoubleRightTee": 190, + "SuchThat": 180, + "Rule": 120, + "RuleDelayed": 120, + "AddTo": 100, + "SubtractFrom": 100, + "TimesBy": 100, + "DivideBy": 100, + "Therefore": 50, + "UpSet": 40, + "Set": 40, + "SetDelayed": 40, + "UpSetDelayed": 40, } -misc_ops = { - "DifferentialD": 550, - "Sum": 320, - "Pattern": 150, - "Optional": 140, - "SqrtBox": 670, - "RadicalBox": 670, - "FractionBox": 670, - "OverscriptBox": 710, - "UnderscriptBox": 710, - "SubscriptBox": 695, - "FormBox": 670, - "SuperscriptBox": 590, - "UnderoverscriptBox": 700, +ternary_box_ops = { "SubsuperscriptBox": 690, + "UnderoverscriptBox": 700, +} + +ternary_ops = { + "Span": 305, + "Infix": 630, } inequality_ops = ["Less", "LessEqual", "Greater", "GreaterEqual", "Equal", "Unequal"] diff --git a/mathics/core/parser/parser.py b/mathics/core/parser/parser.py index 8dedce324..691899881 100644 --- a/mathics/core/parser/parser.py +++ b/mathics/core/parser/parser.py @@ -31,6 +31,7 @@ ) from mathics.core.parser.operators import ( all_ops, + binary_box_ops, binary_ops, flat_binary_ops, inequality_ops, @@ -40,6 +41,7 @@ postfix_ops, prefix_ops, right_binary_ops, + ternary_box_ops, ternary_ops, ) @@ -53,6 +55,15 @@ } +# An operator precedence value that will ensure that whatever operator +# this is attached to does not have parenthesis surrounding it. +# Operator precedence values are integers; If if an operator +# "op" is greater than the surrounding precedence, then "op" +# will be surrounded by parenthesis, e.g. ... (...op...) ... +# In named-characters.yml of mathics-scanner we start at 0. +# However, negative values would also work. +NEVER_ADD_PARENTHESIS = 0 + permitted_digits = {c: i for i, c in enumerate(string.digits + string.ascii_lowercase)} permitted_digits["."] = 0 @@ -116,7 +127,7 @@ def backtrack(self, pos): def parse_e(self) -> Union[Node, Optional[list]]: result = [] while self.next().tag != "END": - result.append(self.parse_exp(0)) + result.append(self.parse_exp(NEVER_ADD_PARENTHESIS)) if len(result) > 1: return Node("Times", *result) if len(result) == 1: @@ -152,8 +163,8 @@ def parse_exp(self, p: int): new_result = None if new_result is None: break - else: - result = new_result + + result = new_result return result def parse_p(self): @@ -171,32 +182,36 @@ def parse_p(self): self.tokeniser.sntx_message(token.pos) raise InvalidSyntaxError() - def parse_box(self, precedence: int): # -> String: ? + def parse_box(self, precedence: int) -> Union[String, Node]: """ - Return the String value of the next token in a box expression. - ``precedence`` is not used here, but may be passed - along + Return the parsed boxed expression for the current + sequence of tokens. + + If there is only an Atom we return a String of that. + Otherwise we return the Node parse expression. """ - result = None + result = self.parse_p() while True: + new_result = None token = self.next() tag = token.tag method = getattr(self, "b_" + tag, None) if method is not None: new_result = method(result, token, precedence) elif tag in ("OtherscriptBox", "RightRowBox"): + self.consume() break - if tag == "END": - self.incomplete(token.pos) - new_result = None + # if tag == "END": + # self.incomplete(token.pos) + # new_result = None + elif new_result: + continue elif result is None and tag != "END": self.consume() new_result = String(token.text) if new_result.value == r"\(": new_result = self.p_LeftRowBox(token) - else: - new_result = None if new_result is None: break @@ -342,8 +357,8 @@ def p_LeftRowBox(self, token) -> Node: self.box_depth += 1 self.bracket_depth += 1 token = self.next() - while token.tag not in ("RightRowBox", "OtherscriptBox"): - newnode = self.parse_box(0) + while token.tag not in ("RightRowBox", "OtherscriptBox", "END"): + newnode = self.parse_box(NEVER_ADD_PARENTHESIS) children.append(newnode) token = self.next() if len(children) == 0: @@ -352,7 +367,8 @@ def p_LeftRowBox(self, token) -> Node: result = children[0] else: result = Node("RowBox", Node("List", *children)) - self.expect("RightRowBox") + if token.tag != "END": + self.expect("RightRowBox") self.box_depth -= 1 self.bracket_depth -= 1 result.parenthesised = True @@ -802,11 +818,50 @@ def e_MessageName(self, expr1, token: Token, p: int) -> Node: # The first argument may be None if the LHS is absent. # Used for boxes. + def b_FormBox(self, box1, token: Token, p: int) -> Optional[Node]: + q = misc_ops["FormBox"] + if q < p: + return None + if box1 is None: + box1 = Symbol("StandardForm") # RawForm + elif is_symbol_name(box1.value): + box1 = Symbol(box1.value, context=None) + else: + box1 = Node("Removed", String("$$Failure")) + self.consume() + box2 = self.parse_box(q) + return Node("FormBox", box2, box1) + + def b_FractionBox(self, box1, token: Token, p: int) -> Optional[Node]: + q = binary_box_ops["FractionBox"] + if q < p: + return None + if box1 is None: + box1 = NullString + self.consume() + box2 = self.parse_box(q + 1) + return Node("FractionBox", box1, box2) + + def b_OverscriptBox(self, box1, token: Token, p: int) -> Optional[Node]: + q = misc_ops["OverscriptBox"] + if q < p: + return None + if box1 is None: + box1 = NullString + self.consume() + box2 = self.parse_box(q) + if self.next().tag == "OtherscriptBox": + self.consume() + box3 = self.parse_box(misc_ops["UnderoverscriptBox"]) + return Node("UnderoverscriptBox", box1, box3, box2) + else: + return Node("OverscriptBox", box1, box2) + def b_SqrtBox(self, box0, token: Token, p: int) -> Optional[Node]: if box0 is not None: return None self.consume() - q = misc_ops["SqrtBox"] + q = binary_box_ops["SqrtBox"] box1 = self.parse_box(q) if self.next().tag == "OtherscriptBox": self.consume() @@ -815,9 +870,9 @@ def b_SqrtBox(self, box0, token: Token, p: int) -> Optional[Node]: else: return Node("SqrtBox", box1) - def b_SuperscriptBox(self, box1, token: Token, p: int) -> Optional[Node]: - q = misc_ops["SuperscriptBox"] - if q < p: + def b_SuperscriptBox(self, box1, token: Token, precedence: int) -> Optional[Node]: + q = binary_box_ops["SuperscriptBox"] + if q < precedence: return None if box1 is None: box1 = NullString @@ -825,13 +880,13 @@ def b_SuperscriptBox(self, box1, token: Token, p: int) -> Optional[Node]: box2 = self.parse_box(q) if self.next().tag == "OtherscriptBox": self.consume() - box3 = self.parse_box(misc_ops["SubsuperscriptBox"]) + box3 = self.parse_box(ternary_box_ops["SubsuperscriptBox"]) return Node("SubsuperscriptBox", box1, box3, box2) else: return Node("SuperscriptBox", box1, box2) def b_SubscriptBox(self, box1, token: Token, p: int) -> Optional[Node]: - q = misc_ops["SubscriptBox"] + q = binary_box_ops["SubscriptBox"] if q < p: return None if box1 is None: @@ -840,13 +895,13 @@ def b_SubscriptBox(self, box1, token: Token, p: int) -> Optional[Node]: box2 = self.parse_box(q) if self.next().tag == "OtherscriptBox": self.consume() - box3 = self.parse_box(misc_ops["SubsuperscriptBox"]) + box3 = self.parse_box(ternary_ops["SubsuperscriptBox"]) return Node("SubsuperscriptBox", box1, box2, box3) else: return Node("SubscriptBox", box1, box2) def b_UnderscriptBox(self, box1, token: Token, p: int) -> Optional[Node]: - q = misc_ops["UnderscriptBox"] + q = ternary_box_ops["UnderscriptBox"] if q < p: return None if box1 is None: @@ -855,46 +910,7 @@ def b_UnderscriptBox(self, box1, token: Token, p: int) -> Optional[Node]: box2 = self.parse_box(q) if self.next().tag == "OtherscriptBox": self.consume() - box3 = self.parse_box(misc_ops["UnderoverscriptBox"]) + box3 = self.parse_box(ternary_ops["UnderoverscriptBox"]) return Node("UnderoverscriptBox", box1, box2, box3) else: return Node("UnderscriptBox", box1, box2) - - def b_FractionBox(self, box1, token: Token, p: int) -> Optional[Node]: - q = misc_ops["FractionBox"] - if q < p: - return None - if box1 is None: - box1 = NullString - self.consume() - box2 = self.parse_box(q + 1) - return Node("FractionBox", box1, box2) - - def b_FormBox(self, box1, token: Token, p: int) -> Optional[Node]: - q = misc_ops["FormBox"] - if q < p: - return None - if box1 is None: - box1 = Symbol("StandardForm") # RawForm - elif is_symbol_name(box1.value): - box1 = Symbol(box1.value, context=None) - else: - box1 = Node("Removed", String("$$Failure")) - self.consume() - box2 = self.parse_box(q) - return Node("FormBox", box2, box1) - - def b_OverscriptBox(self, box1, token: Token, p: int) -> Optional[Node]: - q = misc_ops["OverscriptBox"] - if q < p: - return None - if box1 is None: - box1 = NullString - self.consume() - box2 = self.parse_box(q) - if self.next().tag == "OtherscriptBox": - self.consume() - box3 = self.parse_box(misc_ops["UnderoverscriptBox"]) - return Node("UnderoverscriptBox", box1, box3, box2) - else: - return Node("OverscriptBox", box1, box2) diff --git a/mathics/eval/makeboxes.py b/mathics/eval/makeboxes.py index 52665c5e0..12858b378 100644 --- a/mathics/eval/makeboxes.py +++ b/mathics/eval/makeboxes.py @@ -16,6 +16,7 @@ from mathics.core.evaluation import Evaluation from mathics.core.expression import Expression from mathics.core.list import ListExpression +from mathics.core.parser.parser import NEVER_ADD_PARENTHESIS from mathics.core.symbols import ( Atom, Symbol, @@ -36,21 +37,11 @@ from mathics.core.systemsymbols import ( SymbolComplex, SymbolMinus, - SymbolOutputForm, SymbolRational, SymbolRowBox, SymbolStandardForm, ) -# An operator precedence value that will ensure that whatever operator -# this is attached to does not have parenthesis surrounding it. -# Operator precedence values are integers; If if an operator -# "op" is greater than the surrounding precedence, then "op" -# will be surrounded by parenthesis, e.g. ... (...op...) ... -# In named-characters.yml of mathics-scanner we start at 0. -# However, negative values would also work. -NEVER_ADD_PARENTHESIS = 0 - # These Strings are used in Boxing output StringElipsis = String("...") StringLParen = String("(") From 80a3c539108d7725140c10fec118e24939319a23 Mon Sep 17 00:00:00 2001 From: rocky Date: Sun, 2 Apr 2023 09:54:43 -0400 Subject: [PATCH 4/4] Add some test __init__ files --- test/builtin/list/__init__.py | 1 + test/core/parser/__init__.py | 0 2 files changed, 1 insertion(+) create mode 100644 test/builtin/list/__init__.py create mode 100644 test/core/parser/__init__.py diff --git a/test/builtin/list/__init__.py b/test/builtin/list/__init__.py new file mode 100644 index 000000000..40a96afc6 --- /dev/null +++ b/test/builtin/list/__init__.py @@ -0,0 +1 @@ +# -*- coding: utf-8 -*- diff --git a/test/core/parser/__init__.py b/test/core/parser/__init__.py new file mode 100644 index 000000000..e69de29bb