Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion mathics_scanner/characters.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
and Unicode/ASCII.
"""

import os
import os.path as osp
import re

Expand Down
45 changes: 26 additions & 19 deletions mathics_scanner/data/named-characters.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,20 +51,25 @@
# control-sequence is both allowed in text-mode and math-mode,
# then the same control sequence also appears in amslatex.
#
# operator-name: If present, this symbol is a Mathics3 operator with
# whose class name is the given name. For example, the
# Divide operator, "/" is here. But some symbols like
# "`" or "." inside a number could be considered
# either an operator or as part of a lexical symbol.
# "operators.yml" may contain use of these symbols, while here we
# might not (or we might). Also, from an operator
# operator-name: If present, this symbol is a is part of some
# Mathics3 operator with whose class name is the
# given name. For example, the Divide operator, "/"
# is here. But some symbols like "`" or "." inside a
# number could be considered either an operator or as
# part of a lexical symbol. "operators.yml" may
# contain use of these symbols, while here we might
# not (or we might). Also, from an operator
# perspective, an operator name like "Association"
# might have *two* (bracketing) symbols associated
# here: "LeftAssociation" and "RightAssociation".
# More operator information can be found in
# file "operators.yml".
# More operator information can be found in file
# "operators.yml". Note that some operators like
# Integrate, have several symbols, e.g. Integral and
# DifferentialD used in the operator.
#
#
# unicode-block: when given, the unicode block, or named ranges of code points.
##
# unicode-equivalent: A unicode equivalent for the named-character, if it
# exists. If it is the same as "ascii", please omit.
#
Expand Down Expand Up @@ -2150,18 +2155,18 @@ Digamma:
wl-unicode-name: GREEK SMALL LETTER DIGAMMA

# The WL symbol displays with a round dot at the left endpoint.
# The unicode equivalent shows omits this
# When there is a tag over the edge, WL uses a bold variant
# of the symbol.
# Note: not the same as \[Rule] or \[RightArrow]
DirectedEdge:
amslatex: '\rightarrow'
amslatex: '\mathrel{\cdot\rightarrow}'
esc-alias: de
has-unicode-inverse: false
is-letter-like: false
operator-name: DirectedEdge
unicode-equivalent: "\u2192"
unicode-equivalent-name: RIGHTWARDS ARROW
unicode-reference: https://www.compart.com/en/unicode/U+2192
unicode-equivalent: "\u21F4"
unicode-equivalent-name: RIGHT ARROW WITH SMALL CIRCLE
unicode-reference: https://www.compart.com/en/unicode/U+21F4
wl-reference: https://reference.wolfram.com/language/ref/character/DirectedEdge.html
wl-unicode: "\uF3D5"

Expand Down Expand Up @@ -8506,7 +8511,7 @@ RightAngleBracket:
wl-unicode: "\u232A"
wl-unicode-name: RIGHT-POINTING ANGLE BRACKET

# Note: not the same as \[Rule]
# Note: not the same as \[Rule] or \[DirectedEdge]
RightArrow:
amslatex: '\rightarrow'
esc-alias: ' ->'
Expand Down Expand Up @@ -8859,16 +8864,18 @@ RoundSpaceIndicator:
wl-reference: https://reference.wolfram.com/language/ref/character/RoundSpaceIndicator.html
wl-unicode: "\uF3B2"

# Note: not the same as \[RightArrow]
# Note: not the same as \[RightArrow] or \[DirectedEdge]
Rule:
amslatex: '\vrightarrow'
ascii: "->"
esc-alias: "->"
has-unicode-inverse: false
is-letter-like: false
operator-name: Rule
unicode-equivalent: "\u2192"
unicode-equivalent-name: RIGHTWARDS ARROW
unicode-reference: https://www.compart.com/en/unicode/U+2192
unicode-equivalent: "\u21FE"
unicode-equivalent-name: RIGHTWARDS OPEN-HEADED ARROW
unicode-block: Arrows
unicode-reference: https://www.compart.com/en/unicode/U+21FE
wl-reference: https://reference.wolfram.com/language/ref/character/Rule.html
wl-unicode: "\uF522"

Expand Down
5 changes: 2 additions & 3 deletions mathics_scanner/tokeniser.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,14 +290,13 @@ def init_module():
("Greater", r" \> "),
("Less", r" \< "),
# https://reference.wolfram.com/language/ref/character/DirectedEdge.html
# The official Unicode value is \u2192.
("DirectedEdge", r" -> | \uf3d5|\u2192"),
("DirectedEdge", r" -> | \uf3d5|\u21F4 "),
("Or", r" (\|\|) | \u2228 "),
("And", r" (\&\&) | \u2227 "),
("RepeatedNull", r" \.\.\. "),
("Repeated", r" \.\. "),
("Alternatives", r" \| "),
("Rule", r" (\-\>)|\uF522 "),
("Rule", r" (\-\>)| \uF522|\u21FE"),
("RuleDelayed", r" (\:\>)|\uF51F "),
# https://reference.wolfram.com/language/ref/character/UndirectedEdge.html
# The official Unicode value is \u2194
Expand Down
24 changes: 16 additions & 8 deletions test/test_general_yaml_sanity.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def test_yaml_field_names():
"latex",
"operator-name",
"precedence",
"unicode-block",
"unicode-equivalent",
"unicode-equivalent-name",
"unicode-reference",
Expand Down Expand Up @@ -142,22 +143,29 @@ def test_unicode_name():
f"{k}'s unicode-equivalent doesn't have a unicode name (it's not valid unicode)"
)

real_name = v.get("unicode-equivalent-name")
name_in_yaml = v.get("unicode-equivalent-name")

if real_name is None:
if name_in_yaml is None:
raise ValueError(
"{k} has a unicode equivalent but doesn't have the unicode-equivalent-name field"
)

if k == "VerticalBar":
continue
assert real_name == expected_name or expected_name.startswith(
"MODIFIER LETTER SMALL SCHWA"
), f"{k} has unicode-equivalent-name set to {real_name} but it should be {expected_name}"

# If uncodedata gives a different name, then it is possible that the same Unicode character
# resides in two different code blocks, and in the YAML file we used one that uncodedata uses.
# Sadly, since terminals use uncodedata and don't have a way to specify a specific Unicode code
# block like Supplimental Arrows-C.
assert name_in_yaml == expected_name, (
f"{k} has uncodedata set to {expected_name} but it YAML says it is {name_in_yaml}.\n"
"Change Unicode value in YAML to be unambiquous. "
)
else:
assert (
"ascii" in v
), f"{k} has unicode-equivalent-name set to {v['unicode-equivalent-name']} but it doesn't have a unicode or ascii equivalent"
assert "ascii" in v, (
f"{k} has unicode-equivalent-name set to {v['unicode-equivalent-name']} "
"but it doesn't have a Unicode or ASCII equivalent"
)


def test_wl_unicode():
Expand Down
Loading