Skip to content

Commit 94ae0ea

Browse files
committed
Start an operator block.
Also note that Mathics3 operators can use more than one symbol.
1 parent af87eed commit 94ae0ea

File tree

2 files changed

+28
-15
lines changed

2 files changed

+28
-15
lines changed

mathics_scanner/data/named-characters.yml

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -51,20 +51,25 @@
5151
# control-sequence is both allowed in text-mode and math-mode,
5252
# then the same control sequence also appears in amslatex.
5353
#
54-
# operator-name: If present, this symbol is a Mathics3 operator with
55-
# whose class name is the given name. For example, the
56-
# Divide operator, "/" is here. But some symbols like
57-
# "`" or "." inside a number could be considered
58-
# either an operator or as part of a lexical symbol.
59-
# "operators.yml" may contain use of these symbols, while here we
60-
# might not (or we might). Also, from an operator
54+
# operator-name: If present, this symbol is a is part of some
55+
# Mathics3 operator with whose class name is the
56+
# given name. For example, the Divide operator, "/"
57+
# is here. But some symbols like "`" or "." inside a
58+
# number could be considered either an operator or as
59+
# part of a lexical symbol. "operators.yml" may
60+
# contain use of these symbols, while here we might
61+
# not (or we might). Also, from an operator
6162
# perspective, an operator name like "Association"
6263
# might have *two* (bracketing) symbols associated
6364
# here: "LeftAssociation" and "RightAssociation".
64-
# More operator information can be found in
65-
# file "operators.yml".
65+
# More operator information can be found in file
66+
# "operators.yml". Note that some operators like
67+
# Integrate, have several symbols, e.g. Integral and
68+
# DifferentialD used in the operator.
6669
#
6770
#
71+
# unicode-block: when given, the unicode block, or named ranges of code points.
72+
##
6873
# unicode-equivalent: A unicode equivalent for the named-character, if it
6974
# exists. If it is the same as "ascii", please omit.
7075
#
@@ -8867,9 +8872,8 @@ Rule:
88678872
is-letter-like: false
88688873
operator-name: Rule
88698874
unicode-equivalent: "\u1F862"
8870-
## It seems there are two names for this. The one used, is the one that our test checker finds.
8871-
# unicode-equivalent-name: WIDE-HEADED RIGHTWARDS LIGHT BARB ARROW
8872-
unicode-equivalent-name: GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI + DIGIT TWO
8875+
unicode-equivalent-name: WIDE-HEADED RIGHTWARDS LIGHT BARB ARROW
8876+
unicode-block: Arrows
88738877
unicode-reference: https://www.compart.com/en/unicode/U+1F862
88748878
wl-reference: https://reference.wolfram.com/language/ref/character/Rule.html
88758879
wl-unicode: "\uF522"

test/test_general_yaml_sanity.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def test_yaml_field_names():
4040
"latex",
4141
"operator-name",
4242
"precedence",
43+
"unicode-block",
4344
"unicode-equivalent",
4445
"unicode-equivalent-name",
4546
"unicode-reference",
@@ -151,9 +152,17 @@ def test_unicode_name():
151152

152153
if k == "VerticalBar":
153154
continue
154-
assert real_name == expected_name or expected_name.startswith(
155-
"MODIFIER LETTER SMALL SCHWA"
156-
), f"{k} has unicode-equivalent-name set to {real_name} but it should be {expected_name}"
155+
156+
# uncodedata sometimes gives a different name, and there is no way that I
157+
# know of to allow it narrow its results to a particular unicode block,
158+
# or find out what unicode block it is useing
159+
if real_name not in (
160+
"WIDE-HEADED RIGHTWARDS LIGHT BARB ARROW",
161+
# "MODIFIER LETTER SMALL SCHWA",
162+
):
163+
assert (
164+
real_name == expected_name
165+
), f"{k} has unicode-equivalent-name set to {real_name} but it should be {expected_name}"
157166
else:
158167
assert (
159168
"ascii" in v

0 commit comments

Comments
 (0)