Mathics3 · rocky · Jan 31, 2026 · Jan 29, 2026 · Jan 29, 2026 · Jan 30, 2026
diff --git a/mathics_scanner/characters.py b/mathics_scanner/characters.py
@@ -6,7 +6,6 @@
 and Unicode/ASCII.
 """
 
-import os
 import os.path as osp
 import re
 

diff --git a/mathics_scanner/data/named-characters.yml b/mathics_scanner/data/named-characters.yml
@@ -51,20 +51,25 @@
 #          control-sequence is both allowed in text-mode and math-mode,
 #          then the same control sequence also appears in amslatex.
 #
-#   operator-name: If present, this symbol is a Mathics3 operator with
-#                  whose class name is the given name. For example, the
-#                  Divide operator, "/" is here.  But some symbols like
-#                  "`" or "." inside a number could be considered
-#                  either an operator or as part of a lexical symbol.
-#                  "operators.yml" may contain use of these symbols, while here we
-#                  might not (or we might). Also, from an operator
+#   operator-name: If present, this symbol is a is part of some
+#                  Mathics3 operator with whose class name is the
+#                  given name. For example, the Divide operator, "/"
+#                  is here.  But some symbols like "`" or "." inside a
+#                  number could be considered either an operator or as
+#                  part of a lexical symbol.  "operators.yml" may
+#                  contain use of these symbols, while here we might
+#                  not (or we might). Also, from an operator
 #                  perspective, an operator name like "Association"
 #                  might have *two* (bracketing) symbols associated
 #                  here: "LeftAssociation" and "RightAssociation".
-#                  More operator information can be found in
-#                  file "operators.yml".
+#                  More operator information can be found in file
+#                  "operators.yml". Note that some operators like
+#                  Integrate, have several symbols, e.g. Integral and
+#                  DifferentialD used in the operator.
 #
 #
+#   unicode-block: when given, the unicode block, or named ranges of code points.
+##
 #   unicode-equivalent: A unicode equivalent for the named-character, if it
 #                       exists. If it is the same as "ascii", please omit.
 #
@@ -2150,18 +2155,18 @@ Digamma:
   wl-unicode-name: GREEK SMALL LETTER DIGAMMA
 
 # The WL symbol displays with a round dot at the left endpoint.
-# The unicode equivalent shows omits this
 # When there is a tag over the edge, WL uses a bold variant
 # of the symbol.
+# Note: not the same as \[Rule] or \[RightArrow]
 DirectedEdge:
-  amslatex: '\rightarrow'
+  amslatex: '\mathrel{\cdot\rightarrow}'
   esc-alias: de
   has-unicode-inverse: false
   is-letter-like: false
   operator-name: DirectedEdge
-  unicode-equivalent: "\u2192"
-  unicode-equivalent-name: RIGHTWARDS ARROW
-  unicode-reference: https://www.compart.com/en/unicode/U+2192
+  unicode-equivalent: "\u21F4"
+  unicode-equivalent-name: RIGHT ARROW WITH SMALL CIRCLE
+  unicode-reference: https://www.compart.com/en/unicode/U+21F4
   wl-reference: https://reference.wolfram.com/language/ref/character/DirectedEdge.html
   wl-unicode: "\uF3D5"
 
@@ -8506,7 +8511,7 @@ RightAngleBracket:
   wl-unicode: "\u232A"
   wl-unicode-name: RIGHT-POINTING ANGLE BRACKET
 
-# Note: not the same as \[Rule]
+# Note: not the same as \[Rule] or \[DirectedEdge]
 RightArrow:
   amslatex: '\rightarrow'
   esc-alias: ' ->'
@@ -8859,16 +8864,18 @@ RoundSpaceIndicator:
   wl-reference: https://reference.wolfram.com/language/ref/character/RoundSpaceIndicator.html
   wl-unicode: "\uF3B2"
 
-# Note: not the same as \[RightArrow]
+# Note: not the same as \[RightArrow] or \[DirectedEdge]
 Rule:
+  amslatex: '\vrightarrow'
   ascii: "->"
   esc-alias: "->"
   has-unicode-inverse: false
   is-letter-like: false
   operator-name: Rule
-  unicode-equivalent: "\u2192"
-  unicode-equivalent-name: RIGHTWARDS ARROW
-  unicode-reference: https://www.compart.com/en/unicode/U+2192
+  unicode-equivalent: "\u21FE"
+  unicode-equivalent-name: RIGHTWARDS OPEN-HEADED ARROW
+  unicode-block: Arrows
+  unicode-reference: https://www.compart.com/en/unicode/U+21FE
   wl-reference: https://reference.wolfram.com/language/ref/character/Rule.html
   wl-unicode: "\uF522"
 

diff --git a/mathics_scanner/tokeniser.py b/mathics_scanner/tokeniser.py
@@ -290,14 +290,13 @@ def init_module():
         ("Greater", r" \> "),
         ("Less", r" \< "),
         # https://reference.wolfram.com/language/ref/character/DirectedEdge.html
-        # The official Unicode value is \u2192.
-        ("DirectedEdge", r" -> | \uf3d5|\u2192"),
+        ("DirectedEdge", r" -> | \uf3d5|\u21F4 "),
         ("Or", r" (\|\|) | \u2228 "),
         ("And", r" (\&\&) | \u2227 "),
         ("RepeatedNull", r" \.\.\. "),
         ("Repeated", r" \.\. "),
         ("Alternatives", r" \| "),
-        ("Rule", r" (\-\>)|\uF522 "),
+        ("Rule", r" (\-\>)| \uF522|\u21FE"),
         ("RuleDelayed", r" (\:\>)|\uF51F "),
         # https://reference.wolfram.com/language/ref/character/UndirectedEdge.html
         # The official Unicode value is \u2194

diff --git a/test/test_general_yaml_sanity.py b/test/test_general_yaml_sanity.py
@@ -40,6 +40,7 @@ def test_yaml_field_names():
             "latex",
             "operator-name",
             "precedence",
+            "unicode-block",
             "unicode-equivalent",
             "unicode-equivalent-name",
             "unicode-reference",
@@ -142,22 +143,29 @@ def test_unicode_name():
                     f"{k}'s unicode-equivalent doesn't have a unicode name (it's not valid unicode)"
                 )
 
-            real_name = v.get("unicode-equivalent-name")
+            name_in_yaml = v.get("unicode-equivalent-name")
 
-            if real_name is None:
+            if name_in_yaml is None:
                 raise ValueError(
                     "{k} has a unicode equivalent but doesn't have the unicode-equivalent-name field"
                 )
 
             if k == "VerticalBar":
                 continue
-            assert real_name == expected_name or expected_name.startswith(
-                "MODIFIER LETTER SMALL SCHWA"
-            ), f"{k} has unicode-equivalent-name set to {real_name} but it should be {expected_name}"
+
+            # If uncodedata gives a different name, then it is possible that the same Unicode character
+            # resides in two different code blocks, and in the YAML file we used one that uncodedata uses.
+            # Sadly, since terminals use uncodedata and don't have a way to specify a specific Unicode code
+            # block like Supplimental Arrows-C.
+            assert name_in_yaml == expected_name, (
+                f"{k} has uncodedata set to {expected_name} but it YAML says it is {name_in_yaml}.\n"
+                "Change Unicode value in YAML to be unambiquous. "
+            )
         else:
-            assert (
-                "ascii" in v
-            ), f"{k} has unicode-equivalent-name set to {v['unicode-equivalent-name']} but it doesn't have a unicode or ascii equivalent"
+            assert "ascii" in v, (
+                f"{k} has unicode-equivalent-name set to {v['unicode-equivalent-name']} "
+                "but it doesn't have a Unicode or ASCII equivalent"
+            )
 
 
 def test_wl_unicode():