From af87eed334b8d7b61e0872b25a29e6e057203e47 Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Thu, 29 Jan 2026 07:47:39 -0500
Subject: [PATCH 1/5] Reassign Rule Unicode..

"Rule" is not the same things as "RightArrow". Use a unicode glyph that
matches the Mathematica symbol better. Note that the arrowhead is not a
triangle, but two lines.
---
 mathics_scanner/data/named-characters.yml | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/mathics_scanner/data/named-characters.yml b/mathics_scanner/data/named-characters.yml
index 48e0cd8..30250af 100644
--- a/mathics_scanner/data/named-characters.yml
+++ b/mathics_scanner/data/named-characters.yml
@@ -8866,9 +8866,11 @@ Rule:
   has-unicode-inverse: false
   is-letter-like: false
   operator-name: Rule
-  unicode-equivalent: "\u2192"
-  unicode-equivalent-name: RIGHTWARDS ARROW
-  unicode-reference: https://www.compart.com/en/unicode/U+2192
+  unicode-equivalent: "\u1F862"
+  ## It seems there are two names for this. The one used, is the one that our test checker finds.
+  # unicode-equivalent-name: WIDE-HEADED RIGHTWARDS LIGHT BARB ARROW
+  unicode-equivalent-name: GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI + DIGIT TWO
+  unicode-reference: https://www.compart.com/en/unicode/U+1F862
   wl-reference: https://reference.wolfram.com/language/ref/character/Rule.html
   wl-unicode: "\uF522"
 

From 94ae0ea5e9ad47e88fd8fb5fb346309a8f4d30fa Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Thu, 29 Jan 2026 11:01:58 -0500
Subject: [PATCH 2/5] Start an operator block.

Also note that Mathics3 operators can use more than one symbol.
---
 mathics_scanner/data/named-characters.yml | 28 +++++++++++++----------
 test/test_general_yaml_sanity.py          | 15 +++++++++---
 2 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/mathics_scanner/data/named-characters.yml b/mathics_scanner/data/named-characters.yml
index 30250af..8335c80 100644
--- a/mathics_scanner/data/named-characters.yml
+++ b/mathics_scanner/data/named-characters.yml
@@ -51,20 +51,25 @@
 #          control-sequence is both allowed in text-mode and math-mode,
 #          then the same control sequence also appears in amslatex.
 #
-#   operator-name: If present, this symbol is a Mathics3 operator with
-#                  whose class name is the given name. For example, the
-#                  Divide operator, "/" is here.  But some symbols like
-#                  "`" or "." inside a number could be considered
-#                  either an operator or as part of a lexical symbol.
-#                  "operators.yml" may contain use of these symbols, while here we
-#                  might not (or we might). Also, from an operator
+#   operator-name: If present, this symbol is a is part of some
+#                  Mathics3 operator with whose class name is the
+#                  given name. For example, the Divide operator, "/"
+#                  is here.  But some symbols like "`" or "." inside a
+#                  number could be considered either an operator or as
+#                  part of a lexical symbol.  "operators.yml" may
+#                  contain use of these symbols, while here we might
+#                  not (or we might). Also, from an operator
 #                  perspective, an operator name like "Association"
 #                  might have *two* (bracketing) symbols associated
 #                  here: "LeftAssociation" and "RightAssociation".
-#                  More operator information can be found in
-#                  file "operators.yml".
+#                  More operator information can be found in file
+#                  "operators.yml". Note that some operators like
+#                  Integrate, have several symbols, e.g. Integral and
+#                  DifferentialD used in the operator.
 #
 #
+#   unicode-block: when given, the unicode block, or named ranges of code points.
+##
 #   unicode-equivalent: A unicode equivalent for the named-character, if it
 #                       exists. If it is the same as "ascii", please omit.
 #
@@ -8867,9 +8872,8 @@ Rule:
   is-letter-like: false
   operator-name: Rule
   unicode-equivalent: "\u1F862"
-  ## It seems there are two names for this. The one used, is the one that our test checker finds.
-  # unicode-equivalent-name: WIDE-HEADED RIGHTWARDS LIGHT BARB ARROW
-  unicode-equivalent-name: GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI + DIGIT TWO
+  unicode-equivalent-name: WIDE-HEADED RIGHTWARDS LIGHT BARB ARROW
+  unicode-block: Arrows
   unicode-reference: https://www.compart.com/en/unicode/U+1F862
   wl-reference: https://reference.wolfram.com/language/ref/character/Rule.html
   wl-unicode: "\uF522"
diff --git a/test/test_general_yaml_sanity.py b/test/test_general_yaml_sanity.py
index 727e1f4..12f3e25 100644
--- a/test/test_general_yaml_sanity.py
+++ b/test/test_general_yaml_sanity.py
@@ -40,6 +40,7 @@ def test_yaml_field_names():
             "latex",
             "operator-name",
             "precedence",
+            "unicode-block",
             "unicode-equivalent",
             "unicode-equivalent-name",
             "unicode-reference",
@@ -151,9 +152,17 @@ def test_unicode_name():
 
             if k == "VerticalBar":
                 continue
-            assert real_name == expected_name or expected_name.startswith(
-                "MODIFIER LETTER SMALL SCHWA"
-            ), f"{k} has unicode-equivalent-name set to {real_name} but it should be {expected_name}"
+
+            # uncodedata sometimes gives a different name, and there is no way that I
+            # know of to allow it narrow its results to a particular unicode block,
+            # or find out what unicode block it is useing
+            if real_name not in (
+                "WIDE-HEADED RIGHTWARDS LIGHT BARB ARROW",
+                # "MODIFIER LETTER SMALL SCHWA",
+            ):
+                assert (
+                    real_name == expected_name
+                ), f"{k} has unicode-equivalent-name set to {real_name} but it should be {expected_name}"
         else:
             assert (
                 "ascii" in v

From a392925af41b3f1d31478780f1743acfc92dfa98 Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Fri, 30 Jan 2026 17:16:17 -0500
Subject: [PATCH 3/5] Reassign Rule unicode to arrow block...

Also make sure to parse this unicode symbol as Rule.
---
 mathics_scanner/data/named-characters.yml |  6 +++---
 mathics_scanner/tokeniser.py              |  2 +-
 test/test_general_yaml_sanity.py          | 20 ++++++++++----------
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/mathics_scanner/data/named-characters.yml b/mathics_scanner/data/named-characters.yml
index 8335c80..cebff7b 100644
--- a/mathics_scanner/data/named-characters.yml
+++ b/mathics_scanner/data/named-characters.yml
@@ -8871,10 +8871,10 @@ Rule:
   has-unicode-inverse: false
   is-letter-like: false
   operator-name: Rule
-  unicode-equivalent: "\u1F862"
-  unicode-equivalent-name: WIDE-HEADED RIGHTWARDS LIGHT BARB ARROW
+  unicode-equivalent: "\u21FE"
+  unicode-equivalent-name: Rightwards Open-Headed Arrow
   unicode-block: Arrows
-  unicode-reference: https://www.compart.com/en/unicode/U+1F862
+  unicode-reference: https://www.compart.com/en/unicode/U+21FE
   wl-reference: https://reference.wolfram.com/language/ref/character/Rule.html
   wl-unicode: "\uF522"
 
diff --git a/mathics_scanner/tokeniser.py b/mathics_scanner/tokeniser.py
index 0b2cc69..3ceeff0 100644
--- a/mathics_scanner/tokeniser.py
+++ b/mathics_scanner/tokeniser.py
@@ -297,7 +297,7 @@ def init_module():
         ("RepeatedNull", r" \.\.\. "),
         ("Repeated", r" \.\. "),
         ("Alternatives", r" \| "),
-        ("Rule", r" (\-\>)|\uF522 "),
+        ("Rule", r" (\-\>)| \uF522|\u21FE"),
         ("RuleDelayed", r" (\:\>)|\uF51F "),
         # https://reference.wolfram.com/language/ref/character/UndirectedEdge.html
         # The official Unicode value is \u2194
diff --git a/test/test_general_yaml_sanity.py b/test/test_general_yaml_sanity.py
index 12f3e25..6fbedc6 100644
--- a/test/test_general_yaml_sanity.py
+++ b/test/test_general_yaml_sanity.py
@@ -153,16 +153,16 @@ def test_unicode_name():
             if k == "VerticalBar":
                 continue
 
-            # uncodedata sometimes gives a different name, and there is no way that I
-            # know of to allow it narrow its results to a particular unicode block,
-            # or find out what unicode block it is useing
-            if real_name not in (
-                "WIDE-HEADED RIGHTWARDS LIGHT BARB ARROW",
-                # "MODIFIER LETTER SMALL SCHWA",
-            ):
-                assert (
-                    real_name == expected_name
-                ), f"{k} has unicode-equivalent-name set to {real_name} but it should be {expected_name}"
+            # # uncodedata sometimes gives a different name, and there is no way that I
+            # # know of to allow it narrow its results to a particular unicode block,
+            # # or find out what unicode block it is useing
+            # if real_name not in (
+            #     "WIDE-HEADED RIGHTWARDS LIGHT BARB ARROW",
+            #     # "MODIFIER LETTER SMALL SCHWA",
+            # ):
+            #     assert (
+            #         real_name == expected_name
+            #     ), f"{k} has unicode-equivalent-name set to {real_name} but it should be {expected_name}"
         else:
             assert (
                 "ascii" in v

From ef724090ba38853a84b0df26488a034f9d1ad2d7 Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Fri, 30 Jan 2026 17:41:01 -0500
Subject: [PATCH 4/5] Adjust test messages

---
 mathics_scanner/data/named-characters.yml |  2 +-
 test/test_general_yaml_sanity.py          | 29 +++++++++++------------
 2 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/mathics_scanner/data/named-characters.yml b/mathics_scanner/data/named-characters.yml
index cebff7b..374aca1 100644
--- a/mathics_scanner/data/named-characters.yml
+++ b/mathics_scanner/data/named-characters.yml
@@ -8872,7 +8872,7 @@ Rule:
   is-letter-like: false
   operator-name: Rule
   unicode-equivalent: "\u21FE"
-  unicode-equivalent-name: Rightwards Open-Headed Arrow
+  unicode-equivalent-name: RIGHTWARDS OPEN-HEADED ARROW
   unicode-block: Arrows
   unicode-reference: https://www.compart.com/en/unicode/U+21FE
   wl-reference: https://reference.wolfram.com/language/ref/character/Rule.html
diff --git a/test/test_general_yaml_sanity.py b/test/test_general_yaml_sanity.py
index 6fbedc6..dcf6d6d 100644
--- a/test/test_general_yaml_sanity.py
+++ b/test/test_general_yaml_sanity.py
@@ -143,9 +143,9 @@ def test_unicode_name():
                     f"{k}'s unicode-equivalent doesn't have a unicode name (it's not valid unicode)"
                 )
 
-            real_name = v.get("unicode-equivalent-name")
+            name_in_yaml = v.get("unicode-equivalent-name")
 
-            if real_name is None:
+            if name_in_yaml is None:
                 raise ValueError(
                     "{k} has a unicode equivalent but doesn't have the unicode-equivalent-name field"
                 )
@@ -153,20 +153,19 @@ def test_unicode_name():
             if k == "VerticalBar":
                 continue
 
-            # # uncodedata sometimes gives a different name, and there is no way that I
-            # # know of to allow it narrow its results to a particular unicode block,
-            # # or find out what unicode block it is useing
-            # if real_name not in (
-            #     "WIDE-HEADED RIGHTWARDS LIGHT BARB ARROW",
-            #     # "MODIFIER LETTER SMALL SCHWA",
-            # ):
-            #     assert (
-            #         real_name == expected_name
-            #     ), f"{k} has unicode-equivalent-name set to {real_name} but it should be {expected_name}"
+            # If uncodedata gives a different name, then it is possible that the same Unicode character
+            # resides in two different code blocks, and in the YAML file we used one that uncodedata uses.
+            # Sadly, since terminals use uncodedata and don't have a way to specify a specific Unicode code
+            # block like Supplimental Arrows-C.
+            assert name_in_yaml == expected_name, (
+                f"{k} has unicodedata set to {expected_name} but it YAML says it is {name_in_yaml}.\n"
+                "Change Unicode value in YAML to be unambiquous. "
+            )
         else:
-            assert (
-                "ascii" in v
-            ), f"{k} has unicode-equivalent-name set to {v['unicode-equivalent-name']} but it doesn't have a unicode or ascii equivalent"
+            assert "ascii" in v, (
+                f"{k} has unicode-equivalent-name set to {v['unicode-equivalent-name']} "
+                "but it doesn't have a Unicode or ASCII equivalent"
+            )
 
 
 def test_wl_unicode():

From d9807e9ff89c41140e80a873f305a1f389471b3b Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Sat, 31 Jan 2026 06:21:17 -0500
Subject: [PATCH 5/5] Separate DirectedEdge from RightArrow

---
 mathics_scanner/characters.py             |  1 -
 mathics_scanner/data/named-characters.yml | 15 ++++++++-------
 mathics_scanner/tokeniser.py              |  3 +--
 test/test_general_yaml_sanity.py          |  2 +-
 4 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/mathics_scanner/characters.py b/mathics_scanner/characters.py
index f3aaeb9..057dc9c 100644
--- a/mathics_scanner/characters.py
+++ b/mathics_scanner/characters.py
@@ -6,7 +6,6 @@
 and Unicode/ASCII.
 """
 
-import os
 import os.path as osp
 import re
 
diff --git a/mathics_scanner/data/named-characters.yml b/mathics_scanner/data/named-characters.yml
index 374aca1..db3daea 100644
--- a/mathics_scanner/data/named-characters.yml
+++ b/mathics_scanner/data/named-characters.yml
@@ -2155,18 +2155,18 @@ Digamma:
   wl-unicode-name: GREEK SMALL LETTER DIGAMMA
 
 # The WL symbol displays with a round dot at the left endpoint.
-# The unicode equivalent shows omits this
 # When there is a tag over the edge, WL uses a bold variant
 # of the symbol.
+# Note: not the same as \[Rule] or \[RightArrow]
 DirectedEdge:
-  amslatex: '\rightarrow'
+  amslatex: '\mathrel{\cdot\rightarrow}'
   esc-alias: de
   has-unicode-inverse: false
   is-letter-like: false
   operator-name: DirectedEdge
-  unicode-equivalent: "\u2192"
-  unicode-equivalent-name: RIGHTWARDS ARROW
-  unicode-reference: https://www.compart.com/en/unicode/U+2192
+  unicode-equivalent: "\u21F4"
+  unicode-equivalent-name: RIGHT ARROW WITH SMALL CIRCLE
+  unicode-reference: https://www.compart.com/en/unicode/U+21F4
   wl-reference: https://reference.wolfram.com/language/ref/character/DirectedEdge.html
   wl-unicode: "\uF3D5"
 
@@ -8511,7 +8511,7 @@ RightAngleBracket:
   wl-unicode: "\u232A"
   wl-unicode-name: RIGHT-POINTING ANGLE BRACKET
 
-# Note: not the same as \[Rule]
+# Note: not the same as \[Rule] or \[DirectedEdge]
 RightArrow:
   amslatex: '\rightarrow'
   esc-alias: ' ->'
@@ -8864,8 +8864,9 @@ RoundSpaceIndicator:
   wl-reference: https://reference.wolfram.com/language/ref/character/RoundSpaceIndicator.html
   wl-unicode: "\uF3B2"
 
-# Note: not the same as \[RightArrow]
+# Note: not the same as \[RightArrow] or \[DirectedEdge]
 Rule:
+  amslatex: '\vrightarrow'
   ascii: "->"
   esc-alias: "->"
   has-unicode-inverse: false
diff --git a/mathics_scanner/tokeniser.py b/mathics_scanner/tokeniser.py
index 3ceeff0..1a0b841 100644
--- a/mathics_scanner/tokeniser.py
+++ b/mathics_scanner/tokeniser.py
@@ -290,8 +290,7 @@ def init_module():
         ("Greater", r" \> "),
         ("Less", r" \< "),
         # https://reference.wolfram.com/language/ref/character/DirectedEdge.html
-        # The official Unicode value is \u2192.
-        ("DirectedEdge", r" -> | \uf3d5|\u2192"),
+        ("DirectedEdge", r" -> | \uf3d5|\u21F4 "),
         ("Or", r" (\|\|) | \u2228 "),
         ("And", r" (\&\&) | \u2227 "),
         ("RepeatedNull", r" \.\.\. "),
diff --git a/test/test_general_yaml_sanity.py b/test/test_general_yaml_sanity.py
index dcf6d6d..e419877 100644
--- a/test/test_general_yaml_sanity.py
+++ b/test/test_general_yaml_sanity.py
@@ -158,7 +158,7 @@ def test_unicode_name():
             # Sadly, since terminals use uncodedata and don't have a way to specify a specific Unicode code
             # block like Supplimental Arrows-C.
             assert name_in_yaml == expected_name, (
-                f"{k} has unicodedata set to {expected_name} but it YAML says it is {name_in_yaml}.\n"
+                f"{k} has uncodedata set to {expected_name} but it YAML says it is {name_in_yaml}.\n"
                 "Change Unicode value in YAML to be unambiquous. "
             )
         else: