From 29443e68fedb4dcfa551a12de9f311bd33f7477a Mon Sep 17 00:00:00 2001 From: Kyle King Date: Mon, 26 Jan 2026 21:34:43 -0600 Subject: [PATCH 1/7] =?UTF-8?q?=F0=9F=90=9B=20FIX:=20reorder=20footnotes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdformat_footnote/_reorder.py | 121 ++++++++++++++++++++++++++++++++++ mdformat_footnote/plugin.py | 46 +++++++++++-- tests/fixtures.md | 91 ++++++++++++++++++++++++- tests/test_word_wrap.py | 7 +- 4 files changed, 255 insertions(+), 10 deletions(-) create mode 100644 mdformat_footnote/_reorder.py diff --git a/mdformat_footnote/_reorder.py b/mdformat_footnote/_reorder.py new file mode 100644 index 0000000..db51ad1 --- /dev/null +++ b/mdformat_footnote/_reorder.py @@ -0,0 +1,121 @@ +"""Footnote ID and subId normalization logic.""" + +from __future__ import annotations + +from markdown_it.rules_core import StateCore + + +def reorder_footnotes_by_definition(state: StateCore) -> None: + """Reorder footnotes to match definition order and normalize subIds. + + The mdit-py-plugins footnote plugin assigns IDs and subIds based on the + order references are encountered during inline parsing. This causes HTML + to differ when footnote definitions are reordered by the formatter. + + This rule: + 1. Preserves orphan footnotes (defined but never referenced) + 2. Reorders the footnote list to match definition order + 3. Updates all token IDs to match the new ordering + 4. Reassigns subIds based on output order (body first, then definitions) + + This ensures consistent HTML output regardless of definition position. + """ + if "footnotes" not in state.env: + return + + footnote_data = state.env["footnotes"] + refs = footnote_data.get("refs", {}) + old_list = footnote_data.get("list", {}) + + if not refs: + return + + new_list: dict[int, dict] = {} + old_to_new_id: dict[int, int] = {} + + for new_id, label_key in enumerate(refs.keys()): + label = label_key[1:] + old_id = refs[label_key] + + if old_id >= 0 and old_id in old_list: + new_list[new_id] = old_list[old_id].copy() + else: + new_list[new_id] = {"label": label, "count": 0} + + if old_id >= 0: + old_to_new_id[old_id] = new_id + refs[label_key] = new_id + + footnote_data["list"] = new_list + + _update_token_ids(state.tokens, old_to_new_id) + _reassign_subids(state.tokens, refs, new_list) + + +def _update_token_ids(tokens: list, old_to_new_id: dict[int, int]) -> None: + """Recursively update footnote IDs in tokens.""" + for token in tokens: + if token.type in ("footnote_ref", "footnote_anchor"): + if token.meta and "id" in token.meta: + old_id = token.meta["id"] + if old_id in old_to_new_id: + token.meta["id"] = old_to_new_id[old_id] + if token.children: + _update_token_ids(token.children, old_to_new_id) + + +def _partition_refs_by_context( + tokens: list, +) -> tuple[list, dict[str, list]]: + """Partition footnote refs into body refs and definition refs.""" + body_refs: list = [] + def_refs: dict[str, list] = {} + current_def_label: str | None = None + + for token in tokens: + if token.type == "footnote_reference_open": + current_def_label = token.meta.get("label") + if current_def_label: + def_refs.setdefault(current_def_label, []) + elif token.type == "footnote_reference_close": + current_def_label = None + elif current_def_label is None: + _collect_refs(token, body_refs) + else: + _collect_refs(token, def_refs.setdefault(current_def_label, [])) + + return body_refs, def_refs + + +def _assign_subids_to_refs(ref_tokens: list, counters: dict[int, int]) -> None: + """Assign sequential subIds to a list of ref tokens.""" + for ref_token in ref_tokens: + fn_id = ref_token.meta["id"] + ref_token.meta["subId"] = counters.get(fn_id, 0) + counters[fn_id] = counters.get(fn_id, 0) + 1 + + +def _reassign_subids(tokens: list, refs: dict, footnote_list: dict) -> None: + """Reassign subIds based on output order: body refs first, then definition refs.""" + body_refs, def_refs = _partition_refs_by_context(tokens) + subid_counters: dict[int, int] = {} + + _assign_subids_to_refs(body_refs, subid_counters) + + for label_key in refs.keys(): + label = label_key[1:] + if label in def_refs: + _assign_subids_to_refs(def_refs[label], subid_counters) + + for fn_id, count in subid_counters.items(): + if fn_id in footnote_list: + footnote_list[fn_id]["count"] = count + + +def _collect_refs(token, ref_list: list) -> None: + """Collect footnote_ref tokens from a token and its children.""" + if token.type == "footnote_ref" and token.meta: + ref_list.append(token) + if hasattr(token, "children") and token.children: + for child in token.children: + _collect_refs(child, ref_list) diff --git a/mdformat_footnote/plugin.py b/mdformat_footnote/plugin.py index 98088de..762b1f9 100644 --- a/mdformat_footnote/plugin.py +++ b/mdformat_footnote/plugin.py @@ -8,6 +8,8 @@ from mdformat.renderer.typing import Render from mdit_py_plugins.footnote import footnote_plugin +from ._reorder import reorder_footnotes_by_definition + def update_mdit(mdit: MarkdownIt) -> None: """Update the parser, adding the footnote plugin.""" @@ -15,6 +17,11 @@ def update_mdit(mdit: MarkdownIt) -> None: # Disable inline footnotes for now, since we don't have rendering # support for them yet. mdit.disable("footnote_inline") + # Reorder footnotes to match definition order and preserve orphans. + # Must run before footnote_tail. + mdit.core.ruler.before( + "footnote_tail", "reorder_footnotes", reorder_footnotes_by_definition + ) def _footnote_ref_renderer(node: RenderTreeNode, context: RenderContext) -> str: @@ -25,18 +32,47 @@ def _footnote_renderer(node: RenderTreeNode, context: RenderContext) -> str: first_line = f"[^{node.meta['label']}]:" indent = " " * 4 elements = [] + + first_child_idx = 0 + while ( + first_child_idx < len(node.children) + and node.children[first_child_idx].type == "footnote_anchor" + ): + first_child_idx += 1 + + if ( + first_child_idx < len(node.children) + and node.children[first_child_idx].type == "paragraph" + ): + with context.indented(len(first_line) + 1): + first_element = node.children[first_child_idx].render(context) + + first_element_lines = first_element.split("\n") + first_para_first_line = first_element_lines[0] + first_para_rest_lines = first_element_lines[1:] + + with context.indented(len(indent)): + for child in node.children[first_child_idx + 1 :]: + if child.type == "footnote_anchor": + continue + elements.append(child.render(context)) + + result = first_line + " " + first_para_first_line + if first_para_rest_lines: + indented_rest = textwrap.indent("\n".join(first_para_rest_lines), indent) + result += "\n" + indented_rest + if elements: + result += "\n\n" + textwrap.indent("\n\n".join(elements), indent) + return result + with context.indented(len(indent)): for child in node.children: if child.type == "footnote_anchor": continue elements.append(child.render(context)) body = textwrap.indent("\n\n".join(elements), indent) - # if the first body element is a paragraph, we can start on the first line, - # otherwise we start on the second line - if body and node.children and node.children[0].type != "paragraph": + if body: body = "\n" + body - else: - body = " " + body.lstrip() return first_line + body diff --git a/tests/fixtures.md b/tests/fixtures.md index 05275ae..9e022c6 100644 --- a/tests/fixtures.md +++ b/tests/fixtures.md @@ -56,11 +56,11 @@ Empty footnote . Here is a footnote reference [^emptynote] -[^emptynote]: +[^emptynote]: . Here is a footnote reference [^emptynote] -[^emptynote]: +[^emptynote]: . @@ -116,3 +116,90 @@ unindented next line content ``` . + + +footnote-ref-inside-footnote (issue #7) +. +[^a]: lorem +[^c]: ipsum [^a] +. +[^a]: lorem + +[^c]: ipsum [^a] +. + + +nested-footnote-refs (issue #8) +. +[^a]: Lorem. [^b] + +[^b]: Ipsum. + +A [^b] +. +A [^b] + +[^a]: Lorem. [^b] + +[^b]: Ipsum. +. + + +Footnote in table nested in admonition (issue #22) +. +# Document + +| Color | +| ------ | +| R [^1] | +| G [^2] | +| B [^3] | + +```{tip} +| Color | +| ------ | +| C [^4] | +| M [^5] | +| Y [^6] | +``` + +[^1]: Red + +[^2]: Green + +[^3]: Blue + +[^4]: Cyan + +[^5]: Magenta + +[^6]: Yellow +. +# Document + +| Color | +| ------ | +| R [^1] | +| G [^2] | +| B [^3] | + +```{tip} +| Color | +| ------ | +| C [^4] | +| M [^5] | +| Y [^6] | +``` + +[^1]: Red + +[^2]: Green + +[^3]: Blue + +[^4]: Cyan + +[^5]: Magenta + +[^6]: Yellow +. diff --git a/tests/test_word_wrap.py b/tests/test_word_wrap.py index fbe2867..6cdcb84 100644 --- a/tests/test_word_wrap.py +++ b/tests/test_word_wrap.py @@ -13,9 +13,10 @@ def test_word_wrap(): expected_output = """\ [^a] -[^a]: Ooh no, the first line of this first - paragraph is still wrapped too wide - unfortunately. Should fix this. +[^a]: Ooh no, the first line of this + first paragraph is still wrapped + too wide unfortunately. Should fix + this. But this second paragraph is wrapped exactly as expected. Woohooo, From 046a6eb58117625e5a402903bfd734a6a6def88e Mon Sep 17 00:00:00 2001 From: Kyle King Date: Mon, 26 Jan 2026 21:37:16 -0600 Subject: [PATCH 2/7] refactor: split out fix for #5 --- mdformat_footnote/plugin.py | 39 +++++-------------------------------- tests/test_word_wrap.py | 7 +++---- 2 files changed, 8 insertions(+), 38 deletions(-) diff --git a/mdformat_footnote/plugin.py b/mdformat_footnote/plugin.py index 762b1f9..31eb457 100644 --- a/mdformat_footnote/plugin.py +++ b/mdformat_footnote/plugin.py @@ -32,47 +32,18 @@ def _footnote_renderer(node: RenderTreeNode, context: RenderContext) -> str: first_line = f"[^{node.meta['label']}]:" indent = " " * 4 elements = [] - - first_child_idx = 0 - while ( - first_child_idx < len(node.children) - and node.children[first_child_idx].type == "footnote_anchor" - ): - first_child_idx += 1 - - if ( - first_child_idx < len(node.children) - and node.children[first_child_idx].type == "paragraph" - ): - with context.indented(len(first_line) + 1): - first_element = node.children[first_child_idx].render(context) - - first_element_lines = first_element.split("\n") - first_para_first_line = first_element_lines[0] - first_para_rest_lines = first_element_lines[1:] - - with context.indented(len(indent)): - for child in node.children[first_child_idx + 1 :]: - if child.type == "footnote_anchor": - continue - elements.append(child.render(context)) - - result = first_line + " " + first_para_first_line - if first_para_rest_lines: - indented_rest = textwrap.indent("\n".join(first_para_rest_lines), indent) - result += "\n" + indented_rest - if elements: - result += "\n\n" + textwrap.indent("\n\n".join(elements), indent) - return result - with context.indented(len(indent)): for child in node.children: if child.type == "footnote_anchor": continue elements.append(child.render(context)) body = textwrap.indent("\n\n".join(elements), indent) - if body: + # if the first body element is a paragraph, we can start on the first line, + # otherwise we start on the second line + if body and node.children and node.children[0].type != "paragraph": body = "\n" + body + else: + body = " " + body.lstrip() return first_line + body diff --git a/tests/test_word_wrap.py b/tests/test_word_wrap.py index 6cdcb84..fbe2867 100644 --- a/tests/test_word_wrap.py +++ b/tests/test_word_wrap.py @@ -13,10 +13,9 @@ def test_word_wrap(): expected_output = """\ [^a] -[^a]: Ooh no, the first line of this - first paragraph is still wrapped - too wide unfortunately. Should fix - this. +[^a]: Ooh no, the first line of this first + paragraph is still wrapped too wide + unfortunately. Should fix this. But this second paragraph is wrapped exactly as expected. Woohooo, From bcbe7ec52c263e2e55f0da7ebb987935fed2a258 Mon Sep 17 00:00:00 2001 From: Kyle King Date: Mon, 26 Jan 2026 21:38:16 -0600 Subject: [PATCH 3/7] docs: fix minor nits and typo --- mdformat_footnote/_reorder.py | 4 ++-- tests/fixtures.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mdformat_footnote/_reorder.py b/mdformat_footnote/_reorder.py index db51ad1..21c3f43 100644 --- a/mdformat_footnote/_reorder.py +++ b/mdformat_footnote/_reorder.py @@ -9,8 +9,8 @@ def reorder_footnotes_by_definition(state: StateCore) -> None: """Reorder footnotes to match definition order and normalize subIds. The mdit-py-plugins footnote plugin assigns IDs and subIds based on the - order references are encountered during inline parsing. This causes HTML - to differ when footnote definitions are reordered by the formatter. + order the references are encountered during inline parsing. This causes + HTML to differ when footnote definitions are reordered by the formatter This rule: 1. Preserves orphan footnotes (defined but never referenced) diff --git a/tests/fixtures.md b/tests/fixtures.md index 9e022c6..06d5625 100644 --- a/tests/fixtures.md +++ b/tests/fixtures.md @@ -56,11 +56,11 @@ Empty footnote . Here is a footnote reference [^emptynote] -[^emptynote]: +[^emptynote]: . Here is a footnote reference [^emptynote] -[^emptynote]: +[^emptynote]: . From 73059f965917d2e2505e996d45eb4cd82527914d Mon Sep 17 00:00:00 2001 From: Kyle King Date: Mon, 26 Jan 2026 21:48:44 -0600 Subject: [PATCH 4/7] test: generate a few additional test scenarios --- tests/fixtures.md | 284 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 284 insertions(+) diff --git a/tests/fixtures.md b/tests/fixtures.md index 06d5625..13a2b1d 100644 --- a/tests/fixtures.md +++ b/tests/fixtures.md @@ -203,3 +203,287 @@ Footnote in table nested in admonition (issue #22) [^6]: Yellow . + + +Multiple references to same footnote with subIds +. +First [^1] and second [^1] and third [^1] + +[^1]: Shared footnote +. +First [^1] and second [^1] and third [^1] + +[^1]: Shared footnote +. + + +Orphan footnotes (defined but never referenced) +. +Referenced [^used] + +[^orphan]: This is never referenced + +[^used]: This is used + +[^another-orphan]: Also unused +. +Referenced [^used] + +[^orphan]: This is never referenced + +[^used]: This is used + +[^another-orphan]: Also unused +. + + +Chained nested footnote references (A references B, B references C) +. +[^a]: References B [^b] + +[^b]: References C [^c] + +[^c]: Final one + +Start [^a] +. +Start [^a] + +[^a]: References B [^b] + +[^b]: References C [^c] + +[^c]: Final one +. + + +Complex mixed ordering with multiple references +. +[^z]: Defined first + +[^a]: Defined second + +Text [^a] then [^z] then [^a] again +. +Text [^a] then [^z] then [^a] again + +[^z]: Defined first + +[^a]: Defined second +. + + +Footnote referenced in body and within another footnote +. +[^x]: Simple + +[^y]: Contains [^x] reference + +Body [^x] and [^y] +. +Body [^x] and [^y] + +[^x]: Simple + +[^y]: Contains [^x] reference +. + + +Deeply nested: footnote in list in footnote +. +[^outer]: List item: + - Item with [^inner] reference + - Another item + +[^inner]: Inner content + +Text [^outer] +. +Text [^outer] + +[^outer]: List item: + + - Item with [^inner] reference + - Another item + +[^inner]: Inner content +. + + +Multiple footnotes in nested structures +. +[^1]: First + +[^2]: Second with [^1] + +[^3]: Third with [^2] and [^1] + +Body: [^3] [^2] [^1] +. +Body: [^3] [^2] [^1] + +[^1]: First + +[^2]: Second with [^1] + +[^3]: Third with [^2] and [^1] +. + + +Reordering with mixed body and nested references +. +[^c]: Defined first + +[^b]: Defined second [^c] + +[^a]: Defined third [^b] + +Body [^a] [^b] [^c] +. +Body [^a] [^b] [^c] + +[^c]: Defined first + +[^b]: Defined second [^c] + +[^a]: Defined third [^b] +. + + +Footnotes with same reference appearing in body and definitions +. +[^shared]: Base note + +[^wrapper]: Contains [^shared] + +First [^shared] in body, then [^wrapper] +. +First [^shared] in body, then [^wrapper] + +[^shared]: Base note + +[^wrapper]: Contains [^shared] +. + + +Complex scenario: multiple refs, nesting, and reordering +. +[^z]: Last defined [^a] + +[^m]: Middle defined + +[^a]: First defined [^m] + +Body [^m] [^a] [^z] [^m] +. +Body [^m] [^a] [^z] [^m] + +[^z]: Last defined [^a] + +[^m]: Middle defined + +[^a]: First defined [^m] +. + + +Footnote in blockquote with nested reference +. +[^inner]: Inner note + +[^outer]: Quote: + > Blockquote with [^inner] + +Text [^outer] +. +Text [^outer] + +[^inner]: Inner note + +[^outer]: Quote: + + > Blockquote with [^inner] +. + + +Three-level deep nesting +. +[^1]: Level 1 + +[^2]: Level 2 [^1] + +[^3]: Level 3 [^2] + +Start [^3] +. +Start [^3] + +[^1]: Level 1 + +[^2]: Level 2 [^1] + +[^3]: Level 3 [^2] +. + + +Mixed orphans and referenced with complex ordering +. +[^used-first]: Used + +[^orphan-1]: Never used + +[^used-second]: Also used [^used-first] + +[^orphan-2]: Also never used + +Body [^used-second] [^used-first] +. +Body [^used-second] [^used-first] + +[^used-first]: Used + +[^orphan-1]: Never used + +[^used-second]: Also used [^used-first] + +[^orphan-2]: Also never used +. + + +Footnotes in table cells with cross-references +. +[^1]: First + +[^2]: Second [^1] + +| Col A | Col B | +| ----- | ----- | +| A [^1] | B [^2] | +. +| Col A | Col B | +| ----- | ----- | +| A [^1] | B [^2] | + +[^1]: First + +[^2]: Second [^1] +. + + +Same footnote multiple times in same and different contexts +. +[^repeat]: Repeated note + +Para 1: [^repeat] [^repeat] + +[^nested]: Has [^repeat] inside + +Para 2: [^nested] [^repeat] +. +Para 1: [^repeat] [^repeat] + +Para 2: [^nested] [^repeat] + +[^repeat]: Repeated note + +[^nested]: Has [^repeat] inside +. From 46fc717d4bdb2518da309640937c0e1049d56382 Mon Sep 17 00:00:00 2001 From: Kyle King Date: Tue, 27 Jan 2026 07:58:52 -0600 Subject: [PATCH 5/7] feat: implement opt-in and backward compatible ordering --- mdformat_footnote/__init__.py | 3 +- mdformat_footnote/_helpers.py | 17 ++ mdformat_footnote/_reorder.py | 212 +++++++++++++++--- mdformat_footnote/plugin.py | 33 ++- tests/fixture_helpers.py | 23 ++ tests/fixtures/cli_integration.md | 14 ++ tests/fixtures/cli_options.md | 45 ++++ tests/{fixtures.md => fixtures/footnote.md} | 34 +-- tests/fixtures/regression.md | 123 ++++++++++ .../word_wrap.md} | 14 +- tests/test_cli_integration.py | 51 +++++ tests/test_fixtures.py | 39 +++- 12 files changed, 529 insertions(+), 79 deletions(-) create mode 100644 mdformat_footnote/_helpers.py create mode 100644 tests/fixture_helpers.py create mode 100644 tests/fixtures/cli_integration.md create mode 100644 tests/fixtures/cli_options.md rename tests/{fixtures.md => fixtures/footnote.md} (96%) create mode 100644 tests/fixtures/regression.md rename tests/{test_word_wrap.py => fixtures/word_wrap.md} (65%) create mode 100644 tests/test_cli_integration.py diff --git a/mdformat_footnote/__init__.py b/mdformat_footnote/__init__.py index d3a2974..0352e0c 100644 --- a/mdformat_footnote/__init__.py +++ b/mdformat_footnote/__init__.py @@ -1,5 +1,6 @@ """An mdformat plugin for parsing/validating footnotes""" __version__ = "0.1.2" +__plugin_name__ = "footnote" -from .plugin import RENDERERS, update_mdit # noqa: F401 +from .plugin import RENDERERS, add_cli_argument_group, update_mdit # noqa: F401 diff --git a/mdformat_footnote/_helpers.py b/mdformat_footnote/_helpers.py new file mode 100644 index 0000000..a3d3652 --- /dev/null +++ b/mdformat_footnote/_helpers.py @@ -0,0 +1,17 @@ +"""Helper functions for plugin configuration.""" + +from __future__ import annotations + +from collections.abc import Mapping +from typing import Any + +from . import __plugin_name__ + +ContextOptions = Mapping[str, Any] + + +def get_conf(options: ContextOptions, key: str) -> bool | str | int | None: + """Read setting from mdformat configuration Context.""" + if (api := options["mdformat"].get(key)) is not None: + return api + return options["mdformat"].get("plugin", {}).get(__plugin_name__, {}).get(key) diff --git a/mdformat_footnote/_reorder.py b/mdformat_footnote/_reorder.py index 21c3f43..e1bcb2c 100644 --- a/mdformat_footnote/_reorder.py +++ b/mdformat_footnote/_reorder.py @@ -2,23 +2,135 @@ from __future__ import annotations +from dataclasses import dataclass, field + from markdown_it.rules_core import StateCore -def reorder_footnotes_by_definition(state: StateCore) -> None: - """Reorder footnotes to match definition order and normalize subIds. +@dataclass +class _ReorderState: + """Mutable state for footnote reordering.""" + + old_list: dict + refs: dict + new_list: dict = field(default_factory=dict) + old_to_new_id: dict[int, int] = field(default_factory=dict) + processed: set[str] = field(default_factory=set) + new_id: int = 0 + + def get_or_create_def(self, label: str) -> dict: + """Get footnote definition by label, or create a default.""" + for fn_data in self.old_list.values(): + if fn_data.get("label") == label: + return fn_data.copy() + return {"label": label, "count": 0} + + def find_old_id_by_label(self, label: str) -> int | None: + """Find the old ID for a label in old_list.""" + for old_id, fn_data in self.old_list.items(): + if fn_data.get("label") == label: + return old_id + return None + + def add_footnote( + self, label: str, label_key: str, old_id: int | None = None + ) -> None: + """Add a footnote to the new list and update mappings.""" + self.new_list[self.new_id] = self.get_or_create_def(label) + + if old_id is not None: + self.old_to_new_id[old_id] = self.new_id + elif (found_id := self.find_old_id_by_label(label)) is not None: + self.old_to_new_id[found_id] = self.new_id + + self.refs[label_key] = self.new_id + self.processed.add(label) + self.new_id += 1 + + +def _categorize_footnotes( + refs: dict, + footnote_deps: dict[str, set[str]], +) -> tuple[list[tuple[int, str, str]], set[str], list[str]]: + """Categorize footnotes into body-referenced, nested-only, and orphans. + + Returns: + Tuple of (body_referenced, nested_only, true_orphans) where: + - body_referenced: list of (old_id, label_key, label) sorted by old_id + - nested_only: set of labels only referenced from other footnotes + - true_orphans: list of label_keys never referenced anywhere + """ + referenced_by_footnotes: set[str] = set() + for refs_set in footnote_deps.values(): + referenced_by_footnotes.update(refs_set) + + body_referenced: list[tuple[int, str, str]] = [] + nested_only: set[str] = set() + true_orphans: list[str] = [] + + for label_key, old_id in refs.items(): + label = label_key[1:] + if old_id >= 0: + body_referenced.append((old_id, label_key, label)) + elif label in referenced_by_footnotes: + nested_only.add(label) + else: + true_orphans.append(label_key) + + body_referenced.sort(key=lambda x: x[0]) + return body_referenced, nested_only, true_orphans + + +def _should_skip_nested( + label: str, + state: _ReorderState, + body_referenced_labels: set[str], + true_orphans: list[str], +) -> bool: + """Check if a nested footnote should be skipped.""" + if label in state.processed: + return True + if label in body_referenced_labels: + return True + if f":{label}" in true_orphans: + return True + return False + + +def _process_nested_footnotes( + parent_label: str, + footnote_deps: dict[str, set[str]], + state: _ReorderState, + body_referenced_labels: set[str], + true_orphans: list[str], +) -> None: + """Process nested footnotes referenced by a parent footnote.""" + if parent_label not in footnote_deps: + return + + for nested_label in footnote_deps[parent_label]: + if _should_skip_nested( + nested_label, state, body_referenced_labels, true_orphans + ): + continue + state.add_footnote(nested_label, f":{nested_label}") + - The mdit-py-plugins footnote plugin assigns IDs and subIds based on the - order the references are encountered during inline parsing. This causes - HTML to differ when footnote definitions are reordered by the formatter +def reorder_footnotes_by_definition( + state: StateCore, keep_orphans: bool = False +) -> None: + """Reorder footnotes by reference order, fix IDs, and handle orphans. - This rule: - 1. Preserves orphan footnotes (defined but never referenced) - 2. Reorders the footnote list to match definition order - 3. Updates all token IDs to match the new ordering - 4. Reassigns subIds based on output order (body first, then definitions) + The mdit-py-plugins footnote plugin assigns IDs based on the order + references are encountered during inline parsing. This function: + 1. Sorts footnotes by reference order (order they appear in body text) + 2. Keeps nested footnotes (referenced from other footnotes) with their parents + 3. Removes true orphans (never referenced) unless keep_orphans=True + 4. Reassigns IDs to ensure consistent HTML output - This ensures consistent HTML output regardless of definition position. + Args: + state: markdown-it state + keep_orphans: If True, preserve footnotes that are never referenced """ if "footnotes" not in state.env: return @@ -30,26 +142,66 @@ def reorder_footnotes_by_definition(state: StateCore) -> None: if not refs: return - new_list: dict[int, dict] = {} - old_to_new_id: dict[int, int] = {} + footnote_deps = _build_dependency_graph(state.tokens) + body_referenced, nested_only, true_orphans = _categorize_footnotes( + refs, footnote_deps + ) - for new_id, label_key in enumerate(refs.keys()): - label = label_key[1:] - old_id = refs[label_key] + if not keep_orphans: + for orphan_key in true_orphans: + del refs[orphan_key] - if old_id >= 0 and old_id in old_list: - new_list[new_id] = old_list[old_id].copy() - else: - new_list[new_id] = {"label": label, "count": 0} + body_referenced_labels = {label for _, _, label in body_referenced} + reorder_state = _ReorderState(old_list=old_list, refs=refs) - if old_id >= 0: - old_to_new_id[old_id] = new_id - refs[label_key] = new_id + for old_id, label_key, label in body_referenced: + reorder_state.add_footnote(label, label_key, old_id) + _process_nested_footnotes( + label, footnote_deps, reorder_state, body_referenced_labels, true_orphans + ) + + for nested_label in nested_only - reorder_state.processed: + reorder_state.add_footnote(nested_label, f":{nested_label}") + + if keep_orphans: + for orphan_key in true_orphans: + reorder_state.add_footnote(orphan_key[1:], orphan_key) - footnote_data["list"] = new_list + footnote_data["list"] = reorder_state.new_list + + _update_token_ids(state.tokens, reorder_state.old_to_new_id) + _reassign_subids(state.tokens, refs, reorder_state.new_list) + + +def _build_dependency_graph(tokens: list) -> dict[str, set[str]]: + """Build a graph of which footnotes reference which others. + + Returns: + Dict mapping footnote label to set of labels it references + """ + graph: dict[str, set[str]] = {} + current_def_label: str | None = None - _update_token_ids(state.tokens, old_to_new_id) - _reassign_subids(state.tokens, refs, new_list) + for token in tokens: + if token.type == "footnote_reference_open": + current_def_label = token.meta.get("label") + if current_def_label: + graph.setdefault(current_def_label, set()) + elif token.type == "footnote_reference_close": + current_def_label = None + elif current_def_label is not None: + _collect_nested_refs(token, graph[current_def_label]) + + return graph + + +def _collect_nested_refs(token, ref_set: set[str]) -> None: + """Collect footnote labels referenced from a token and its children.""" + if token.type == "footnote_ref" and token.meta: + ref_set.add(token.meta["label"]) + if token.children: + for child in token.children: + _collect_nested_refs(child, ref_set) def _update_token_ids(tokens: list, old_to_new_id: dict[int, int]) -> None: @@ -64,9 +216,7 @@ def _update_token_ids(tokens: list, old_to_new_id: dict[int, int]) -> None: _update_token_ids(token.children, old_to_new_id) -def _partition_refs_by_context( - tokens: list, -) -> tuple[list, dict[str, list]]: +def _partition_refs_by_context(tokens: list) -> tuple[list, dict[str, list]]: """Partition footnote refs into body refs and definition refs.""" body_refs: list = [] def_refs: dict[str, list] = {} @@ -102,7 +252,7 @@ def _reassign_subids(tokens: list, refs: dict, footnote_list: dict) -> None: _assign_subids_to_refs(body_refs, subid_counters) - for label_key in refs.keys(): + for label_key in refs: label = label_key[1:] if label in def_refs: _assign_subids_to_refs(def_refs[label], subid_counters) @@ -116,6 +266,6 @@ def _collect_refs(token, ref_list: list) -> None: """Collect footnote_ref tokens from a token and its children.""" if token.type == "footnote_ref" and token.meta: ref_list.append(token) - if hasattr(token, "children") and token.children: + if token.children: for child in token.children: _collect_refs(child, ref_list) diff --git a/mdformat_footnote/plugin.py b/mdformat_footnote/plugin.py index 31eb457..18971ce 100644 --- a/mdformat_footnote/plugin.py +++ b/mdformat_footnote/plugin.py @@ -1,6 +1,8 @@ from __future__ import annotations +import argparse from collections.abc import Mapping +from functools import partial import textwrap from markdown_it import MarkdownIt @@ -8,20 +10,43 @@ from mdformat.renderer.typing import Render from mdit_py_plugins.footnote import footnote_plugin +from ._helpers import ContextOptions, get_conf from ._reorder import reorder_footnotes_by_definition +def _keep_orphans(options: ContextOptions) -> bool: + """Check if orphan footnotes should be preserved.""" + return bool(get_conf(options, "keep_orphans")) or False + + +def add_cli_argument_group(group: argparse._ArgumentGroup) -> None: + """Add options to the mdformat CLI. + + Stored in `mdit.options["mdformat"]["plugin"]["footnote"]` + """ + group.add_argument( + "--keep-footnote-orphans", + action="store_const", + const=True, + dest="keep_orphans", + help=( + "Keep footnote definitions that are never referenced " + "(default: remove them)" + ), + ) + + def update_mdit(mdit: MarkdownIt) -> None: """Update the parser, adding the footnote plugin.""" mdit.use(footnote_plugin) # Disable inline footnotes for now, since we don't have rendering # support for them yet. mdit.disable("footnote_inline") - # Reorder footnotes to match definition order and preserve orphans. + # Reorder footnotes by reference order, fix IDs, and handle orphans. # Must run before footnote_tail. - mdit.core.ruler.before( - "footnote_tail", "reorder_footnotes", reorder_footnotes_by_definition - ) + keep_orphans = _keep_orphans(mdit.options) + reorder_fn = partial(reorder_footnotes_by_definition, keep_orphans=keep_orphans) + mdit.core.ruler.before("footnote_tail", "reorder_footnotes", reorder_fn) def _footnote_ref_renderer(node: RenderTreeNode, context: RenderContext) -> str: diff --git a/tests/fixture_helpers.py b/tests/fixture_helpers.py new file mode 100644 index 0000000..fede2f8 --- /dev/null +++ b/tests/fixture_helpers.py @@ -0,0 +1,23 @@ +"""Helper utilities for loading test fixtures.""" + +from pathlib import Path + +from markdown_it.utils import read_fixture_file + + +def load_fixtures(filename: str) -> list[tuple[int, str, str, str]]: + """Load fixtures from a file in tests/fixtures/ directory.""" + fixture_path = Path(__file__).parent / "fixtures" / filename + return read_fixture_file(fixture_path) + + +def get_fixture(filename: str, title: str) -> tuple[str, str]: + """Get a specific fixture by title from a file.""" + fixtures = load_fixtures(filename) + for _, fixture_title, input_text, expected_output in fixtures: + if fixture_title == title: + return input_text, expected_output + available = [f[1] for f in fixtures] + raise ValueError( + f"Fixture '{title}' not found in {filename}. Available: {available}" + ) diff --git a/tests/fixtures/cli_integration.md b/tests/fixtures/cli_integration.md new file mode 100644 index 0000000..223838a --- /dev/null +++ b/tests/fixtures/cli_integration.md @@ -0,0 +1,14 @@ +CLI keep orphans flag test +. +Referenced [^used] + +[^orphan]: This is never referenced + +[^used]: This is used +. +Referenced [^used] + +[^used]: This is used + +[^orphan]: This is never referenced +. diff --git a/tests/fixtures/cli_options.md b/tests/fixtures/cli_options.md new file mode 100644 index 0000000..6afdf31 --- /dev/null +++ b/tests/fixtures/cli_options.md @@ -0,0 +1,45 @@ +Default removes orphans +. +Referenced [^used] + +[^orphan]: This is never referenced + +[^used]: This is used + +[^another-orphan]: Also unused +. +Referenced [^used] + +[^used]: This is used +. + + +Keep orphans flag preserves orphans +. +Referenced [^used] + +[^orphan]: This is never referenced + +[^used]: This is used +. +Referenced [^used] + +[^used]: This is used + +[^orphan]: This is never referenced +. + + +Nested footnotes not treated as orphans +. +Body [^a] + +[^a]: First [^b] +[^b]: Second +. +Body [^a] + +[^a]: First [^b] + +[^b]: Second +. diff --git a/tests/fixtures.md b/tests/fixtures/footnote.md similarity index 96% rename from tests/fixtures.md rename to tests/fixtures/footnote.md index 13a2b1d..d2fdd9f 100644 --- a/tests/fixtures.md +++ b/tests/fixtures/footnote.md @@ -124,8 +124,6 @@ footnote-ref-inside-footnote (issue #7) [^c]: ipsum [^a] . [^a]: lorem - -[^c]: ipsum [^a] . @@ -139,8 +137,6 @@ A [^b] . A [^b] -[^a]: Lorem. [^b] - [^b]: Ipsum. . @@ -196,12 +192,6 @@ Footnote in table nested in admonition (issue #22) [^2]: Green [^3]: Blue - -[^4]: Cyan - -[^5]: Magenta - -[^6]: Yellow . @@ -229,11 +219,7 @@ Referenced [^used] . Referenced [^used] -[^orphan]: This is never referenced - [^used]: This is used - -[^another-orphan]: Also unused . @@ -249,11 +235,11 @@ Start [^a] . Start [^a] -[^a]: References B [^b] - [^b]: References C [^c] [^c]: Final one + +[^a]: References B [^b] . @@ -267,9 +253,9 @@ Text [^a] then [^z] then [^a] again . Text [^a] then [^z] then [^a] again -[^z]: Defined first - [^a]: Defined second + +[^z]: Defined first . @@ -301,12 +287,12 @@ Text [^outer] . Text [^outer] +[^inner]: Inner content + [^outer]: List item: - Item with [^inner] reference - Another item - -[^inner]: Inner content . @@ -378,11 +364,11 @@ Body [^m] [^a] [^z] [^m] . Body [^m] [^a] [^z] [^m] -[^z]: Last defined [^a] +[^a]: First defined [^m] [^m]: Middle defined -[^a]: First defined [^m] +[^z]: Last defined [^a] . @@ -441,11 +427,7 @@ Body [^used-second] [^used-first] [^used-first]: Used -[^orphan-1]: Never used - [^used-second]: Also used [^used-first] - -[^orphan-2]: Also never used . diff --git a/tests/fixtures/regression.md b/tests/fixtures/regression.md new file mode 100644 index 0000000..4b7fcf1 --- /dev/null +++ b/tests/fixtures/regression.md @@ -0,0 +1,123 @@ +Issue 7: footnote ref inside footnote without body reference +. +[^a]: lorem +[^c]: ipsum [^a] +. +[^a]: lorem +. + + +Issue 7: with body reference +. +Body refs [^c] + +[^a]: lorem +[^c]: ipsum [^a] +. +Body refs [^c] + +[^c]: ipsum [^a] + +[^a]: lorem +. + + +Issue 8: nested footnote refs +. +[^a]: Lorem. [^b] + +[^b]: Ipsum. + +A [^b] +. +A [^b] + +[^b]: Ipsum. +. + + +Issue 22: nested in admonition +. +# Document + +| Color | +| ------ | +| R [^1] | +| G [^2] | +| B [^3] | + +```{tip} +| Color | +| ------ | +| C [^4] | +| M [^5] | +| Y [^6] | +``` + +[^1]: Red + +[^2]: Green + +[^3]: Blue + +[^4]: Cyan + +[^5]: Magenta + +[^6]: Yellow +. +# Document + +| Color | +| ------ | +| R [^1] | +| G [^2] | +| B [^3] | + +```{tip} +| Color | +| ------ | +| C [^4] | +| M [^5] | +| Y [^6] | +``` + +[^1]: Red + +[^2]: Green + +[^3]: Blue +. + + +Reference order preserved +. +Text [^b] then [^a] + +[^a]: First +[^b]: Second +. +Text [^b] then [^a] + +[^b]: Second + +[^a]: First +. + + +Chained nested footnotes +. +Start [^a] + +[^a]: References B [^b] +[^b]: References C [^c] +[^c]: Final one +. +Start [^a] + +[^a]: References B [^b] + +[^b]: References C [^c] + +[^c]: Final one +. diff --git a/tests/test_word_wrap.py b/tests/fixtures/word_wrap.md similarity index 65% rename from tests/test_word_wrap.py rename to tests/fixtures/word_wrap.md index fbe2867..afcbfd9 100644 --- a/tests/test_word_wrap.py +++ b/tests/fixtures/word_wrap.md @@ -1,16 +1,12 @@ -import mdformat - - -def test_word_wrap(): - input_text = """\ +Word wrap at column 40 +. [^a] [^a]: Ooh no, the first line of this first paragraph is still wrapped too wide unfortunately. Should fix this. But this second paragraph is wrapped exactly as expected. Woohooo, awesome! -""" - expected_output = """\ +. [^a] [^a]: Ooh no, the first line of this first @@ -20,6 +16,4 @@ def test_word_wrap(): But this second paragraph is wrapped exactly as expected. Woohooo, awesome! -""" - output = mdformat.text(input_text, options={"wrap": 40}, extensions={"footnote"}) - assert output == expected_output +. diff --git a/tests/test_cli_integration.py b/tests/test_cli_integration.py new file mode 100644 index 0000000..0de68f9 --- /dev/null +++ b/tests/test_cli_integration.py @@ -0,0 +1,51 @@ +"""Integration tests for CLI arguments.""" + +from pathlib import Path +import subprocess +import tempfile + +from fixture_helpers import get_fixture + + +def test_cli_keep_orphans_flag(): + """Test --keep-footnote-orphans flag from command line.""" + text, expected_keep = get_fixture( + "cli_integration.md", "CLI keep orphans flag test" + ) + + with tempfile.TemporaryDirectory() as tmpdir: + input_file = Path(tmpdir) / "test.md" + input_file.write_text(text) + + # Default behavior: remove orphans + result = subprocess.run( + ["python", "-m", "mdformat", str(input_file)], + capture_output=True, + text=True, + ) + assert result.returncode == 0 + output_default = input_file.read_text() + assert "[^orphan]" not in output_default + assert "[^used]" in output_default + + # With --keep-footnote-orphans: preserve orphans + input_file.write_text(text) # Reset file + result = subprocess.run( + ["python", "-m", "mdformat", "--keep-footnote-orphans", str(input_file)], + capture_output=True, + text=True, + ) + assert result.returncode == 0 + output_keep = input_file.read_text() + assert output_keep.strip() == expected_keep.strip() + + +def test_cli_help_shows_option(): + """Test that --keep-footnote-orphans appears in help.""" + result = subprocess.run( + ["python", "-m", "mdformat", "--help"], + capture_output=True, + text=True, + ) + assert result.returncode == 0 + assert "--keep-footnote-orphans" in result.stdout diff --git a/tests/test_fixtures.py b/tests/test_fixtures.py index c68bb16..0d73092 100644 --- a/tests/test_fixtures.py +++ b/tests/test_fixtures.py @@ -1,17 +1,42 @@ +"""All fixture-based tests for mdformat-footnote.""" + from pathlib import Path -from markdown_it.utils import read_fixture_file +from fixture_helpers import load_fixtures import mdformat import pytest -FIXTURE_PATH = Path(__file__).parent / "fixtures.md" -fixtures = read_fixture_file(FIXTURE_PATH) + +def _get_options(filename: str, title: str) -> dict: + """Determine mdformat options based on fixture file and title.""" + if filename == "word_wrap.md": + return {"wrap": 40} + if "keep orphans" in title.lower(): + return {"keep_orphans": True} + return {} + + +# Load all fixture files +TEST_CASES: list[tuple[str, str, str, str, str, dict]] = [] +for pth in (Path(__file__).parent / "fixtures").glob("*.md"): + filename = pth.name + for line, title, text, expected in load_fixtures(filename): + options = _get_options(filename, title) + TEST_CASES.append((filename, line, title, text, expected, options)) @pytest.mark.parametrize( - "line,title,text,expected", fixtures, ids=[f[1] for f in fixtures] + "filename,line,title,text,expected,options", + TEST_CASES, + ids=[f"{tc[0].replace('.md', '')}::{tc[2]}" for tc in TEST_CASES], ) -def test_fixtures(line, title, text, expected): - output = mdformat.text(text, extensions={"footnote"}) - print(output) +def test_fixtures( + filename: str, + line: int, + title: str, + text: str, + expected: str, + options: dict, +): + output = mdformat.text(text, extensions={"footnote"}, options=options) assert output.rstrip() == expected.rstrip(), output From db2da89a1c2b8c033f8fc87d72e0aefaab449083 Mon Sep 17 00:00:00 2001 From: Kyle King Date: Tue, 27 Jan 2026 08:03:44 -0600 Subject: [PATCH 6/7] ci: fix merge with master --- tests/test_fixtures.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_fixtures.py b/tests/test_fixtures.py index 0d73092..9e9a26d 100644 --- a/tests/test_fixtures.py +++ b/tests/test_fixtures.py @@ -1,6 +1,7 @@ """All fixture-based tests for mdformat-footnote.""" from pathlib import Path +import re from fixture_helpers import load_fixtures import mdformat @@ -10,6 +11,8 @@ def _get_options(filename: str, title: str) -> dict: """Determine mdformat options based on fixture file and title.""" if filename == "word_wrap.md": + if match := re.search(r"wrap at (\d+)", title): + return {"wrap": int(match.group(1))} return {"wrap": 40} if "keep orphans" in title.lower(): return {"keep_orphans": True} From d323bdfb406422e7366f466213eb2e5bc602125f Mon Sep 17 00:00:00 2001 From: Kyle King Date: Tue, 27 Jan 2026 20:40:01 -0600 Subject: [PATCH 7/7] fix: don't orphan when in tables --- mdformat_footnote/_reorder.py | 318 ++++++++++++++++++---------------- tests/fixtures/footnote.md | 6 + tests/fixtures/regression.md | 6 + 3 files changed, 178 insertions(+), 152 deletions(-) diff --git a/mdformat_footnote/_reorder.py b/mdformat_footnote/_reorder.py index e1bcb2c..71c1bbf 100644 --- a/mdformat_footnote/_reorder.py +++ b/mdformat_footnote/_reorder.py @@ -3,9 +3,26 @@ from __future__ import annotations from dataclasses import dataclass, field +import re from markdown_it.rules_core import StateCore +_FOOTNOTE_REF_PATTERN = re.compile(r"\[\^([^\]]+)\]") + + +@dataclass +class _FootnoteCategories: + """Categorized footnotes for reordering.""" + + body_referenced: list[tuple[int, str, str]] + nested_only: set[str] + fence_only: list[str] + true_orphans: list[str] + + @property + def body_labels(self) -> set[str]: + return {label for _, _, label in self.body_referenced} + @dataclass class _ReorderState: @@ -18,15 +35,13 @@ class _ReorderState: processed: set[str] = field(default_factory=set) new_id: int = 0 - def get_or_create_def(self, label: str) -> dict: - """Get footnote definition by label, or create a default.""" + def _find_def_by_label(self, label: str) -> dict: for fn_data in self.old_list.values(): if fn_data.get("label") == label: return fn_data.copy() return {"label": label, "count": 0} - def find_old_id_by_label(self, label: str) -> int | None: - """Find the old ID for a label in old_list.""" + def _find_old_id_by_label(self, label: str) -> int | None: for old_id, fn_data in self.old_list.items(): if fn_data.get("label") == label: return old_id @@ -36,184 +51,149 @@ def add_footnote( self, label: str, label_key: str, old_id: int | None = None ) -> None: """Add a footnote to the new list and update mappings.""" - self.new_list[self.new_id] = self.get_or_create_def(label) + if label in self.processed: + return - if old_id is not None: - self.old_to_new_id[old_id] = self.new_id - elif (found_id := self.find_old_id_by_label(label)) is not None: - self.old_to_new_id[found_id] = self.new_id + self.new_list[self.new_id] = self._find_def_by_label(label) + + effective_old_id = old_id or self._find_old_id_by_label(label) + if effective_old_id is not None: + self.old_to_new_id[effective_old_id] = self.new_id self.refs[label_key] = self.new_id self.processed.add(label) self.new_id += 1 +def _collect_refs_in_fences(tokens: list) -> list[str]: + """Collect footnote labels referenced in fence tokens, preserving order.""" + refs: list[str] = [] + seen: set[str] = set() + for token in tokens: + if token.type != "fence" or not token.content: + continue + for match in _FOOTNOTE_REF_PATTERN.finditer(token.content): + label = match.group(1) + if label not in seen: + refs.append(label) + seen.add(label) + return refs + + +def _build_dependency_graph(tokens: list) -> dict[str, set[str]]: + """Build a graph of which footnotes reference which others.""" + graph: dict[str, set[str]] = {} + current_def_label: str | None = None + + for token in tokens: + match token.type: + case "footnote_reference_open": + current_def_label = token.meta.get("label") + if current_def_label: + graph.setdefault(current_def_label, set()) + case "footnote_reference_close": + current_def_label = None + case _ if current_def_label is not None: + _collect_nested_refs(token, graph[current_def_label]) + + return graph + + +def _collect_nested_refs(token, ref_set: set[str]) -> None: + """Collect footnote labels referenced from a token and its children.""" + if token.type == "footnote_ref" and token.meta: + ref_set.add(token.meta["label"]) + for child in token.children or []: + _collect_nested_refs(child, ref_set) + + def _categorize_footnotes( refs: dict, footnote_deps: dict[str, set[str]], -) -> tuple[list[tuple[int, str, str]], set[str], list[str]]: - """Categorize footnotes into body-referenced, nested-only, and orphans. - - Returns: - Tuple of (body_referenced, nested_only, true_orphans) where: - - body_referenced: list of (old_id, label_key, label) sorted by old_id - - nested_only: set of labels only referenced from other footnotes - - true_orphans: list of label_keys never referenced anywhere - """ + refs_in_fences: list[str], +) -> _FootnoteCategories: + """Categorize footnotes.""" referenced_by_footnotes: set[str] = set() for refs_set in footnote_deps.values(): referenced_by_footnotes.update(refs_set) + refs_in_fences_set = set(refs_in_fences) + body_referenced: list[tuple[int, str, str]] = [] nested_only: set[str] = set() + fence_only_set: set[str] = set() true_orphans: list[str] = [] for label_key, old_id in refs.items(): label = label_key[1:] - if old_id >= 0: - body_referenced.append((old_id, label_key, label)) - elif label in referenced_by_footnotes: - nested_only.add(label) - else: - true_orphans.append(label_key) + match ( + old_id >= 0, + label in referenced_by_footnotes, + label in refs_in_fences_set, + ): + case (True, _, _): + body_referenced.append((old_id, label_key, label)) + case (False, True, _): + nested_only.add(label) + case (False, False, True): + fence_only_set.add(label) + case _: + true_orphans.append(label_key) body_referenced.sort(key=lambda x: x[0]) - return body_referenced, nested_only, true_orphans + fence_only = [label for label in refs_in_fences if label in fence_only_set] + return _FootnoteCategories(body_referenced, nested_only, fence_only, true_orphans) -def _should_skip_nested( - label: str, - state: _ReorderState, - body_referenced_labels: set[str], - true_orphans: list[str], -) -> bool: - """Check if a nested footnote should be skipped.""" - if label in state.processed: - return True - if label in body_referenced_labels: - return True - if f":{label}" in true_orphans: - return True - return False - - -def _process_nested_footnotes( + +def _process_nested_for_parent( parent_label: str, footnote_deps: dict[str, set[str]], state: _ReorderState, - body_referenced_labels: set[str], - true_orphans: list[str], + skip_labels: set[str], ) -> None: """Process nested footnotes referenced by a parent footnote.""" - if parent_label not in footnote_deps: - return - - for nested_label in footnote_deps[parent_label]: - if _should_skip_nested( - nested_label, state, body_referenced_labels, true_orphans - ): - continue - state.add_footnote(nested_label, f":{nested_label}") + for nested_label in footnote_deps.get(parent_label, []): + if nested_label not in skip_labels: + state.add_footnote(nested_label, f":{nested_label}") -def reorder_footnotes_by_definition( - state: StateCore, keep_orphans: bool = False -) -> None: - """Reorder footnotes by reference order, fix IDs, and handle orphans. - - The mdit-py-plugins footnote plugin assigns IDs based on the order - references are encountered during inline parsing. This function: - 1. Sorts footnotes by reference order (order they appear in body text) - 2. Keeps nested footnotes (referenced from other footnotes) with their parents - 3. Removes true orphans (never referenced) unless keep_orphans=True - 4. Reassigns IDs to ensure consistent HTML output - - Args: - state: markdown-it state - keep_orphans: If True, preserve footnotes that are never referenced - """ - if "footnotes" not in state.env: - return - - footnote_data = state.env["footnotes"] - refs = footnote_data.get("refs", {}) - old_list = footnote_data.get("list", {}) - - if not refs: - return - - footnote_deps = _build_dependency_graph(state.tokens) - body_referenced, nested_only, true_orphans = _categorize_footnotes( - refs, footnote_deps - ) - - if not keep_orphans: - for orphan_key in true_orphans: - del refs[orphan_key] +def _build_reordered_list( + categories: _FootnoteCategories, + footnote_deps: dict[str, set[str]], + old_list: dict, + refs: dict, + keep_orphans: bool, +) -> _ReorderState: + """Build the reordered footnote list from categorized footnotes.""" + state = _ReorderState(old_list=old_list, refs=refs) + skip_labels = categories.body_labels | set(categories.true_orphans) - body_referenced_labels = {label for _, _, label in body_referenced} - reorder_state = _ReorderState(old_list=old_list, refs=refs) + for old_id, label_key, label in categories.body_referenced: + state.add_footnote(label, label_key, old_id) + _process_nested_for_parent(label, footnote_deps, state, skip_labels) - for old_id, label_key, label in body_referenced: - reorder_state.add_footnote(label, label_key, old_id) - _process_nested_footnotes( - label, footnote_deps, reorder_state, body_referenced_labels, true_orphans - ) + for nested_label in categories.nested_only: + state.add_footnote(nested_label, f":{nested_label}") - for nested_label in nested_only - reorder_state.processed: - reorder_state.add_footnote(nested_label, f":{nested_label}") + for fence_label in categories.fence_only: + state.add_footnote(fence_label, f":{fence_label}") if keep_orphans: - for orphan_key in true_orphans: - reorder_state.add_footnote(orphan_key[1:], orphan_key) + for orphan_key in categories.true_orphans: + state.add_footnote(orphan_key[1:], orphan_key) - footnote_data["list"] = reorder_state.new_list - - _update_token_ids(state.tokens, reorder_state.old_to_new_id) - _reassign_subids(state.tokens, refs, reorder_state.new_list) - - -def _build_dependency_graph(tokens: list) -> dict[str, set[str]]: - """Build a graph of which footnotes reference which others. - - Returns: - Dict mapping footnote label to set of labels it references - """ - graph: dict[str, set[str]] = {} - current_def_label: str | None = None - - for token in tokens: - if token.type == "footnote_reference_open": - current_def_label = token.meta.get("label") - if current_def_label: - graph.setdefault(current_def_label, set()) - elif token.type == "footnote_reference_close": - current_def_label = None - elif current_def_label is not None: - _collect_nested_refs(token, graph[current_def_label]) - - return graph - - -def _collect_nested_refs(token, ref_set: set[str]) -> None: - """Collect footnote labels referenced from a token and its children.""" - if token.type == "footnote_ref" and token.meta: - ref_set.add(token.meta["label"]) - if token.children: - for child in token.children: - _collect_nested_refs(child, ref_set) + return state def _update_token_ids(tokens: list, old_to_new_id: dict[int, int]) -> None: """Recursively update footnote IDs in tokens.""" for token in tokens: if token.type in ("footnote_ref", "footnote_anchor"): - if token.meta and "id" in token.meta: - old_id = token.meta["id"] - if old_id in old_to_new_id: - token.meta["id"] = old_to_new_id[old_id] - if token.children: - _update_token_ids(token.children, old_to_new_id) + if token.meta and (old_id := token.meta.get("id")) in old_to_new_id: + token.meta["id"] = old_to_new_id[old_id] + for child in token.children or []: + _update_token_ids([child], old_to_new_id) def _partition_refs_by_context(tokens: list) -> tuple[list, dict[str, list]]: @@ -223,16 +203,17 @@ def _partition_refs_by_context(tokens: list) -> tuple[list, dict[str, list]]: current_def_label: str | None = None for token in tokens: - if token.type == "footnote_reference_open": - current_def_label = token.meta.get("label") - if current_def_label: - def_refs.setdefault(current_def_label, []) - elif token.type == "footnote_reference_close": - current_def_label = None - elif current_def_label is None: - _collect_refs(token, body_refs) - else: - _collect_refs(token, def_refs.setdefault(current_def_label, [])) + match token.type: + case "footnote_reference_open": + current_def_label = token.meta.get("label") + if current_def_label: + def_refs.setdefault(current_def_label, []) + case "footnote_reference_close": + current_def_label = None + case _ if current_def_label is None: + _collect_refs(token, body_refs) + case _: + _collect_refs(token, def_refs.setdefault(current_def_label, [])) return body_refs, def_refs @@ -266,6 +247,39 @@ def _collect_refs(token, ref_list: list) -> None: """Collect footnote_ref tokens from a token and its children.""" if token.type == "footnote_ref" and token.meta: ref_list.append(token) - if token.children: - for child in token.children: - _collect_refs(child, ref_list) + for child in token.children or []: + _collect_refs(child, ref_list) + + +def _get_footnote_data(state: StateCore) -> tuple[dict, dict] | None: + """Extract footnote refs and list from state, or None if missing.""" + footnote_data = state.env.get("footnotes", {}) + refs = footnote_data.get("refs", {}) + if not refs: + return None + return refs, footnote_data.get("list", {}) + + +def reorder_footnotes_by_definition( + state: StateCore, keep_orphans: bool = False +) -> None: + """Reorder footnotes by reference order, fix IDs, and handle orphans.""" + if (data := _get_footnote_data(state)) is None: + return + + refs, old_list = data + footnote_deps = _build_dependency_graph(state.tokens) + refs_in_fences = _collect_refs_in_fences(state.tokens) + categories = _categorize_footnotes(refs, footnote_deps, refs_in_fences) + + if not keep_orphans: + for orphan_key in categories.true_orphans: + del refs[orphan_key] + + reorder_state = _build_reordered_list( + categories, footnote_deps, old_list, refs, keep_orphans + ) + + state.env["footnotes"]["list"] = reorder_state.new_list + _update_token_ids(state.tokens, reorder_state.old_to_new_id) + _reassign_subids(state.tokens, refs, reorder_state.new_list) diff --git a/tests/fixtures/footnote.md b/tests/fixtures/footnote.md index d2fdd9f..49fc307 100644 --- a/tests/fixtures/footnote.md +++ b/tests/fixtures/footnote.md @@ -192,6 +192,12 @@ Footnote in table nested in admonition (issue #22) [^2]: Green [^3]: Blue + +[^4]: Cyan + +[^5]: Magenta + +[^6]: Yellow . diff --git a/tests/fixtures/regression.md b/tests/fixtures/regression.md index 4b7fcf1..ea52cb8 100644 --- a/tests/fixtures/regression.md +++ b/tests/fixtures/regression.md @@ -87,6 +87,12 @@ Issue 22: nested in admonition [^2]: Green [^3]: Blue + +[^4]: Cyan + +[^5]: Magenta + +[^6]: Yellow .