From 0d18c59b10c37de029e6a7b637257778a7837e95 Mon Sep 17 00:00:00 2001 From: Jake LoRocco Date: Fri, 16 Jan 2026 09:18:38 -0500 Subject: [PATCH] fix: import times by not exporting RichDocument at module level --- docs/examples/mify/mify.py | 2 +- docs/examples/mify/rich_document_advanced.py | 2 +- .../examples/mify/rich_table_execute_basic.py | 2 +- .../examples/notebooks/document_mobject.ipynb | 15 +++---------- docs/examples/rag/mellea_pdf.py | 2 +- docs/examples/tutorial/document_mobject.py | 4 ++-- docs/tutorial.md | 4 ++-- mellea/stdlib/components/docs/__init__.py | 8 +++++-- mellea/stdlib/requirements/md.py | 21 ++++++++++++++++++- test/backends/test_tool_calls.py | 2 +- .../components/docs/test_richdocument.py | 2 +- test/stdlib/components/test_transform.py | 2 +- 12 files changed, 40 insertions(+), 26 deletions(-) diff --git a/docs/examples/mify/mify.py b/docs/examples/mify/mify.py index b84a29cc..3d6b81eb 100644 --- a/docs/examples/mify/mify.py +++ b/docs/examples/mify/mify.py @@ -1,4 +1,4 @@ -from mellea.stdlib.components.docs import TableQuery +from mellea.stdlib.components.docs.richdocument import TableQuery from mellea.stdlib.components.mify import MifiedProtocol, mify from mellea.stdlib.session import start_session diff --git a/docs/examples/mify/rich_document_advanced.py b/docs/examples/mify/rich_document_advanced.py index c3ba2b41..b186218f 100644 --- a/docs/examples/mify/rich_document_advanced.py +++ b/docs/examples/mify/rich_document_advanced.py @@ -35,7 +35,7 @@ # 4. `Mellea` also provides a basic wrapper around this functionality to make # basic processing of documents easier. -from mellea.stdlib.components.docs import RichDocument +from mellea.stdlib.components.docs.richdocument import RichDocument # This creates a new `Mellea` RichDocument component that encapsulates all # the logic above along with some convenient helpers. diff --git a/docs/examples/mify/rich_table_execute_basic.py b/docs/examples/mify/rich_table_execute_basic.py index eabb791e..a9a5c112 100644 --- a/docs/examples/mify/rich_table_execute_basic.py +++ b/docs/examples/mify/rich_table_execute_basic.py @@ -5,7 +5,7 @@ from mellea.backends import model_ids from mellea.backends import ModelOption from mellea.core import FancyLogger -from mellea.stdlib.components.docs import RichDocument, Table +from mellea.stdlib.components.docs.richdocument import RichDocument, Table FancyLogger.get_logger().setLevel("ERROR") diff --git a/docs/examples/notebooks/document_mobject.ipynb b/docs/examples/notebooks/document_mobject.ipynb index 55c7a2b7..090f6d58 100644 --- a/docs/examples/notebooks/document_mobject.ipynb +++ b/docs/examples/notebooks/document_mobject.ipynb @@ -80,11 +80,7 @@ "id": "3j-Se7PpfMqV" }, "outputs": [], - "source": [ - "from mellea.stdlib.components.docs import RichDocument\n", - "\n", - "rd = RichDocument.from_document_file(\"https://arxiv.org/pdf/1906.04043\")" - ] + "source": "from mellea.stdlib.components.docs.richdocument import RichDocument\n\nrd = RichDocument.from_document_file(\"https://arxiv.org/pdf/1906.04043\")" }, { "cell_type": "markdown", @@ -101,12 +97,7 @@ "id": "kcBb3g_BfMqV" }, "outputs": [], - "source": [ - "from mellea.stdlib.components.docs import Table\n", - "\n", - "table1: Table = rd.get_tables()[0]\n", - "print(table1.to_markdown())" - ] + "source": "from mellea.stdlib.components.docs.richdocument import Table\n\ntable1: Table = rd.get_tables()[0]\nprint(table1.to_markdown())" }, { "cell_type": "markdown", @@ -177,4 +168,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/docs/examples/rag/mellea_pdf.py b/docs/examples/rag/mellea_pdf.py index c0a64140..5f668e6c 100644 --- a/docs/examples/rag/mellea_pdf.py +++ b/docs/examples/rag/mellea_pdf.py @@ -1,5 +1,5 @@ import mellea -from mellea.stdlib.components.docs import RichDocument +from mellea.stdlib.components.docs.richdocument import RichDocument m = mellea.start_session() diff --git a/docs/examples/tutorial/document_mobject.py b/docs/examples/tutorial/document_mobject.py index 19d04bf7..42c18cb1 100644 --- a/docs/examples/tutorial/document_mobject.py +++ b/docs/examples/tutorial/document_mobject.py @@ -1,10 +1,10 @@ from mellea.backends import model_ids from mellea.backends.model_ids import IBM_GRANITE_3_3_8B -from mellea.stdlib.components.docs import RichDocument +from mellea.stdlib.components.docs.richdocument import RichDocument rd = RichDocument.from_document_file("https://arxiv.org/pdf/1906.04043") -from mellea.stdlib.components.docs import Table # noqa: E402 +from mellea.stdlib.components.docs.richdocument import Table # noqa: E402 table1: Table = rd.get_tables()[0] print(table1.to_markdown()) diff --git a/docs/tutorial.md b/docs/tutorial.md index 39144abc..5270ab2c 100644 --- a/docs/tutorial.md +++ b/docs/tutorial.md @@ -617,7 +617,7 @@ Let's create a RichDocument from an arxiv paper: ```python # file: https://github.com/generative-computing/mellea/blob/main/docs/examples/tutorial/document_mobject.py#L1-L3 -from mellea.stdlib.components.docs import RichDocument +from mellea.stdlib.components.docs.richdocument import RichDocument rd = RichDocument.from_document_file("https://arxiv.org/pdf/1906.04043") ``` this loads the PDF file and parses it using the Docling parser into an @@ -627,7 +627,7 @@ From the rich document we can extract some document content, e.g. the first table: ```python # file: https://github.com/generative-computing/mellea/blob/main/docs/examples/tutorial/document_mobject.py#L5-L8 -from mellea.stdlib.components.docs import Table +from mellea.stdlib.components.docs.richdocument import Table table1: Table = rd.get_tables()[0] print(table1.to_markdown()) ``` diff --git a/mellea/stdlib/components/docs/__init__.py b/mellea/stdlib/components/docs/__init__.py index 794b5201..2c2de3ee 100644 --- a/mellea/stdlib/components/docs/__init__.py +++ b/mellea/stdlib/components/docs/__init__.py @@ -1,5 +1,9 @@ """Classes and functions for working with document-like objects.""" -from .richdocument import RichDocument, Table, TableQuery, TableTransform +from .document import Document -__all__ = ["RichDocument", "Table", "TableQuery", "TableTransform"] +# Note: RichDocument, Table, TableQuery, TableTransform are not imported here +# by default to avoid heavy docling/torch/transformers imports at module load time. +# Import them explicitly from mellea.stdlib.components.docs.richdocument when needed. + +__all__ = ["Document"] diff --git a/mellea/stdlib/requirements/md.py b/mellea/stdlib/requirements/md.py index 8d3ed00d..5af44453 100644 --- a/mellea/stdlib/requirements/md.py +++ b/mellea/stdlib/requirements/md.py @@ -1,14 +1,32 @@ """This file contains various requirements for Markdown-formatted files.""" -import mistletoe +from __future__ import annotations + +from typing import TYPE_CHECKING from ...core import Context, Requirement +if TYPE_CHECKING: + import mistletoe + +_mistletoe = None + + +def _get_mistletoe(): + global _mistletoe + if _mistletoe is None: + import mistletoe as mt + + _mistletoe = mt + return _mistletoe + + # region lists def as_markdown_list(ctx: Context) -> list[str] | None: """Attempts to format the last_output of the given context as a markdown list.""" + mistletoe = _get_mistletoe() xs = list() raw_output = ctx.last_output() assert raw_output is not None @@ -44,6 +62,7 @@ def _md_list(ctx: Context): def _md_table(ctx: Context): + mistletoe = _get_mistletoe() raw_output = ctx.last_output() assert raw_output is not None try: diff --git a/test/backends/test_tool_calls.py b/test/backends/test_tool_calls.py index 7cdb292d..958dfe78 100644 --- a/test/backends/test_tool_calls.py +++ b/test/backends/test_tool_calls.py @@ -9,7 +9,7 @@ from mellea.core import ModelOutputThunk from mellea.stdlib.context import ChatContext -from mellea.stdlib.components.docs import Table +from mellea.stdlib.components.docs.richdocument import Table from mellea.stdlib.session import MelleaSession diff --git a/test/stdlib/components/docs/test_richdocument.py b/test/stdlib/components/docs/test_richdocument.py index ab713078..f96038c3 100644 --- a/test/stdlib/components/docs/test_richdocument.py +++ b/test/stdlib/components/docs/test_richdocument.py @@ -1,6 +1,6 @@ import os from mellea.core import TemplateRepresentation -from mellea.stdlib.components.docs import RichDocument, Table +from mellea.stdlib.components.docs.richdocument import RichDocument, Table import mellea from docling_core.types.doc.document import DoclingDocument import tempfile diff --git a/test/stdlib/components/test_transform.py b/test/stdlib/components/test_transform.py index d3a604e4..c99ac884 100644 --- a/test/stdlib/components/test_transform.py +++ b/test/stdlib/components/test_transform.py @@ -1,7 +1,7 @@ import pytest from mellea.core import TemplateRepresentation -from mellea.stdlib.components.docs import TableTransform +from mellea.stdlib.components.docs.richdocument import TableTransform from mellea.stdlib.components import MObject, Query, Transform custom_mobject_description = "custom mobject description"