From 65fcb5ed629f0ca1dfa1bd060a084786e52b47a2 Mon Sep 17 00:00:00 2001
From: Avinash Balakrishnan <avinash.bala@us.ibm.com>
Date: Wed, 7 Jan 2026 10:22:53 -0800
Subject: [PATCH 01/13] Additional tests cleanup and skipping when running on
 github actions.

---
 test/backends/test_huggingface.py           |  7 +++
 test/backends/test_huggingface_tools.py     |  7 +++
 test/backends/test_vllm.py                  |  6 +++
 test/backends/test_vllm_tools.py            |  6 +++
 test/backends/test_watsonx.py               |  6 +++
 test/conftest.py                            | 49 +++++++++++++++++++++
 test/stdlib_intrinsics/test_rag/test_rag.py |  6 +++
 7 files changed, 87 insertions(+)

diff --git a/test/backends/test_huggingface.py b/test/backends/test_huggingface.py
index 328b6068..7be4e552 100644
--- a/test/backends/test_huggingface.py
+++ b/test/backends/test_huggingface.py
@@ -1,6 +1,7 @@
 import asyncio
 from copy import copy
 import faulthandler
+import os
 import random
 import time
 from typing import Any, Coroutine
@@ -11,6 +12,12 @@
 import torch
 from typing_extensions import Annotated
 
+# Skip entire module in CI since 17/18 tests are qualitative
+pytestmark = pytest.mark.skipif(
+    int(os.environ.get("CICD", 0)) == 1,
+    reason="Skipping HuggingFace tests in CI - mostly qualitative tests",
+)
+
 from mellea import MelleaSession
 from mellea.backends.adapters.adapter import GraniteCommonAdapter
 from mellea.backends.cache import SimpleLRUCache
diff --git a/test/backends/test_huggingface_tools.py b/test/backends/test_huggingface_tools.py
index 0df5f3dc..5fca6d10 100644
--- a/test/backends/test_huggingface_tools.py
+++ b/test/backends/test_huggingface_tools.py
@@ -1,7 +1,14 @@
+import os
 import pydantic
 import pytest
 from typing_extensions import Annotated
 
+# Skip entire module in CI since the single test is qualitative
+pytestmark = pytest.mark.skipif(
+    int(os.environ.get("CICD", 0)) == 1,
+    reason="Skipping HuggingFace tools tests in CI - qualitative test",
+)
+
 import mellea.backends.model_ids as model_ids
 from mellea import MelleaSession
 from mellea.backends.cache import SimpleLRUCache
diff --git a/test/backends/test_vllm.py b/test/backends/test_vllm.py
index cfcda8c2..c396b916 100644
--- a/test/backends/test_vllm.py
+++ b/test/backends/test_vllm.py
@@ -4,6 +4,12 @@
 import pytest
 from typing_extensions import Annotated
 
+# Skip entire module in CI since all 8 tests are qualitative
+pytestmark = pytest.mark.skipif(
+    int(os.environ.get("CICD", 0)) == 1,
+    reason="Skipping vLLM tests in CI - all qualitative tests",
+)
+
 from mellea import MelleaSession
 from mellea.backends.vllm import LocalVLLMBackend
 from mellea.backends.types import ModelOption
diff --git a/test/backends/test_vllm_tools.py b/test/backends/test_vllm_tools.py
index 69c824b2..76101754 100644
--- a/test/backends/test_vllm_tools.py
+++ b/test/backends/test_vllm_tools.py
@@ -3,6 +3,12 @@
 import pytest
 from typing_extensions import Annotated
 
+# Skip entire module in CI since the single test is qualitative
+pytestmark = pytest.mark.skipif(
+    int(os.environ.get("CICD", 0)) == 1,
+    reason="Skipping vLLM tools tests in CI - qualitative test",
+)
+
 from mellea import MelleaSession
 from mellea.backends.vllm import LocalVLLMBackend
 from mellea.backends.types import ModelOption
diff --git a/test/backends/test_watsonx.py b/test/backends/test_watsonx.py
index 08615973..9a17ea97 100644
--- a/test/backends/test_watsonx.py
+++ b/test/backends/test_watsonx.py
@@ -5,6 +5,12 @@
 import pydantic
 import pytest
 
+# Skip entire module in CI since 8/9 tests are qualitative
+pytestmark = pytest.mark.skipif(
+    int(os.environ.get("CICD", 0)) == 1,
+    reason="Skipping Watsonx tests in CI - mostly qualitative tests",
+)
+
 from mellea import MelleaSession
 from mellea.backends.formatter import TemplateFormatter
 from mellea.backends.types import ModelOption
diff --git a/test/conftest.py b/test/conftest.py
index 4b799d50..19f713a3 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -1,3 +1,4 @@
+import gc
 import os
 
 import pytest
@@ -9,6 +10,7 @@ def gh_run() -> int:
 
 
 def pytest_runtest_setup(item):
+    """Skip qualitative tests when running in CI environment."""
     # Runs tests *not* marked with `@pytest.mark.qualitative` to run normally.
     if not item.get_closest_marker("qualitative"):
         return
@@ -19,3 +21,50 @@ def pytest_runtest_setup(item):
         pytest.skip(
             reason="Skipping qualitative test: got env variable CICD == 1. Used only in gh workflows."
         )
+
+
+@pytest.fixture(autouse=True, scope="function")
+def aggressive_cleanup():
+    """Aggressive memory cleanup after each test to prevent OOM on CI runners."""
+    yield
+    # Only run aggressive cleanup in CI where memory is constrained
+    if int(os.environ.get("CICD", 0)) != 1:
+        return
+
+    # Cleanup after each test
+    gc.collect()
+    gc.collect()
+
+    # If torch is available, clear CUDA cache
+    try:
+        import torch
+
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            torch.cuda.synchronize()
+    except ImportError:
+        pass
+
+
+@pytest.fixture(autouse=True, scope="module")
+def cleanup_module_fixtures():
+    """Cleanup module-scoped fixtures to free memory between test modules."""
+    yield
+    # Only run aggressive cleanup in CI where memory is constrained
+    if int(os.environ.get("CICD", 0)) != 1:
+        return
+
+    # Cleanup after module
+    gc.collect()
+    gc.collect()
+    gc.collect()
+
+    # If torch is available, clear CUDA cache
+    try:
+        import torch
+
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            torch.cuda.synchronize()
+    except ImportError:
+        pass
diff --git a/test/stdlib_intrinsics/test_rag/test_rag.py b/test/stdlib_intrinsics/test_rag/test_rag.py
index 47b13e02..66fb833f 100644
--- a/test/stdlib_intrinsics/test_rag/test_rag.py
+++ b/test/stdlib_intrinsics/test_rag/test_rag.py
@@ -13,6 +13,12 @@
 from mellea.stdlib.chat import Message
 from mellea.stdlib.intrinsics import rag
 
+# Skip entire module in CI since all 7 tests are qualitative
+pytestmark = pytest.mark.skipif(
+    int(os.environ.get("CICD", 0)) == 1,
+    reason="Skipping RAG tests in CI - all qualitative tests",
+)
+
 DATA_ROOT = pathlib.Path(os.path.dirname(__file__)) / "testdata"
 """Location of data files for the tests in this file."""
 

From f22afe6f24a7c7b983cfe1002de0085938db9524 Mon Sep 17 00:00:00 2001
From: Avinash Balakrishnan <avinash.bala@us.ibm.com>
Date: Thu, 8 Jan 2026 11:08:18 -0800
Subject: [PATCH 02/13] moving all tests to IBM_GRANITE_4_MICRO_3B

---
 .github/workflows/quality.yml                 | 17 +++------
 test/stdlib_basics/test_functional.py         | 15 +-------
 test/stdlib_basics/test_majority_voting.py    | 17 +++------
 test/stdlib_basics/test_model_output_thunk.py | 14 +------
 test/stdlib_basics/test_sampling_ctx.py       |  3 +-
 test/stdlib_basics/test_session.py            | 37 +++----------------
 test/stdlib_basics/test_vision_ollama.py      | 10 ++---
 test/stdlib_basics/test_vision_openai.py      |  7 ++--
 8 files changed, 30 insertions(+), 90 deletions(-)

diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml
index 75777260..776e09e7 100644
--- a/.github/workflows/quality.yml
+++ b/.github/workflows/quality.yml
@@ -3,10 +3,9 @@ name: Verify Code Quality
 on:
   workflow_call:
 
-
 concurrency:
-    group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.event.pull_request.number || github.ref_name }}
-    cancel-in-progress: true
+  group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.event.pull_request.number || github.ref_name }}
+  cancel-in-progress: true
 
 env:
   CICD: 1
@@ -15,10 +14,10 @@ env:
 jobs:
   quality:
     runs-on: ubuntu-latest
-    timeout-minutes: 90 # TODO: need to reduce this after we figure out our testing strategy. 
+    timeout-minutes: 90 # TODO: need to reduce this after we figure out our testing strategy.
     strategy:
       matrix:
-        python-version: ['3.10', '3.11', '3.12'] # Need to add 3.13 once we resolve outlines issues.
+        python-version: ["3.10", "3.11", "3.12"] # Need to add 3.13 once we resolve outlines issues.
     steps:
       - uses: actions/checkout@v4
       - name: Free disk space
@@ -39,18 +38,14 @@ jobs:
       - name: Check style and run tests
         run: pre-commit run --all-files
       - name: Send failure message pre-commit
-        if: failure()  # This step will only run if a previous step failed
+        if: failure() # This step will only run if a previous step failed
         run: echo "The quality verification failed. Please run precommit "
       - name: Install Ollama
         run: curl -fsSL https://ollama.com/install.sh | sh
       - name: Start serving ollama
         run: nohup ollama serve &
-      - name: Pull Llama 3.2:1b model
-        run: ollama pull llama3.2:1b
-
       - name: Run Tests
         run: uv run -m pytest -v test
       - name: Send failure message tests
-        if: failure()  # This step will only run if a previous step failed
+        if: failure() # This step will only run if a previous step failed
         run: echo "Tests failed. Please verify that tests are working locally."
-      
diff --git a/test/stdlib_basics/test_functional.py b/test/stdlib_basics/test_functional.py
index 4dbfb9e0..86bf9c81 100644
--- a/test/stdlib_basics/test_functional.py
+++ b/test/stdlib_basics/test_functional.py
@@ -3,25 +3,14 @@
 from mellea.backends.types import ModelOption
 from mellea.stdlib.base import ModelOutputThunk
 from mellea.stdlib.chat import Message
-from mellea.stdlib.functional import instruct, aact, avalidate, ainstruct
+from mellea.stdlib.functional import aact, ainstruct, avalidate, instruct
 from mellea.stdlib.requirement import req
 from mellea.stdlib.session import start_session
 
 
 @pytest.fixture(scope="module")
 def m_session(gh_run):
-    if gh_run == 1:
-        m = start_session(
-            "ollama",
-            model_id="llama3.2:1b",
-            model_options={ModelOption.MAX_NEW_TOKENS: 5},
-        )
-    else:
-        m = start_session(
-            "ollama",
-            model_id="granite3.3:8b",
-            model_options={ModelOption.MAX_NEW_TOKENS: 5},
-        )
+    m = start_session(model_options={ModelOption.MAX_NEW_TOKENS: 5})
     yield m
     del m
 
diff --git a/test/stdlib_basics/test_majority_voting.py b/test/stdlib_basics/test_majority_voting.py
index 56cc3389..eeec7b7c 100644
--- a/test/stdlib_basics/test_majority_voting.py
+++ b/test/stdlib_basics/test_majority_voting.py
@@ -1,25 +1,18 @@
+import pytest
+
+from mellea import MelleaSession, start_session
 from mellea.backends import ModelOption
-from mellea import start_session, MelleaSession
 from mellea.stdlib.requirement import check, req, simple_validate
 from mellea.stdlib.sampling.majority_voting import (
-    MBRDRougeLStrategy,
     MajorityVotingStrategyForMath,
+    MBRDRougeLStrategy,
 )
-import pytest
-
 from mellea.stdlib.sampling.types import SamplingResult
 
 
 @pytest.fixture(scope="module")
 def m_session(gh_run):
-    if gh_run == 1:
-        m = start_session(
-            "ollama",
-            model_id="llama3.2:1b",
-            model_options={ModelOption.MAX_NEW_TOKENS: 5},
-        )
-    else:
-        m = start_session("ollama", model_id="llama3.2:1b")
+    m = start_session(model_options={ModelOption.MAX_NEW_TOKENS: 5})
     yield m
     del m
 
diff --git a/test/stdlib_basics/test_model_output_thunk.py b/test/stdlib_basics/test_model_output_thunk.py
index 6f562812..8878c4b2 100644
--- a/test/stdlib_basics/test_model_output_thunk.py
+++ b/test/stdlib_basics/test_model_output_thunk.py
@@ -1,4 +1,5 @@
 import copy
+
 import pytest
 
 from mellea.backends.types import ModelOption
@@ -10,18 +11,7 @@
 # backend, but it simplifies test setup.
 @pytest.fixture(scope="module")
 def m_session(gh_run):
-    if gh_run == 1:
-        m = start_session(
-            "ollama",
-            model_id="llama3.2:1b",
-            model_options={ModelOption.MAX_NEW_TOKENS: 5},
-        )
-    else:
-        m = start_session(
-            "ollama",
-            model_id="granite3.3:8b",
-            model_options={ModelOption.MAX_NEW_TOKENS: 5},
-        )
+    m = start_session(model_options={ModelOption.MAX_NEW_TOKENS: 5})
     yield m
     del m
 
diff --git a/test/stdlib_basics/test_sampling_ctx.py b/test/stdlib_basics/test_sampling_ctx.py
index 362730d6..496d3689 100644
--- a/test/stdlib_basics/test_sampling_ctx.py
+++ b/test/stdlib_basics/test_sampling_ctx.py
@@ -1,7 +1,8 @@
 import pytest
+
 from mellea import start_session
 from mellea.backends import ModelOption
-from mellea.stdlib.base import ChatContext, ModelOutputThunk, Context
+from mellea.stdlib.base import ChatContext, Context, ModelOutputThunk
 from mellea.stdlib.requirement import Requirement
 from mellea.stdlib.sampling import (
     MultiTurnStrategy,
diff --git a/test/stdlib_basics/test_session.py b/test/stdlib_basics/test_session.py
index 6694246c..beb4fc78 100644
--- a/test/stdlib_basics/test_session.py
+++ b/test/stdlib_basics/test_session.py
@@ -7,24 +7,13 @@
 from mellea.backends.types import ModelOption
 from mellea.stdlib.base import ChatContext, ModelOutputThunk
 from mellea.stdlib.chat import Message
-from mellea.stdlib.session import start_session, MelleaSession
+from mellea.stdlib.session import MelleaSession, start_session
 
 
 # We edit the context type in the async tests below. Don't change the scope here.
-@pytest.fixture(scope="function")
+@pytest.fixture(scope="module")
 def m_session(gh_run):
-    if gh_run == 1:
-        m = start_session(
-            "ollama",
-            model_id="llama3.2:1b",
-            model_options={ModelOption.MAX_NEW_TOKENS: 5},
-        )
-    else:
-        m = start_session(
-            "ollama",
-            model_id="granite3.3:8b",
-            model_options={ModelOption.MAX_NEW_TOKENS: 5},
-        )
+    m = start_session(model_options={ModelOption.MAX_NEW_TOKENS: 5})
     yield m
     del m
 
@@ -39,23 +28,9 @@ def test_start_session_watsonx(gh_run):
         assert response.value is not None
 
 
-def test_start_session_openai_with_kwargs(gh_run):
-    if gh_run == 1:
-        m = start_session(
-            "openai",
-            model_id="llama3.2:1b",
-            base_url=f"http://{os.environ.get('OLLAMA_HOST', 'localhost:11434')}/v1",
-            api_key="ollama",
-        )
-    else:
-        m = start_session(
-            "openai",
-            model_id="granite3.3:8b",
-            base_url=f"http://{os.environ.get('OLLAMA_HOST', 'localhost:11434')}/v1",
-            api_key="ollama",
-        )
-    initial_ctx = m.ctx
-    response = m.instruct("testing")
+def test_start_session_openai_with_kwargs(m_session):
+    initial_ctx = m_session.ctx
+    response = m_session.instruct("testing")
     assert isinstance(response, ModelOutputThunk)
     assert response.value is not None
     assert initial_ctx is not m.ctx
diff --git a/test/stdlib_basics/test_vision_ollama.py b/test/stdlib_basics/test_vision_ollama.py
index eae4e87b..1d7b5caf 100644
--- a/test/stdlib_basics/test_vision_ollama.py
+++ b/test/stdlib_basics/test_vision_ollama.py
@@ -3,10 +3,10 @@
 from io import BytesIO
 
 import numpy as np
-from PIL import Image
 import pytest
+from PIL import Image
 
-from mellea import start_session, MelleaSession
+from mellea import MelleaSession, start_session
 from mellea.backends import ModelOption
 from mellea.stdlib.base import ImageBlock, ModelOutputThunk
 from mellea.stdlib.chat import Message
@@ -16,11 +16,7 @@
 @pytest.fixture(scope="module")
 def m_session(gh_run):
     if gh_run == 1:
-        m = start_session(
-            "ollama",
-            model_id="llama3.2:1b",
-            model_options={ModelOption.MAX_NEW_TOKENS: 5},
-        )
+        m = start_session(model_options={ModelOption.MAX_NEW_TOKENS: 5})
     else:
         m = start_session(
             "ollama",
diff --git a/test/stdlib_basics/test_vision_openai.py b/test/stdlib_basics/test_vision_openai.py
index c922acd5..22f3e73b 100644
--- a/test/stdlib_basics/test_vision_openai.py
+++ b/test/stdlib_basics/test_vision_openai.py
@@ -3,11 +3,12 @@
 from io import BytesIO
 
 import numpy as np
-from PIL import Image
 import pytest
+from PIL import Image
 
-from mellea import start_session, MelleaSession
+from mellea import MelleaSession, start_session
 from mellea.backends import ModelOption
+from mellea.backends.model_ids import IBM_GRANITE_4_MICRO_3B
 from mellea.stdlib.base import ImageBlock, ModelOutputThunk
 from mellea.stdlib.chat import Message
 from mellea.stdlib.instruction import Instruction
@@ -18,7 +19,7 @@ def m_session(gh_run):
     if gh_run == 1:
         m = start_session(
             "openai",
-            model_id="llama3.2:1b",
+            model_id=IBM_GRANITE_4_MICRO_3B,
             base_url=f"http://{os.environ.get('OLLAMA_HOST', 'localhost:11434')}/v1",
             api_key="ollama",
             model_options={ModelOption.MAX_NEW_TOKENS: 5},

From 8b328a0c77123f9449bf6461754bad2a220f328e Mon Sep 17 00:00:00 2001
From: Avinash Balakrishnan <avinash.bala@us.ibm.com>
Date: Fri, 9 Jan 2026 09:17:49 -0800
Subject: [PATCH 03/13] changing granite4 micro ollama name to latest

---
 mellea/backends/model_ids.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mellea/backends/model_ids.py b/mellea/backends/model_ids.py
index 3ffdb4b8..c7bb277e 100644
--- a/mellea/backends/model_ids.py
+++ b/mellea/backends/model_ids.py
@@ -28,7 +28,7 @@ class ModelIdentifier:
 
 IBM_GRANITE_4_MICRO_3B = ModelIdentifier(
     hf_model_name="ibm-granite/granite-4.0-micro",
-    ollama_name="ibm/granite4:micro",
+    ollama_name="granite4:micro",
     watsonx_name="ibm/granite-4-h-small",
 )
 # todo: watsonx model is different from ollama model - should be same.

From 8431f28eae5d7e0c99b146afff0533cd64f1e108 Mon Sep 17 00:00:00 2001
From: Avinash Balakrishnan <avinash.bala@us.ibm.com>
Date: Fri, 9 Jan 2026 09:42:10 -0800
Subject: [PATCH 04/13] Adding step to download granite micro

---
 .github/workflows/quality.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml
index 776e09e7..ad82d084 100644
--- a/.github/workflows/quality.yml
+++ b/.github/workflows/quality.yml
@@ -44,6 +44,8 @@ jobs:
         run: curl -fsSL https://ollama.com/install.sh | sh
       - name: Start serving ollama
         run: nohup ollama serve &
+      - name: Pull model granite4:micro
+        run: ollama pull granite4:micro
       - name: Run Tests
         run: uv run -m pytest -v test
       - name: Send failure message tests

From 2f995253f6cbe75b777d617e9e1fc1a4181189df Mon Sep 17 00:00:00 2001
From: Avinash Balakrishnan <avinash.bala@us.ibm.com>
Date: Fri, 9 Jan 2026 09:42:58 -0800
Subject: [PATCH 05/13] Minor changes to make tests run

---
 mellea/backends/model_ids.py       | 1 +
 test/stdlib_basics/test_session.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/mellea/backends/model_ids.py b/mellea/backends/model_ids.py
index c7bb277e..7f68165f 100644
--- a/mellea/backends/model_ids.py
+++ b/mellea/backends/model_ids.py
@@ -29,6 +29,7 @@ class ModelIdentifier:
 IBM_GRANITE_4_MICRO_3B = ModelIdentifier(
     hf_model_name="ibm-granite/granite-4.0-micro",
     ollama_name="granite4:micro",
+    openai_name="granite4:micro",  # setting this just for testing purposes.
     watsonx_name="ibm/granite-4-h-small",
 )
 # todo: watsonx model is different from ollama model - should be same.
diff --git a/test/stdlib_basics/test_session.py b/test/stdlib_basics/test_session.py
index beb4fc78..ea493318 100644
--- a/test/stdlib_basics/test_session.py
+++ b/test/stdlib_basics/test_session.py
@@ -33,7 +33,7 @@ def test_start_session_openai_with_kwargs(m_session):
     response = m_session.instruct("testing")
     assert isinstance(response, ModelOutputThunk)
     assert response.value is not None
-    assert initial_ctx is not m.ctx
+    assert initial_ctx is not m_session.ctx
 
 
 async def test_aact(m_session):

From 3823a22f40e09c40448d127152306f767131ea2f Mon Sep 17 00:00:00 2001
From: Avinash Balakrishnan <avinash.bala@us.ibm.com>
Date: Fri, 9 Jan 2026 10:25:47 -0800
Subject: [PATCH 06/13] Fixing some wayward tests still in llama

---
 test/backends/test_openai_ollama.py           | 22 ++++++-------------
 test/stdlib_basics/test_contextual_session.py |  7 ++----
 test/stdlib_basics/test_genslot.py            |  8 ++++---
 3 files changed, 14 insertions(+), 23 deletions(-)

diff --git a/test/backends/test_openai_ollama.py b/test/backends/test_openai_ollama.py
index 57ca3281..8b3b7c06 100644
--- a/test/backends/test_openai_ollama.py
+++ b/test/backends/test_openai_ollama.py
@@ -10,7 +10,7 @@
 
 from mellea import MelleaSession
 from mellea.backends.formatter import TemplateFormatter
-from mellea.backends.model_ids import META_LLAMA_3_2_1B
+from mellea.backends.model_ids import IBM_GRANITE_4_MICRO_3B
 from mellea.backends.openai import OpenAIBackend
 from mellea.backends.types import ModelOption
 from mellea.stdlib.base import CBlock, ChatContext, ModelOutputThunk, SimpleContext
@@ -19,20 +19,12 @@
 @pytest.fixture(scope="module")
 def backend(gh_run: int):
     """Shared OpenAI backend configured for Ollama."""
-    if gh_run == 1:
-        return OpenAIBackend(
-            model_id=META_LLAMA_3_2_1B.ollama_name,  # type: ignore
-            formatter=TemplateFormatter(model_id=META_LLAMA_3_2_1B.hf_model_name),  # type: ignore
-            base_url=f"http://{os.environ.get('OLLAMA_HOST', 'localhost:11434')}/v1",
-            api_key="ollama",
-        )
-    else:
-        return OpenAIBackend(
-            model_id="granite3.3:8b",
-            formatter=TemplateFormatter(model_id="ibm-granite/granite-3.2-8b-instruct"),
-            base_url=f"http://{os.environ.get('OLLAMA_HOST', 'localhost:11434')}/v1",
-            api_key="ollama",
-        )
+    return OpenAIBackend(
+        model_id=IBM_GRANITE_4_MICRO_3B.ollama_name,  # type: ignore
+        formatter=TemplateFormatter(model_id=IBM_GRANITE_4_MICRO_3B.hf_model_name),  # type: ignore
+        base_url=f"http://{os.environ.get('OLLAMA_HOST', 'localhost:11434')}/v1",
+        api_key="ollama",
+    )
 
 
 @pytest.fixture(scope="function")
diff --git a/test/stdlib_basics/test_contextual_session.py b/test/stdlib_basics/test_contextual_session.py
index a401f117..98a8dc2e 100644
--- a/test/stdlib_basics/test_contextual_session.py
+++ b/test/stdlib_basics/test_contextual_session.py
@@ -4,7 +4,7 @@
 # import pytest
 #
 # from mellea import chat, generative, instruct, query, start_session, transform, validate
-# from mellea.backends.model_ids import IBM_GRANITE_3_3_8B, META_LLAMA_3_2_1B
+# from mellea.backends.model_ids import IBM_GRANITE_4_MICRO_3B
 # from mellea.stdlib.base import ModelOutputThunk
 # from mellea.stdlib.mify import MifiedProtocol, mify
 # from mellea.stdlib.requirement import req
@@ -13,10 +13,7 @@
 #
 # @pytest.fixture(scope="module")
 # def model_id(gh_run: int):
-#     if gh_run == 1:
-#         return META_LLAMA_3_2_1B
-#     else:
-#         return IBM_GRANITE_3_3_8B
+#     return IBM_GRANITE_4_MICRO_3B
 #
 #
 # @generative
diff --git a/test/stdlib_basics/test_genslot.py b/test/stdlib_basics/test_genslot.py
index e7e0bfb3..984a8140 100644
--- a/test/stdlib_basics/test_genslot.py
+++ b/test/stdlib_basics/test_genslot.py
@@ -1,8 +1,10 @@
 import asyncio
-import pytest
 from typing import Literal
+
+import pytest
+
 from mellea import generative, start_session
-from mellea.backends.model_ids import META_LLAMA_3_2_1B
+from mellea.backends.model_ids import IBM_GRANITE_4_MICRO_3B
 from mellea.backends.ollama import OllamaModelBackend
 from mellea.stdlib.base import ChatContext, Context
 from mellea.stdlib.genslot import (
@@ -21,7 +23,7 @@ def backend(gh_run: int):
     """Shared backend."""
     if gh_run == 1:
         return OllamaModelBackend(
-            model_id=META_LLAMA_3_2_1B.ollama_name  # type: ignore
+            model_id=IBM_GRANITE_4_MICRO_3B.ollama_name  # type: ignore
         )
     else:
         return OllamaModelBackend(model_id="granite3.3:8b")

From fee236fd7d738c89085bd25e42b5712831d7d1f3 Mon Sep 17 00:00:00 2001
From: Avinash Balakrishnan <avinash.bala@us.ibm.com>
Date: Mon, 12 Jan 2026 08:39:48 -0800
Subject: [PATCH 07/13] DRYing conftest

---
 test/conftest.py | 34 +++++++++++-----------------------
 1 file changed, 11 insertions(+), 23 deletions(-)

diff --git a/test/conftest.py b/test/conftest.py
index 19f713a3..10c96e74 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -23,15 +23,15 @@ def pytest_runtest_setup(item):
         )
 
 
-@pytest.fixture(autouse=True, scope="function")
-def aggressive_cleanup():
-    """Aggressive memory cleanup after each test to prevent OOM on CI runners."""
+def memory_cleaner():
+    """Aggressive memory cleanup function."""
     yield
     # Only run aggressive cleanup in CI where memory is constrained
     if int(os.environ.get("CICD", 0)) != 1:
         return
 
-    # Cleanup after each test
+    # Cleanup after module
+    gc.collect()
     gc.collect()
     gc.collect()
 
@@ -46,25 +46,13 @@ def aggressive_cleanup():
         pass
 
 
+@pytest.fixture(autouse=True, scope="function")
+def aggressive_cleanup():
+    """Aggressive memory cleanup after each test to prevent OOM on CI runners."""
+    memory_cleaner()
+
+
 @pytest.fixture(autouse=True, scope="module")
 def cleanup_module_fixtures():
     """Cleanup module-scoped fixtures to free memory between test modules."""
-    yield
-    # Only run aggressive cleanup in CI where memory is constrained
-    if int(os.environ.get("CICD", 0)) != 1:
-        return
-
-    # Cleanup after module
-    gc.collect()
-    gc.collect()
-    gc.collect()
-
-    # If torch is available, clear CUDA cache
-    try:
-        import torch
-
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-            torch.cuda.synchronize()
-    except ImportError:
-        pass
+    memory_cleaner()

From 4d85f1711e9dd5acad02f1d61bc7566dc75c5810 Mon Sep 17 00:00:00 2001
From: Avinash Balakrishnan <avinash.bala@us.ibm.com>
Date: Thu, 15 Jan 2026 13:25:26 -0800
Subject: [PATCH 08/13] Adding metadata to notebooks to help with automated
 testing.

---
 .../compositionality_with_generative_slots.ipynb  | 10 ++++++++--
 docs/examples/notebooks/context_example.ipynb     | 10 ++++++++--
 docs/examples/notebooks/document_mobject.ipynb    | 10 ++++++++--
 docs/examples/notebooks/example.ipynb             | 10 ++++++++--
 docs/examples/notebooks/georgia_tech.ipynb        | 15 ++++++++++-----
 .../notebooks/instruct_validate_repair.ipynb      | 10 ++++++++--
 docs/examples/notebooks/m_serve_example.ipynb     | 10 ++++++++--
 docs/examples/notebooks/mcp_example.ipynb         | 10 ++++++++--
 .../notebooks/model_options_example.ipynb         | 10 ++++++++--
 .../examples/notebooks/sentiment_classifier.ipynb | 10 ++++++++--
 docs/examples/notebooks/simple_email.ipynb        | 10 ++++++++--
 docs/examples/notebooks/table_mobject.ipynb       | 10 ++++++++--
 12 files changed, 98 insertions(+), 27 deletions(-)

diff --git a/docs/examples/notebooks/compositionality_with_generative_slots.ipynb b/docs/examples/notebooks/compositionality_with_generative_slots.ipynb
index 6f7f4ed4..478a5332 100644
--- a/docs/examples/notebooks/compositionality_with_generative_slots.ipynb
+++ b/docs/examples/notebooks/compositionality_with_generative_slots.ipynb
@@ -25,7 +25,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "VDaTfltQY3Fl"
+    "id": "VDaTfltQY3Fl",
+    "tags": [
+     "skip-execution"
+    ]
    },
    "outputs": [],
    "source": [
@@ -56,7 +59,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "9EurAUSz_1yl"
+    "id": "9EurAUSz_1yl",
+    "tags": [
+     "skip-execution"
+    ]
    },
    "outputs": [],
    "source": [
diff --git a/docs/examples/notebooks/context_example.ipynb b/docs/examples/notebooks/context_example.ipynb
index ec5d03fa..1c0d3ef5 100644
--- a/docs/examples/notebooks/context_example.ipynb
+++ b/docs/examples/notebooks/context_example.ipynb
@@ -25,7 +25,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "VDaTfltQY3Fl"
+    "id": "VDaTfltQY3Fl",
+    "tags": [
+     "skip-execution"
+    ]
    },
    "outputs": [],
    "source": [
@@ -56,7 +59,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "9EurAUSz_1yl"
+    "id": "9EurAUSz_1yl",
+    "tags": [
+     "skip-execution"
+    ]
    },
    "outputs": [],
    "source": [
diff --git a/docs/examples/notebooks/document_mobject.ipynb b/docs/examples/notebooks/document_mobject.ipynb
index 55c7a2b7..8846f841 100644
--- a/docs/examples/notebooks/document_mobject.ipynb
+++ b/docs/examples/notebooks/document_mobject.ipynb
@@ -25,7 +25,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "VDaTfltQY3Fl"
+    "id": "VDaTfltQY3Fl",
+    "tags": [
+     "skip-execution"
+    ]
    },
    "outputs": [],
    "source": [
@@ -56,7 +59,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "9EurAUSz_1yl"
+    "id": "9EurAUSz_1yl",
+    "tags": [
+     "skip-execution"
+    ]
    },
    "outputs": [],
    "source": [
diff --git a/docs/examples/notebooks/example.ipynb b/docs/examples/notebooks/example.ipynb
index 21877e45..275de1ce 100644
--- a/docs/examples/notebooks/example.ipynb
+++ b/docs/examples/notebooks/example.ipynb
@@ -25,7 +25,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "VDaTfltQY3Fl"
+    "id": "VDaTfltQY3Fl",
+    "tags": [
+     "skip-execution"
+    ]
    },
    "outputs": [],
    "source": [
@@ -56,7 +59,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "9EurAUSz_1yl"
+    "id": "9EurAUSz_1yl",
+    "tags": [
+     "skip-execution"
+    ]
    },
    "outputs": [],
    "source": [
diff --git a/docs/examples/notebooks/georgia_tech.ipynb b/docs/examples/notebooks/georgia_tech.ipynb
index 3b349881..08422fb4 100644
--- a/docs/examples/notebooks/georgia_tech.ipynb
+++ b/docs/examples/notebooks/georgia_tech.ipynb
@@ -28,7 +28,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "6fDEbLHL_hkK"
+    "id": "6fDEbLHL_hkK",
+    "tags": [
+     "skip-execution"
+    ]
    },
    "outputs": [],
    "source": [
@@ -134,14 +137,14 @@
     "        strategy=RejectionSamplingStrategy(loop_budget=5),\n",
     "        user_variables={\"name\": name, \"notes\": notes},\n",
     "        return_sampling_results=True,\n",
-    "    )\n",
+    "    )  # type: ignore\n",
     "    if email_candidate.success:\n",
     "        return str(email_candidate.result)\n",
     "    else:\n",
     "        return email_candidate.sample_generations[0].value\n",
     "\n",
     "\n",
-    "m = mellea_org.start_session()\n",
+    "m = mellea.start_session()\n",
     "print(\n",
     "    write_email(\n",
     "        m,\n",
@@ -556,11 +559,13 @@
    "provenance": []
   },
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "mellea-public",
+   "language": "python",
    "name": "python3"
   },
   "language_info": {
-   "name": "python"
+   "name": "python",
+   "version": "3.12.10"
   }
  },
  "nbformat": 4,
diff --git a/docs/examples/notebooks/instruct_validate_repair.ipynb b/docs/examples/notebooks/instruct_validate_repair.ipynb
index 14896c2b..7144d539 100644
--- a/docs/examples/notebooks/instruct_validate_repair.ipynb
+++ b/docs/examples/notebooks/instruct_validate_repair.ipynb
@@ -25,7 +25,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "VDaTfltQY3Fl"
+    "id": "VDaTfltQY3Fl",
+    "tags": [
+     "skip-execution"
+    ]
    },
    "outputs": [],
    "source": [
@@ -56,7 +59,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "9EurAUSz_1yl"
+    "id": "9EurAUSz_1yl",
+    "tags": [
+     "skip-execution"
+    ]
    },
    "outputs": [],
    "source": [
diff --git a/docs/examples/notebooks/m_serve_example.ipynb b/docs/examples/notebooks/m_serve_example.ipynb
index 871349f7..729b75bf 100644
--- a/docs/examples/notebooks/m_serve_example.ipynb
+++ b/docs/examples/notebooks/m_serve_example.ipynb
@@ -25,7 +25,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "VDaTfltQY3Fl"
+    "id": "VDaTfltQY3Fl",
+    "tags": [
+     "skip-execution"
+    ]
    },
    "outputs": [],
    "source": [
@@ -56,7 +59,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "9EurAUSz_1yl"
+    "id": "9EurAUSz_1yl",
+    "tags": [
+     "skip-execution"
+    ]
    },
    "outputs": [],
    "source": [
diff --git a/docs/examples/notebooks/mcp_example.ipynb b/docs/examples/notebooks/mcp_example.ipynb
index 50c6233b..565c128d 100644
--- a/docs/examples/notebooks/mcp_example.ipynb
+++ b/docs/examples/notebooks/mcp_example.ipynb
@@ -26,7 +26,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "VDaTfltQY3Fl"
+    "id": "VDaTfltQY3Fl",
+    "tags": [
+     "skip-execution"
+    ]
    },
    "outputs": [],
    "source": [
@@ -58,7 +61,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "9EurAUSz_1yl"
+    "id": "9EurAUSz_1yl",
+    "tags": [
+     "skip-execution"
+    ]
    },
    "outputs": [],
    "source": [
diff --git a/docs/examples/notebooks/model_options_example.ipynb b/docs/examples/notebooks/model_options_example.ipynb
index a706c05a..0216010c 100644
--- a/docs/examples/notebooks/model_options_example.ipynb
+++ b/docs/examples/notebooks/model_options_example.ipynb
@@ -25,7 +25,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "VDaTfltQY3Fl"
+    "id": "VDaTfltQY3Fl",
+    "tags": [
+     "skip-execution"
+    ]
    },
    "outputs": [],
    "source": [
@@ -56,7 +59,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "9EurAUSz_1yl"
+    "id": "9EurAUSz_1yl",
+    "tags": [
+     "skip-execution"
+    ]
    },
    "outputs": [],
    "source": [
diff --git a/docs/examples/notebooks/sentiment_classifier.ipynb b/docs/examples/notebooks/sentiment_classifier.ipynb
index e1cd70bd..dc2dec4d 100644
--- a/docs/examples/notebooks/sentiment_classifier.ipynb
+++ b/docs/examples/notebooks/sentiment_classifier.ipynb
@@ -25,7 +25,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "VDaTfltQY3Fl"
+    "id": "VDaTfltQY3Fl",
+    "tags": [
+     "skip-execution"
+    ]
    },
    "outputs": [],
    "source": [
@@ -56,7 +59,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "9EurAUSz_1yl"
+    "id": "9EurAUSz_1yl",
+    "tags": [
+     "skip-execution"
+    ]
    },
    "outputs": [],
    "source": [
diff --git a/docs/examples/notebooks/simple_email.ipynb b/docs/examples/notebooks/simple_email.ipynb
index f80f1663..3662fcb5 100644
--- a/docs/examples/notebooks/simple_email.ipynb
+++ b/docs/examples/notebooks/simple_email.ipynb
@@ -25,7 +25,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "VDaTfltQY3Fl"
+    "id": "VDaTfltQY3Fl",
+    "tags": [
+     "skip-execution"
+    ]
    },
    "outputs": [],
    "source": [
@@ -56,7 +59,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "9EurAUSz_1yl"
+    "id": "9EurAUSz_1yl",
+    "tags": [
+     "skip-execution"
+    ]
    },
    "outputs": [],
    "source": [
diff --git a/docs/examples/notebooks/table_mobject.ipynb b/docs/examples/notebooks/table_mobject.ipynb
index 94289994..bf963f46 100644
--- a/docs/examples/notebooks/table_mobject.ipynb
+++ b/docs/examples/notebooks/table_mobject.ipynb
@@ -25,7 +25,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "VDaTfltQY3Fl"
+    "id": "VDaTfltQY3Fl",
+    "tags": [
+     "skip-execution"
+    ]
    },
    "outputs": [],
    "source": [
@@ -56,7 +59,10 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "9EurAUSz_1yl"
+    "id": "9EurAUSz_1yl",
+    "tags": [
+     "skip-execution"
+    ]
    },
    "outputs": [],
    "source": [

From 2a7c7cc35866ac33f892505f262963a3a6f56f82 Mon Sep 17 00:00:00 2001
From: Avinash Balakrishnan <avinash.bala@us.ibm.com>
Date: Thu, 15 Jan 2026 13:45:36 -0800
Subject: [PATCH 09/13] enabling notebooks to run with pytest docs

---
 docs/examples/conftest.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/docs/examples/conftest.py b/docs/examples/conftest.py
index 2fde57e2..2b1b4ea5 100644
--- a/docs/examples/conftest.py
+++ b/docs/examples/conftest.py
@@ -6,6 +6,18 @@
 
 import pytest
 
+# Enable nbmake for notebook testing when running pytest in this directory.
+# This allows `pytest docs/` to automatically run notebooks via nbmake.
+pytest_plugins = ["nbmake"]
+
+
+def pytest_configure(config):
+    """Configure nbmake to run notebooks in docs/examples/notebooks/."""
+    # Only enable nbmake if we're collecting from docs directory
+    if hasattr(config.option, "nbmake"):
+        config.option.nbmake = True
+
+
 examples_to_skip = {
     "101_example.py",
     "__init__.py",
@@ -43,14 +55,6 @@ def pytest_collect_file(parent: pytest.Dir, file_path: pathlib.PosixPath):
 
         return ExampleFile.from_parent(parent, path=file_path)
 
-    # TODO: Support running jupyter notebooks:
-    #       - use nbmake or directly use nbclient as documented below
-    #       - install the nbclient package
-    #           - run either using python api or jupyter execute
-    #           - must replace background processes
-    # if file_path.suffix == ".ipynb":
-    #     return ExampleFile.from_parent(parent, path=file_path)
-
 
 class ExampleFile(pytest.File):
     def collect(self):

From 9a73fa3e7ca9c249cfbe7031b5730ef1298ab5ee Mon Sep 17 00:00:00 2001
From: Avinash Balakrishnan <avinash.bala@us.ibm.com>
Date: Thu, 15 Jan 2026 13:48:35 -0800
Subject: [PATCH 10/13] reverting cause plugins can only be initiated at top
 level

---
 docs/examples/conftest.py | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/docs/examples/conftest.py b/docs/examples/conftest.py
index 2b1b4ea5..bef7dce6 100644
--- a/docs/examples/conftest.py
+++ b/docs/examples/conftest.py
@@ -1,4 +1,7 @@
-"""Allows you to use `pytest docs` to run the examples."""
+"""Allows you to use `pytest docs` to run the examples.
+
+To run notebooks, use: uv run --with 'mcp' pytest --nbmake docs/examples/notebooks/
+"""
 
 import pathlib
 import subprocess
@@ -6,18 +9,6 @@
 
 import pytest
 
-# Enable nbmake for notebook testing when running pytest in this directory.
-# This allows `pytest docs/` to automatically run notebooks via nbmake.
-pytest_plugins = ["nbmake"]
-
-
-def pytest_configure(config):
-    """Configure nbmake to run notebooks in docs/examples/notebooks/."""
-    # Only enable nbmake if we're collecting from docs directory
-    if hasattr(config.option, "nbmake"):
-        config.option.nbmake = True
-
-
 examples_to_skip = {
     "101_example.py",
     "__init__.py",

From 419fcd0b359d457b9e933fddf093373f8639cd7a Mon Sep 17 00:00:00 2001
From: Avinash Balakrishnan <avinash.bala@us.ibm.com>
Date: Fri, 16 Jan 2026 10:29:44 -0800
Subject: [PATCH 11/13] updating dev with nbmake

---
 pyproject.toml |  1 +
 uv.lock        | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 913258b4..2431f6b0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -114,6 +114,7 @@ dev = [
     "pytest-asyncio",
     "mypy>=1.17.0",
     "python-semantic-release~=7.32",
+    "nbmake>=1.5.5",
 ]
 
 notebook = [
diff --git a/uv.lock b/uv.lock
index b49e0bd1..8d1f179a 100644
--- a/uv.lock
+++ b/uv.lock
@@ -3284,6 +3284,7 @@ watsonx = [
 dev = [
     { name = "isort" },
     { name = "mypy" },
+    { name = "nbmake" },
     { name = "pdm" },
     { name = "pre-commit" },
     { name = "pylint" },
@@ -3350,6 +3351,7 @@ provides-extras = ["hf", "vllm", "litellm", "watsonx", "docling", "all"]
 dev = [
     { name = "isort", specifier = ">=6.0.0" },
     { name = "mypy", specifier = ">=1.17.0" },
+    { name = "nbmake", specifier = ">=1.5.5" },
     { name = "pdm", specifier = ">=2.24.0" },
     { name = "pre-commit", specifier = ">=4.2.0" },
     { name = "pylint", specifier = ">=3.3.4" },
@@ -3818,6 +3820,22 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a9/82/0340caa499416c78e5d8f5f05947ae4bc3cba53c9f038ab6e9ed964e22f1/nbformat-5.10.4-py3-none-any.whl", hash = "sha256:3b48d6c8fbca4b299bf3982ea7db1af21580e4fec269ad087b9e81588891200b", size = 78454, upload-time = "2024-04-04T11:20:34.895Z" },
 ]
 
+[[package]]
+name = "nbmake"
+version = "1.5.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "ipykernel" },
+    { name = "nbclient" },
+    { name = "nbformat" },
+    { name = "pygments" },
+    { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/43/9a/aae201cee5639e1d562b3843af8fd9f8d018bb323e776a2b973bdd5fc64b/nbmake-1.5.5.tar.gz", hash = "sha256:239dc868ea13a7c049746e2aba2c229bd0f6cdbc6bfa1d22f4c88638aa4c5f5c", size = 85929, upload-time = "2024-12-23T18:33:46.774Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/eb/be/b257e12f9710819fde40adc972578bee6b72c5992da1bc8369bef2597756/nbmake-1.5.5-py3-none-any.whl", hash = "sha256:c6fbe6e48b60cacac14af40b38bf338a3b88f47f085c54ac5b8639ff0babaf4b", size = 12818, upload-time = "2024-12-23T18:33:44.566Z" },
+]
+
 [[package]]
 name = "nest-asyncio"
 version = "1.6.0"

From 8836f7ae8ededb7cd388bf0977d5b02e75377619 Mon Sep 17 00:00:00 2001
From: Avinash Balakrishnan <avinash.bala@us.ibm.com>
Date: Fri, 16 Jan 2026 10:52:16 -0800
Subject: [PATCH 12/13] removing openai name from granite micro

---
 mellea/backends/model_ids.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mellea/backends/model_ids.py b/mellea/backends/model_ids.py
index 9b585aea..6fd67fe2 100644
--- a/mellea/backends/model_ids.py
+++ b/mellea/backends/model_ids.py
@@ -28,7 +28,6 @@ class ModelIdentifier:
 IBM_GRANITE_4_MICRO_3B = ModelIdentifier(
     hf_model_name="ibm-granite/granite-4.0-micro",
     ollama_name="granite4:micro",
-    openai_name="granite4:micro",  # setting this just for testing purposes.
     watsonx_name="ibm/granite-4-h-small",
 )
 # todo: watsonx model is different from ollama model - should be same.

From 956ba93442b889669389f78791dd04f436d2b398 Mon Sep 17 00:00:00 2001
From: Avinash Balakrishnan <avinash.bala@us.ibm.com>
Date: Fri, 16 Jan 2026 11:23:12 -0800
Subject: [PATCH 13/13] small change

---
 test/backends/test_vision_openai.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/backends/test_vision_openai.py b/test/backends/test_vision_openai.py
index dbe17b9c..9c958efe 100644
--- a/test/backends/test_vision_openai.py
+++ b/test/backends/test_vision_openai.py
@@ -18,7 +18,7 @@ def m_session(gh_run):
     if gh_run == 1:
         m = start_session(
             "openai",
-            model_id=IBM_GRANITE_4_MICRO_3B,
+            model_id=IBM_GRANITE_4_MICRO_3B.ollama_name,  # type: ignore
             base_url=f"http://{os.environ.get('OLLAMA_HOST', 'localhost:11434')}/v1",
             api_key="ollama",
             model_options={ModelOption.MAX_NEW_TOKENS: 5},