Skip to content

Commit f505f23

Browse files
add tests for the notebook_utils module
1 parent 3a7e4b8 commit f505f23

File tree

3 files changed

+230
-4
lines changed

3 files changed

+230
-4
lines changed

tests/conftest.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,12 +78,12 @@ def temp_directory(tmp_path: Path) -> Path:
7878
@pytest.fixture
7979
def write_notebook(tmp_path: Path):
8080
"""
81-
A helper fixture that returns a function for writing arbitrary notebook content to a temporary .ipynb file.
81+
A fixture that returns a helper function to write a .ipynb notebook file at runtime with given content.
8282
"""
8383

84-
def _write_notebook(name: str, content: dict) -> Path:
84+
def _write_notebook(name: str, content: dict[str, Any]) -> Path:
8585
notebook_path = tmp_path / name
86-
with notebook_path.open("w", encoding="utf-8") as f:
86+
with notebook_path.open(mode="w", encoding="utf-8") as f:
8787
json.dump(content, f)
8888
return notebook_path
8989

tests/test_ingest.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22

33
from pathlib import Path
44
from typing import Any
5+
from unittest.mock import patch
56

6-
from gitingest.ingest_from_query import _extract_files_content, _scan_directory
7+
from gitingest.ingest_from_query import _extract_files_content, _read_file_content, _scan_directory
78

89

910
def test_scan_directory(temp_directory: Path, sample_query: dict[str, Any]) -> None:
@@ -37,6 +38,25 @@ def test_extract_files_content(temp_directory: Path, sample_query: dict[str, Any
3738
assert any("file_dir2.txt" in p for p in paths)
3839

3940

41+
def test_read_file_content_with_notebook(tmp_path: Path):
42+
notebook_path = tmp_path / "dummy_notebook.ipynb"
43+
notebook_path.write_text("{}", encoding="utf-8") # minimal JSON
44+
45+
# Patch the symbol as it is used in ingest_from_query
46+
with patch("gitingest.ingest_from_query.process_notebook") as mock_process:
47+
_read_file_content(notebook_path)
48+
mock_process.assert_called_once_with(notebook_path)
49+
50+
51+
def test_read_file_content_with_non_notebook(tmp_path: Path):
52+
py_file_path = tmp_path / "dummy_file.py"
53+
py_file_path.write_text("print('Hello')", encoding="utf-8")
54+
55+
with patch("gitingest.ingest_from_query.process_notebook") as mock_process:
56+
_read_file_content(py_file_path)
57+
mock_process.assert_not_called()
58+
59+
4060
# TODO: test with include patterns: ['*.txt']
4161
# TODO: test with wrong include patterns: ['*.qwerty']
4262

tests/test_notebook_utils.py

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
""" Tests for the notebook_utils module. """
2+
3+
import pytest
4+
5+
from gitingest.notebook_utils import process_notebook
6+
7+
8+
def test_process_notebook_all_cells(write_notebook):
9+
"""
10+
Test a notebook containing markdown, code, and raw cells.
11+
12+
- Markdown/raw cells => triple-quoted
13+
- Code cells => remain normal code
14+
- For 1 markdown + 1 raw => 2 triple-quoted blocks => 4 occurrences of triple-quotes.
15+
"""
16+
notebook_content = {
17+
"cells": [
18+
{"cell_type": "markdown", "source": ["# Markdown cell"]},
19+
{"cell_type": "code", "source": ['print("Hello Code")']},
20+
{"cell_type": "raw", "source": ["<raw content>"]},
21+
]
22+
}
23+
nb_path = write_notebook("all_cells.ipynb", notebook_content)
24+
result = process_notebook(nb_path)
25+
26+
assert result.count('"""') == 4, "Expected 4 triple-quote occurrences for 2 blocks."
27+
28+
# Check that markdown and raw content are inside triple-quoted blocks
29+
assert "# Markdown cell" in result
30+
assert "<raw content>" in result
31+
32+
# Check code cell is present and not wrapped in triple quotes
33+
assert 'print("Hello Code")' in result
34+
assert '"""\nprint("Hello Code")\n"""' not in result
35+
36+
37+
def test_process_notebook_with_worksheets(write_notebook):
38+
"""
39+
Test a notebook containing the 'worksheets' key (deprecated as of IPEP-17).
40+
41+
- Should raise a DeprecationWarning.
42+
- We process only the first (and only) worksheet's cells.
43+
- The resulting content matches an equivalent notebook with "cells" at top level.
44+
"""
45+
with_worksheets = {
46+
"worksheets": [
47+
{
48+
"cells": [
49+
{"cell_type": "markdown", "source": ["# Markdown cell"]},
50+
{"cell_type": "code", "source": ['print("Hello Code")']},
51+
{"cell_type": "raw", "source": ["<raw content>"]},
52+
]
53+
}
54+
]
55+
}
56+
without_worksheets = with_worksheets["worksheets"][0] # same, but no 'worksheets' key at top
57+
58+
nb_with = write_notebook("with_worksheets.ipynb", with_worksheets)
59+
nb_without = write_notebook("without_worksheets.ipynb", without_worksheets)
60+
61+
with pytest.warns(DeprecationWarning, match="Worksheets are deprecated as of IPEP-17."):
62+
result_with = process_notebook(nb_with)
63+
64+
# No warnings here
65+
result_without = process_notebook(nb_without)
66+
67+
assert result_with == result_without, "Both notebooks should produce identical content."
68+
69+
70+
def test_process_notebook_multiple_worksheets(write_notebook):
71+
"""
72+
Test a notebook containing multiple 'worksheets'.
73+
74+
If multiple worksheets are present:
75+
- Only process the first sheet's cells.
76+
- DeprecationWarning for worksheets
77+
- UserWarning for ignoring extra worksheets
78+
"""
79+
multi_worksheets = {
80+
"worksheets": [
81+
{"cells": [{"cell_type": "markdown", "source": ["# First Worksheet"]}]},
82+
{"cells": [{"cell_type": "code", "source": ['print("Ignored Worksheet")']}]},
83+
]
84+
}
85+
86+
# Single-worksheet version (only the first)
87+
single_worksheet = {
88+
"worksheets": [
89+
{"cells": [{"cell_type": "markdown", "source": ["# First Worksheet"]}]},
90+
]
91+
}
92+
93+
nb_multi = write_notebook("multiple_worksheets.ipynb", multi_worksheets)
94+
nb_single = write_notebook("single_worksheet.ipynb", single_worksheet)
95+
96+
with pytest.warns(DeprecationWarning, match="Worksheets are deprecated as of IPEP-17."):
97+
with pytest.warns(UserWarning, match="Multiple worksheets are not supported."):
98+
result_multi = process_notebook(nb_multi)
99+
100+
with pytest.warns(DeprecationWarning, match="Worksheets are deprecated as of IPEP-17."):
101+
result_single = process_notebook(nb_single)
102+
103+
# The second worksheet (with code) should have been ignored
104+
assert result_multi == result_single, "Second worksheet was ignored, results match."
105+
106+
107+
def test_process_notebook_code_only(write_notebook):
108+
"""
109+
Test a notebook containing only code cells.
110+
111+
No triple quotes should appear.
112+
"""
113+
notebook_content = {
114+
"cells": [
115+
{"cell_type": "code", "source": ["print('Code Cell 1')"]},
116+
{"cell_type": "code", "source": ["x = 42"]},
117+
]
118+
}
119+
nb_path = write_notebook("code_only.ipynb", notebook_content)
120+
result = process_notebook(nb_path)
121+
122+
# No triple quotes
123+
assert '"""' not in result
124+
assert "print('Code Cell 1')" in result
125+
assert "x = 42" in result
126+
127+
128+
def test_process_notebook_markdown_only(write_notebook):
129+
"""
130+
Test a notebook with 2 markdown cells.
131+
132+
2 markdown cells => each becomes 1 triple-quoted block => 2 blocks => 4 triple quotes.
133+
"""
134+
notebook_content = {
135+
"cells": [
136+
{"cell_type": "markdown", "source": ["# Markdown Header"]},
137+
{"cell_type": "markdown", "source": ["Some more markdown."]},
138+
]
139+
}
140+
nb_path = write_notebook("markdown_only.ipynb", notebook_content)
141+
result = process_notebook(nb_path)
142+
143+
assert result.count('"""') == 4, "Two markdown cells => two triple-quoted blocks => 4 triple quotes total."
144+
assert "# Markdown Header" in result
145+
assert "Some more markdown." in result
146+
147+
148+
def test_process_notebook_raw_only(write_notebook):
149+
"""
150+
Test a notebook with 2 raw cells.
151+
152+
2 raw cells => 2 blocks => 4 triple quotes.
153+
"""
154+
notebook_content = {
155+
"cells": [
156+
{"cell_type": "raw", "source": ["Raw content line 1"]},
157+
{"cell_type": "raw", "source": ["Raw content line 2"]},
158+
]
159+
}
160+
nb_path = write_notebook("raw_only.ipynb", notebook_content)
161+
result = process_notebook(nb_path)
162+
163+
# 2 raw cells => 2 triple-quoted blocks => 4 occurrences
164+
assert result.count('"""') == 4
165+
assert "Raw content line 1" in result
166+
assert "Raw content line 2" in result
167+
168+
169+
def test_process_notebook_empty_cells(write_notebook):
170+
"""
171+
Test that cells with an empty 'source' are skipped entirely.
172+
173+
4 cells but 3 are empty => only 1 non-empty cell => 1 triple-quoted block => 2 quotes.
174+
"""
175+
notebook_content = {
176+
"cells": [
177+
{"cell_type": "markdown", "source": []},
178+
{"cell_type": "code", "source": []},
179+
{"cell_type": "raw", "source": []},
180+
{"cell_type": "markdown", "source": ["# Non-empty markdown"]},
181+
]
182+
}
183+
nb_path = write_notebook("empty_cells.ipynb", notebook_content)
184+
result = process_notebook(nb_path)
185+
186+
# Only one non-empty markdown cell => 1 block => 2 triple quotes
187+
assert result.count('"""') == 2
188+
assert "# Non-empty markdown" in result
189+
190+
191+
def test_process_notebook_invalid_cell_type(write_notebook):
192+
"""
193+
Test a notebook with an unknown cell type.
194+
195+
Should raise a ValueError.
196+
"""
197+
notebook_content = {
198+
"cells": [
199+
{"cell_type": "markdown", "source": ["# Valid markdown"]},
200+
{"cell_type": "unknown", "source": ["Unrecognized cell type"]},
201+
]
202+
}
203+
nb_path = write_notebook("invalid_cell_type.ipynb", notebook_content)
204+
205+
with pytest.raises(ValueError, match="Unknown cell type: unknown"):
206+
process_notebook(nb_path)

0 commit comments

Comments
 (0)