Skip to content

Commit 1908391

Browse files
Refactor ingest_from_query to run_ingest_query, add process_notebook function, and add tests for notebook processing
1 parent 193c3c9 commit 1908391

File tree

6 files changed

+74
-7
lines changed

6 files changed

+74
-7
lines changed

src/gitingest/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from gitingest.clone import clone_repo
44
from gitingest.ingest import ingest
5-
from gitingest.ingest_from_query import ingest_from_query
5+
from gitingest.ingest_from_query import run_ingest_query
66
from gitingest.parse_query import parse_query
77

8-
__all__ = ["ingest_from_query", "clone_repo", "parse_query", "ingest"]
8+
__all__ = ["run_ingest_query", "clone_repo", "parse_query", "ingest"]

src/gitingest/ingest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import shutil
66

77
from gitingest.clone import CloneConfig, clone_repo
8-
from gitingest.ingest_from_query import ingest_from_query
8+
from gitingest.ingest_from_query import run_ingest_query
99
from gitingest.parse_query import parse_query
1010

1111

@@ -74,7 +74,7 @@ def ingest(
7474
else:
7575
raise TypeError("clone_repo did not return a coroutine as expected.")
7676

77-
summary, tree, content = ingest_from_query(query)
77+
summary, tree, content = run_ingest_query(query)
7878

7979
if output is not None:
8080
with open(output, "w", encoding="utf-8") as f:

src/gitingest/ingest_from_query.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import tiktoken
88

99
from gitingest.exceptions import AlreadyVisitedError, MaxFileSizeReachedError, MaxFilesReachedError
10+
from gitingest.notebook_utils import process_notebook
1011

1112
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB
1213
MAX_DIRECTORY_DEPTH = 20 # Maximum depth of directory traversal
@@ -146,6 +147,9 @@ def _read_file_content(file_path: str) -> str:
146147
The content of the file, or an error message if the file could not be read.
147148
"""
148149
try:
150+
if file_path.endswith(".ipynb"):
151+
return process_notebook(file_path)
152+
149153
with open(file_path, encoding="utf-8", errors="ignore") as f:
150154
return f.read()
151155
except OSError as e:
@@ -814,7 +818,7 @@ def _ingest_directory(path: str, query: dict[str, Any]) -> tuple[str, str, str]:
814818
return summary, tree, files_content
815819

816820

817-
def ingest_from_query(query: dict[str, Any]) -> tuple[str, str, str]:
821+
def run_ingest_query(query: dict[str, Any]) -> tuple[str, str, str]:
818822
"""
819823
Main entry point for analyzing a codebase directory or single file.
820824

src/gitingest/notebook_utils.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
""" Utilities for processing Jupyter notebooks. """
2+
3+
import json
4+
5+
6+
def process_notebook(file: str) -> str:
7+
"""
8+
Process a Jupyter notebook file and return an executable Python script as a string.
9+
10+
Parameters
11+
----------
12+
file : str
13+
The path to the Jupyter notebook file.
14+
15+
Returns
16+
-------
17+
str
18+
The executable Python script as a string.
19+
20+
Raises
21+
------
22+
ValueError
23+
If an unexpected cell type is encountered.
24+
"""
25+
with open(file, encoding="utf-8") as f:
26+
notebook = json.load(f)
27+
28+
result = []
29+
30+
for cell in notebook["cells"]:
31+
cell_type = cell.get("cell_type")
32+
33+
# Validate cell type and handle unexpected types
34+
if cell_type not in ("markdown", "code", "raw"):
35+
raise ValueError(f"Unknown cell type: {cell_type}")
36+
37+
str_ = "".join(cell.get("source", []))
38+
if not str_:
39+
continue
40+
41+
# Convert Markdown and raw cells to multi-line comments
42+
if cell_type in ("markdown", "raw"):
43+
str_ = f'"""\n{str_}\n"""'
44+
45+
result.append(str_)
46+
47+
return "\n\n".join(result)

src/process_query.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from config import EXAMPLE_REPOS, MAX_DISPLAY_SIZE
1010
from gitingest.clone import CloneConfig, clone_repo
11-
from gitingest.ingest_from_query import ingest_from_query
11+
from gitingest.ingest_from_query import run_ingest_query
1212
from gitingest.parse_query import parse_query
1313
from server_utils import Colors, log_slider_to_size
1414

@@ -91,7 +91,7 @@ async def process_query(
9191
branch=query.get("branch"),
9292
)
9393
await clone_repo(clone_config)
94-
summary, tree, content = ingest_from_query(query)
94+
summary, tree, content = run_ingest_query(query)
9595
with open(f"{clone_config.local_path}.txt", "w", encoding="utf-8") as f:
9696
f.write(tree + "\n" + content)
9797
except Exception as e:

tests/conftest.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
""" This module contains fixtures for the tests. """
22

3+
import json
34
from pathlib import Path
45
from typing import Any
56

@@ -70,3 +71,18 @@ def temp_directory(tmp_path: Path) -> Path:
7071
(dir2 / "file_dir2.txt").write_text("Hello from dir2")
7172

7273
return test_dir
74+
75+
76+
@pytest.fixture
77+
def write_notebook(tmp_path: Path):
78+
"""
79+
A helper fixture that returns a function for writing arbitrary notebook content to a temporary .ipynb file.
80+
"""
81+
82+
def _write_notebook(name: str, content: dict) -> Path:
83+
notebook_path = tmp_path / name
84+
with notebook_path.open("w", encoding="utf-8") as f:
85+
json.dump(content, f)
86+
return notebook_path
87+
88+
return _write_notebook

0 commit comments

Comments
 (0)