Skip to content

Commit 254ff37

Browse files
committed
Merge branch 'main' of https://github.com/RyanL2004/gitingest into flow-integration-tests
2 parents d943081 + 361147a commit 254ff37

28 files changed

+178
-108
lines changed

CONTRIBUTING.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,18 +48,18 @@ Thanks for your interest in contributing to Gitingest! 🚀 Gitingest aims to be
4848
pytest
4949
```
5050

51-
8. Run the app locally using Docker to test your changes (optional):
51+
8. Navigate to src folder
5252

5353
1. Build the Docker image
5454

5555
``` bash
56-
docker build -t gitingest .
56+
cd src
5757
```
5858

59-
2. Run the Docker container:
59+
2. Run the local web server:
6060

6161
``` bash
62-
docker run -d --name gitingest -p 8000:8000 gitingest
62+
uvicorn server.main:app
6363
```
6464

6565
3. Open your browser and navigate to `http://localhost:8000` to see the app running.

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,4 +41,4 @@ USER appuser
4141

4242
EXPOSE 8000
4343

44-
CMD ["python", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
44+
CMD ["python", "-m", "uvicorn", "server.main:app", "--host", "0.0.0.0", "--port", "8000"]

src/gitingest/cli.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import click
88

9-
from config import MAX_FILE_SIZE, OUTPUT_FILE_PATH
9+
from gitingest.config import MAX_FILE_SIZE, OUTPUT_FILE_PATH
1010
from gitingest.repository_ingest import ingest
1111

1212

@@ -16,12 +16,14 @@
1616
@click.option("--max-size", "-s", default=MAX_FILE_SIZE, help="Maximum file size to process in bytes")
1717
@click.option("--exclude-pattern", "-e", multiple=True, help="Patterns to exclude")
1818
@click.option("--include-pattern", "-i", multiple=True, help="Patterns to include")
19+
@click.option("--branch", "-b", default=None, help="Branch to clone and ingest")
1920
def main(
2021
source: str,
2122
output: str | None,
2223
max_size: int,
2324
exclude_pattern: tuple[str, ...],
2425
include_pattern: tuple[str, ...],
26+
branch: str | None,
2527
):
2628
"""
2729
Main entry point for the CLI. This function is called when the CLI is run as a script.
@@ -41,9 +43,11 @@ def main(
4143
A tuple of patterns to exclude during the analysis. Files matching these patterns will be ignored.
4244
include_pattern : tuple[str, ...]
4345
A tuple of patterns to include during the analysis. Only files matching these patterns will be processed.
46+
branch : str | None
47+
The branch to clone (optional).
4448
"""
4549
# Main entry point for the CLI. This function is called when the CLI is run as a script.
46-
asyncio.run(_async_main(source, output, max_size, exclude_pattern, include_pattern))
50+
asyncio.run(_async_main(source, output, max_size, exclude_pattern, include_pattern, branch))
4751

4852

4953
async def _async_main(
@@ -52,6 +56,7 @@ async def _async_main(
5256
max_size: int,
5357
exclude_pattern: tuple[str, ...],
5458
include_pattern: tuple[str, ...],
59+
branch: str | None,
5560
) -> None:
5661
"""
5762
Analyze a directory or repository and create a text dump of its contents.
@@ -72,6 +77,8 @@ async def _async_main(
7277
A tuple of patterns to exclude during the analysis. Files matching these patterns will be ignored.
7378
include_pattern : tuple[str, ...]
7479
A tuple of patterns to include during the analysis. Only files matching these patterns will be processed.
80+
branch : str | None
81+
The branch to clone (optional).
7582
7683
Raises
7784
------
@@ -85,7 +92,7 @@ async def _async_main(
8592

8693
if not output:
8794
output = OUTPUT_FILE_PATH
88-
summary, _, _ = await ingest(source, max_size, include_patterns, exclude_patterns, output=output)
95+
summary, _, _ = await ingest(source, max_size, include_patterns, exclude_patterns, branch, output=output)
8996

9097
click.echo(f"Analysis complete! Output written to: {output}")
9198
click.echo("\nSummary:")

src/gitingest/config.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
""" Configuration file for the project. """
2+
3+
from pathlib import Path
4+
5+
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB
6+
MAX_DIRECTORY_DEPTH = 20 # Maximum depth of directory traversal
7+
MAX_FILES = 10_000 # Maximum number of files to process
8+
MAX_TOTAL_SIZE_BYTES = 500 * 1024 * 1024 # 500 MB
9+
10+
OUTPUT_FILE_PATH = "digest.txt"
11+
TMP_BASE_PATH = Path("/tmp/gitingest")

src/gitingest/query_ingestion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import tiktoken
88

9-
from config import MAX_DIRECTORY_DEPTH, MAX_FILES, MAX_TOTAL_SIZE_BYTES
9+
from gitingest.config import MAX_DIRECTORY_DEPTH, MAX_FILES, MAX_TOTAL_SIZE_BYTES
1010
from gitingest.exceptions import (
1111
AlreadyVisitedError,
1212
InvalidNotebookError,

src/gitingest/query_parser.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from pathlib import Path
1010
from urllib.parse import unquote, urlparse
1111

12-
from config import MAX_FILE_SIZE, TMP_BASE_PATH
12+
from gitingest.config import MAX_FILE_SIZE, TMP_BASE_PATH
1313
from gitingest.exceptions import InvalidPatternError
1414
from gitingest.ignore_patterns import DEFAULT_IGNORE_PATTERNS
1515
from gitingest.repository_clone import _check_repo_exists, fetch_remote_branch_list
@@ -163,7 +163,7 @@ async def _parse_repo_source(source: str) -> ParsedQuery:
163163

164164
_id = str(uuid.uuid4())
165165
slug = f"{user_name}-{repo_name}"
166-
local_path = Path(TMP_BASE_PATH) / _id / slug
166+
local_path = TMP_BASE_PATH / _id / slug
167167
url = f"https://{host}/{user_name}/{repo_name}"
168168

169169
parsed = ParsedQuery(

src/gitingest/repository_ingest.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import inspect
55
import shutil
66

7-
from config import TMP_BASE_PATH
7+
from gitingest.config import TMP_BASE_PATH
88
from gitingest.query_ingestion import run_ingest_query
99
from gitingest.query_parser import ParsedQuery, parse_query
1010
from gitingest.repository_clone import CloneConfig, clone_repo
@@ -15,6 +15,7 @@ async def ingest(
1515
max_file_size: int = 10 * 1024 * 1024, # 10 MB
1616
include_patterns: set[str] | str | None = None,
1717
exclude_patterns: set[str] | str | None = None,
18+
branch: str | None = None,
1819
output: str | None = None,
1920
) -> tuple[str, str, str]:
2021
"""
@@ -35,6 +36,8 @@ async def ingest(
3536
Pattern or set of patterns specifying which files to include. If `None`, all files are included.
3637
exclude_patterns : set[str] | str | None, optional
3738
Pattern or set of patterns specifying which files to exclude. If `None`, no files are excluded.
39+
branch : str | None, optional
40+
The branch to clone and ingest. If `None`, the default branch is used.
3841
output : str | None, optional
3942
File path where the summary and content should be written. If `None`, the results are not written to a file.
4043
@@ -61,17 +64,23 @@ async def ingest(
6164
)
6265

6366
if parsed_query.url:
67+
selected_branch = branch if branch else parsed_query.branch # prioritize branch argument
68+
parsed_query.branch = selected_branch
69+
6470
# Extract relevant fields for CloneConfig
6571
clone_config = CloneConfig(
6672
url=parsed_query.url,
6773
local_path=str(parsed_query.local_path),
6874
commit=parsed_query.commit,
69-
branch=parsed_query.branch,
75+
branch=selected_branch,
7076
)
7177
clone_result = clone_repo(clone_config)
7278

7379
if inspect.iscoroutine(clone_result):
74-
asyncio.run(clone_result)
80+
if asyncio.get_event_loop().is_running():
81+
await clone_result
82+
else:
83+
asyncio.run(clone_result)
7584
else:
7685
raise TypeError("clone_repo did not return a coroutine as expected.")
7786

src/routers/__init__.py

Lines changed: 0 additions & 7 deletions
This file was deleted.

src/server/__init__.py

Whitespace-only changes.

src/main.py renamed to src/server/main.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,9 @@
1111
from slowapi.errors import RateLimitExceeded
1212
from starlette.middleware.trustedhost import TrustedHostMiddleware
1313

14-
from config import templates
15-
from routers import download, dynamic, index
16-
from server_utils import limiter
17-
from utils import lifespan, rate_limit_exception_handler
14+
from server.routers import download, dynamic, index
15+
from server.server_config import templates
16+
from server.server_utils import lifespan, limiter, rate_limit_exception_handler
1817

1918
# Load environment variables from .env file
2019
load_dotenv()

0 commit comments

Comments
 (0)