Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,10 @@ repos:
rev: v1.36.4
hooks:
- id: djlint-reformat-jinja

- repo: https://github.com/igorshubovych/markdownlint-cli
rev: v0.43.0
hooks:
- id: markdownlint
description: "Lint markdown files."
args: ["--disable=line-length"]
71 changes: 25 additions & 46 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,32 +1,13 @@
[![Image](./docs/frontpage.png "GitIngest main page")](https://gitingest.com)
# GitIngest

<!-- License -->
<a href="https://github.com/cyclotruc/gitingest/blob/main/LICENSE">
<img alt="License" src="https://img.shields.io/badge/license-MIT-blue.svg" />
</a>
<!-- PyPI version -->
<a href="https://badge.fury.io/py/gitingest">
<img src="https://badge.fury.io/py/gitingest.svg" alt="PyPI version" />
</a>
<!-- Downloads -->
<a href="https://pepy.tech/project/gitingest">
<img src="https://pepy.tech/badge/gitingest" alt="Downloads" />
</a>
<!-- GitHub issues -->
<a href="https://github.com/cyclotruc/gitingest/issues">
<img src="https://img.shields.io/github/issues/cyclotruc/gitingest" alt="GitHub issues" />
</a>
<!-- Black code style -->
<a href="https://github.com/psf/black">
<img alt="Code style: black" src="https://img.shields.io/badge/code%20style-black-000000.svg" />
</a>

<!-- Discord -->
<a href="https://discord.com/invite/zerRaGK9EC">
<img src="https://dcbadge.limes.pink/api/server/https://discord.com/invite/zerRaGK9EC" alt="Discord" />
</a>
[![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/cyclotruc/gitingest/blob/main/LICENSE)
[![PyPI version](https://badge.fury.io/py/gitingest.svg)](https://badge.fury.io/py/gitingest)
[![Downloads](https://pepy.tech/badge/gitingest)](https://pepy.tech/project/gitingest)
[![GitHub issues](https://img.shields.io/github/issues/cyclotruc/gitingest)](https://github.com/cyclotruc/gitingest/issues)
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
[![Discord](https://dcbadge.limes.pink/api/server/https://discord.com/invite/zerRaGK9EC)](https://discord.com/invite/zerRaGK9EC)

# GitIngest
[![Image](./docs/frontpage.png "GitIngest main page")](https://gitingest.com)

Turn any Git repository into a prompt-friendly text ingest for LLMs.

Expand Down Expand Up @@ -92,15 +73,15 @@ By default, this won't write a file but can be enabled with the `output` argumen

1. Build the image:

``` bash
docker build -t gitingest .
```
``` bash
docker build -t gitingest .
```

2. Run the container:

``` bash
docker run -d --name gitingest -p 8000:8000 gitingest
```
``` bash
docker run -d --name gitingest -p 8000:8000 gitingest
```

The application will be available at `http://localhost:8000`
Ensure environment variables are set before running the application or deploying it via Docker.
Expand Down Expand Up @@ -135,22 +116,20 @@ ALLOWED_HOSTS="gitingest.local,localhost"

1. Clone the repository

```bash
git clone https://github.com/cyclotruc/gitingest.git
cd gitingest
```
```bash
git clone https://github.com/cyclotruc/gitingest.git
cd gitingest
```

2. Install dependencies

```bash
pip install -r requirements.txt
```
```bash
pip install -r requirements.txt
```

3. Run the application:

```bash
cd src
uvicorn main:app --reload
```

The frontend will be available at `localhost:8000`
```bash
cd src
uvicorn main:app --reload
```
6 changes: 3 additions & 3 deletions src/config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
MAX_DISPLAY_SIZE = 300_000
TMP_BASE_PATH = "../tmp"
MAX_DISPLAY_SIZE: int = 300_000
TMP_BASE_PATH: str = "../tmp"

EXAMPLE_REPOS = [
EXAMPLE_REPOS: list[dict[str, str]] = [
{"name": "Gitingest", "url": "https://github.com/cyclotruc/gitingest"},
{"name": "FastAPI", "url": "https://github.com/tiangolo/fastapi"},
{"name": "Flask", "url": "https://github.com/pallets/flask"},
Expand Down
10 changes: 0 additions & 10 deletions src/gitingest/cli.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,9 @@
import os

import click

from gitingest.ingest import ingest
from gitingest.ingest_from_query import MAX_FILE_SIZE


def normalize_pattern(pattern: str) -> str:
pattern = pattern.strip()
pattern = pattern.lstrip(os.sep)
if pattern.endswith(os.sep):
pattern += "*"
return pattern


@click.command()
@click.argument("source", type=str, required=True)
@click.option("--output", "-o", default=None, help="Output file path (default: <repo_name>.txt in current directory)")
Expand Down
134 changes: 67 additions & 67 deletions src/gitingest/clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from gitingest.utils import AsyncTimeoutError, async_timeout

CLONE_TIMEOUT = 20
CLONE_TIMEOUT: int = 20


@dataclass
Expand All @@ -14,67 +14,6 @@ class CloneConfig:
branch: str | None = None


async def check_repo_exists(url: str) -> bool:
"""
Check if a repository exists at the given URL using an HTTP HEAD request.

Parameters
----------
url : str
The URL of the repository.

Returns
-------
bool
True if the repository exists, False otherwise.
"""
proc = await asyncio.create_subprocess_exec(
"curl",
"-I",
url,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, _ = await proc.communicate()
if proc.returncode != 0:
return False
# Check if stdout contains "404" status code
stdout_str = stdout.decode()
return "HTTP/1.1 404" not in stdout_str and "HTTP/2 404" not in stdout_str


async def run_git_command(*args: str) -> tuple[bytes, bytes]:
"""
Executes a git command asynchronously and captures its output.

Parameters
----------
*args : str
The git command and its arguments to execute.

Returns
-------
Tuple[bytes, bytes]
A tuple containing the stdout and stderr of the git command.

Raises
------
RuntimeError
If the git command exits with a non-zero status.
"""
proc = await asyncio.create_subprocess_exec(
*args,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await proc.communicate()
if proc.returncode != 0:
error_message = stderr.decode().strip()
raise RuntimeError(f"Git command failed: {' '.join(args)}\nError: {error_message}")

return stdout, stderr


@async_timeout(CLONE_TIMEOUT)
async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]:
"""
Expand Down Expand Up @@ -116,29 +55,90 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]:
raise ValueError("The 'local_path' parameter is required.")

# Check if the repository exists
if not await check_repo_exists(url):
if not await _check_repo_exists(url):
raise ValueError("Repository not found, make sure it is public")

try:
if commit:
# Scenario 1: Clone and checkout a specific commit
# Clone the repository without depth to ensure full history for checkout
clone_cmd = ["git", "clone", "--single-branch", url, local_path]
await run_git_command(*clone_cmd)
await _run_git_command(*clone_cmd)

# Checkout the specific commit
checkout_cmd = ["git", "-C", local_path, "checkout", commit]
return await run_git_command(*checkout_cmd)
return await _run_git_command(*checkout_cmd)

if branch and branch.lower() not in ("main", "master"):

# Scenario 2: Clone a specific branch with shallow depth
clone_cmd = ["git", "clone", "--depth=1", "--single-branch", "--branch", branch, url, local_path]
return await run_git_command(*clone_cmd)
return await _run_git_command(*clone_cmd)

# Scenario 3: Clone the default branch with shallow depth
clone_cmd = ["git", "clone", "--depth=1", "--single-branch", url, local_path]
return await run_git_command(*clone_cmd)
return await _run_git_command(*clone_cmd)

except (RuntimeError, asyncio.TimeoutError, AsyncTimeoutError):
raise # Re-raise the exception


async def _check_repo_exists(url: str) -> bool:
"""
Check if a repository exists at the given URL using an HTTP HEAD request.

Parameters
----------
url : str
The URL of the repository.

Returns
-------
bool
True if the repository exists, False otherwise.
"""
proc = await asyncio.create_subprocess_exec(
"curl",
"-I",
url,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, _ = await proc.communicate()
if proc.returncode != 0:
return False
# Check if stdout contains "404" status code
stdout_str = stdout.decode()
return "HTTP/1.1 404" not in stdout_str and "HTTP/2 404" not in stdout_str


async def _run_git_command(*args: str) -> tuple[bytes, bytes]:
"""
Executes a git command asynchronously and captures its output.

Parameters
----------
*args : str
The git command and its arguments to execute.

Returns
-------
Tuple[bytes, bytes]
A tuple containing the stdout and stderr of the git command.

Raises
------
RuntimeError
If the git command exits with a non-zero status.
"""
proc = await asyncio.create_subprocess_exec(
*args,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await proc.communicate()
if proc.returncode != 0:
error_message = stderr.decode().strip()
raise RuntimeError(f"Git command failed: {' '.join(args)}\nError: {error_message}")

return stdout, stderr
Loading
Loading