Skip to content

Commit d2d5eda

Browse files
authored
Merge branch 'main' into feature/pr-221-fix
2 parents 7fb28a7 + ba701a8 commit d2d5eda

File tree

10 files changed

+175
-17
lines changed

10 files changed

+175
-17
lines changed

.pre-commit-config.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ repos:
105105
starlette>=0.40.0,
106106
tiktoken,
107107
tomli,
108+
pathspec,
108109
uvicorn>=0.11.7,
109110
]
110111
- id: pylint
@@ -124,6 +125,7 @@ repos:
124125
starlette>=0.40.0,
125126
tiktoken,
126127
tomli,
128+
pathspec,
127129
uvicorn>=0.11.7,
128130
]
129131

README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,16 @@ You can also replace `hub` with `ingest` in any GitHub URL to access the corresp
1515

1616
[gitingest.com](https://gitingest.com) · [Chrome Extension](https://chromewebstore.google.com/detail/adfjahbijlkjfoicpjkhjicpjpjfaood) · [Firefox Add-on](https://addons.mozilla.org/firefox/addon/gitingest)
1717

18+
<!-- Keep these links. Translations will automatically update with the README. -->
19+
[Deutsch](https://www.readme-i18n.com/cyclotruc/gitingest?lang=de) |
20+
[Español](https://www.readme-i18n.com/cyclotruc/gitingest?lang=es) |
21+
[Français](https://www.readme-i18n.com/cyclotruc/gitingest?lang=fr) |
22+
[日本語](https://www.readme-i18n.com/cyclotruc/gitingest?lang=ja) |
23+
[한국어](https://www.readme-i18n.com/cyclotruc/gitingest?lang=ko) |
24+
[Português](https://www.readme-i18n.com/cyclotruc/gitingest?lang=pt) |
25+
[Русский](https://www.readme-i18n.com/cyclotruc/gitingest?lang=ru) |
26+
[中文](https://www.readme-i18n.com/cyclotruc/gitingest?lang=zh)
27+
1828
## 🚀 Features
1929

2030
- **Easy code context**: Get a text digest from a Git repository URL or a directory
@@ -99,6 +109,9 @@ export GITHUB_TOKEN=github_pat_...
99109
gitingest https://github.com/username/private-repo
100110
```
101111

112+
By default, files listed in `.gitignore` are skipped. Use `--include-gitignored` if you
113+
need those files in the digest.
114+
102115
By default, the digest is written to a text file (`digest.txt`) in your current working directory. You can customize the output in two ways:
103116

104117
- Use `--output/-o <filename>` to write to a specific file.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ dependencies = [
1313
"starlette>=0.40.0", # Vulnerable to https://osv.dev/vulnerability/GHSA-f96h-pmfr-66vw
1414
"tiktoken>=0.7.0", # Support for o200k_base encoding
1515
"tomli",
16+
"pathspec>=0.12.1",
1617
"typing_extensions; python_version < '3.10'",
1718
"uvicorn>=0.11.7", # Vulnerable to https://osv.dev/vulnerability/PYSEC-2020-150
1819
]

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
click>=8.0.0
22
fastapi[standard]>=0.109.1 # Vulnerable to https://osv.dev/vulnerability/PYSEC-2024-38
3+
pathspec>=0.12.1
34
pydantic
45
python-dotenv
56
slowapi

src/gitingest/cli.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,12 @@
4545
)
4646
@click.option("--branch", "-b", default=None, help="Branch to clone and ingest")
4747
@click.option("--include-submodules", is_flag=True, help="Include repository's submodules in the analysis")
48+
@click.option(
49+
"--include-gitignored",
50+
is_flag=True,
51+
default=False,
52+
help="Include files matched by .gitignore",
53+
)
4854
@click.option(
4955
"--token",
5056
"-t",
@@ -63,6 +69,7 @@ def main(
6369
include_pattern: Tuple[str, ...],
6470
branch: Optional[str],
6571
include_submodules: bool,
72+
include_gitignored: bool,
6673
token: Optional[str],
6774
):
6875
"""
@@ -88,11 +95,12 @@ def main(
8895
include_submodules : bool
8996
If True, recursively include and analyze all Git submodules within the repository.
9097
Set to False to ignore submodules during analysis (default is False).
98+
include_gitignored : bool
99+
If provided, include files normally ignored by .gitignore.
91100
token: str, optional
92101
GitHub personal-access token (PAT). Needed when *source* refers to a
93102
**private** repository. Can also be set via the ``GITHUB_TOKEN`` env var.
94103
"""
95-
96104
asyncio.run(
97105
_async_main(
98106
source=source,
@@ -102,6 +110,7 @@ def main(
102110
include_pattern=include_pattern,
103111
branch=branch,
104112
include_submodules=include_submodules,
113+
include_gitignored=include_gitignored,
105114
token=token,
106115
)
107116
)
@@ -115,6 +124,7 @@ async def _async_main(
115124
include_pattern: Tuple[str, ...],
116125
branch: Optional[str],
117126
include_submodules: bool,
127+
include_gitignored: bool,
118128
token: Optional[str],
119129
) -> None:
120130
"""
@@ -139,6 +149,8 @@ async def _async_main(
139149
Glob patterns for including files in the output.
140150
branch : str, optional
141151
Specific branch to ingest (defaults to the repository's default).
152+
include_gitignored : bool
153+
If provided, include files normally ignored by .gitignore.
142154
token: str, optional
143155
GitHub personal-access token (PAT). Needed when *source* refers to a
144156
**private** repository. Can also be set via the ``GITHUB_TOKEN`` env var.
@@ -171,6 +183,7 @@ async def _async_main(
171183
branch=branch,
172184
include_submodules=include_submodules,
173185
output=output_target,
186+
include_gitignored=include_gitignored,
174187
token=token,
175188
)
176189

src/gitingest/entrypoint.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from gitingest.config import TMP_BASE_PATH
1212
from gitingest.ingestion import ingest_query
1313
from gitingest.query_parsing import IngestionQuery, parse_query
14+
from gitingest.utils.ignore_patterns import load_gitignore_patterns
1415

1516

1617
async def ingest_async(
@@ -19,6 +20,7 @@ async def ingest_async(
1920
include_patterns: Optional[Union[str, Set[str]]] = None,
2021
exclude_patterns: Optional[Union[str, Set[str]]] = None,
2122
branch: Optional[str] = None,
23+
include_gitignored: bool = False,
2224
token: Optional[str] = None,
2325
include_submodules: bool = False,
2426
output: Optional[str] = None,
@@ -46,6 +48,8 @@ async def ingest_async(
4648
include_submodules : bool
4749
If True, recursively include and analyze all Git submodules within the repository.
4850
Set to False to ignore submodules during analysis (default is False).
51+
include_gitignored : bool
52+
If ``True``, include files ignored by ``.gitignore``. Defaults to ``False``.
4953
token : str, optional
5054
GitHub personal-access token (PAT). Needed when *source* refers to a
5155
**private** repository. Can also be set via the ``GITHUB_TOKEN`` env var.
@@ -81,6 +85,10 @@ async def ingest_async(
8185
token=token,
8286
)
8387

88+
if not include_gitignored:
89+
gitignore_patterns = load_gitignore_patterns(query.local_path)
90+
query.ignore_patterns.update(gitignore_patterns)
91+
8492
if query.url:
8593
selected_branch = branch if branch else query.branch # prioritize branch argument
8694
query.branch = selected_branch
@@ -123,6 +131,7 @@ def ingest(
123131
exclude_patterns: Optional[Union[str, Set[str]]] = None,
124132
branch: Optional[str] = None,
125133
include_submodules: bool = False,
134+
include_gitignored: bool = False,
126135
token: Optional[str] = None,
127136
output: Optional[str] = None,
128137
) -> Tuple[str, str, str]:
@@ -149,6 +158,8 @@ def ingest(
149158
include_submodules : bool
150159
If True, recursively include and analyze all Git submodules within the repository.
151160
Set to False to ignore submodules during analysis (default is False).
161+
include_gitignored : bool
162+
If ``True``, include files ignored by ``.gitignore``. Defaults to ``False``.
152163
token : str, optional
153164
GitHub personal-access token (PAT). Needed when *source* refers to a
154165
**private** repository. Can also be set via the ``GITHUB_TOKEN`` env var.
@@ -175,6 +186,7 @@ def ingest(
175186
exclude_patterns=exclude_patterns,
176187
branch=branch,
177188
include_submodules=include_submodules,
189+
include_gitignored=include_gitignored,
178190
token=token,
179191
output=output,
180192
)

src/gitingest/utils/ignore_patterns.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
"""Default ignore patterns for Gitingest."""
22

3+
import os
4+
from pathlib import Path
35
from typing import Set
46

57
DEFAULT_IGNORE_PATTERNS: Set[str] = {
@@ -160,3 +162,47 @@
160162
# Gitingest
161163
"digest.txt",
162164
}
165+
166+
167+
def load_gitignore_patterns(root: Path) -> Set[str]:
168+
"""
169+
Recursively load ignore patterns from all .gitignore files under the given root directory.
170+
171+
Parameters
172+
----------
173+
root : Path
174+
The root directory to search for .gitignore files.
175+
176+
Returns
177+
-------
178+
Set[str]
179+
A set of ignore patterns extracted from all .gitignore files found under the root directory.
180+
"""
181+
patterns: Set[str] = set()
182+
for dirpath, _, filenames in os.walk(root):
183+
if ".gitignore" not in filenames:
184+
continue
185+
186+
gitignore_path = Path(dirpath) / ".gitignore"
187+
with gitignore_path.open("r", encoding="utf-8") as f:
188+
for line in f:
189+
stripped = line.strip()
190+
191+
if not stripped or stripped.startswith("#"):
192+
continue
193+
194+
negated = stripped.startswith("!")
195+
if negated:
196+
stripped = stripped[1:]
197+
198+
rel_dir = os.path.relpath(dirpath, root)
199+
if stripped.startswith("/"):
200+
pattern_body = os.path.join(rel_dir, stripped.lstrip("/"))
201+
else:
202+
pattern_body = os.path.join(rel_dir, stripped) if rel_dir != "." else stripped
203+
204+
pattern_body = pattern_body.replace("\\", "/")
205+
pattern = f"!{pattern_body}" if negated else pattern_body
206+
patterns.add(pattern)
207+
208+
return patterns

src/gitingest/utils/ingestion_utils.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
"""Utility functions for the ingestion process."""
22

3-
from fnmatch import fnmatch
43
from pathlib import Path
54
from typing import Set
65

6+
from pathspec import PathSpec
7+
78

89
def _should_include(path: Path, base_path: Path, include_patterns: Set[str]) -> bool:
910
"""
@@ -38,10 +39,8 @@ def _should_include(path: Path, base_path: Path, include_patterns: Set[str]) ->
3839
if path.is_dir():
3940
return True
4041

41-
for pattern in include_patterns:
42-
if fnmatch(rel_str, pattern):
43-
return True
44-
return False
42+
spec = PathSpec.from_lines("gitwildmatch", include_patterns)
43+
return spec.match_file(rel_str)
4544

4645

4746
def _should_exclude(path: Path, base_path: Path, ignore_patterns: Set[str]) -> bool:
@@ -73,7 +72,5 @@ def _should_exclude(path: Path, base_path: Path, ignore_patterns: Set[str]) -> b
7372
return True
7473

7574
rel_str = str(rel_path)
76-
for pattern in ignore_patterns:
77-
if pattern and fnmatch(rel_str, pattern):
78-
return True
79-
return False
75+
spec = PathSpec.from_lines("gitwildmatch", ignore_patterns)
76+
return spec.match_file(rel_str)

tests/test_gitignore_feature.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
"""
2+
Tests for the gitignore functionality in Gitingest.
3+
"""
4+
5+
from pathlib import Path
6+
7+
import pytest
8+
9+
from gitingest.entrypoint import ingest_async
10+
from gitingest.utils.ignore_patterns import load_gitignore_patterns
11+
12+
13+
@pytest.fixture(name="repo_path")
14+
def repo_fixture(tmp_path: Path) -> Path:
15+
"""
16+
Create a temporary repository structure with:
17+
- A .gitignore that excludes 'exclude.txt'
18+
- 'include.txt' (should be processed)
19+
- 'exclude.txt' (should be skipped when gitignore rules are respected)
20+
"""
21+
# Create a .gitignore file that excludes 'exclude.txt'
22+
gitignore_file = tmp_path / ".gitignore"
23+
gitignore_file.write_text("exclude.txt\n")
24+
25+
# Create a file that should be included
26+
include_file = tmp_path / "include.txt"
27+
include_file.write_text("This file should be included.")
28+
29+
# Create a file that should be excluded
30+
exclude_file = tmp_path / "exclude.txt"
31+
exclude_file.write_text("This file should be excluded.")
32+
33+
return tmp_path
34+
35+
36+
def test_load_gitignore_patterns(tmp_path: Path):
37+
"""
38+
Test that load_gitignore_patterns() correctly loads patterns from a .gitignore file.
39+
"""
40+
gitignore = tmp_path / ".gitignore"
41+
# Write some sample patterns with a comment line included
42+
gitignore.write_text("exclude.txt\n*.log\n# a comment\n")
43+
44+
patterns = load_gitignore_patterns(tmp_path)
45+
46+
# Check that the expected patterns are loaded
47+
assert "exclude.txt" in patterns
48+
assert "*.log" in patterns
49+
# Ensure that comment lines are not added
50+
for pattern in patterns:
51+
assert not pattern.startswith("#")
52+
53+
54+
@pytest.mark.asyncio
55+
async def test_ingest_with_gitignore(repo_path: Path):
56+
"""
57+
Integration test for ingest_async() respecting .gitignore rules.
58+
59+
When ``include_gitignored`` is ``False`` (default), the content of 'exclude.txt' should be omitted.
60+
When ``include_gitignored`` is ``True``, both files should be present.
61+
"""
62+
# Run ingestion with the gitignore functionality enabled.
63+
_, _, content_with_ignore = await ingest_async(source=str(repo_path))
64+
# 'exclude.txt' should be skipped.
65+
assert "This file should be excluded." not in content_with_ignore
66+
# 'include.txt' should be processed.
67+
assert "This file should be included." in content_with_ignore
68+
69+
# Run ingestion with the gitignore functionality disabled.
70+
_, _, content_without_ignore = await ingest_async(source=str(repo_path), include_gitignored=True)
71+
# Now both files should be present.
72+
assert "This file should be excluded." in content_without_ignore
73+
assert "This file should be included." in content_without_ignore

tests/test_ingestion.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,10 @@ class PatternScenario(TypedDict):
8484
"*/file_dir2.txt",
8585
},
8686
"ignore_patterns": {*()},
87-
"expected_num_files": 3,
88-
"expected_content": {"file1.txt", "file2.py", "dir2/file_dir2.txt"},
89-
"expected_structure": {"test_repo/", "dir2/"},
90-
"expected_not_structure": {"src/", "subdir/", "dir1/"},
87+
"expected_num_files": 4,
88+
"expected_content": {"file1.txt", "file2.py", "dir1/file_dir1.txt", "dir2/file_dir2.txt"},
89+
"expected_structure": {"test_repo/", "dir1/", "dir2/"},
90+
"expected_not_structure": {"src/", "subdir/"},
9191
}
9292
),
9393
id="include-wildcard-directory",
@@ -114,9 +114,10 @@ class PatternScenario(TypedDict):
114114
{
115115
"include_patterns": {"**/file_dir2.txt", "src/**/*.py"},
116116
"ignore_patterns": {*()},
117-
"expected_num_files": 2,
117+
"expected_num_files": 3,
118118
"expected_content": {
119119
"dir2/file_dir2.txt",
120+
"src/subfile2.py",
120121
"src/subdir/file_subdir.py",
121122
},
122123
"expected_structure": {"test_repo/", "dir2/", "src/", "subdir/"},
@@ -169,12 +170,11 @@ class PatternScenario(TypedDict):
169170
{
170171
"include_patterns": {*()},
171172
"ignore_patterns": {"src/**/*.py"},
172-
"expected_num_files": 7,
173+
"expected_num_files": 6,
173174
"expected_content": {
174175
"file1.txt",
175176
"file2.py",
176177
"src/subfile1.txt",
177-
"src/subfile2.py",
178178
"src/subdir/file_subdir.txt",
179179
"dir1/file_dir1.txt",
180180
"dir2/file_dir2.txt",

0 commit comments

Comments
 (0)