Skip to content

Commit ce2a19c

Browse files
authored
Merge branch 'main' into docs/add-usage-examples
2 parents 555cb75 + 3869aa3 commit ce2a19c

23 files changed

+853
-207
lines changed

.pre-commit-config.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ repos:
4444
- id: black
4545

4646
- repo: https://github.com/asottile/pyupgrade
47-
rev: v3.19.1
47+
rev: v3.20.0
4848
hooks:
4949
- id: pyupgrade
5050
description: "Automatically upgrade syntax for newer versions."
@@ -73,7 +73,7 @@ repos:
7373
- id: djlint-reformat-jinja
7474

7575
- repo: https://github.com/igorshubovych/markdownlint-cli
76-
rev: v0.44.0
76+
rev: v0.45.0
7777
hooks:
7878
- id: markdownlint
7979
description: "Lint markdown files."
@@ -88,7 +88,7 @@ repos:
8888
files: ^src/
8989

9090
- repo: https://github.com/pycqa/pylint
91-
rev: v3.3.6
91+
rev: v3.3.7
9292
hooks:
9393
- id: pylint
9494
name: pylint for source

README.md

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ You can also replace `hub` with `ingest` in any GitHub URL to access the corresp
2828

2929
## 📚 Requirements
3030

31-
- Python 3.7+
31+
- Python 3.8+
32+
- For private repositories: A GitHub Personal Access Token (PAT). You can generate one at [https://github.com/settings/personal-access-tokens](https://github.com/settings/personal-access-tokens) (Profile → Settings → Developer Settings → Personal Access Tokens → Fine-grained Tokens)
3233

3334
### 📦 Installation
3435

@@ -77,17 +78,34 @@ Issues and feature requests are welcome to the repo.
7778
The `gitingest` command line tool allows you to analyze codebases and create a text dump of their contents.
7879

7980
```bash
80-
# Basic usage
81+
# Basic usage (writes to digest.txt by default)
8182
gitingest /path/to/directory
8283

8384
# From URL
8485
gitingest https://github.com/cyclotruc/gitingest
86+
```
8587

86-
# See more options
87-
gitingest --help
88+
For private repositories, use the `--token/-t` option.
89+
90+
```bash
91+
# Get your token from https://github.com/settings/personal-access-tokens
92+
gitingest https://github.com/username/private-repo --token github_pat_...
93+
94+
# Or set it as an environment variable
95+
export GITHUB_TOKEN=github_pat_...
96+
gitingest https://github.com/username/private-repo
8897
```
8998

90-
This will write the digest in a text file (default `digest.txt`) in your current working directory.
99+
By default, the digest is written to a text file (`digest.txt`) in your current working directory. You can customize the output in two ways:
100+
101+
- Use `--output/-o <filename>` to write to a specific file.
102+
- Use `--output/-o -` to output directly to `STDOUT` (useful for piping to other tools).
103+
104+
See more options and usage details with:
105+
106+
```bash
107+
gitingest --help
108+
```
91109

92110
### Using Multiple Patterns (CLI)
93111

@@ -118,6 +136,18 @@ summary, tree, content = ingest("path/to/directory")
118136
summary, tree, content = ingest("https://github.com/cyclotruc/gitingest")
119137
```
120138

139+
For private repositories, you can pass a token:
140+
141+
```python
142+
# Using token parameter
143+
summary, tree, content = ingest("https://github.com/username/private-repo", token="github_pat_...")
144+
145+
# Or set it as an environment variable
146+
import os
147+
os.environ["GITHUB_TOKEN"] = "github_pat_..."
148+
summary, tree, content = ingest("https://github.com/username/private-repo")
149+
```
150+
121151
By default, this won't write a file but can be enabled with the `output` argument.
122152

123153
```python

pyproject.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ dependencies = [
1111
"python-dotenv",
1212
"slowapi",
1313
"starlette>=0.40.0", # Vulnerable to https://osv.dev/vulnerability/GHSA-f96h-pmfr-66vw
14-
"tiktoken",
14+
"tiktoken>=0.7.0", # Support for o200k_base encoding
1515
"tomli",
1616
"typing_extensions; python_version < '3.10'",
1717
"uvicorn>=0.11.7", # Vulnerable to https://osv.dev/vulnerability/PYSEC-2020-150
@@ -23,7 +23,6 @@ classifiers=[
2323
"Development Status :: 3 - Alpha",
2424
"Intended Audience :: Developers",
2525
"License :: OSI Approved :: MIT License",
26-
"Programming Language :: Python :: 3.7",
2726
"Programming Language :: Python :: 3.8",
2827
"Programming Language :: Python :: 3.9",
2928
"Programming Language :: Python :: 3.10",

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,6 @@ pydantic
44
python-dotenv
55
slowapi
66
starlette>=0.40.0 # Vulnerable to https://osv.dev/vulnerability/GHSA-f96h-pmfr-66vw
7-
tiktoken
7+
tiktoken>=0.7.0 # Support for o200k_base encoding
88
tomli
99
uvicorn>=0.11.7 # Vulnerable to https://osv.dev/vulnerability/PYSEC-2020-150

src/gitingest/cli.py

Lines changed: 104 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -13,42 +13,92 @@
1313

1414
@click.command()
1515
@click.argument("source", type=str, default=".")
16-
@click.option("--output", "-o", default=None, help="Output file path (default: <repo_name>.txt in current directory)")
17-
@click.option("--max-size", "-s", default=MAX_FILE_SIZE, help="Maximum file size to process in bytes")
18-
@click.option("--exclude-pattern", "-e", multiple=True, help="Patterns to exclude")
19-
@click.option("--include-pattern", "-i", multiple=True, help="Patterns to include")
16+
@click.option(
17+
"--output",
18+
"-o",
19+
default=None,
20+
help="Output file path (default: <repo_name>.txt in current directory)",
21+
)
22+
@click.option(
23+
"--max-size",
24+
"-s",
25+
default=MAX_FILE_SIZE,
26+
help="Maximum file size to process in bytes",
27+
)
28+
@click.option(
29+
"--exclude-pattern",
30+
"-e",
31+
multiple=True,
32+
help=(
33+
"Patterns to exclude. Handles Python's arbitrary subset of Unix shell-style "
34+
"wildcards. See: https://docs.python.org/3/library/fnmatch.html"
35+
),
36+
)
37+
@click.option(
38+
"--include-pattern",
39+
"-i",
40+
multiple=True,
41+
help=(
42+
"Patterns to include. Handles Python's arbitrary subset of Unix shell-style "
43+
"wildcards. See: https://docs.python.org/3/library/fnmatch.html"
44+
),
45+
)
2046
@click.option("--branch", "-b", default=None, help="Branch to clone and ingest")
47+
@click.option(
48+
"--token",
49+
"-t",
50+
envvar="GITHUB_TOKEN",
51+
default=None,
52+
help=(
53+
"GitHub personal access token for accessing private repositories. "
54+
"If omitted, the CLI will look for the GITHUB_TOKEN environment variable."
55+
),
56+
)
2157
def main(
2258
source: str,
2359
output: Optional[str],
2460
max_size: int,
2561
exclude_pattern: Tuple[str, ...],
2662
include_pattern: Tuple[str, ...],
2763
branch: Optional[str],
64+
token: Optional[str],
2865
):
2966
"""
30-
Main entry point for the CLI. This function is called when the CLI is run as a script.
67+
Main entry point for the CLI. This function is called when the CLI is run as a script.
3168
3269
It calls the async main function to run the command.
3370
3471
Parameters
3572
----------
3673
source : str
37-
The source directory or repository to analyze.
74+
A directory path or a Git repository URL.
3875
output : str, optional
3976
The path where the output file will be written. If not specified, the output will be written
40-
to a file named `<repo_name>.txt` in the current directory.
77+
to a file named `<repo_name>.txt` in the current directory. Use '-' to output to stdout.
4178
max_size : int
42-
The maximum file size to process, in bytes. Files larger than this size will be ignored.
79+
Maximum file size (in bytes) to consider.
4380
exclude_pattern : Tuple[str, ...]
44-
A tuple of patterns to exclude during the analysis. Files matching these patterns will be ignored.
81+
Glob patterns for pruning the file set.
4582
include_pattern : Tuple[str, ...]
46-
A tuple of patterns to include during the analysis. Only files matching these patterns will be processed.
83+
Glob patterns for including files in the output.
4784
branch : str, optional
48-
The branch to clone (optional).
85+
Specific branch to ingest (defaults to the repository's default).
86+
token: str, optional
87+
GitHub personal-access token (PAT). Needed when *source* refers to a
88+
**private** repository. Can also be set via the ``GITHUB_TOKEN`` env var.
4989
"""
50-
# Main entry point for the CLI. This function is called when the CLI is run as a script.
51-
asyncio.run(_async_main(source, output, max_size, exclude_pattern, include_pattern, branch))
90+
91+
asyncio.run(
92+
_async_main(
93+
source=source,
94+
output=output,
95+
max_size=max_size,
96+
exclude_pattern=exclude_pattern,
97+
include_pattern=include_pattern,
98+
branch=branch,
99+
token=token,
100+
)
101+
)
52102

53103

54104
async def _async_main(
@@ -58,50 +108,75 @@ async def _async_main(
58108
exclude_pattern: Tuple[str, ...],
59109
include_pattern: Tuple[str, ...],
60110
branch: Optional[str],
111+
token: Optional[str],
61112
) -> None:
62113
"""
63114
Analyze a directory or repository and create a text dump of its contents.
64115
65116
This command analyzes the contents of a specified source directory or repository, applies custom include and
66-
exclude patterns, and generates a text summary of the analysis which is then written to an output file.
117+
exclude patterns, and generates a text summary of the analysis which is then written to an output file
118+
or printed to stdout.
67119
68120
Parameters
69121
----------
70122
source : str
71-
The source directory or repository to analyze.
123+
A directory path or a Git repository URL.
72124
output : str, optional
73125
The path where the output file will be written. If not specified, the output will be written
74-
to a file named `<repo_name>.txt` in the current directory.
126+
to a file named `<repo_name>.txt` in the current directory. Use '-' to output to stdout.
75127
max_size : int
76-
The maximum file size to process, in bytes. Files larger than this size will be ignored.
128+
Maximum file size (in bytes) to consider.
77129
exclude_pattern : Tuple[str, ...]
78-
A tuple of patterns to exclude during the analysis. Files matching these patterns will be ignored.
130+
Glob patterns for pruning the file set.
79131
include_pattern : Tuple[str, ...]
80-
A tuple of patterns to include during the analysis. Only files matching these patterns will be processed.
132+
Glob patterns for including files in the output.
81133
branch : str, optional
82-
The branch to clone (optional).
134+
Specific branch to ingest (defaults to the repository's default).
135+
token: str, optional
136+
GitHub personal-access token (PAT). Needed when *source* refers to a
137+
**private** repository. Can also be set via the ``GITHUB_TOKEN`` env var.
83138
84139
Raises
85140
------
86141
Abort
87142
If there is an error during the execution of the command, this exception is raised to abort the process.
88143
"""
89144
try:
90-
# Combine default and custom ignore patterns
145+
# Normalise pattern containers (the ingest layer expects sets)
91146
exclude_patterns = set(exclude_pattern)
92147
include_patterns = set(include_pattern)
93148

94-
if not output:
95-
output = OUTPUT_FILE_NAME
96-
summary, _, _ = await ingest_async(source, max_size, include_patterns, exclude_patterns, branch, output=output)
97-
98-
click.echo(f"Analysis complete! Output written to: {output}")
99-
click.echo("\nSummary:")
100-
click.echo(summary)
149+
output_target = output if output is not None else OUTPUT_FILE_NAME
150+
151+
if output_target == "-":
152+
click.echo("Analyzing source, preparing output for stdout...", err=True)
153+
else:
154+
click.echo(f"Analyzing source, output will be written to '{output_target}'...", err=True)
155+
156+
summary, _, _ = await ingest_async(
157+
source=source,
158+
max_file_size=max_size,
159+
include_patterns=include_patterns,
160+
exclude_patterns=exclude_patterns,
161+
branch=branch,
162+
output=output_target,
163+
token=token,
164+
)
165+
166+
if output_target == "-": # stdout
167+
click.echo("\n--- Summary ---", err=True)
168+
click.echo(summary, err=True)
169+
click.echo("--- End Summary ---", err=True)
170+
click.echo("Analysis complete! Output sent to stdout.", err=True)
171+
else: # file
172+
click.echo(f"Analysis complete! Output written to: {output_target}")
173+
click.echo("\nSummary:")
174+
click.echo(summary)
101175

102176
except Exception as exc:
177+
# Convert any exception into Click.Abort so that exit status is non-zero
103178
click.echo(f"Error: {exc}", err=True)
104-
raise click.Abort()
179+
raise click.Abort() from exc
105180

106181

107182
if __name__ == "__main__":

0 commit comments

Comments
 (0)