Skip to content

Commit 38d1bd6

Browse files
committed
feat(cli): Enhance file exclusion capabilities with multiple patterns and .gitingestignore support
- Added the ability to specify multiple file exclusion patterns using the `-e` option. - Introduced support for a `.gitingestignore` file to define files and directories to be excluded. - Improved command line pattern parsing to handle space-separated patterns. - Combined exclusion patterns from both command line arguments and the `.gitingestignore` file for comprehensive exclusion. - Removed the unused import 'os' to satisfy pylint checks. - Corrected docstring issues to comply with darglint requirements. - Updated documentation with clear examples illustrating the new features. These enhancements provide users with a more flexible and user-friendly way to exclude files and directories, either through command line options or by using a `.gitingestignore` file.
1 parent 71b1167 commit 38d1bd6

File tree

1 file changed

+96
-23
lines changed

1 file changed

+96
-23
lines changed

src/gitingest/cli.py

Lines changed: 96 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,99 +1,172 @@
1-
""" Command-line interface for the Gitingest package. """
1+
"""Command-line interface for the Gitingest package."""
22

33
# pylint: disable=no-value-for-parameter
4-
54
import asyncio
5+
from pathlib import Path
66

77
import click
88

99
from config import MAX_FILE_SIZE
1010
from gitingest.repository_ingest import ingest
1111

1212

13+
def parse_ignore_file(ignore_file_path: Path) -> set[str]:
14+
"""
15+
Parse the .gitingestignore file and return a set of patterns to ignore.
16+
17+
Parameters
18+
----------
19+
ignore_file_path : Path
20+
Path to the .gitingestignore file
21+
22+
Returns
23+
-------
24+
set[str]
25+
Set of patterns to ignore
26+
"""
27+
if not ignore_file_path.exists():
28+
return set()
29+
30+
with open(ignore_file_path, encoding="utf-8") as f:
31+
# Read lines, strip whitespace, and filter out empty lines and comments
32+
patterns = {line.strip() for line in f if line.strip() and not line.startswith("#")}
33+
34+
return patterns
35+
36+
37+
def parse_patterns(patterns: tuple[str, ...]) -> set[str]:
38+
"""
39+
Parse patterns from command line arguments.
40+
Handles both space-separated patterns in a single string
41+
and multiple -e/-i arguments.
42+
43+
Parameters
44+
----------
45+
patterns : tuple[str, ...]
46+
Tuple of patterns from command line
47+
48+
Returns
49+
-------
50+
set[str]
51+
Set of parsed patterns
52+
"""
53+
result = set()
54+
for pattern_str in patterns:
55+
# Split on spaces and add each pattern
56+
result.update(p.strip() for p in pattern_str.split() if p.strip())
57+
return result
58+
59+
1360
@click.command()
1461
@click.argument("source", type=str, default=".")
1562
@click.option("--output", "-o", default=None, help="Output file path (default: <repo_name>.txt in current directory)")
1663
@click.option("--max-size", "-s", default=MAX_FILE_SIZE, help="Maximum file size to process in bytes")
17-
@click.option("--exclude-pattern", "-e", multiple=True, help="Patterns to exclude")
18-
@click.option("--include-pattern", "-i", multiple=True, help="Patterns to include")
64+
@click.option("--exclude-pattern", "-e", multiple=True, help="Patterns to exclude (space-separated patterns allowed)")
65+
@click.option("--include-pattern", "-i", multiple=True, help="Patterns to include (space-separated patterns allowed)")
66+
@click.option("--ignore-file", default=".gitingestignore", help="Path to ignore file (default: .gitingestignore)")
1967
def main(
2068
source: str,
2169
output: str | None,
2270
max_size: int,
2371
exclude_pattern: tuple[str, ...],
2472
include_pattern: tuple[str, ...],
73+
ignore_file: str,
2574
):
2675
"""
27-
Main entry point for the CLI. This function is called when the CLI is run as a script.
28-
29-
It calls the async main function to run the command.
76+
Main entry point for the CLI.
3077
3178
Parameters
3279
----------
3380
source : str
3481
The source directory or repository to analyze.
3582
output : str | None
36-
The path where the output file will be written. If not specified, the output will be written
37-
to a file named `<repo_name>.txt` in the current directory.
83+
The path where the output file will be written. If not specified, the output
84+
will be written to a file named `<repo_name>.txt` in the current directory.
3885
max_size : int
3986
The maximum file size to process, in bytes. Files larger than this size will be ignored.
4087
exclude_pattern : tuple[str, ...]
4188
A tuple of patterns to exclude during the analysis. Files matching these patterns will be ignored.
4289
include_pattern : tuple[str, ...]
4390
A tuple of patterns to include during the analysis. Only files matching these patterns will be processed.
91+
ignore_file : str
92+
Path to the ignore file containing additional patterns to exclude.
4493
"""
45-
# Main entry point for the CLI. This function is called when the CLI is run as a script.
46-
asyncio.run(_async_main(source, output, max_size, exclude_pattern, include_pattern))
94+
asyncio.run(async_main(source, output, max_size, exclude_pattern, include_pattern, ignore_file))
4795

4896

49-
async def _async_main(
97+
async def async_main(
5098
source: str,
5199
output: str | None,
52100
max_size: int,
53101
exclude_pattern: tuple[str, ...],
54102
include_pattern: tuple[str, ...],
103+
ignore_file: str,
55104
) -> None:
56105
"""
57106
Analyze a directory or repository and create a text dump of its contents.
58107
59-
This command analyzes the contents of a specified source directory or repository, applies custom include and
60-
exclude patterns, and generates a text summary of the analysis which is then written to an output file.
108+
This command analyzes the contents of a specified source directory or repository,
109+
applies custom include and exclude patterns, and generates a text summary of the
110+
analysis which is then written to an output file.
61111
62112
Parameters
63113
----------
64114
source : str
65115
The source directory or repository to analyze.
66116
output : str | None
67-
The path where the output file will be written. If not specified, the output will be written
68-
to a file named `<repo_name>.txt` in the current directory.
117+
The path where the output file will be written. If not specified, the output
118+
will be written to a file named `<repo_name>.txt` in the current directory.
69119
max_size : int
70120
The maximum file size to process, in bytes. Files larger than this size will be ignored.
71121
exclude_pattern : tuple[str, ...]
72122
A tuple of patterns to exclude during the analysis. Files matching these patterns will be ignored.
73123
include_pattern : tuple[str, ...]
74124
A tuple of patterns to include during the analysis. Only files matching these patterns will be processed.
125+
ignore_file : str
126+
Path to the ignore file containing additional patterns to exclude.
75127
76128
Raises
77129
------
78130
Abort
79131
If there is an error during the execution of the command, this exception is raised to abort the process.
80132
"""
81133
try:
82-
# Combine default and custom ignore patterns
83-
exclude_patterns = set(exclude_pattern)
84-
include_patterns = set(include_pattern)
134+
# Get repository name from source path
135+
repo_name = Path(source).name or "repository"
85136

137+
# Set default output filename if not provided
86138
if not output:
87-
output = "digest.txt"
88-
summary, _, _ = await ingest(source, max_size, include_patterns, exclude_patterns, output=output)
139+
output = f"{repo_name}.txt"
89140

141+
# Parse command line patterns
142+
exclude_patterns = parse_patterns(exclude_pattern)
143+
include_patterns = parse_patterns(include_pattern)
144+
145+
# Read and add patterns from ignore file
146+
ignore_file_path = Path(source) / ignore_file
147+
ignore_patterns = parse_ignore_file(ignore_file_path)
148+
exclude_patterns.update(ignore_patterns)
149+
150+
# Perform the ingest operation
151+
summary, *_ = await ingest(source, max_size, include_patterns, exclude_patterns, output=output)
152+
153+
# Display results
90154
click.echo(f"Analysis complete! Output written to: {output}")
91155
click.echo("\nSummary:")
92156
click.echo(summary)
93157

94-
except Exception as e:
95-
click.echo(f"Error: {e}", err=True)
158+
except FileNotFoundError as e:
159+
click.echo(f"Error: Source directory not found - {e}", err=True)
160+
raise click.Abort()
161+
except PermissionError as e:
162+
click.echo(f"Error: Permission denied - {e}", err=True)
96163
raise click.Abort()
164+
except Exception as e:
165+
click.echo(f"Warning: An error occurred - {e}", err=True)
166+
# For non-critical errors, we might want to continue rather than abort
167+
if isinstance(e, (OSError, IOError)):
168+
raise click.Abort()
169+
return
97170

98171

99172
if __name__ == "__main__":

0 commit comments

Comments
 (0)