|
1 | | -""" Command-line interface for the Gitingest package. """ |
| 1 | +"""Command-line interface for the Gitingest package.""" |
2 | 2 |
|
3 | 3 | # pylint: disable=no-value-for-parameter |
4 | | - |
5 | 4 | import asyncio |
| 5 | +from pathlib import Path |
6 | 6 |
|
7 | 7 | import click |
8 | 8 |
|
9 | 9 | from config import MAX_FILE_SIZE |
10 | 10 | from gitingest.repository_ingest import ingest |
11 | 11 |
|
12 | 12 |
|
| 13 | +def parse_ignore_file(ignore_file_path: Path) -> set[str]: |
| 14 | + """ |
| 15 | + Parse the .gitingestignore file and return a set of patterns to ignore. |
| 16 | +
|
| 17 | + Parameters |
| 18 | + ---------- |
| 19 | + ignore_file_path : Path |
| 20 | + Path to the .gitingestignore file |
| 21 | +
|
| 22 | + Returns |
| 23 | + ------- |
| 24 | + set[str] |
| 25 | + Set of patterns to ignore |
| 26 | + """ |
| 27 | + if not ignore_file_path.exists(): |
| 28 | + return set() |
| 29 | + |
| 30 | + with open(ignore_file_path, encoding="utf-8") as f: |
| 31 | + # Read lines, strip whitespace, and filter out empty lines and comments |
| 32 | + patterns = {line.strip() for line in f if line.strip() and not line.startswith("#")} |
| 33 | + |
| 34 | + return patterns |
| 35 | + |
| 36 | + |
| 37 | +def parse_patterns(patterns: tuple[str, ...]) -> set[str]: |
| 38 | + """ |
| 39 | + Parse patterns from command line arguments. |
| 40 | + Handles both space-separated patterns in a single string |
| 41 | + and multiple -e/-i arguments. |
| 42 | +
|
| 43 | + Parameters |
| 44 | + ---------- |
| 45 | + patterns : tuple[str, ...] |
| 46 | + Tuple of patterns from command line |
| 47 | +
|
| 48 | + Returns |
| 49 | + ------- |
| 50 | + set[str] |
| 51 | + Set of parsed patterns |
| 52 | + """ |
| 53 | + result = set() |
| 54 | + for pattern_str in patterns: |
| 55 | + # Split on spaces and add each pattern |
| 56 | + result.update(p.strip() for p in pattern_str.split() if p.strip()) |
| 57 | + return result |
| 58 | + |
| 59 | + |
13 | 60 | @click.command() |
14 | 61 | @click.argument("source", type=str, default=".") |
15 | 62 | @click.option("--output", "-o", default=None, help="Output file path (default: <repo_name>.txt in current directory)") |
16 | 63 | @click.option("--max-size", "-s", default=MAX_FILE_SIZE, help="Maximum file size to process in bytes") |
17 | | -@click.option("--exclude-pattern", "-e", multiple=True, help="Patterns to exclude") |
18 | | -@click.option("--include-pattern", "-i", multiple=True, help="Patterns to include") |
| 64 | +@click.option("--exclude-pattern", "-e", multiple=True, help="Patterns to exclude (space-separated patterns allowed)") |
| 65 | +@click.option("--include-pattern", "-i", multiple=True, help="Patterns to include (space-separated patterns allowed)") |
| 66 | +@click.option("--ignore-file", default=".gitingestignore", help="Path to ignore file (default: .gitingestignore)") |
19 | 67 | def main( |
20 | 68 | source: str, |
21 | 69 | output: str | None, |
22 | 70 | max_size: int, |
23 | 71 | exclude_pattern: tuple[str, ...], |
24 | 72 | include_pattern: tuple[str, ...], |
| 73 | + ignore_file: str, |
25 | 74 | ): |
26 | 75 | """ |
27 | | - Main entry point for the CLI. This function is called when the CLI is run as a script. |
28 | | -
|
29 | | - It calls the async main function to run the command. |
| 76 | + Main entry point for the CLI. |
30 | 77 |
|
31 | 78 | Parameters |
32 | 79 | ---------- |
33 | 80 | source : str |
34 | 81 | The source directory or repository to analyze. |
35 | 82 | output : str | None |
36 | | - The path where the output file will be written. If not specified, the output will be written |
37 | | - to a file named `<repo_name>.txt` in the current directory. |
| 83 | + The path where the output file will be written. If not specified, the output |
| 84 | + will be written to a file named `<repo_name>.txt` in the current directory. |
38 | 85 | max_size : int |
39 | 86 | The maximum file size to process, in bytes. Files larger than this size will be ignored. |
40 | 87 | exclude_pattern : tuple[str, ...] |
41 | 88 | A tuple of patterns to exclude during the analysis. Files matching these patterns will be ignored. |
42 | 89 | include_pattern : tuple[str, ...] |
43 | 90 | A tuple of patterns to include during the analysis. Only files matching these patterns will be processed. |
| 91 | + ignore_file : str |
| 92 | + Path to the ignore file containing additional patterns to exclude. |
44 | 93 | """ |
45 | | - # Main entry point for the CLI. This function is called when the CLI is run as a script. |
46 | | - asyncio.run(_async_main(source, output, max_size, exclude_pattern, include_pattern)) |
| 94 | + asyncio.run(async_main(source, output, max_size, exclude_pattern, include_pattern, ignore_file)) |
47 | 95 |
|
48 | 96 |
|
49 | | -async def _async_main( |
| 97 | +async def async_main( |
50 | 98 | source: str, |
51 | 99 | output: str | None, |
52 | 100 | max_size: int, |
53 | 101 | exclude_pattern: tuple[str, ...], |
54 | 102 | include_pattern: tuple[str, ...], |
| 103 | + ignore_file: str, |
55 | 104 | ) -> None: |
56 | 105 | """ |
57 | 106 | Analyze a directory or repository and create a text dump of its contents. |
58 | 107 |
|
59 | | - This command analyzes the contents of a specified source directory or repository, applies custom include and |
60 | | - exclude patterns, and generates a text summary of the analysis which is then written to an output file. |
| 108 | + This command analyzes the contents of a specified source directory or repository, |
| 109 | + applies custom include and exclude patterns, and generates a text summary of the |
| 110 | + analysis which is then written to an output file. |
61 | 111 |
|
62 | 112 | Parameters |
63 | 113 | ---------- |
64 | 114 | source : str |
65 | 115 | The source directory or repository to analyze. |
66 | 116 | output : str | None |
67 | | - The path where the output file will be written. If not specified, the output will be written |
68 | | - to a file named `<repo_name>.txt` in the current directory. |
| 117 | + The path where the output file will be written. If not specified, the output |
| 118 | + will be written to a file named `<repo_name>.txt` in the current directory. |
69 | 119 | max_size : int |
70 | 120 | The maximum file size to process, in bytes. Files larger than this size will be ignored. |
71 | 121 | exclude_pattern : tuple[str, ...] |
72 | 122 | A tuple of patterns to exclude during the analysis. Files matching these patterns will be ignored. |
73 | 123 | include_pattern : tuple[str, ...] |
74 | 124 | A tuple of patterns to include during the analysis. Only files matching these patterns will be processed. |
| 125 | + ignore_file : str |
| 126 | + Path to the ignore file containing additional patterns to exclude. |
75 | 127 |
|
76 | 128 | Raises |
77 | 129 | ------ |
78 | 130 | Abort |
79 | 131 | If there is an error during the execution of the command, this exception is raised to abort the process. |
80 | 132 | """ |
81 | 133 | try: |
82 | | - # Combine default and custom ignore patterns |
83 | | - exclude_patterns = set(exclude_pattern) |
84 | | - include_patterns = set(include_pattern) |
| 134 | + # Get repository name from source path |
| 135 | + repo_name = Path(source).name or "repository" |
85 | 136 |
|
| 137 | + # Set default output filename if not provided |
86 | 138 | if not output: |
87 | | - output = "digest.txt" |
88 | | - summary, _, _ = await ingest(source, max_size, include_patterns, exclude_patterns, output=output) |
| 139 | + output = f"{repo_name}.txt" |
89 | 140 |
|
| 141 | + # Parse command line patterns |
| 142 | + exclude_patterns = parse_patterns(exclude_pattern) |
| 143 | + include_patterns = parse_patterns(include_pattern) |
| 144 | + |
| 145 | + # Read and add patterns from ignore file |
| 146 | + ignore_file_path = Path(source) / ignore_file |
| 147 | + ignore_patterns = parse_ignore_file(ignore_file_path) |
| 148 | + exclude_patterns.update(ignore_patterns) |
| 149 | + |
| 150 | + # Perform the ingest operation |
| 151 | + summary, *_ = await ingest(source, max_size, include_patterns, exclude_patterns, output=output) |
| 152 | + |
| 153 | + # Display results |
90 | 154 | click.echo(f"Analysis complete! Output written to: {output}") |
91 | 155 | click.echo("\nSummary:") |
92 | 156 | click.echo(summary) |
93 | 157 |
|
94 | | - except Exception as e: |
95 | | - click.echo(f"Error: {e}", err=True) |
| 158 | + except FileNotFoundError as e: |
| 159 | + click.echo(f"Error: Source directory not found - {e}", err=True) |
| 160 | + raise click.Abort() |
| 161 | + except PermissionError as e: |
| 162 | + click.echo(f"Error: Permission denied - {e}", err=True) |
96 | 163 | raise click.Abort() |
| 164 | + except Exception as e: |
| 165 | + click.echo(f"Warning: An error occurred - {e}", err=True) |
| 166 | + # For non-critical errors, we might want to continue rather than abort |
| 167 | + if isinstance(e, (OSError, IOError)): |
| 168 | + raise click.Abort() |
| 169 | + return |
97 | 170 |
|
98 | 171 |
|
99 | 172 | if __name__ == "__main__": |
|
0 commit comments