Skip to content

Commit fab90a6

Browse files
Add docstrings, refactor process_query, and move AsyncTimeoutError to gitingest.exceptions (#77)
* refactor: prefix helper functions with an underscore * Add docstrings to functions and move AsyncTimeoutError to gitingest.exceptions * Refactor: Move process_query to top and prefix helper functions with an underscore
1 parent d77741b commit fab90a6

File tree

11 files changed

+763
-151
lines changed

11 files changed

+763
-151
lines changed

src/gitingest/cli.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,32 @@ def main(
1717
exclude_pattern: tuple[str, ...],
1818
include_pattern: tuple[str, ...],
1919
) -> None:
20-
"""Analyze a directory and create a text dump of its contents."""
20+
"""
21+
Analyze a directory or repository and create a text dump of its contents.
22+
23+
This command analyzes the contents of a specified source directory or repository,
24+
applies custom include and exclude patterns, and generates a text summary of the analysis
25+
which is then written to an output file.
26+
27+
Parameters
28+
----------
29+
source : str
30+
The source directory or repository to analyze.
31+
output : str | None
32+
The path where the output file will be written. If not specified, the output will be written
33+
to a file named `<repo_name>.txt` in the current directory.
34+
max_size : int
35+
The maximum file size to process, in bytes. Files larger than this size will be ignored.
36+
exclude_pattern : tuple[str, ...]
37+
A tuple of patterns to exclude during the analysis. Files matching these patterns will be ignored.
38+
include_pattern : tuple[str, ...]
39+
A tuple of patterns to include during the analysis. Only files matching these patterns will be processed.
40+
41+
Raises
42+
------
43+
click.Abort
44+
If there is an error during the execution of the command, this exception is raised to abort the process.
45+
"""
2146
try:
2247
# Combine default and custom ignore patterns
2348
exclude_patterns = list(exclude_pattern)

src/gitingest/clone.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,32 @@
11
import asyncio
22
from dataclasses import dataclass
33

4-
from gitingest.utils import AsyncTimeoutError, async_timeout
4+
from gitingest.exceptions import AsyncTimeoutError
5+
from gitingest.utils import async_timeout
56

67
CLONE_TIMEOUT: int = 20
78

89

910
@dataclass
1011
class CloneConfig:
12+
"""
13+
Configuration for cloning a Git repository.
14+
15+
This class holds the necessary parameters for cloning a repository to a local path, including
16+
the repository's URL, the target local path, and optional parameters for a specific commit or branch.
17+
18+
Attributes
19+
----------
20+
url : str
21+
The URL of the Git repository to clone.
22+
local_path : str
23+
The local directory where the repository will be cloned.
24+
commit : str | None, optional
25+
The specific commit hash to check out after cloning (default is None).
26+
branch : str | None, optional
27+
The branch to clone (default is None).
28+
"""
29+
1130
url: str
1231
local_path: str
1332
commit: str | None = None
@@ -17,7 +36,11 @@ class CloneConfig:
1736
@async_timeout(CLONE_TIMEOUT)
1837
async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]:
1938
"""
20-
Clones a repository to a local path based on the provided query parameters.
39+
Clones a repository to a local path based on the provided configuration.
40+
41+
This function handles the process of cloning a Git repository to the local file system.
42+
It can clone a specific branch or commit if provided, and it raises exceptions if
43+
any errors occur during the cloning process.
2144
2245
Parameters
2346
----------
@@ -30,7 +53,7 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]:
3053
3154
Returns
3255
-------
33-
Tuple[bytes, bytes]
56+
tuple[bytes, bytes]
3457
A tuple containing the stdout and stderr of the git commands executed.
3558
3659
Raises
@@ -123,7 +146,7 @@ async def _run_git_command(*args: str) -> tuple[bytes, bytes]:
123146
124147
Returns
125148
-------
126-
Tuple[bytes, bytes]
149+
tuple[bytes, bytes]
127150
A tuple containing the stdout and stderr of the git command.
128151
129152
Raises

src/gitingest/exceptions.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
class InvalidPatternError(ValueError):
2+
"""
3+
Exception raised when a pattern contains invalid characters.
4+
5+
This exception is used to signal that a pattern provided for some operation
6+
contains characters that are not allowed. The valid characters for the pattern
7+
include alphanumeric characters, dash (-), underscore (_), dot (.), forward slash (/),
8+
plus (+), and asterisk (*).
9+
10+
Parameters
11+
----------
12+
pattern : str
13+
The invalid pattern that caused the error.
14+
"""
15+
16+
def __init__(self, pattern: str) -> None:
17+
super().__init__(
18+
f"Pattern '{pattern}' contains invalid characters. Only alphanumeric characters, dash (-), "
19+
"underscore (_), dot (.), forward slash (/), plus (+), and asterisk (*) are allowed."
20+
)
21+
22+
23+
class AsyncTimeoutError(Exception):
24+
"""
25+
Raised when an async operation exceeds its timeout limit.
26+
27+
This exception is used by the `async_timeout` decorator to signal that the wrapped
28+
asynchronous function has exceeded the specified time limit for execution.
29+
"""

src/gitingest/ingest.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,39 @@ def ingest(
1515
exclude_patterns: list[str] | str | None = None,
1616
output: str | None = None,
1717
) -> tuple[str, str, str]:
18+
"""
19+
Main entry point for ingesting a source and processing its contents.
1820
21+
This function analyzes a source (URL or local path), clones the corresponding repository (if applicable),
22+
and processes its files according to the specified query parameters. It returns a summary, a tree-like
23+
structure of the files, and the content of the files. The results can optionally be written to an output file.
24+
25+
Parameters
26+
----------
27+
source : str
28+
The source to analyze, which can be a URL (for a GitHub repository) or a local directory path.
29+
max_file_size : int, optional
30+
The maximum allowed file size for file ingestion. Files larger than this size are ignored, by default 10*1024*1024 (10 MB).
31+
include_patterns : list[str] | str | None, optional
32+
A pattern or list of patterns specifying which files to include in the analysis. If `None`, all files are included.
33+
exclude_patterns : list[str] | str | None, optional
34+
A pattern or list of patterns specifying which files to exclude from the analysis. If `None`, no files are excluded.
35+
output : str | None, optional
36+
The file path where the summary and content should be written. If `None`, the results are not written to a file.
37+
38+
Returns
39+
-------
40+
tuple[str, str, str]
41+
A tuple containing:
42+
- A summary string of the analyzed repository or directory.
43+
- A tree-like string representation of the file structure.
44+
- The content of the files in the repository or directory.
45+
46+
Raises
47+
------
48+
TypeError
49+
If `clone_repo` does not return a coroutine, or if the `source` is of an unsupported type.
50+
"""
1951
try:
2052
query = parse_query(
2153
source=source,
@@ -42,8 +74,8 @@ def ingest(
4274

4375
summary, tree, content = ingest_from_query(query)
4476

45-
if output:
46-
with open(f"{output}", "w") as f:
77+
if output is not None:
78+
with open(output, "w") as f:
4779
f.write(tree + "\n" + content)
4880

4981
return summary, tree, content

0 commit comments

Comments
 (0)