Skip to content

Commit 82acb0a

Browse files
standardize docstrings
1 parent d36a975 commit 82acb0a

26 files changed

+107
-106
lines changed

src/gitingest/cli.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ class _CLIArgs(TypedDict):
5353
envvar="GITHUB_TOKEN",
5454
default=None,
5555
help=(
56-
"GitHub personal access token for accessing private repositories. "
56+
"GitHub personal access token (PAT) for accessing private repositories. "
5757
"If omitted, the CLI will look for the GITHUB_TOKEN environment variable."
5858
),
5959
)
@@ -81,9 +81,9 @@ async def _async_main(
8181
) -> None:
8282
"""Analyze a directory or repository and create a text dump of its contents.
8383
84-
This command analyzes the contents of a specified source directory or repository, applies custom include and
85-
exclude patterns, and generates a text summary of the analysis which is then written to an output file
86-
or printed to stdout.
84+
This command scans the specified ``source`` (a local directory or Git repo),
85+
applies custom include and exclude patterns, and generates a text summary of
86+
the analysis. The summary is written to an output file or printed to ``stdout``.
8787
8888
Parameters
8989
----------
@@ -96,20 +96,20 @@ async def _async_main(
9696
include_pattern : tuple[str, ...] | None
9797
Glob patterns for including files in the output.
9898
branch : str | None
99-
Git branch to ingest. If *None*, the repository's default branch is used.
99+
Git branch to ingest. If ``None``, the repository's default branch is used.
100100
include_gitignored : bool
101-
If *True*, also ingest files matched by ``.gitignore`` or ``.gitingestignore`` (default: ``False``).
101+
If ``True``, also ingest files matched by ``.gitignore`` or ``.gitingestignore`` (default: ``False``).
102102
token : str | None
103-
GitHub personal-access token (PAT). Needed when the repository is private.
104-
Can also be set via the ``GITHUB_TOKEN`` env var.
103+
GitHub personal access token (PAT) for accessing private repositories.
104+
Can also be set via the ``GITHUB_TOKEN`` environment variable.
105105
output : str | None
106-
Destination file path. If *None*, the output is written to ``<repo_name>.txt`` in the current directory.
107-
Use ``"-"`` to write to *stdout*.
106+
Destination file path. If ``None``, the output is written to ``<repo_name>.txt`` in the current directory.
107+
Use ``"-"`` to write to ``stdout``.
108108
109109
Raises
110110
------
111111
click.Abort
112-
If there is an error during the execution of the command, this exception is raised to abort the process.
112+
Raised if an error occurs during execution and the command must be aborted.
113113
114114
"""
115115
try:

src/gitingest/clone.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ async def clone_repo(config: CloneConfig, token: str | None = None) -> None:
3535
config : CloneConfig
3636
The configuration for cloning the repository.
3737
token : str | None
38-
GitHub personal-access token (PAT). Needed when the repository is private.
39-
Can also be set via the `GITHUB_TOKEN` env var.
38+
GitHub personal access token (PAT) for accessing private repositories.
39+
Can also be set via the ``GITHUB_TOKEN`` environment variable.
4040
4141
Raises
4242
------

src/gitingest/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
import tempfile
44
from pathlib import Path
55

6-
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB
6+
MAX_FILE_SIZE = 10 * 1024 * 1024 # Maximum size of a single file to process (10 MB)
77
MAX_DIRECTORY_DEPTH = 20 # Maximum depth of directory traversal
88
MAX_FILES = 10_000 # Maximum number of files to process
9-
MAX_TOTAL_SIZE_BYTES = 500 * 1024 * 1024 # 500 MB
9+
MAX_TOTAL_SIZE_BYTES = 500 * 1024 * 1024 # Maximum size of output file (500 MB)
1010
DEFAULT_TIMEOUT = 60 # seconds
1111

1212
OUTPUT_FILE_NAME = "digest.txt"

src/gitingest/entrypoint.py

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from pathlib import Path
1010

1111
from gitingest.clone import clone_repo
12-
from gitingest.config import TMP_BASE_PATH
12+
from gitingest.config import MAX_FILE_SIZE, TMP_BASE_PATH
1313
from gitingest.ingestion import ingest_query
1414
from gitingest.query_parser import IngestionQuery, parse_query
1515
from gitingest.utils.async_compat import to_thread
@@ -20,7 +20,7 @@
2020
async def ingest_async(
2121
source: str,
2222
*,
23-
max_file_size: int = 10 * 1024 * 1024, # 10 MB
23+
max_file_size: int = MAX_FILE_SIZE, # 10 MB
2424
include_patterns: str | set[str] | None = None,
2525
exclude_patterns: str | set[str] | None = None,
2626
branch: str | None = None,
@@ -41,19 +41,20 @@ async def ingest_async(
4141
max_file_size : int
4242
Maximum allowed file size for file ingestion. Files larger than this size are ignored (default: 10 MB).
4343
include_patterns : str | set[str] | None
44-
Pattern or set of patterns specifying which files to include. If *None*, all files are included.
44+
Pattern or set of patterns specifying which files to include. If ``None``, all files are included.
4545
exclude_patterns : str | set[str] | None
46-
Pattern or set of patterns specifying which files to exclude. If *None*, no files are excluded.
46+
Pattern or set of patterns specifying which files to exclude. If ``None``, no files are excluded.
4747
branch : str | None
48-
The branch to clone and ingest. If *None*, the default branch is used.
48+
The branch to clone and ingest (default: the default branch).
4949
include_gitignored : bool
50-
If *True*, include files ignored by ``.gitignore`` and ``.gitingestignore`` (default: ``False``).
50+
If ``True``, include files ignored by ``.gitignore`` and ``.gitingestignore`` (default: ``False``).
5151
token : str | None
52-
GitHub personal-access token (PAT). Needed when the repository is private.
53-
Can also be set via the ``GITHUB_TOKEN`` env var.
52+
GitHub personal access token (PAT) for accessing private repositories.
53+
Can also be set via the ``GITHUB_TOKEN`` environment variable.
5454
output : str | None
55-
File path where the summary and content should be written. If *"-"* (dash), the results are written to stdout.
56-
If *None*, the results are not written to a file.
55+
File path where the summary and content should be written.
56+
If ``"-"`` (dash), the results are written to ``stdout``.
57+
If ``None``, the results are not written to a file.
5758
5859
Returns
5960
-------
@@ -99,7 +100,7 @@ async def ingest_async(
99100
def ingest(
100101
source: str,
101102
*,
102-
max_file_size: int = 10 * 1024 * 1024, # 10 MB
103+
max_file_size: int = MAX_FILE_SIZE,
103104
include_patterns: str | set[str] | None = None,
104105
exclude_patterns: str | set[str] | None = None,
105106
branch: str | None = None,
@@ -120,19 +121,20 @@ def ingest(
120121
max_file_size : int
121122
Maximum allowed file size for file ingestion. Files larger than this size are ignored (default: 10 MB).
122123
include_patterns : str | set[str] | None
123-
Pattern or set of patterns specifying which files to include. If *None*, all files are included.
124+
Pattern or set of patterns specifying which files to include. If ``None``, all files are included.
124125
exclude_patterns : str | set[str] | None
125-
Pattern or set of patterns specifying which files to exclude. If *None*, no files are excluded.
126+
Pattern or set of patterns specifying which files to exclude. If ``None``, no files are excluded.
126127
branch : str | None
127128
The branch to clone and ingest (default: the default branch).
128129
include_gitignored : bool
129-
If *True*, include files ignored by ``.gitignore`` and ``.gitingestignore`` (default: ``False``).
130+
If ``True``, include files ignored by ``.gitignore`` and ``.gitingestignore`` (default: ``False``).
130131
token : str | None
131-
GitHub personal-access token (PAT). Needed when the repository is private.
132-
Can also be set via the ``GITHUB_TOKEN`` env var.
132+
GitHub personal access token (PAT) for accessing private repositories.
133+
Can also be set via the ``GITHUB_TOKEN`` environment variable.
133134
output : str | None
134-
File path where the summary and content should be written. If *"-"* (dash), the results are written to stdout.
135-
If *None*, the results are not written to a file.
135+
File path where the summary and content should be written.
136+
If ``"-"`` (dash), the results are written to ``stdout``.
137+
If ``None``, the results are not written to a file.
136138
137139
Returns
138140
-------
@@ -162,7 +164,7 @@ def ingest(
162164

163165

164166
def _apply_gitignores(query: IngestionQuery) -> None:
165-
"""Update `query.ignore_patterns` in-place.
167+
"""Update ``query.ignore_patterns`` in-place.
166168
167169
Parameters
168170
----------
@@ -187,7 +189,7 @@ async def _clone_if_remote(query: IngestionQuery, token: str | None) -> None:
187189
Raises
188190
------
189191
TypeError
190-
If `clone_repo` does not return a coroutine.
192+
If ``clone_repo`` does not return a coroutine.
191193
192194
"""
193195
if not query.url: # local path ingestion
@@ -207,7 +209,7 @@ async def _clone_if_remote(query: IngestionQuery, token: str | None) -> None:
207209

208210

209211
async def _write_output(tree: str, content: str, target: str | None) -> None:
210-
"""Write combined output to *target* (`'-'` ⇒ stdout).
212+
"""Write combined output to ``target`` (``"-"`` ⇒ stdout).
211213
212214
Parameters
213215
----------
@@ -216,7 +218,7 @@ async def _write_output(tree: str, content: str, target: str | None) -> None:
216218
content : str
217219
The content of the files in the repository or directory.
218220
target : str | None
219-
The path to the output file. If *None*, the results are not written to a file.
221+
The path to the output file. If ``None``, the results are not written to a file.
220222
221223
"""
222224
if target == "-":

src/gitingest/output_formatter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ def _format_token_count(text: str) -> str | None:
179179
Returns
180180
-------
181181
str | None
182-
The formatted number of tokens as a string (e.g., ``1.2k``, ``1.2M``), or ``None`` if an error occurs.
182+
The formatted number of tokens as a string (e.g., ``"1.2k"``, ``"1.2M"``), or ``None`` if an error occurs.
183183
184184
"""
185185
try:

src/gitingest/query_parser.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@ async def parse_query(
4848
ignore_patterns : set[str] | str | None
4949
Patterns to ignore. Can be a set of strings or a single string.
5050
token : str | None
51-
GitHub personal-access token (PAT). Needed when the repository is private.
52-
Can also be set via the `GITHUB_TOKEN` env var.
51+
GitHub personal access token (PAT) for accessing private repositories.
52+
Can also be set via the ``GITHUB_TOKEN`` environment variable.
5353
5454
Returns
5555
-------
@@ -99,17 +99,17 @@ async def _parse_remote_repo(source: str, token: str | None = None) -> Ingestion
9999
"""Parse a repository URL into a structured query dictionary.
100100
101101
If source is:
102-
- A fully qualified URL (`https://gitlab.com/...`), parse & verify that domain
103-
- A URL missing `https://` (`gitlab.com/...`), add `https://` and parse
104-
- A `slug` (`pandas-dev/pandas`), attempt known domains until we find one that exists.
102+
- A fully qualified URL ('https://gitlab.com/...'), parse & verify that domain
103+
- A URL missing 'https://' ('gitlab.com/...'), add 'https://' and parse
104+
- A *slug* ('pandas-dev/pandas'), attempt known domains until we find one that exists.
105105
106106
Parameters
107107
----------
108108
source : str
109109
The URL or domain-less slug to parse.
110110
token : str | None
111-
GitHub personal-access token (PAT). Needed when the repository is private.
112-
Can also be set via the ``GITHUB_TOKEN`` env var.
111+
GitHub personal access token (PAT) for accessing private repositories.
112+
Can also be set via the ``GITHUB_TOKEN`` environment variable.
113113
114114
Returns
115115
-------
@@ -300,8 +300,8 @@ async def try_domains_for_user_and_repo(user_name: str, repo_name: str, token: s
300300
repo_name : str
301301
The name of the repository.
302302
token : str | None
303-
GitHub personal-access token (PAT). Needed when the repository is private.
304-
Can also be set via the ``GITHUB_TOKEN`` env var.
303+
GitHub personal access token (PAT) for accessing private repositories.
304+
Can also be set via the ``GITHUB_TOKEN`` environment variable.
305305
306306
Returns
307307
-------

src/gitingest/schemas/ingestion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def extract_clone_config(self) -> CloneConfig:
101101
Raises
102102
------
103103
ValueError
104-
If the `url` parameter is not provided.
104+
If the ``url`` parameter is not provided.
105105
106106
"""
107107
if not self.url:

src/gitingest/utils/async_compat.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,14 @@
2121
async def to_thread(func: Callable[P, R], /, *args: P.args, **kwargs: P.kwargs) -> R:
2222
"""Back-port :func:`asyncio.to_thread` for Python < 3.9.
2323
24-
Run `func` in the default thread-pool executor and return the result.
24+
Run ``func`` in the default thread-pool executor and return the result.
2525
"""
2626
loop = asyncio.get_running_loop()
2727
ctx = contextvars.copy_context()
2828
func_call = functools.partial(ctx.run, func, *args, **kwargs)
2929
return await loop.run_in_executor(None, func_call)
3030

31-
# Patch stdlib so that *existing* imports of ``asyncio`` see the shim.
31+
# Patch stdlib so that *existing* imports of asyncio see the shim.
3232
if not hasattr(asyncio, "to_thread"):
3333
asyncio.to_thread = to_thread
3434

src/gitingest/utils/exceptions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def __init__(self, pattern: str) -> None:
2626
class AsyncTimeoutError(Exception):
2727
"""Exception raised when an async operation exceeds its timeout limit.
2828
29-
This exception is used by the `async_timeout` decorator to signal that the wrapped
29+
This exception is used by the ``async_timeout`` decorator to signal that the wrapped
3030
asynchronous function has exceeded the specified time limit for execution.
3131
"""
3232

src/gitingest/utils/file_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def _read_chunk(path: Path) -> bytes | None:
4444
Returns
4545
-------
4646
bytes | None
47-
The first *_CHUNK_SIZE* bytes of *path*, or None on any `OSError`.
47+
The first ``_CHUNK_SIZE`` bytes of ``path``, or ``None`` on any ``OSError``.
4848
4949
"""
5050
try:
@@ -55,7 +55,7 @@ def _read_chunk(path: Path) -> bytes | None:
5555

5656

5757
def _decodes(chunk: bytes, encoding: str) -> bool:
58-
"""Return *True* if *chunk* decodes cleanly with *encoding*.
58+
"""Return ``True`` if ``chunk`` decodes cleanly with ``encoding``.
5959
6060
Parameters
6161
----------
@@ -67,7 +67,7 @@ def _decodes(chunk: bytes, encoding: str) -> bool:
6767
Returns
6868
-------
6969
bool
70-
True if the chunk decodes cleanly with the encoding, False otherwise.
70+
``True`` if the chunk decodes cleanly with the encoding, ``False`` otherwise.
7171
7272
"""
7373
try:

0 commit comments

Comments
 (0)