Skip to content

Commit 6981246

Browse files
committed
rename and moved dataclasses in separate file
1 parent 48123bd commit 6981246

File tree

11 files changed

+228
-221
lines changed

11 files changed

+228
-221
lines changed

src/gitingest/cloning.py

Lines changed: 1 addition & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2,45 +2,15 @@
22

33
import asyncio
44
import os
5-
from dataclasses import dataclass
65
from pathlib import Path
76
from typing import List, Optional, Tuple
87

8+
from gitingest.ingestion_schema import CloneConfig
99
from gitingest.utils.timeout_wrapper import async_timeout
1010

1111
TIMEOUT: int = 60
1212

1313

14-
@dataclass
15-
class CloneConfig:
16-
"""
17-
Configuration for cloning a Git repository.
18-
19-
This class holds the necessary parameters for cloning a repository to a local path, including
20-
the repository's URL, the target local path, and optional parameters for a specific commit or branch.
21-
22-
Attributes
23-
----------
24-
url : str
25-
The URL of the Git repository to clone.
26-
local_path : str
27-
The local directory where the repository will be cloned.
28-
commit : str, optional
29-
The specific commit hash to check out after cloning (default is None).
30-
branch : str, optional
31-
The branch to clone (default is None).
32-
subpath : str
33-
The subpath to clone from the repository (default is "/").
34-
"""
35-
36-
url: str
37-
local_path: str
38-
commit: Optional[str] = None
39-
branch: Optional[str] = None
40-
subpath: str = "/"
41-
blob: bool = False
42-
43-
4414
@async_timeout(TIMEOUT)
4515
async def clone(config: CloneConfig) -> None:
4616
"""

src/gitingest/entrypoint.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from gitingest.cloning import clone
99
from gitingest.config import TMP_BASE_PATH
1010
from gitingest.ingestion import ingest_query
11-
from gitingest.query_parsing import ParsedQuery, parse_query
11+
from gitingest.query_parsing import IngestionQuery, parse_query
1212

1313

1414
async def ingest_async(
@@ -58,19 +58,19 @@ async def ingest_async(
5858
repo_cloned = False
5959

6060
try:
61-
parsed_query: ParsedQuery = await parse_query(
61+
query: IngestionQuery = await parse_query(
6262
source=source,
6363
max_file_size=max_file_size,
6464
from_web=False,
6565
include_patterns=include_patterns,
6666
ignore_patterns=exclude_patterns,
6767
)
6868

69-
if parsed_query.url:
70-
selected_branch = branch if branch else parsed_query.branch # prioritize branch argument
71-
parsed_query.branch = selected_branch
69+
if query.url:
70+
selected_branch = branch if branch else query.branch # prioritize branch argument
71+
query.branch = selected_branch
7272

73-
clone_config = parsed_query.extact_clone_config()
73+
clone_config = query.extact_clone_config()
7474
clone_coroutine = clone(clone_config)
7575

7676
if inspect.iscoroutine(clone_coroutine):
@@ -83,7 +83,7 @@ async def ingest_async(
8383

8484
repo_cloned = True
8585

86-
summary, tree, content = ingest_query(parsed_query)
86+
summary, tree, content = ingest_query(query)
8787

8888
if output is not None:
8989
with open(output, "w", encoding="utf-8") as f:

src/gitingest/ingestion.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from gitingest.config import MAX_DIRECTORY_DEPTH, MAX_FILES, MAX_TOTAL_SIZE_BYTES
88
from gitingest.filesystem_schema import FileSystemNode, FileSystemNodeType, FileSystemStats
99
from gitingest.output_formatters import format_node
10-
from gitingest.query_parsing import ParsedQuery
10+
from gitingest.query_parsing import IngestionQuery
1111
from gitingest.utils.ingestion_utils import _should_exclude, _should_include
1212
from gitingest.utils.path_utils import _is_safe_symlink
1313

@@ -17,7 +17,7 @@
1717
import tomli as tomllib
1818

1919

20-
def ingest_query(query: ParsedQuery) -> Tuple[str, str, str]:
20+
def ingest_query(query: IngestionQuery) -> Tuple[str, str, str]:
2121
"""
2222
Run the ingestion process for a parsed query.
2323
@@ -27,7 +27,7 @@ def ingest_query(query: ParsedQuery) -> Tuple[str, str, str]:
2727
2828
Parameters
2929
----------
30-
query : ParsedQuery
30+
query : IngestionQuery
3131
The parsed query object containing information about the repository and query parameters.
3232
3333
Returns
@@ -87,7 +87,7 @@ def ingest_query(query: ParsedQuery) -> Tuple[str, str, str]:
8787
return format_node(root_node, query)
8888

8989

90-
def apply_gitingest_file(path: Path, query: ParsedQuery) -> None:
90+
def apply_gitingest_file(path: Path, query: IngestionQuery) -> None:
9191
"""
9292
Apply the .gitingest file to the query object.
9393
@@ -98,7 +98,7 @@ def apply_gitingest_file(path: Path, query: ParsedQuery) -> None:
9898
----------
9999
path : Path
100100
The path of the directory to ingest.
101-
query : ParsedQuery
101+
query : IngestionQuery
102102
The parsed query object containing information about the repository and query parameters.
103103
It should have an attribute `ignore_patterns` which is either None or a set of strings.
104104
"""
@@ -154,7 +154,7 @@ def apply_gitingest_file(path: Path, query: ParsedQuery) -> None:
154154

155155
def _process_node(
156156
node: FileSystemNode,
157-
query: ParsedQuery,
157+
query: IngestionQuery,
158158
stats: FileSystemStats,
159159
) -> None:
160160
"""
@@ -167,7 +167,7 @@ def _process_node(
167167
----------
168168
node : FileSystemNode
169169
The current directory or file node being processed.
170-
query : ParsedQuery
170+
query : IngestionQuery
171171
The parsed query object containing information about the repository and query parameters.
172172
stats : FileSystemStats
173173
Statistics tracking object for the total file count and size.

src/gitingest/ingestion_schema.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
""" This module contains the dataclasses for the ingestion process. """
2+
3+
from dataclasses import dataclass
4+
from pathlib import Path
5+
from typing import Optional, Set
6+
7+
from gitingest.config import MAX_FILE_SIZE
8+
9+
10+
@dataclass
11+
class CloneConfig:
12+
"""
13+
Configuration for cloning a Git repository.
14+
15+
This class holds the necessary parameters for cloning a repository to a local path, including
16+
the repository's URL, the target local path, and optional parameters for a specific commit or branch.
17+
18+
Attributes
19+
----------
20+
url : str
21+
The URL of the Git repository to clone.
22+
local_path : str
23+
The local directory where the repository will be cloned.
24+
commit : str, optional
25+
The specific commit hash to check out after cloning (default is None).
26+
branch : str, optional
27+
The branch to clone (default is None).
28+
subpath : str
29+
The subpath to clone from the repository (default is "/").
30+
"""
31+
32+
url: str
33+
local_path: str
34+
commit: Optional[str] = None
35+
branch: Optional[str] = None
36+
subpath: str = "/"
37+
blob: bool = False
38+
39+
40+
@dataclass
41+
class IngestionQuery: # pylint: disable=too-many-instance-attributes
42+
"""
43+
Dataclass to store the parsed details of the repository or file path.
44+
"""
45+
46+
user_name: Optional[str]
47+
repo_name: Optional[str]
48+
local_path: Path
49+
url: Optional[str]
50+
slug: str
51+
id: str
52+
subpath: str = "/"
53+
type: Optional[str] = None
54+
branch: Optional[str] = None
55+
commit: Optional[str] = None
56+
max_file_size: int = MAX_FILE_SIZE
57+
ignore_patterns: Optional[Set[str]] = None
58+
include_patterns: Optional[Set[str]] = None
59+
pattern_type: Optional[str] = None
60+
61+
def extact_clone_config(self) -> CloneConfig:
62+
"""
63+
Extract the relevant fields for the CloneConfig object.
64+
65+
Returns
66+
-------
67+
CloneConfig
68+
A CloneConfig object containing the relevant fields.
69+
70+
Raises
71+
------
72+
ValueError
73+
If the 'url' parameter is not provided.
74+
"""
75+
if not self.url:
76+
raise ValueError("The 'url' parameter is required.")
77+
78+
return CloneConfig(
79+
url=self.url,
80+
local_path=str(self.local_path),
81+
commit=self.commit,
82+
branch=self.branch,
83+
subpath=self.subpath,
84+
blob=self.type == "blob",
85+
)

src/gitingest/output_formatters.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@
55
import tiktoken
66

77
from gitingest.filesystem_schema import FileSystemNode, FileSystemNodeType
8-
from gitingest.query_parsing import ParsedQuery
8+
from gitingest.query_parsing import IngestionQuery
99

1010

11-
def format_node(node: FileSystemNode, query: ParsedQuery) -> Tuple[str, str, str]:
11+
def format_node(node: FileSystemNode, query: IngestionQuery) -> Tuple[str, str, str]:
1212
"""
1313
Generate a summary, directory structure, and file contents for a given file system node.
1414
@@ -18,7 +18,7 @@ def format_node(node: FileSystemNode, query: ParsedQuery) -> Tuple[str, str, str
1818
----------
1919
node : FileSystemNode
2020
The file system node to be summarized.
21-
query : ParsedQuery
21+
query : IngestionQuery
2222
The parsed query object containing information about the repository and query parameters.
2323
2424
Returns
@@ -47,15 +47,15 @@ def format_node(node: FileSystemNode, query: ParsedQuery) -> Tuple[str, str, str
4747
return summary, tree, content
4848

4949

50-
def _create_summary_prefix(query: ParsedQuery, single_file: bool = False) -> str:
50+
def _create_summary_prefix(query: IngestionQuery, single_file: bool = False) -> str:
5151
"""
5252
Create a prefix string for summarizing a repository or local directory.
5353
5454
Includes repository name (if provided), commit/branch details, and subpath if relevant.
5555
5656
Parameters
5757
----------
58-
query : ParsedQuery
58+
query : IngestionQuery
5959
The parsed query object containing information about the repository and query parameters.
6060
single_file : bool
6161
A flag indicating whether the summary is for a single file, by default False.
@@ -108,7 +108,7 @@ def _gather_file_contents(node: FileSystemNode) -> str:
108108
return "\n".join(_gather_file_contents(child) for child in node.children)
109109

110110

111-
def _create_tree_structure(query: ParsedQuery, node: FileSystemNode, prefix: str = "", is_last: bool = True) -> str:
111+
def _create_tree_structure(query: IngestionQuery, node: FileSystemNode, prefix: str = "", is_last: bool = True) -> str:
112112
"""
113113
Generate a tree-like string representation of the file structure.
114114
@@ -117,7 +117,7 @@ def _create_tree_structure(query: ParsedQuery, node: FileSystemNode, prefix: str
117117
118118
Parameters
119119
----------
120-
query : ParsedQuery
120+
query : IngestionQuery
121121
The parsed query object containing information about the repository and query parameters.
122122
node : FileSystemNode
123123
The current directory or file node being processed.

0 commit comments

Comments
 (0)