Skip to content

Commit 8f93ee6

Browse files
refactor: convert key functions and tests to asynchronous
- Made `parse_query` in query_processor.py asynchronous - Made `main` in cli.py asynchronous - Made `ingest` in repository_ingest.py asynchronous - Updated test functions in test_query_parser.py to support async
1 parent 1ab39dd commit 8f93ee6

File tree

5 files changed

+46
-46
lines changed

5 files changed

+46
-46
lines changed

src/gitingest/cli.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
@click.option("--max-size", "-s", default=MAX_FILE_SIZE, help="Maximum file size to process in bytes")
1515
@click.option("--exclude-pattern", "-e", multiple=True, help="Patterns to exclude")
1616
@click.option("--include-pattern", "-i", multiple=True, help="Patterns to include")
17-
def main(
17+
async def main(
1818
source: str,
1919
output: str | None,
2020
max_size: int,
@@ -54,7 +54,7 @@ def main(
5454

5555
if not output:
5656
output = "digest.txt"
57-
summary, _, _ = ingest(source, max_size, include_patterns, exclude_patterns, output=output)
57+
summary, _, _ = await ingest(source, max_size, include_patterns, exclude_patterns, output=output)
5858

5959
click.echo(f"Analysis complete! Output written to: {output}")
6060
click.echo("\nSummary:")

src/gitingest/query_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
HEX_DIGITS = set(string.hexdigits)
1616

1717

18-
def parse_query(
18+
async def parse_query(
1919
source: str,
2020
max_file_size: int,
2121
from_web: bool,

src/gitingest/repository_ingest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from gitingest.repository_clone import CloneConfig, clone_repo
1111

1212

13-
def ingest(
13+
async def ingest(
1414
source: str,
1515
max_file_size: int = 10 * 1024 * 1024, # 10 MB
1616
include_patterns: list[str] | str | None = None,
@@ -52,7 +52,7 @@ def ingest(
5252
If `clone_repo` does not return a coroutine, or if the `source` is of an unsupported type.
5353
"""
5454
try:
55-
query = parse_query(
55+
query = await parse_query(
5656
source=source,
5757
max_file_size=max_file_size,
5858
from_web=False,

src/query_processor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ async def process_query(
7777
}
7878

7979
try:
80-
query = parse_query(
80+
query = await parse_query(
8181
source=input_text,
8282
max_file_size=max_file_size,
8383
from_web=True,

tests/test_query_parser.py

Lines changed: 40 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
from gitingest.query_parser import _parse_patterns, _parse_url, parse_query
99

1010

11-
def test_parse_url_valid_https() -> None:
11+
async def test_parse_url_valid_https() -> None:
1212
"""
13-
Test `_parse_url` with valid HTTPS URLs from supported platforms (GitHub, GitLab, Bitbucket).
13+
Test `_parse_url` with valid HTTPS URLs from supported platforms (GitHub, GitLab, Bitbucket, Gitea).
1414
Verifies that user and repository names are correctly extracted.
1515
"""
1616
test_cases = [
@@ -19,13 +19,13 @@ def test_parse_url_valid_https() -> None:
1919
"https://bitbucket.org/user/repo",
2020
]
2121
for url in test_cases:
22-
result = _parse_url(url)
22+
result = await _parse_url(url)
2323
assert result["user_name"] == "user"
2424
assert result["repo_name"] == "repo"
2525
assert result["url"] == url
2626

2727

28-
def test_parse_url_valid_http() -> None:
28+
async def test_parse_url_valid_http() -> None:
2929
"""
3030
Test `_parse_url` with valid HTTP URLs from supported platforms.
3131
Verifies that user and repository names, as well as the slug, are correctly extracted.
@@ -36,88 +36,88 @@ def test_parse_url_valid_http() -> None:
3636
"http://bitbucket.org/user/repo",
3737
]
3838
for url in test_cases:
39-
result = _parse_url(url)
39+
result = await _parse_url(url)
4040
assert result["user_name"] == "user"
4141
assert result["repo_name"] == "repo"
4242
assert result["slug"] == "user-repo"
4343

4444

45-
def test_parse_url_invalid() -> None:
45+
async def test_parse_url_invalid() -> None:
4646
"""
4747
Test `_parse_url` with an invalid URL that does not include a repository structure.
4848
Verifies that a ValueError is raised with an appropriate error message.
4949
"""
50-
url = "https://only-domain.com"
50+
url = "https://github.com"
5151
with pytest.raises(ValueError, match="Invalid repository URL"):
52-
_parse_url(url)
52+
await _parse_url(url)
5353

5454

55-
def test_parse_query_basic() -> None:
55+
async def test_parse_query_basic() -> None:
5656
"""
5757
Test `parse_query` with basic inputs including valid repository URLs.
5858
Verifies that user and repository names, URL, and ignore patterns are correctly parsed.
5959
"""
6060
test_cases = ["https://github.com/user/repo", "https://gitlab.com/user/repo"]
6161
for url in test_cases:
62-
result = parse_query(url, max_file_size=50, from_web=True, ignore_patterns="*.txt")
62+
result = await parse_query(url, max_file_size=50, from_web=True, ignore_patterns="*.txt")
6363
assert result["user_name"] == "user"
6464
assert result["repo_name"] == "repo"
6565
assert result["url"] == url
6666
assert "*.txt" in result["ignore_patterns"]
6767

6868

69-
def test_parse_query_mixed_case() -> None:
69+
async def test_parse_query_mixed_case() -> None:
7070
"""
7171
Test `parse_query` with mixed case URLs.
7272
"""
7373
url = "Https://GitHub.COM/UsEr/rEpO"
74-
result = parse_query(url, max_file_size=50, from_web=True)
74+
result = await parse_query(url, max_file_size=50, from_web=True)
7575
assert result["user_name"] == "user"
7676
assert result["repo_name"] == "repo"
7777

7878

79-
def test_parse_query_include_pattern() -> None:
79+
async def test_parse_query_include_pattern() -> None:
8080
"""
8181
Test `parse_query` with an include pattern.
8282
Verifies that the include pattern is set correctly and default ignore patterns are applied.
8383
"""
8484
url = "https://github.com/user/repo"
85-
result = parse_query(url, max_file_size=50, from_web=True, include_patterns="*.py")
85+
result = await parse_query(url, max_file_size=50, from_web=True, include_patterns="*.py")
8686
assert result["include_patterns"] == ["*.py"]
8787
assert set(result["ignore_patterns"]) == set(DEFAULT_IGNORE_PATTERNS)
8888

8989

90-
def test_parse_query_invalid_pattern() -> None:
90+
async def test_parse_query_invalid_pattern() -> None:
9191
"""
9292
Test `parse_query` with an invalid pattern containing special characters.
9393
Verifies that a ValueError is raised with an appropriate error message.
9494
"""
9595
url = "https://github.com/user/repo"
9696
with pytest.raises(ValueError, match="Pattern.*contains invalid characters"):
97-
parse_query(url, max_file_size=50, from_web=True, include_patterns="*.py;rm -rf")
97+
await parse_query(url, max_file_size=50, from_web=True, include_patterns="*.py;rm -rf")
9898

9999

100-
def test_parse_url_with_subpaths() -> None:
100+
async def test_parse_url_with_subpaths() -> None:
101101
"""
102102
Test `_parse_url` with a URL containing a branch and subpath.
103103
Verifies that user name, repository name, branch, and subpath are correctly extracted.
104104
"""
105105
url = "https://github.com/user/repo/tree/main/subdir/file"
106-
result = _parse_url(url)
106+
result = await _parse_url(url)
107107
assert result["user_name"] == "user"
108108
assert result["repo_name"] == "repo"
109109
assert result["branch"] == "main"
110110
assert result["subpath"] == "/subdir/file"
111111

112112

113-
def test_parse_url_invalid_repo_structure() -> None:
113+
async def test_parse_url_invalid_repo_structure() -> None:
114114
"""
115115
Test `_parse_url` with an invalid repository structure in the URL.
116116
Verifies that a ValueError is raised with an appropriate error message.
117117
"""
118118
url = "https://github.com/user"
119119
with pytest.raises(ValueError, match="Invalid repository URL"):
120-
_parse_url(url)
120+
await _parse_url(url)
121121

122122

123123
def test_parse_patterns_valid() -> None:
@@ -140,35 +140,35 @@ def test_parse_patterns_invalid_characters() -> None:
140140
_parse_patterns(patterns)
141141

142142

143-
def test_parse_query_with_large_file_size() -> None:
143+
async def test_parse_query_with_large_file_size() -> None:
144144
"""
145145
Test `parse_query` with a very large file size limit.
146146
Verifies that the file size limit and default ignore patterns are set correctly.
147147
"""
148148
url = "https://github.com/user/repo"
149-
result = parse_query(url, max_file_size=10**9, from_web=True)
149+
result = await parse_query(url, max_file_size=10**9, from_web=True)
150150
assert result["max_file_size"] == 10**9
151151
assert result["ignore_patterns"] == DEFAULT_IGNORE_PATTERNS
152152

153153

154-
def test_parse_query_empty_patterns() -> None:
154+
async def test_parse_query_empty_patterns() -> None:
155155
"""
156156
Test `parse_query` with empty include and ignore patterns.
157157
Verifies that the include patterns are set to None and default ignore patterns are applied.
158158
"""
159159
url = "https://github.com/user/repo"
160-
result = parse_query(url, max_file_size=50, from_web=True, include_patterns="", ignore_patterns="")
160+
result = await parse_query(url, max_file_size=50, from_web=True, include_patterns="", ignore_patterns="")
161161
assert result["include_patterns"] is None
162162
assert result["ignore_patterns"] == DEFAULT_IGNORE_PATTERNS
163163

164164

165-
def test_parse_query_include_and_ignore_overlap() -> None:
165+
async def test_parse_query_include_and_ignore_overlap() -> None:
166166
"""
167167
Test `parse_query` with overlapping include and ignore patterns.
168168
Verifies that overlapping patterns are removed from the ignore patterns.
169169
"""
170170
url = "https://github.com/user/repo"
171-
result = parse_query(
171+
result = await parse_query(
172172
url,
173173
max_file_size=50,
174174
from_web=True,
@@ -180,50 +180,50 @@ def test_parse_query_include_and_ignore_overlap() -> None:
180180
assert "*.txt" in result["ignore_patterns"]
181181

182182

183-
def test_parse_query_local_path() -> None:
183+
async def test_parse_query_local_path() -> None:
184184
"""
185185
Test `parse_query` with a local file path.
186186
Verifies that the local path is set, a unique ID is generated, and the slug is correctly created.
187187
"""
188188
path = "/home/user/project"
189-
result = parse_query(path, max_file_size=100, from_web=False)
189+
result = await parse_query(path, max_file_size=100, from_web=False)
190190
tail = Path("home/user/project")
191191
assert result["local_path"].parts[-len(tail.parts) :] == tail.parts
192192
assert result["id"] is not None
193193
assert result["slug"] == "user/project"
194194

195195

196-
def test_parse_query_relative_path() -> None:
196+
async def test_parse_query_relative_path() -> None:
197197
"""
198198
Test `parse_query` with a relative file path.
199199
Verifies that the local path and slug are correctly resolved.
200200
"""
201201
path = "./project"
202-
result = parse_query(path, max_file_size=100, from_web=False)
202+
result = await parse_query(path, max_file_size=100, from_web=False)
203203
tail = Path("project")
204204
assert result["local_path"].parts[-len(tail.parts) :] == tail.parts
205205
assert result["slug"].endswith("project")
206206

207207

208-
def test_parse_query_empty_source() -> None:
208+
async def test_parse_query_empty_source() -> None:
209209
"""
210210
Test `parse_query` with an empty source input.
211211
Verifies that a ValueError is raised with an appropriate error message.
212212
"""
213213
with pytest.raises(ValueError, match="Invalid repository URL"):
214-
parse_query("", max_file_size=100, from_web=True)
214+
await parse_query("", max_file_size=100, from_web=True)
215215

216216

217-
def test_parse_url_branch_and_commit_distinction() -> None:
217+
async def test_parse_url_branch_and_commit_distinction() -> None:
218218
"""
219219
Test `_parse_url` with URLs containing either a branch name or a commit hash.
220220
Verifies that the branch and commit are correctly distinguished.
221221
"""
222222
url_branch = "https://github.com/user/repo/tree/main"
223223
url_commit = "https://github.com/user/repo/tree/abcd1234abcd1234abcd1234abcd1234abcd1234"
224224

225-
result_branch = _parse_url(url_branch)
226-
result_commit = _parse_url(url_commit)
225+
result_branch = await _parse_url(url_branch)
226+
result_commit = await _parse_url(url_commit)
227227

228228
assert result_branch["branch"] == "main"
229229
assert result_branch["commit"] is None
@@ -232,23 +232,23 @@ def test_parse_url_branch_and_commit_distinction() -> None:
232232
assert result_commit["commit"] == "abcd1234abcd1234abcd1234abcd1234abcd1234"
233233

234234

235-
def test_parse_query_uuid_uniqueness() -> None:
235+
async def test_parse_query_uuid_uniqueness() -> None:
236236
"""
237237
Test `parse_query` to ensure that each call generates a unique UUID for the query result.
238238
"""
239239
path = "/home/user/project"
240-
result1 = parse_query(path, max_file_size=100, from_web=False)
241-
result2 = parse_query(path, max_file_size=100, from_web=False)
240+
result1 = await parse_query(path, max_file_size=100, from_web=False)
241+
result2 = await parse_query(path, max_file_size=100, from_web=False)
242242
assert result1["id"] != result2["id"]
243243

244244

245-
def test_parse_url_with_query_and_fragment() -> None:
245+
async def test_parse_url_with_query_and_fragment() -> None:
246246
"""
247247
Test `_parse_url` with a URL containing query parameters and a fragment.
248248
Verifies that the URL is cleaned and other fields are correctly extracted.
249249
"""
250250
url = "https://github.com/user/repo?arg=value#fragment"
251-
result = _parse_url(url)
251+
result = await _parse_url(url)
252252
assert result["user_name"] == "user"
253253
assert result["repo_name"] == "repo"
254254
assert result["url"] == "https://github.com/user/repo" # URL should be cleaned

0 commit comments

Comments
 (0)