Skip to content

Commit 234e1ac

Browse files
Fixing the branch name with nested /
1 parent 6039114 commit 234e1ac

File tree

3 files changed

+123
-22
lines changed

3 files changed

+123
-22
lines changed

src/gitingest/query_parser.py

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from config import TMP_BASE_PATH
1212
from gitingest.exceptions import InvalidPatternError
1313
from gitingest.ignore_patterns import DEFAULT_IGNORE_PATTERNS
14-
from gitingest.repository_clone import _check_repo_exists
14+
from gitingest.repository_clone import _check_repo_exists, fetch_remote_branch_list
1515

1616
HEX_DIGITS: set[str] = set(string.hexdigits)
1717

@@ -168,18 +168,47 @@ async def _parse_repo_source(source: str) -> dict[str, Any]:
168168
parsed["type"] = possible_type
169169

170170
# Commit or branch
171-
commit_or_branch = remaining_parts.pop(0)
171+
commit_or_branch = remaining_parts[0]
172172
if _is_valid_git_commit_hash(commit_or_branch):
173173
parsed["commit"] = commit_or_branch
174-
else:
175-
parsed["branch"] = commit_or_branch
174+
parsed["subpath"] += "/".join(remaining_parts[1:])
176175

177-
# Subpath if anything left
178-
if remaining_parts:
176+
else:
177+
parsed["branch"] = await _configure_branch_and_subpath(remaining_parts, url)
179178
parsed["subpath"] += "/".join(remaining_parts)
180-
181179
return parsed
182180

181+
async def _configure_branch_and_subpath(remaining_parts: list[str],url: str) -> str | None:
182+
"""
183+
Find the branch name from the remaining parts of the URL path.
184+
Parameters
185+
----------
186+
remaining_parts : list[str]
187+
List of path parts extracted from the URL.
188+
url : str
189+
The repository URL to determine branches.
190+
191+
Returns
192+
-------
193+
str(branch name) or None
194+
195+
"""
196+
try:
197+
# Fetch the list of branches from the remote repository
198+
branches: list[str] = await fetch_remote_branch_list(url)
199+
except Exception as e:
200+
print(f"Warning: Failed to fetch branch list: {str(e)}")
201+
return remaining_parts.pop(0)
202+
203+
branch = []
204+
205+
while remaining_parts:
206+
branch.append(remaining_parts.pop(0))
207+
branch_name = "/".join(branch)
208+
if branch_name in branches:
209+
return branch_name
210+
211+
return None
183212

184213
def _is_valid_git_commit_hash(commit: str) -> bool:
185214
"""

src/gitingest/repository_clone.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
from gitingest.utils import async_timeout
77

8-
CLONE_TIMEOUT: int = 20
8+
TIMEOUT: int = 20
99

1010

1111
@dataclass
@@ -34,7 +34,7 @@ class CloneConfig:
3434
branch: str | None = None
3535

3636

37-
@async_timeout(CLONE_TIMEOUT)
37+
@async_timeout(TIMEOUT)
3838
async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]:
3939
"""
4040
Clone a repository to a local path based on the provided configuration.
@@ -141,6 +141,32 @@ async def _check_repo_exists(url: str) -> bool:
141141
raise RuntimeError(f"Unexpected status code: {status_code}")
142142

143143

144+
@async_timeout(TIMEOUT)
145+
async def fetch_remote_branch_list(url: str) -> list[str]:
146+
"""
147+
Get the list of branches from the remote repo.
148+
149+
Parameters
150+
----------
151+
url : str
152+
The URL of the repository.
153+
154+
Returns
155+
-------
156+
list[str]
157+
list of the branches in the remote repository
158+
"""
159+
fetch_branches_command = ["git", "ls-remote", "--heads", url]
160+
stdout, stderr = await _run_git_command(*fetch_branches_command)
161+
stdout_decoded = stdout.decode()
162+
163+
return [
164+
line.split('refs/heads/', 1)[1]
165+
for line in stdout_decoded.splitlines()
166+
if line.strip() and 'refs/heads/' in line
167+
]
168+
169+
144170
async def _run_git_command(*args: str) -> tuple[bytes, bytes]:
145171
"""
146172
Execute a Git command asynchronously and captures its output.

tests/query_parser/test_query_parser.py

Lines changed: 59 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
from pathlib import Path
44

55
import pytest
6+
from unittest.mock import patch, AsyncMock
7+
from gitingest.repository_clone import _check_repo_exists, fetch_remote_branch_list
68

79
from gitingest.ignore_patterns import DEFAULT_IGNORE_PATTERNS
810
from gitingest.query_parser import _parse_patterns, _parse_repo_source, parse_query
@@ -96,18 +98,21 @@ async def test_parse_query_invalid_pattern() -> None:
9698
with pytest.raises(ValueError, match="Pattern.*contains invalid characters"):
9799
await parse_query(url, max_file_size=50, from_web=True, include_patterns="*.py;rm -rf")
98100

99-
100101
async def test_parse_url_with_subpaths() -> None:
101102
"""
102103
Test `_parse_repo_source` with a URL containing a branch and subpath.
103104
Verifies that user name, repository name, branch, and subpath are correctly extracted.
104105
"""
105106
url = "https://github.com/user/repo/tree/main/subdir/file"
106-
result = await _parse_repo_source(url)
107-
assert result["user_name"] == "user"
108-
assert result["repo_name"] == "repo"
109-
assert result["branch"] == "main"
110-
assert result["subpath"] == "/subdir/file"
107+
with patch('gitingest.repository_clone._run_git_command', new_callable=AsyncMock) as mock_run_git_command:
108+
mock_run_git_command.return_value = (b"refs/heads/main\nrefs/heads/dev\nrefs/heads/feature-branch\n", b"")
109+
with patch('gitingest.repository_clone.fetch_remote_branch_list', new_callable=AsyncMock) as mock_fetch_branches:
110+
mock_fetch_branches.return_value = ["main", "dev", "feature-branch"]
111+
result = await _parse_repo_source(url)
112+
assert result["user_name"] == "user"
113+
assert result["repo_name"] == "repo"
114+
assert result["branch"] == "main"
115+
assert result["subpath"] == "/subdir/file"
111116

112117

113118
async def test_parse_url_invalid_repo_structure() -> None:
@@ -222,15 +227,18 @@ async def test_parse_url_branch_and_commit_distinction() -> None:
222227
url_branch = "https://github.com/user/repo/tree/main"
223228
url_commit = "https://github.com/user/repo/tree/abcd1234abcd1234abcd1234abcd1234abcd1234"
224229

225-
result_branch = await _parse_repo_source(url_branch)
226-
result_commit = await _parse_repo_source(url_commit)
227-
228-
assert result_branch["branch"] == "main"
229-
assert result_branch["commit"] is None
230+
with patch('gitingest.repository_clone._run_git_command', new_callable=AsyncMock) as mock_run_git_command:
231+
mock_run_git_command.return_value = (b"refs/heads/main\nrefs/heads/dev\nrefs/heads/feature-branch\n", b"")
232+
with patch('gitingest.repository_clone.fetch_remote_branch_list', new_callable=AsyncMock) as mock_fetch_branches:
233+
mock_fetch_branches.return_value = ["main", "dev", "feature-branch"]
230234

231-
assert result_commit["branch"] is None
232-
assert result_commit["commit"] == "abcd1234abcd1234abcd1234abcd1234abcd1234"
235+
result_branch = await _parse_repo_source(url_branch)
236+
result_commit = await _parse_repo_source(url_commit)
237+
assert result_branch["branch"] == "main"
238+
assert result_branch["commit"] is None
233239

240+
assert result_commit["branch"] is None
241+
assert result_commit["commit"] == "abcd1234abcd1234abcd1234abcd1234abcd1234"
234242

235243
async def test_parse_query_uuid_uniqueness() -> None:
236244
"""
@@ -274,3 +282,41 @@ async def test_parse_query_with_branch() -> None:
274282
assert result["branch"] == "2.2.x"
275283
assert result["commit"] is None
276284
assert result["type"] == "blob"
285+
286+
@pytest.mark.asyncio
287+
@pytest.mark.parametrize("url, expected_branch, expected_subpath", [
288+
("https://github.com/user/repo/tree/main/src", "main", "/src"),
289+
("https://github.com/user/repo/tree/fix1", "fix1", "/"),
290+
("https://github.com/user/repo/tree/nonexistent-branch/src", "nonexistent-branch", "/src"),
291+
])
292+
async def test_parse_repo_source_with_failed_git_command(url, expected_branch, expected_subpath):
293+
"""
294+
Test `_parse_repo_source` when git command fails.
295+
Verifies that the function returns the first path component as the branch.
296+
"""
297+
with patch('gitingest.repository_clone.fetch_remote_branch_list', new_callable=AsyncMock) as mock_fetch_branches:
298+
mock_fetch_branches.side_effect = Exception("Failed to fetch branch list")
299+
300+
result = await _parse_repo_source(url)
301+
302+
assert result["branch"] == expected_branch
303+
assert result["subpath"] == expected_subpath
304+
305+
@pytest.mark.asyncio
306+
@pytest.mark.parametrize("url, expected_branch, expected_subpath", [
307+
("https://github.com/user/repo/tree/feature/fix1/src", "feature/fix1", "/src"),
308+
("https://github.com/user/repo/tree/main/src", "main", "/src"),
309+
("https://github.com/user/repo", None, "/"), # No
310+
("https://github.com/user/repo/tree/nonexistent-branch/src", None, "/"), # Non-existent branch
311+
("https://github.com/user/repo/tree/fix", "fix", "/"),
312+
])
313+
async def test_parse_repo_source_with_various_url_patterns(url, expected_branch, expected_subpath):
314+
with patch('gitingest.repository_clone._run_git_command', new_callable=AsyncMock) as mock_run_git_command, \
315+
patch('gitingest.repository_clone.fetch_remote_branch_list', new_callable=AsyncMock) as mock_fetch_branches:
316+
317+
mock_run_git_command.return_value = (b"refs/heads/feature/fix1\nrefs/heads/main\nrefs/heads/feature-branch\nrefs/heads/fix\n", b"")
318+
mock_fetch_branches.return_value = ["feature/fix1", "main", "feature-branch"]
319+
320+
result = await _parse_repo_source(url)
321+
assert result["branch"] == expected_branch
322+
assert result["subpath"] == expected_subpath

0 commit comments

Comments
 (0)