|
5 | 5 | Bitbucket, Gitea, and Codeberg, even if the host is omitted. |
6 | 6 | """ |
7 | 7 |
|
8 | | -from typing import List |
| 8 | +from typing import List, Tuple |
9 | 9 |
|
10 | 10 | import pytest |
11 | 11 |
|
12 | 12 | from gitingest.query_parsing import parse_query |
13 | 13 |
|
| 14 | +# Repository matrix: (host, user, repo) |
| 15 | +_REPOS: List[Tuple[str, str, str]] = [ |
| 16 | + ("github.com", "tiangolo", "fastapi"), |
| 17 | + ("gitlab.com", "gitlab-org", "gitlab-runner"), |
| 18 | + ("bitbucket.org", "na-dna", "llm-knowledge-share"), |
| 19 | + ("gitea.com", "xorm", "xorm"), |
| 20 | + ("codeberg.org", "forgejo", "forgejo"), |
| 21 | +] |
14 | 22 |
|
15 | | -@pytest.mark.parametrize( |
16 | | - "urls, expected_user, expected_repo, expected_url", |
17 | | - [ |
18 | | - ( |
19 | | - [ |
20 | | - "https://github.com/tiangolo/fastapi", |
21 | | - "github.com/tiangolo/fastapi", |
22 | | - "tiangolo/fastapi", |
23 | | - ], |
24 | | - "tiangolo", |
25 | | - "fastapi", |
26 | | - "https://github.com/tiangolo/fastapi", |
27 | | - ), |
28 | | - ( |
29 | | - [ |
30 | | - "https://gitlab.com/gitlab-org/gitlab-runner", |
31 | | - "gitlab.com/gitlab-org/gitlab-runner", |
32 | | - "gitlab-org/gitlab-runner", |
33 | | - ], |
34 | | - "gitlab-org", |
35 | | - "gitlab-runner", |
36 | | - "https://gitlab.com/gitlab-org/gitlab-runner", |
37 | | - ), |
38 | | - ( |
39 | | - [ |
40 | | - "https://bitbucket.org/na-dna/llm-knowledge-share", |
41 | | - "bitbucket.org/na-dna/llm-knowledge-share", |
42 | | - "na-dna/llm-knowledge-share", |
43 | | - ], |
44 | | - "na-dna", |
45 | | - "llm-knowledge-share", |
46 | | - "https://bitbucket.org/na-dna/llm-knowledge-share", |
47 | | - ), |
48 | | - ( |
49 | | - [ |
50 | | - "https://gitea.com/xorm/xorm", |
51 | | - "gitea.com/xorm/xorm", |
52 | | - "xorm/xorm", |
53 | | - ], |
54 | | - "xorm", |
55 | | - "xorm", |
56 | | - "https://gitea.com/xorm/xorm", |
57 | | - ), |
58 | | - ( |
59 | | - [ |
60 | | - "https://codeberg.org/forgejo/forgejo", |
61 | | - "codeberg.org/forgejo/forgejo", |
62 | | - "forgejo/forgejo", |
63 | | - ], |
64 | | - "forgejo", |
65 | | - "forgejo", |
66 | | - "https://codeberg.org/forgejo/forgejo", |
67 | | - ), |
68 | | - ], |
69 | | -) |
| 23 | + |
| 24 | +# Generate cartesian product of repository tuples with URL variants. |
| 25 | +@pytest.mark.parametrize("host, user, repo", _REPOS, ids=[f"{h}:{u}/{r}" for h, u, r in _REPOS]) |
| 26 | +@pytest.mark.parametrize("variant", ["full", "noscheme", "slug"]) |
70 | 27 | @pytest.mark.asyncio |
71 | 28 | async def test_parse_query_without_host( |
72 | | - urls: List[str], |
73 | | - expected_user: str, |
74 | | - expected_repo: str, |
75 | | - expected_url: str, |
| 29 | + host: str, |
| 30 | + user: str, |
| 31 | + repo: str, |
| 32 | + variant: str, |
76 | 33 | ) -> None: |
77 | | - """ |
78 | | - Test `parse_query` for Git host agnosticism. |
| 34 | + """Verify that `parse_query` handles URLs, host-omitted URLs and raw slugs.""" |
| 35 | + |
| 36 | + # Build the input URL based on the selected variant |
| 37 | + if variant == "full": |
| 38 | + url = f"https://{host}/{user}/{repo}" |
| 39 | + elif variant == "noscheme": |
| 40 | + url = f"{host}/{user}/{repo}" |
| 41 | + else: # "slug" |
| 42 | + url = f"{user}/{repo}" |
| 43 | + |
| 44 | + expected_url = f"https://{host}/{user}/{repo}" |
| 45 | + |
| 46 | + query = await parse_query(url, max_file_size=50, from_web=True) |
| 47 | + |
| 48 | + # Compare against the canonical dict while ignoring unpredictable fields. |
| 49 | + actual = query.model_dump(exclude={"id", "local_path", "ignore_patterns"}) |
79 | 50 |
|
80 | | - Given multiple URL variations for the same user/repo on different Git hosts (with or without host names): |
81 | | - When `parse_query` is called with each variation, |
82 | | - Then the parser should correctly identify the user, repo, canonical URL, and other default fields. |
83 | | - """ |
84 | | - for url in urls: |
85 | | - query = await parse_query(url, max_file_size=50, from_web=True) |
| 51 | + expected = { |
| 52 | + "user_name": user, |
| 53 | + "repo_name": repo, |
| 54 | + "url": expected_url, |
| 55 | + "slug": f"{user}-{repo}", |
| 56 | + "subpath": "/", |
| 57 | + "type": None, |
| 58 | + "branch": None, |
| 59 | + "commit": None, |
| 60 | + "max_file_size": 50, |
| 61 | + "include_patterns": None, |
| 62 | + } |
86 | 63 |
|
87 | | - assert query.user_name == expected_user |
88 | | - assert query.repo_name == expected_repo |
89 | | - assert query.url == expected_url |
90 | | - assert query.slug == f"{expected_user}-{expected_repo}" |
91 | | - assert query.id is not None |
92 | | - assert query.subpath == "/" |
93 | | - assert query.branch is None |
94 | | - assert query.commit is None |
95 | | - assert query.type is None |
| 64 | + assert actual == expected |
0 commit comments