Skip to content

Commit 7fb28a7

Browse files
Merge branch 'main' into feature/pr-221-fix
2 parents c35437a + 09ffc44 commit 7fb28a7

File tree

7 files changed

+57
-5
lines changed

7 files changed

+57
-5
lines changed

src/gitingest/utils/query_parser_utils.py

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,22 +60,48 @@ def _is_valid_pattern(pattern: str) -> bool:
6060

6161
def _validate_host(host: str) -> None:
6262
"""
63-
Validate the given host against the known Git hosts.
63+
Validate a hostname.
64+
65+
The host is accepted if it is either present in the hard-coded `KNOWN_GIT_HOSTS` list or if it satisfies the
66+
simple heuristics in `_looks_like_git_host`, which try to recognise common self-hosted Git services (e.g. GitLab
67+
instances on sub-domains such as `gitlab.example.com` or `git.example.com`).
6468
6569
Parameters
6670
----------
6771
host : str
68-
The host to validate.
72+
Hostname (case-insensitive).
6973
7074
Raises
7175
------
7276
ValueError
73-
If the host is not a known Git host.
77+
If the host cannot be recognised as a probable Git hosting domain.
7478
"""
75-
if host not in KNOWN_GIT_HOSTS:
79+
host = host.lower()
80+
if host not in KNOWN_GIT_HOSTS and not _looks_like_git_host(host):
7681
raise ValueError(f"Unknown domain '{host}' in URL")
7782

7883

84+
def _looks_like_git_host(host: str) -> bool:
85+
"""
86+
Check if the given host looks like a Git host.
87+
88+
The current heuristic returns `True` when the host starts with `git.` (e.g. `git.example.com`) or starts with
89+
`gitlab.` (e.g. `gitlab.company.com`).
90+
91+
Parameters
92+
----------
93+
host : str
94+
Hostname (case-insensitive).
95+
96+
Returns
97+
-------
98+
bool
99+
True if the host looks like a Git host, otherwise False.
100+
"""
101+
host = host.lower()
102+
return host.startswith(("git.", "gitlab."))
103+
104+
79105
def _validate_url_scheme(scheme: str) -> None:
80106
"""
81107
Validate the given scheme against the known schemes.
@@ -90,6 +116,7 @@ def _validate_url_scheme(scheme: str) -> None:
90116
ValueError
91117
If the scheme is not 'http' or 'https'.
92118
"""
119+
scheme = scheme.lower()
93120
if scheme not in ("https", "http"):
94121
raise ValueError(f"Invalid URL scheme '{scheme}' in URL")
95122

src/server/query_processor.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,9 @@ async def process_query(
9696
if not query.url:
9797
raise ValueError("The 'url' parameter is required.")
9898

99+
# Sets the "<user>/<repo>" for the page title
100+
context["short_repo_url"] = f"{query.user_name}/{query.repo_name}"
101+
99102
clone_config = query.extract_clone_config()
100103
await clone_repo(clone_config, token=token)
101104
summary, tree, content = ingest_query(query)

src/server/templates/base.jinja

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,13 @@
3333
<meta property="og:url" content="{{ request.url }}">
3434
<meta property="og:image" content="/static/og-image.png">
3535
<title>
36-
{% block title %}Gitingest{% endblock %}
36+
{% block title %}
37+
{% if short_repo_url %}
38+
Gitingest - {{ short_repo_url }}
39+
{% else %}
40+
Gitingest
41+
{% endif %}
42+
{% endblock %}
3743
</title>
3844
<script src="https://cdn.tailwindcss.com"></script>
3945
<script src="/static/js/utils.js"></script>

src/static/js/utils.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,9 @@ function handleSubmit(event, showLoading = false) {
109109
starsElement.textContent = starCount;
110110
}
111111

112+
// Set dynamic title that includes the repo name.
113+
document.title = document.body.getElementsByTagName('title')[0].textContent;
114+
112115
// Scroll to results if they exist
113116
const resultsSection = document.querySelector('[data-results]');
114117
if (resultsSection) {

tests/query_parser/__init__.py

Whitespace-only changes.

tests/query_parser/test_git_host_agnostic.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import pytest
1111

1212
from gitingest.query_parsing import parse_query
13+
from gitingest.utils.query_parser_utils import KNOWN_GIT_HOSTS
1314

1415
# Repository matrix: (host, user, repo)
1516
_REPOS: List[Tuple[str, str, str]] = [
@@ -18,6 +19,8 @@
1819
("bitbucket.org", "na-dna", "llm-knowledge-share"),
1920
("gitea.com", "xorm", "xorm"),
2021
("codeberg.org", "forgejo", "forgejo"),
22+
("git.rwth-aachen.de", "medialab", "19squared"),
23+
("gitlab.alpinelinux.org", "alpine", "apk-tools"),
2124
]
2225

2326

@@ -43,6 +46,13 @@ async def test_parse_query_without_host(
4346

4447
expected_url = f"https://{host}/{user}/{repo}"
4548

49+
# For slug form with a custom host (not in KNOWN_GIT_HOSTS) we expect a failure,
50+
# because the parser cannot guess which domain to use.
51+
if variant == "slug" and host not in KNOWN_GIT_HOSTS:
52+
with pytest.raises(ValueError):
53+
await parse_query(url, max_file_size=50, from_web=True)
54+
return
55+
4656
query = await parse_query(url, max_file_size=50, from_web=True)
4757

4858
# Compare against the canonical dict while ignoring unpredictable fields.

tests/query_parser/test_query_parser.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@
2424
"https://gitea.com/user/repo",
2525
"https://codeberg.org/user/repo",
2626
"https://gist.github.com/user/repo",
27+
"https://git.example.com/user/repo",
28+
"https://gitlab.example.com/user/repo",
29+
"https://gitlab.example.se/user/repo",
2730
]
2831

2932
URLS_HTTP: List[str] = [url.replace("https://", "http://") for url in URLS_HTTPS]

0 commit comments

Comments
 (0)