Skip to content

Commit 32bbacb

Browse files
cevatkerimfilipchristiansen
authored andcommitted
feat: add raw response mode to process_query
- Add raw_response option to return tuple instead of template - Update extract_content endpoint for plain text output
1 parent 36b04a5 commit 32bbacb

File tree

3 files changed

+121
-46
lines changed

3 files changed

+121
-46
lines changed

README.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ You can also replace `hub` with `ingest` in any github url to access the corespo
1919

2020
- **Easy code context**: Get a text digest from a git repository URL or a directory
2121
- **Smart Formatting**: Optimized output format for LLM prompts
22+
- **Flexible Filtering**:
23+
- Include/exclude files by pattern
24+
- Control maximum file size
2225
- **Statistics about**:
2326
- File and directory structure
2427
- Size of the extract
@@ -62,6 +65,31 @@ summary, tree, content = ingest("https://github.com/cyclotruc/gitingest")
6265

6366
By default, this won't write a file but can be enabled with the `output` argument
6467

68+
## 🌐 Web API Usage
69+
70+
You can access repositories directly via URL:
71+
72+
```
73+
# Basic repository access
74+
https://gitingest.com/user/repo
75+
76+
# With query parameters
77+
https://gitingest.com/extract/user/repo?max_file_size=243&pattern_type=include&pattern=*.py
78+
79+
# Parameters:
80+
- max_file_size: Controls maximum file size (default: 243)
81+
- pattern_type: 'include' or 'exclude'
82+
- pattern: File pattern (e.g. "*.py" for Python files)
83+
- summary: Set to true to include summary and tree structure
84+
```
85+
86+
## 🛠️ Using
87+
88+
- Tailwind CSS - Frontend
89+
- [FastAPI](https://github.com/fastapi/fastapi) - Backend framework
90+
- [tiktoken](https://github.com/openai/tiktoken) - Token estimation
91+
- [apianalytics.dev](https://www.apianalytics.dev/) - Simple Analytics
92+
6593
## 🌐 Self-host
6694

6795
1. Build the image:

src/process_query.py

Lines changed: 57 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from typing import Union
12
from fastapi import Request
23
from fastapi.templating import Jinja2Templates
34
from starlette.templating import _TemplateResponse
@@ -18,40 +19,43 @@ async def process_query(
1819
pattern_type: str = "exclude",
1920
pattern: str = "",
2021
is_index: bool = False,
21-
) -> _TemplateResponse:
22+
raw_response: bool = False
23+
) -> Union[_TemplateResponse, tuple[str, str, str]]:
2224
"""
23-
Process a query by parsing input, cloning a repository, and generating a summary.
24-
25-
Handle user input, process GitHub repository data, and prepare
26-
a response for rendering a template with the processed results or an error message.
25+
Process query and return template response or raw data tuple.
2726
2827
Parameters
2928
----------
3029
request : Request
31-
The HTTP request object.
30+
HTTP request object
3231
input_text : str
33-
Input text provided by the user, typically a GitHub repository URL or slug.
32+
GitHub repository URL or slug
3433
slider_position : int
35-
Position of the slider, representing the maximum file size in the query.
34+
Maximum file size position (0-500)
3635
pattern_type : str, optional
37-
Type of pattern to use, either "include" or "exclude" (default is "exclude").
36+
"include" or "exclude" pattern type (default: "exclude")
3837
pattern : str, optional
39-
Pattern to include or exclude in the query, depending on the pattern type.
38+
Pattern for including/excluding files
4039
is_index : bool, optional
41-
Flag indicating whether the request is for the index page (default is False).
40+
Whether request is for index page (default: False)
41+
raw_response : bool, optional
42+
Return raw data tuple instead of template (default: False)
4243
4344
Returns
4445
-------
45-
_TemplateResponse
46-
Rendered template response containing the processed results or an error message.
46+
Union[_TemplateResponse, tuple[str, str, str]]
47+
TemplateResponse:
48+
Rendered HTML template with processed results, summary, and error messages
49+
tuple[str, str, str]:
50+
Raw data as (summary, directory_tree, file_contents) when raw_response=True
4751
"""
4852
template = "index.jinja" if is_index else "github.jinja"
4953
max_file_size = logSliderToSize(slider_position)
5054

5155
if pattern_type == "include":
5256
include_patterns = pattern
5357
exclude_patterns = None
54-
elif pattern_type == "exclude":
58+
else:
5559
exclude_patterns = pattern
5660
include_patterns = None
5761

@@ -63,17 +67,53 @@ async def process_query(
6367
include_patterns=include_patterns,
6468
ignore_patterns=exclude_patterns,
6569
)
70+
6671
clone_config = CloneConfig(
6772
url=query["url"],
6873
local_path=query["local_path"],
6974
commit=query.get("commit"),
7075
branch=query.get("branch"),
7176
)
77+
7278
await clone_repo(clone_config)
7379
summary, tree, content = ingest_from_query(query)
80+
81+
if raw_response:
82+
return summary, tree, content
83+
7484
with open(f"{clone_config.local_path}.txt", "w") as f:
7585
f.write(tree + "\n" + content)
7686

87+
if not raw_response and len(content) > MAX_DISPLAY_SIZE:
88+
content = (
89+
f"(Files content cropped to {int(MAX_DISPLAY_SIZE / 1_000)}k characters, "
90+
"download full ingest to see more)\n" + content[:MAX_DISPLAY_SIZE]
91+
)
92+
93+
_print_success(
94+
url=query["url"],
95+
max_file_size=max_file_size,
96+
pattern_type=pattern_type,
97+
pattern=pattern,
98+
summary=summary,
99+
)
100+
return templates.TemplateResponse(
101+
template,
102+
{
103+
"request": request,
104+
"github_url": input_text,
105+
"result": True,
106+
"summary": summary,
107+
"tree": tree,
108+
"content": contents,
109+
"examples": EXAMPLE_REPOS if is_index else [],
110+
"ingest_id": query["id"],
111+
"default_file_size": slider_position,
112+
"pattern_type": pattern_type,
113+
"pattern": pattern,
114+
},
115+
)
116+
77117
except Exception as e:
78118
# hack to print error message when query is not defined
79119
if "query" in locals() and query is not None and isinstance(query, dict):
@@ -82,6 +122,9 @@ async def process_query(
82122
print(f"{Colors.BROWN}WARN{Colors.END}: {Colors.RED}<- {Colors.END}", end="")
83123
print(f"{Colors.RED}{e}{Colors.END}")
84124

125+
if raw_response:
126+
raise e
127+
85128
return templates.TemplateResponse(
86129
template,
87130
{
@@ -95,37 +138,6 @@ async def process_query(
95138
},
96139
)
97140

98-
if len(content) > MAX_DISPLAY_SIZE:
99-
content = (
100-
f"(Files content cropped to {int(MAX_DISPLAY_SIZE / 1_000)}k characters, "
101-
"download full ingest to see more)\n" + content[:MAX_DISPLAY_SIZE]
102-
)
103-
104-
_print_success(
105-
url=query["url"],
106-
max_file_size=max_file_size,
107-
pattern_type=pattern_type,
108-
pattern=pattern,
109-
summary=summary,
110-
)
111-
112-
return templates.TemplateResponse(
113-
template,
114-
{
115-
"request": request,
116-
"github_url": input_text,
117-
"result": True,
118-
"summary": summary,
119-
"tree": tree,
120-
"content": content,
121-
"examples": EXAMPLE_REPOS if is_index else [],
122-
"ingest_id": query["id"],
123-
"default_file_size": slider_position,
124-
"pattern_type": pattern_type,
125-
"pattern": pattern,
126-
},
127-
)
128-
129141

130142
def _print_query(url: str, max_file_size: int, pattern_type: str, pattern: str) -> None:
131143
"""

src/routers/dynamic.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from fastapi import APIRouter, Form, Request
2-
from fastapi.responses import HTMLResponse
2+
from fastapi.responses import HTMLResponse, Response
33
from fastapi.templating import Jinja2Templates
44

55
from process_query import process_query
@@ -8,6 +8,41 @@
88
router = APIRouter()
99
templates = Jinja2Templates(directory="templates")
1010

11+
@router.get("/extract/{full_path:path}", response_model=None)
12+
async def extract_content(
13+
request: Request,
14+
full_path: str,
15+
summary: bool = False,
16+
) -> Response:
17+
try:
18+
query_params = request.query_params
19+
max_file_size = int(query_params.get("max_file_size", 243))
20+
pattern_type = query_params.get("pattern_type", "exclude")
21+
pattern = query_params.get("pattern", "")
22+
23+
result_summary, tree, content = await process_query(
24+
request,
25+
input_text=f"https://github.com/{full_path}",
26+
slider_position=max_file_size,
27+
pattern_type=pattern_type,
28+
pattern=pattern,
29+
is_index=False,
30+
raw_response=True
31+
)
32+
33+
response_parts = []
34+
if summary:
35+
response_parts.append(f"Summary:\n{result_summary}\n")
36+
response_parts.append(f"Tree:\n{tree}\n")
37+
response_parts.append(f"Content:\n{content}")
38+
39+
return Response(content="\n".join(response_parts), media_type="text/plain")
40+
except Exception as e:
41+
return Response(
42+
content=f"Error during extraction: {str(e)}",
43+
media_type="text/plain",
44+
status_code=500,
45+
)
1146

1247
@router.get("/{full_path:path}")
1348
async def catch_all(request: Request, full_path: str) -> HTMLResponse:

0 commit comments

Comments
 (0)