33
44from gitingest .utils import AsyncTimeoutError , async_timeout
55
6- CLONE_TIMEOUT = 20
6+ CLONE_TIMEOUT : int = 20
77
88
99@dataclass
@@ -14,67 +14,6 @@ class CloneConfig:
1414 branch : str | None = None
1515
1616
17- async def check_repo_exists (url : str ) -> bool :
18- """
19- Check if a repository exists at the given URL using an HTTP HEAD request.
20-
21- Parameters
22- ----------
23- url : str
24- The URL of the repository.
25-
26- Returns
27- -------
28- bool
29- True if the repository exists, False otherwise.
30- """
31- proc = await asyncio .create_subprocess_exec (
32- "curl" ,
33- "-I" ,
34- url ,
35- stdout = asyncio .subprocess .PIPE ,
36- stderr = asyncio .subprocess .PIPE ,
37- )
38- stdout , _ = await proc .communicate ()
39- if proc .returncode != 0 :
40- return False
41- # Check if stdout contains "404" status code
42- stdout_str = stdout .decode ()
43- return "HTTP/1.1 404" not in stdout_str and "HTTP/2 404" not in stdout_str
44-
45-
46- async def run_git_command (* args : str ) -> tuple [bytes , bytes ]:
47- """
48- Executes a git command asynchronously and captures its output.
49-
50- Parameters
51- ----------
52- *args : str
53- The git command and its arguments to execute.
54-
55- Returns
56- -------
57- Tuple[bytes, bytes]
58- A tuple containing the stdout and stderr of the git command.
59-
60- Raises
61- ------
62- RuntimeError
63- If the git command exits with a non-zero status.
64- """
65- proc = await asyncio .create_subprocess_exec (
66- * args ,
67- stdout = asyncio .subprocess .PIPE ,
68- stderr = asyncio .subprocess .PIPE ,
69- )
70- stdout , stderr = await proc .communicate ()
71- if proc .returncode != 0 :
72- error_message = stderr .decode ().strip ()
73- raise RuntimeError (f"Git command failed: { ' ' .join (args )} \n Error: { error_message } " )
74-
75- return stdout , stderr
76-
77-
7817@async_timeout (CLONE_TIMEOUT )
7918async def clone_repo (config : CloneConfig ) -> tuple [bytes , bytes ]:
8019 """
@@ -116,29 +55,90 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]:
11655 raise ValueError ("The 'local_path' parameter is required." )
11756
11857 # Check if the repository exists
119- if not await check_repo_exists (url ):
58+ if not await _check_repo_exists (url ):
12059 raise ValueError ("Repository not found, make sure it is public" )
12160
12261 try :
12362 if commit :
12463 # Scenario 1: Clone and checkout a specific commit
12564 # Clone the repository without depth to ensure full history for checkout
12665 clone_cmd = ["git" , "clone" , "--single-branch" , url , local_path ]
127- await run_git_command (* clone_cmd )
66+ await _run_git_command (* clone_cmd )
12867
12968 # Checkout the specific commit
13069 checkout_cmd = ["git" , "-C" , local_path , "checkout" , commit ]
131- return await run_git_command (* checkout_cmd )
70+ return await _run_git_command (* checkout_cmd )
13271
13372 if branch and branch .lower () not in ("main" , "master" ):
13473
13574 # Scenario 2: Clone a specific branch with shallow depth
13675 clone_cmd = ["git" , "clone" , "--depth=1" , "--single-branch" , "--branch" , branch , url , local_path ]
137- return await run_git_command (* clone_cmd )
76+ return await _run_git_command (* clone_cmd )
13877
13978 # Scenario 3: Clone the default branch with shallow depth
14079 clone_cmd = ["git" , "clone" , "--depth=1" , "--single-branch" , url , local_path ]
141- return await run_git_command (* clone_cmd )
80+ return await _run_git_command (* clone_cmd )
14281
14382 except (RuntimeError , asyncio .TimeoutError , AsyncTimeoutError ):
14483 raise # Re-raise the exception
84+
85+
86+ async def _check_repo_exists (url : str ) -> bool :
87+ """
88+ Check if a repository exists at the given URL using an HTTP HEAD request.
89+
90+ Parameters
91+ ----------
92+ url : str
93+ The URL of the repository.
94+
95+ Returns
96+ -------
97+ bool
98+ True if the repository exists, False otherwise.
99+ """
100+ proc = await asyncio .create_subprocess_exec (
101+ "curl" ,
102+ "-I" ,
103+ url ,
104+ stdout = asyncio .subprocess .PIPE ,
105+ stderr = asyncio .subprocess .PIPE ,
106+ )
107+ stdout , _ = await proc .communicate ()
108+ if proc .returncode != 0 :
109+ return False
110+ # Check if stdout contains "404" status code
111+ stdout_str = stdout .decode ()
112+ return "HTTP/1.1 404" not in stdout_str and "HTTP/2 404" not in stdout_str
113+
114+
115+ async def _run_git_command (* args : str ) -> tuple [bytes , bytes ]:
116+ """
117+ Executes a git command asynchronously and captures its output.
118+
119+ Parameters
120+ ----------
121+ *args : str
122+ The git command and its arguments to execute.
123+
124+ Returns
125+ -------
126+ Tuple[bytes, bytes]
127+ A tuple containing the stdout and stderr of the git command.
128+
129+ Raises
130+ ------
131+ RuntimeError
132+ If the git command exits with a non-zero status.
133+ """
134+ proc = await asyncio .create_subprocess_exec (
135+ * args ,
136+ stdout = asyncio .subprocess .PIPE ,
137+ stderr = asyncio .subprocess .PIPE ,
138+ )
139+ stdout , stderr = await proc .communicate ()
140+ if proc .returncode != 0 :
141+ error_message = stderr .decode ().strip ()
142+ raise RuntimeError (f"Git command failed: { ' ' .join (args )} \n Error: { error_message } " )
143+
144+ return stdout , stderr
0 commit comments