1- """ This module contains functions for cloning a Git repository to a local path. """
1+ """This module contains functions for cloning a Git repository to a local path."""
22
3- import asyncio
43import os
54from pathlib import Path
6- from typing import List , Optional , Tuple
5+ from typing import Optional
76
8- from gitingest .ingestion_schema import CloneConfig
7+ from gitingest .schemas import CloneConfig
8+ from gitingest .utils .git_utils import check_repo_exists , ensure_git_installed , run_command
99from gitingest .utils .timeout_wrapper import async_timeout
1010
1111TIMEOUT : int = 60
1212
1313
1414@async_timeout (TIMEOUT )
15- async def clone (config : CloneConfig ) -> None :
15+ async def clone_repo (config : CloneConfig ) -> None :
1616 """
1717 Clone a repository to a local path based on the provided configuration.
1818
@@ -47,7 +47,7 @@ async def clone(config: CloneConfig) -> None:
4747 raise OSError (f"Failed to create parent directory { parent_dir } : { exc } " ) from exc
4848
4949 # Check if the repository exists
50- if not await _check_repo_exists (url ):
50+ if not await check_repo_exists (url ):
5151 raise ValueError ("Repository not found, make sure it is public" )
5252
5353 clone_cmd = ["git" , "clone" , "--single-branch" ]
@@ -64,7 +64,8 @@ async def clone(config: CloneConfig) -> None:
6464 clone_cmd += [url , local_path ]
6565
6666 # Clone the repository
67- await _run_command (* clone_cmd )
67+ await ensure_git_installed ()
68+ await run_command (* clone_cmd )
6869
6970 if commit or partial_clone :
7071 checkout_cmd = ["git" , "-C" , local_path ]
@@ -81,148 +82,4 @@ async def clone(config: CloneConfig) -> None:
8182 checkout_cmd += ["checkout" , commit ]
8283
8384 # Check out the specific commit and/or subpath
84- await _run_command (* checkout_cmd )
85-
86-
87- async def _check_repo_exists (url : str ) -> bool :
88- """
89- Check if a Git repository exists at the provided URL.
90-
91- Parameters
92- ----------
93- url : str
94- The URL of the Git repository to check.
95- Returns
96- -------
97- bool
98- True if the repository exists, False otherwise.
99-
100- Raises
101- ------
102- RuntimeError
103- If the curl command returns an unexpected status code.
104- """
105- proc = await asyncio .create_subprocess_exec (
106- "curl" ,
107- "-I" ,
108- url ,
109- stdout = asyncio .subprocess .PIPE ,
110- stderr = asyncio .subprocess .PIPE ,
111- )
112- stdout , _ = await proc .communicate ()
113-
114- if proc .returncode != 0 :
115- return False
116-
117- response = stdout .decode ()
118- status_code = _get_status_code (response )
119-
120- if status_code in (200 , 301 ):
121- return True
122-
123- if status_code in (404 , 302 ):
124- return False
125-
126- raise RuntimeError (f"Unexpected status code: { status_code } " )
127-
128-
129- async def fetch_remote_branch_list (url : str ) -> List [str ]:
130- """
131- Fetch the list of branches from a remote Git repository.
132- Parameters
133- ----------
134- url : str
135- The URL of the Git repository to fetch branches from.
136- Returns
137- -------
138- List[str]
139- A list of branch names available in the remote repository.
140- """
141- fetch_branches_command = ["git" , "ls-remote" , "--heads" , url ]
142- stdout , _ = await _run_command (* fetch_branches_command )
143- stdout_decoded = stdout .decode ()
144-
145- return [
146- line .split ("refs/heads/" , 1 )[1 ]
147- for line in stdout_decoded .splitlines ()
148- if line .strip () and "refs/heads/" in line
149- ]
150-
151-
152- async def _run_command (* args : str ) -> Tuple [bytes , bytes ]:
153- """
154- Execute a command asynchronously and captures its output.
155-
156- Parameters
157- ----------
158- *args : str
159- The command and its arguments to execute.
160-
161- Returns
162- -------
163- Tuple[bytes, bytes]
164- A tuple containing the stdout and stderr of the command.
165-
166- Raises
167- ------
168- RuntimeError
169- If command exits with a non-zero status.
170- """
171- await check_git_installed ()
172-
173- # Execute the requested command
174- proc = await asyncio .create_subprocess_exec (
175- * args ,
176- stdout = asyncio .subprocess .PIPE ,
177- stderr = asyncio .subprocess .PIPE ,
178- )
179- stdout , stderr = await proc .communicate ()
180- if proc .returncode != 0 :
181- error_message = stderr .decode ().strip ()
182- raise RuntimeError (f"Command failed: { ' ' .join (args )} \n Error: { error_message } " )
183-
184- return stdout , stderr
185-
186-
187- async def check_git_installed () -> None :
188- """
189- Check if Git is installed and accessible on the system.
190-
191- Raises
192- ------
193- RuntimeError
194- If Git is not installed or if the Git command exits with a non-zero status.
195- """
196- try :
197- proc = await asyncio .create_subprocess_exec (
198- "git" ,
199- "--version" ,
200- stdout = asyncio .subprocess .PIPE ,
201- stderr = asyncio .subprocess .PIPE ,
202- )
203- _ , stderr = await proc .communicate ()
204- if proc .returncode != 0 :
205- error_message = stderr .decode ().strip () if stderr else "Git command not found"
206- raise RuntimeError (f"Git is not installed or not accessible: { error_message } " )
207-
208- except FileNotFoundError as exc :
209- raise RuntimeError ("Git is not installed. Please install Git before proceeding." ) from exc
210-
211-
212- def _get_status_code (response : str ) -> int :
213- """
214- Extract the status code from an HTTP response.
215-
216- Parameters
217- ----------
218- response : str
219- The HTTP response string.
220-
221- Returns
222- -------
223- int
224- The status code of the response
225- """
226- status_line = response .splitlines ()[0 ].strip ()
227- status_code = int (status_line .split (" " , 2 )[1 ])
228- return status_code
85+ await run_command (* checkout_cmd )
0 commit comments