33
44from gitingest .utils import AsyncTimeoutError , async_timeout
55
6- CLONE_TIMEOUT = 20
6+ CLONE_TIMEOUT : int = 20
77
88
99@dataclass
@@ -15,7 +15,86 @@ class CloneConfig:
1515 pat : str | None = None
1616
1717
18- async def check_repo_exists (url : str , pat : str | None = None ) -> bool :
18+ @async_timeout (CLONE_TIMEOUT )
19+ async def clone_repo (config : CloneConfig ) -> tuple [bytes , bytes ]:
20+ """
21+ Clones a repository to a local path based on the provided configuration.
22+
23+ Parameters
24+ ----------
25+ config : CloneConfig
26+ Configuration object containing:
27+ - url (str): The URL of the repository.
28+ - local_path (str): The local path to clone the repository to.
29+ - commit (Optional[str]): The specific commit hash to checkout.
30+ - branch (Optional[str]): The branch to clone.
31+ - pat (Optional[str]): Personal Access Token for authentication.
32+
33+ Returns
34+ -------
35+ Tuple[bytes, bytes]
36+ A tuple containing the stdout and stderr of the git commands executed.
37+
38+ Raises
39+ ------
40+ ValueError
41+ If the repository does not exist or if required parameters are missing.
42+ RuntimeError
43+ If any git command fails during execution.
44+ AsyncTimeoutError
45+ If the cloning process exceeds the specified timeout.
46+ """
47+ # Extract and validate parameters
48+ url : str = config .url
49+ local_path : str = config .local_path
50+ commit : str | None = config .commit
51+ branch : str | None = config .branch
52+ pat : str | None = config .pat
53+
54+ if not url :
55+ raise ValueError ("The 'url' parameter is required." )
56+
57+ if not local_path :
58+ raise ValueError ("The 'local_path' parameter is required." )
59+
60+ # Check if the repository exists
61+ if not await _check_repo_exists (url , pat ):
62+ raise ValueError ("Repository not found, make sure it is public or provide valid PAT" )
63+
64+ try :
65+ if commit :
66+ # Scenario 1: Clone and checkout a specific commit
67+ # Clone the repository without depth to ensure full history for checkout
68+ clone_cmd = ["git" , "clone" , "--single-branch" ]
69+ if pat :
70+ url = url .replace ("https://" , f"https://oauth2:{ pat } @" )
71+ clone_cmd .extend ([url , local_path ])
72+ await _run_git_command (* clone_cmd )
73+
74+ # Checkout the specific commit
75+ checkout_cmd = ["git" , "-C" , local_path , "checkout" , commit ]
76+ return await _run_git_command (* checkout_cmd )
77+
78+ if branch and branch .lower () not in ("main" , "master" ):
79+ # Scenario 2: Clone a specific branch with shallow depth
80+ clone_cmd = ["git" , "clone" , "--depth=1" , "--single-branch" , "--branch" , branch ]
81+ if pat :
82+ url = url .replace ("https://" , f"https://oauth2:{ pat } @" )
83+ clone_cmd .extend ([url , local_path ])
84+ return await _run_git_command (* clone_cmd )
85+
86+ # Scenario 3: Clone the default branch with shallow depth
87+ clone_cmd = ["git" , "clone" , "--depth=1" , "--single-branch" ]
88+ if pat :
89+ url = url .replace ("https://" , f"https://oauth2:{ pat } @" )
90+ clone_cmd .extend ([url , local_path ])
91+ return await _run_git_command (* clone_cmd )
92+
93+ except (RuntimeError , asyncio .TimeoutError , AsyncTimeoutError ):
94+ raise # Re-raise the exception
95+
96+
97+ async def _check_repo_exists (url : str , pat : str | None = None ) -> bool :
1998 """
2099 Check if a repository exists at the given URL using an HTTP HEAD request.
21100
@@ -65,7 +144,7 @@ async def check_repo_exists(url: str, pat: str | None = None) -> bool:
65144 return "HTTP/1.1 404" not in stdout_str and "HTTP/2 404" not in stdout_str
66145
67146
68- async def run_git_command (* args : str ) -> tuple [bytes , bytes ]:
147+ async def _run_git_command (* args : str ) -> tuple [bytes , bytes ]:
69148 """
70149 Executes a git command asynchronously and captures its output.
71150
@@ -95,82 +174,3 @@ async def run_git_command(*args: str) -> tuple[bytes, bytes]:
95174 raise RuntimeError (f"Git command failed: { ' ' .join (args )} \n Error: { error_message } " )
96175
97176 return stdout , stderr
98-
99-
100- @async_timeout (CLONE_TIMEOUT )
101- async def clone_repo (config : CloneConfig ) -> tuple [bytes , bytes ]:
102- """
103- Clones a repository to a local path based on the provided configuration.
104-
105- Parameters
106- ----------
107- config : CloneConfig
108- Configuration object containing:
109- - url (str): The URL of the repository.
110- - local_path (str): The local path to clone the repository to.
111- - commit (Optional[str]): The specific commit hash to checkout.
112- - branch (Optional[str]): The branch to clone.
113- - pat (Optional[str]): Personal Access Token for authentication.
114-
115- Returns
116- -------
117- Tuple[bytes, bytes]
118- A tuple containing the stdout and stderr of the git commands executed.
119-
120- Raises
121- ------
122- ValueError
123- If the repository does not exist or if required parameters are missing.
124- RuntimeError
125- If any git command fails during execution.
126- AsyncTimeoutError
127- If the cloning process exceeds the specified timeout.
128- """
129- # Extract and validate parameters
130- url : str = config .url
131- local_path : str = config .local_path
132- commit : str | None = config .commit
133- branch : str | None = config .branch
134- pat : str | None = config .pat
135-
136- if not url :
137- raise ValueError ("The 'url' parameter is required." )
138-
139- if not local_path :
140- raise ValueError ("The 'local_path' parameter is required." )
141-
142- # Check if the repository exists
143- if not await check_repo_exists (url , pat ):
144- raise ValueError ("Repository not found, make sure it is public or provide valid PAT" )
145-
146- try :
147- if commit :
148- # Scenario 1: Clone and checkout a specific commit
149- # Clone the repository without depth to ensure full history for checkout
150- clone_cmd = ["git" , "clone" , "--single-branch" ]
151- if pat :
152- url = url .replace ("https://" , f"https://oauth2:{ pat } @" )
153- clone_cmd .extend ([url , local_path ])
154- await run_git_command (* clone_cmd )
155-
156- # Checkout the specific commit
157- checkout_cmd = ["git" , "-C" , local_path , "checkout" , commit ]
158- return await run_git_command (* checkout_cmd )
159-
160- if branch and branch .lower () not in ("main" , "master" ):
161- # Scenario 2: Clone a specific branch with shallow depth
162- clone_cmd = ["git" , "clone" , "--depth=1" , "--single-branch" , "--branch" , branch ]
163- if pat :
164- url = url .replace ("https://" , f"https://oauth2:{ pat } @" )
165- clone_cmd .extend ([url , local_path ])
166- return await run_git_command (* clone_cmd )
167-
168- # Scenario 3: Clone the default branch with shallow depth
169- clone_cmd = ["git" , "clone" , "--depth=1" , "--single-branch" ]
170- if pat :
171- url = url .replace ("https://" , f"https://oauth2:{ pat } @" )
172- clone_cmd .extend ([url , local_path ])
173- return await run_git_command (* clone_cmd )
174-
175- except (RuntimeError , asyncio .TimeoutError , AsyncTimeoutError ):
176- raise # Re-raise the exception
0 commit comments