@@ -49,6 +49,7 @@ async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
4949 branch : str | None = config .branch
5050 tag : str | None = config .tag
5151 partial_clone : bool = config .subpath != "/"
52+ include_submodules : bool = config .include_submodules
5253
5354 # Create parent directory if it doesn't exist
5455 await ensure_directory (Path (local_path ).parent )
@@ -63,7 +64,8 @@ async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
6364 clone_cmd += ["-c" , create_git_auth_header (token , url = url )]
6465
6566 clone_cmd += ["clone" , "--single-branch" ]
66- # TODO: Re-enable --recurse-submodules when submodule support is needed
67+ if include_submodules :
68+ clone_cmd += ["--recurse-submodules" ]
6769
6870 if partial_clone :
6971 clone_cmd += ["--filter=blob:none" , "--sparse" ]
@@ -86,15 +88,40 @@ async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
8688
8789 # Checkout the subpath if it is a partial clone
8890 if partial_clone :
89- subpath = config .subpath .lstrip ("/" )
90- if config .blob :
91- # When ingesting from a file url (blob/branch/path/file.txt), we need to remove the file name.
92- subpath = str (Path (subpath ).parent .as_posix ())
93-
94- checkout_cmd = create_git_command (["git" ], local_path , url , token )
95- await run_command (* checkout_cmd , "sparse-checkout" , "set" , subpath )
91+ await _checkout_partial_clone (config , local_path , url , token )
9692
9793 # Checkout the commit if it is provided
9894 if commit :
9995 checkout_cmd = create_git_command (["git" ], local_path , url , token )
10096 await run_command (* checkout_cmd , "checkout" , commit )
97+
98+
99+ def _checkout_partial_clone (config : CloneConfig , local_path : str , url : str , token : str | None ) -> None :
100+ """Handle sparse-checkout for partial clones.
101+
102+ This helper function sets the sparse-checkout configuration for a partial clone,
103+ optionally adjusting the subpath if ingesting from a file URL.
104+
105+ Parameters
106+ ----------
107+ config : CloneConfig
108+ The configuration for cloning the repository, including subpath and blob flag.
109+ local_path : str
110+ The local path where the repository has been cloned.
111+ url : str
112+ The URL of the repository.
113+ token : str | None
114+ GitHub personal access token (PAT) for accessing private repositories.
115+ Can also be set via the ``GITHUB_TOKEN`` environment variable.
116+
117+ Returns
118+ -------
119+ None
120+
121+ """
122+ subpath = config .subpath .lstrip ("/" )
123+ if config .blob :
124+ # When ingesting from a file url (blob/branch/path/file.txt), we need to remove the file name.
125+ subpath = str (Path (subpath ).parent .as_posix ())
126+ checkout_cmd = create_git_command (["git" ], local_path , url , token )
127+ return run_command (* checkout_cmd , "sparse-checkout" , "set" , subpath )
0 commit comments