Skip to content

Commit 539adbb

Browse files
committed
feat(logging): enhance ingestion with detailed contextual logs
1 parent 95dd20f commit 539adbb

File tree

3 files changed

+102
-0
lines changed

3 files changed

+102
-0
lines changed

src/gitingest/clone.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,16 @@
1616
resolve_commit,
1717
run_command,
1818
)
19+
from gitingest.utils.logging_config import get_logger
1920
from gitingest.utils.os_utils import ensure_directory_exists_or_create
2021
from gitingest.utils.timeout_wrapper import async_timeout
2122

2223
if TYPE_CHECKING:
2324
from gitingest.schemas import CloneConfig
2425

26+
# Initialize logger for this module
27+
logger = get_logger(__name__)
28+
2529

2630
@async_timeout(DEFAULT_TIMEOUT)
2731
async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
@@ -49,14 +53,35 @@ async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
4953
local_path: str = config.local_path
5054
partial_clone: bool = config.subpath != "/"
5155

56+
logger.info(
57+
"Starting git clone operation",
58+
extra={
59+
"url": url,
60+
"local_path": local_path,
61+
"partial_clone": partial_clone,
62+
"subpath": config.subpath,
63+
"branch": config.branch,
64+
"tag": config.tag,
65+
"commit": config.commit,
66+
"include_submodules": config.include_submodules,
67+
},
68+
)
69+
70+
logger.debug("Ensuring git is installed")
5271
await ensure_git_installed()
72+
73+
logger.debug("Creating local directory", extra={"parent_path": str(Path(local_path).parent)})
5374
await ensure_directory_exists_or_create(Path(local_path).parent)
5475

76+
logger.debug("Checking if repository exists", extra={"url": url})
5577
if not await check_repo_exists(url, token=token):
78+
logger.error("Repository not found", extra={"url": url})
5679
msg = "Repository not found. Make sure it is public or that you have provided a valid token."
5780
raise ValueError(msg)
5881

82+
logger.debug("Resolving commit reference")
5983
commit = await resolve_commit(config, token=token)
84+
logger.debug("Resolved commit", extra={"commit": commit})
6085

6186
clone_cmd = ["git"]
6287
if token and is_github_host(url):
@@ -69,20 +94,30 @@ async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
6994
clone_cmd += [url, local_path]
7095

7196
# Clone the repository
97+
logger.info("Executing git clone command", extra={"command": " ".join([*clone_cmd[:-1], "<url>", local_path])})
7298
await run_command(*clone_cmd)
99+
logger.info("Git clone completed successfully")
73100

74101
# Checkout the subpath if it is a partial clone
75102
if partial_clone:
103+
logger.info("Setting up partial clone for subpath", extra={"subpath": config.subpath})
76104
await checkout_partial_clone(config, token=token)
105+
logger.debug("Partial clone setup completed")
77106

78107
git = create_git_command(["git"], local_path, url, token)
79108

80109
# Ensure the commit is locally available
110+
logger.debug("Fetching specific commit", extra={"commit": commit})
81111
await run_command(*git, "fetch", "--depth=1", "origin", commit)
82112

83113
# Write the work-tree at that commit
114+
logger.info("Checking out commit", extra={"commit": commit})
84115
await run_command(*git, "checkout", commit)
85116

86117
# Update submodules
87118
if config.include_submodules:
119+
logger.info("Updating submodules")
88120
await run_command(*git, "submodule", "update", "--init", "--recursive", "--depth=1")
121+
logger.debug("Submodules updated successfully")
122+
123+
logger.info("Git clone operation completed successfully", extra={"local_path": local_path})

src/gitingest/entrypoint.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from gitingest.utils.auth import resolve_token
2121
from gitingest.utils.compat_func import removesuffix
2222
from gitingest.utils.ignore_patterns import load_ignore_patterns
23+
from gitingest.utils.logging_config import get_logger
2324
from gitingest.utils.pattern_utils import process_patterns
2425
from gitingest.utils.query_parser_utils import KNOWN_GIT_HOSTS
2526

@@ -28,6 +29,9 @@
2829

2930
from gitingest.schemas import IngestionQuery
3031

32+
# Initialize logger for this module
33+
logger = get_logger(__name__)
34+
3135

3236
async def ingest_async(
3337
source: str,
@@ -83,19 +87,23 @@ async def ingest_async(
8387
- The content of the files in the repository or directory.
8488
8589
"""
90+
logger.info("Starting ingestion process", extra={"source": source})
91+
8692
token = resolve_token(token)
8793

8894
source = removesuffix(source.strip(), ".git")
8995

9096
# Determine the parsing method based on the source type
9197
if urlparse(source).scheme in ("https", "http") or any(h in source for h in KNOWN_GIT_HOSTS):
9298
# We either have a full URL or a domain-less slug
99+
logger.info("Parsing remote repository", extra={"source": source})
93100
query = await parse_remote_repo(source, token=token)
94101
query.include_submodules = include_submodules
95102
_override_branch_and_tag(query, branch=branch, tag=tag)
96103

97104
else:
98105
# Local path scenario
106+
logger.info("Processing local directory", extra={"source": source})
99107
query = parse_local_dir_path(source)
100108

101109
query.max_file_size = max_file_size
@@ -109,11 +117,35 @@ async def ingest_async(
109117

110118
query.include_submodules = include_submodules
111119

120+
logger.debug(
121+
"Configuration completed",
122+
extra={
123+
"max_file_size": query.max_file_size,
124+
"include_submodules": query.include_submodules,
125+
"include_gitignored": include_gitignored,
126+
"has_include_patterns": bool(query.include_patterns),
127+
"has_exclude_patterns": bool(query.ignore_patterns),
128+
},
129+
)
130+
112131
async with _clone_repo_if_remote(query, token=token):
132+
if query.url:
133+
logger.info("Repository cloned, starting file processing")
134+
else:
135+
logger.info("Starting local directory processing")
136+
113137
if not include_gitignored:
138+
logger.debug("Applying gitignore patterns")
114139
_apply_gitignores(query)
140+
141+
logger.info("Processing files and generating output")
115142
summary, tree, content = ingest_query(query)
143+
144+
if output:
145+
logger.debug("Writing output to file", extra={"output_path": output})
116146
await _write_output(tree, content=content, target=output)
147+
148+
logger.info("Ingestion completed successfully")
117149
return summary, tree, content
118150

119151

src/gitingest/ingestion.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,16 +41,30 @@ def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
4141
If the path cannot be found, is not a file, or the file has no content.
4242
4343
"""
44+
logger.info(
45+
"Starting file ingestion",
46+
extra={
47+
"slug": query.slug,
48+
"subpath": query.subpath,
49+
"local_path": str(query.local_path),
50+
"max_file_size": query.max_file_size,
51+
},
52+
)
53+
4454
subpath = Path(query.subpath.strip("/")).as_posix()
4555
path = query.local_path / subpath
4656

4757
if not path.exists():
58+
logger.error("Path not found", extra={"path": str(path), "slug": query.slug})
4859
msg = f"{query.slug} cannot be found"
4960
raise ValueError(msg)
5061

5162
if (query.type and query.type == "blob") or query.local_path.is_file():
5263
# TODO: We do this wrong! We should still check the branch and commit!
64+
logger.info("Processing single file", extra={"file_path": str(path)})
65+
5366
if not path.is_file():
67+
logger.error("Expected file but found non-file", extra={"path": str(path)})
5468
msg = f"Path {path} is not a file"
5569
raise ValueError(msg)
5670

@@ -66,11 +80,21 @@ def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
6680
)
6781

6882
if not file_node.content:
83+
logger.error("File has no content", extra={"file_name": file_node.name})
6984
msg = f"File {file_node.name} has no content"
7085
raise ValueError(msg)
7186

87+
logger.info(
88+
"Single file processing completed",
89+
extra={
90+
"file_name": file_node.name,
91+
"file_size": file_node.size,
92+
},
93+
)
7294
return format_node(file_node, query=query)
7395

96+
logger.info("Processing directory", extra={"directory_path": str(path)})
97+
7498
root_node = FileSystemNode(
7599
name=path.name,
76100
type=FileSystemNodeType.DIRECTORY,
@@ -82,6 +106,17 @@ def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
82106

83107
_process_node(node=root_node, query=query, stats=stats)
84108

109+
logger.info(
110+
"Directory processing completed",
111+
extra={
112+
"total_files": root_node.file_count,
113+
"total_directories": root_node.dir_count,
114+
"total_size_bytes": root_node.size,
115+
"stats_total_files": stats.total_files,
116+
"stats_total_size": stats.total_size,
117+
},
118+
)
119+
85120
return format_node(root_node, query=query)
86121

87122

0 commit comments

Comments
 (0)