From e61f227d0654212412ce1835f7e432df85cfc36b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 28 Dec 2025 19:10:48 +0100 Subject: [PATCH 1/8] tag: use algo of repo parameter in parse_tag_buffer() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stop using "the_hash_algo" explicitly and implictly via parse_oid_hex() and instead use the "hash_algo" member of the passed in repository, which is more correct. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- tag.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tag.c b/tag.c index f5c232d2f1f36c..dec5ea8eb0ab74 100644 --- a/tag.c +++ b/tag.c @@ -148,9 +148,11 @@ int parse_tag_buffer(struct repository *r, struct tag *item, const void *data, u FREE_AND_NULL(item->tag); } - if (size < the_hash_algo->hexsz + 24) + if (size < r->hash_algo->hexsz + 24) return -1; - if (memcmp("object ", bufptr, 7) || parse_oid_hex(bufptr + 7, &oid, &bufptr) || *bufptr++ != '\n') + if (memcmp("object ", bufptr, 7) || + parse_oid_hex_algop(bufptr + 7, &oid, &bufptr, r->hash_algo) || + *bufptr++ != '\n') return -1; if (!starts_with(bufptr, "type ")) From 154717b3b0b0631fb6700d5fc77e779106530fc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 28 Dec 2025 19:10:49 +0100 Subject: [PATCH 2/8] tag: support arbitrary repositories in gpg_verify_tag() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow callers of gpg_verify_tag() specify the repository to use by providing a parameter for that. One of the two has not been using the_repository since 43a8391977 (builtin/verify-tag: stop using `the_repository`, 2025-03-08); let it pass in the correct repository. The other simply passes the_repository to get the same result as before. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- builtin/tag.c | 2 +- builtin/verify-tag.c | 2 +- tag.c | 12 ++++++------ tag.h | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/builtin/tag.c b/builtin/tag.c index 01eba90c5c7bb2..aeb04c487fe95a 100644 --- a/builtin/tag.c +++ b/builtin/tag.c @@ -149,7 +149,7 @@ static int verify_tag(const char *name, const char *ref UNUSED, if (format->format) flags = GPG_VERIFY_OMIT_STATUS; - if (gpg_verify_tag(oid, name, flags)) + if (gpg_verify_tag(the_repository, oid, name, flags)) return -1; if (format->format) diff --git a/builtin/verify-tag.c b/builtin/verify-tag.c index 558121eaa1688e..4a261b2369729f 100644 --- a/builtin/verify-tag.c +++ b/builtin/verify-tag.c @@ -61,7 +61,7 @@ int cmd_verify_tag(int argc, continue; } - if (gpg_verify_tag(&oid, name, flags)) { + if (gpg_verify_tag(repo, &oid, name, flags)) { had_error = 1; continue; } diff --git a/tag.c b/tag.c index dec5ea8eb0ab74..9373c49d0614b0 100644 --- a/tag.c +++ b/tag.c @@ -44,28 +44,28 @@ static int run_gpg_verify(const char *buf, unsigned long size, unsigned flags) return ret; } -int gpg_verify_tag(const struct object_id *oid, const char *name_to_report, - unsigned flags) +int gpg_verify_tag(struct repository *r, const struct object_id *oid, + const char *name_to_report, unsigned flags) { enum object_type type; char *buf; unsigned long size; int ret; - type = odb_read_object_info(the_repository->objects, oid, NULL); + type = odb_read_object_info(r->objects, oid, NULL); if (type != OBJ_TAG) return error("%s: cannot verify a non-tag object of type %s.", name_to_report ? name_to_report : - repo_find_unique_abbrev(the_repository, oid, DEFAULT_ABBREV), + repo_find_unique_abbrev(r, oid, DEFAULT_ABBREV), type_name(type)); - buf = odb_read_object(the_repository->objects, oid, &type, &size); + buf = odb_read_object(r->objects, oid, &type, &size); if (!buf) return error("%s: unable to read file.", name_to_report ? name_to_report : - repo_find_unique_abbrev(the_repository, oid, DEFAULT_ABBREV)); + repo_find_unique_abbrev(r, oid, DEFAULT_ABBREV)); ret = run_gpg_verify(buf, size, flags); diff --git a/tag.h b/tag.h index ef12a610372063..55c2d0792b99cb 100644 --- a/tag.h +++ b/tag.h @@ -16,7 +16,7 @@ int parse_tag_buffer(struct repository *r, struct tag *item, const void *data, u int parse_tag(struct tag *item); void release_tag_memory(struct tag *t); struct object *deref_tag(struct repository *r, struct object *, const char *, int); -int gpg_verify_tag(const struct object_id *oid, +int gpg_verify_tag(struct repository *r, const struct object_id *oid, const char *name_to_report, unsigned flags); struct object_id *get_tagged_oid(struct tag *tag); From b6e4cc8c32850315323961659e553d1d14591f7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 28 Dec 2025 19:10:50 +0100 Subject: [PATCH 3/8] tag: support arbitrary repositories in parse_tag() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow callers of parse_tag() pass in the repository to use. Let most of them pass in the_repository to get the same result as before. One of them has stopped using the_repository in ef9b0370da (sha1-name.c: store and use repo in struct disambiguate_state, 2019-04-16); let it pass in its stored repository. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- builtin/describe.c | 6 +++--- builtin/pack-objects.c | 2 +- fsck.c | 2 +- object-name.c | 2 +- ref-filter.c | 2 +- tag.c | 8 ++++---- tag.h | 2 +- walker.c | 2 +- 8 files changed, 13 insertions(+), 13 deletions(-) diff --git a/builtin/describe.c b/builtin/describe.c index 443546aaac96f0..989a78d715d525 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -112,13 +112,13 @@ static int replace_name(struct commit_name *e, if (!e->tag) { t = lookup_tag(the_repository, &e->oid); - if (!t || parse_tag(t)) + if (!t || parse_tag(the_repository, t)) return 1; e->tag = t; } t = lookup_tag(the_repository, oid); - if (!t || parse_tag(t)) + if (!t || parse_tag(the_repository, t)) return 0; *tag = t; @@ -335,7 +335,7 @@ static void append_name(struct commit_name *n, struct strbuf *dst) { if (n->prio == 2 && !n->tag) { n->tag = lookup_tag(the_repository, &n->oid); - if (!n->tag || parse_tag(n->tag)) + if (!n->tag || parse_tag(the_repository, n->tag)) die(_("annotated tag %s not available"), n->path); } if (n->tag && !n->name_checked) { diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 1ce8d6ee215326..ca44b7894fc064 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3293,7 +3293,7 @@ static void add_tag_chain(const struct object_id *oid) tag = lookup_tag(the_repository, oid); while (1) { - if (!tag || parse_tag(tag) || !tag->tagged) + if (!tag || parse_tag(the_repository, tag) || !tag->tagged) die(_("unable to pack objects reachable from tag %s"), oid_to_hex(oid)); diff --git a/fsck.c b/fsck.c index 138fffded935c4..fae18d8561e067 100644 --- a/fsck.c +++ b/fsck.c @@ -474,7 +474,7 @@ static int fsck_walk_tag(struct tag *tag, void *data, struct fsck_options *optio { const char *name = fsck_get_object_name(options, &tag->object.oid); - if (parse_tag(tag)) + if (parse_tag(the_repository, tag)) return -1; if (name) fsck_put_object_name(options, &tag->tagged->oid, "%s", name); diff --git a/object-name.c b/object-name.c index fed5de51531fde..8b862c124e05a9 100644 --- a/object-name.c +++ b/object-name.c @@ -449,7 +449,7 @@ static int show_ambiguous_object(const struct object_id *oid, void *data) } else if (type == OBJ_TAG) { struct tag *tag = lookup_tag(ds->repo, oid); - if (!parse_tag(tag) && tag->tag) { + if (!parse_tag(ds->repo, tag) && tag->tag) { /* * TRANSLATORS: This is a line of ambiguous * tag object output. E.g.: diff --git a/ref-filter.c b/ref-filter.c index d7454269e87cd3..c318f9ca0ec8dd 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2866,7 +2866,7 @@ static int match_points_at(struct oid_array *points_at, while (obj && obj->type == OBJ_TAG) { struct tag *tag = (struct tag *)obj; - if (parse_tag(tag) < 0) { + if (parse_tag(the_repository, tag) < 0) { obj = NULL; break; } diff --git a/tag.c b/tag.c index 9373c49d0614b0..9daeaf2a78ed54 100644 --- a/tag.c +++ b/tag.c @@ -13,6 +13,7 @@ #include "gpg-interface.h" #include "hex.h" #include "packfile.h" +#include "repository.h" const char *tag_type = "tag"; @@ -203,7 +204,7 @@ int parse_tag_buffer(struct repository *r, struct tag *item, const void *data, u return 0; } -int parse_tag(struct tag *item) +int parse_tag(struct repository *r, struct tag *item) { enum object_type type; void *data; @@ -212,8 +213,7 @@ int parse_tag(struct tag *item) if (item->object.parsed) return 0; - data = odb_read_object(the_repository->objects, &item->object.oid, - &type, &size); + data = odb_read_object(r->objects, &item->object.oid, &type, &size); if (!data) return error("Could not read %s", oid_to_hex(&item->object.oid)); @@ -222,7 +222,7 @@ int parse_tag(struct tag *item) return error("Object %s not a tag", oid_to_hex(&item->object.oid)); } - ret = parse_tag_buffer(the_repository, item, data, size); + ret = parse_tag_buffer(r, item, data, size); free(data); return ret; } diff --git a/tag.h b/tag.h index 55c2d0792b99cb..534687c4caeca4 100644 --- a/tag.h +++ b/tag.h @@ -13,7 +13,7 @@ struct tag { }; struct tag *lookup_tag(struct repository *r, const struct object_id *oid); int parse_tag_buffer(struct repository *r, struct tag *item, const void *data, unsigned long size); -int parse_tag(struct tag *item); +int parse_tag(struct repository *r, struct tag *item); void release_tag_memory(struct tag *t); struct object *deref_tag(struct repository *r, struct object *, const char *, int); int gpg_verify_tag(struct repository *r, const struct object_id *oid, diff --git a/walker.c b/walker.c index 409b646578a3d4..2891563b03620b 100644 --- a/walker.c +++ b/walker.c @@ -115,7 +115,7 @@ static int process_commit(struct walker *walker, struct commit *commit) static int process_tag(struct walker *walker, struct tag *tag) { - if (parse_tag(tag)) + if (parse_tag(the_repository, tag)) return -1; return process(walker, tag->tagged); } From 009fceeda26e12e2dbacd04eef47c62d4e206403 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 28 Dec 2025 19:10:51 +0100 Subject: [PATCH 4/8] tag: stop using the_repository MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gpg_verify_tag() shows the passed in object name on error. Both callers provide one. It falls back to abbreviated hashes for future callers that pass in a NULL name. DEFAULT_ABBREV is default_abbrev, which in turn is a global variable that's populated by git_default_config() and only available with USE_THE_REPOSITORY_VARIABLE. Don't let that hypothetical hold us back from getting rid of the_repository in tag.c. Fall back to full hashes, which are more appropriate for error messages anyway. This allows us to stop setting USE_THE_REPOSITORY_VARIABLE. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- tag.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tag.c b/tag.c index 9daeaf2a78ed54..2f12e51024ec0b 100644 --- a/tag.c +++ b/tag.c @@ -1,4 +1,3 @@ -#define USE_THE_REPOSITORY_VARIABLE #define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" @@ -58,7 +57,7 @@ int gpg_verify_tag(struct repository *r, const struct object_id *oid, return error("%s: cannot verify a non-tag object of type %s.", name_to_report ? name_to_report : - repo_find_unique_abbrev(r, oid, DEFAULT_ABBREV), + oid_to_hex(oid), type_name(type)); buf = odb_read_object(r->objects, oid, &type, &size); @@ -66,7 +65,7 @@ int gpg_verify_tag(struct repository *r, const struct object_id *oid, return error("%s: unable to read file.", name_to_report ? name_to_report : - repo_find_unique_abbrev(r, oid, DEFAULT_ABBREV)); + oid_to_hex(oid)); ret = run_gpg_verify(buf, size, flags); From 979ee83e8a908f920d097c10cea5f8857e10898f Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Mon, 29 Dec 2025 18:43:03 +0000 Subject: [PATCH 5/8] merge-ort: fix corner case recursive submodule/directory conflict handling At GitHub, a few repositories were triggering errors of the form: git: merge-ort.c:3037: process_renames: Assertion `newinfo && !newinfo->merged.clean' failed. Aborted (core dumped) While these may look similar to both a562d90a350d (merge-ort: fix failing merges in special corner case, 2025-11-03) and f6ecb603ff8a (merge-ort: fix directory rename on top of source of other rename/delete, 2025-08-06) the cause is different and in this case the problem is not an over-conservative assertion, but a bug before the assertion where we did not update all relevant state appropriately. It sadly took me a really long time to figure out how to get a simple reproducer for this one. It doesn't really have that many moving parts, but there are multiple pieces of background information needed to understand it. First of all, when we have two files added at the same path, merge-ort does a two-way merge of those files. If we have two directories added at the same path, we basically do the same thing (taking the union of files, and two-way merging files with the same name). But two-way merging requires components of the same type. We can't merge the contents of a regular file with a directory, or with a symlink, or with a submodule. Nor can any of those other types be merged with each other, e.g. merging a submodule with a directory is a bad idea. When two paths have the same name but their types do not match, merge-ort is forced to move one of them to an alternate filename (using the unique_path() function). Second, if two commits being merged have more than one merge-base, merge-ort will merge the merge-bases to create a virtual merge-base, and use that as the base commit. Third, one of the really important optimizations in merge-ort is trivial tree-level resolution (roughly meaning merging trees without recursing into them). This optimization has some nuance to it that is important to the current bug, and to understand it, it helps to first look at the high-level overview of how merge-ort runs; there are basically three high-level functions that the work is divided between: collect_merge_info() - walks the top-level trees getting individual paths of interest detect_renames() - detect renames between paths in order to match up paths for three-way merging process_entries() - does a few things of interest: * three-way merging of files, * other special handling (e.g. adjusting paths with conflicting types to avoid path collisions) * as it finishes handling all the files within a subdirectory, writes out a new tree object for that directory If it were not for renames, we could just always do tree-level merging whenever the tree on at least one side was unmodified. Unfortunately, we need to recurse into trees to determine whether there are renames. However, we can also do tree-level merging so long as there aren't any *relevant* renames (another merge-ort optimization), which we can determine without recursing into trees. We would also be able to do tree-level merging if we somehow apriori knew what renames existed, by only recursing into the trees which we could otherwise trivially merge if they contained files involved in renames. That might not seem useful, because we need to find out the renames and we have to recurse into trees to do so, but when you find out that the process_entries() step is more computationally expensive than the collect_merge_info() step, it yields an interesting strategy: * run collect_merge_info() * run detect_renames() * cache the renames() * restart -- rerun collect_merge_info(), using the cached renames to only recurse into the needed trees * we already have the renames cached so no need to re-detect * run process_entries() on the reduced list of paths which was implemented back in 7bee6c100431 (merge-ort: avoid recursing into directories when we don't need to, 2021-07-16) Crucially, this restarting only occurs if the number of paths we could skip recursing into exceeds the number we still need to recurse into by some safety factor (wanted_factor in handle_deferred_entries()); forgetting this fact is a great way to repeatedly fail to create a minimal testcase for several days and go down alternate wrong paths). Now, I earlier summarized this optimization as "merging trees without recursing into them", but this optimization does not require that all three sides of history has a directory at a given path. So long as the tree on one side matches the tree in the base version, we can decide to resolve in favor of whatever the other side of history has at that path -- be it a directory, a file, a submodule, or a symlink. Unfortunately, the code in question didn't fully realize this, and was written assuming the base version and both sides would have a directory at the given path, as can be seen by the "ci->filemask == 0" comment in resolve_trivial_directory_merge() that was added as part of 7bee6c100431 (merge-ort: avoid recursing into directories when we don't need to, 2021-07-16). A few additional lines of code are needed to handle cases where we have something other than a directory on the other side of history. But, knowing that resolve_trivial_directory_merge() doesn't have sufficient state updating logic doesn't show us how to trigger a bug without combining with the other bits of information we provided above. Here's a relevant testcase: * branches A & B * commit A1: adds "folder" as a directory with files tracked under it * commit B1: adds "folder" as a submodule * commit A2: merges B1 into A1, keeping "folder" as a directory (and in fact, with no changes to "folder" since A1), discarding the submodule * commit B2: merges A1 into B1, keeping "folder" as a submodule (and in fact, with no changes to "folder" since B1), discarding the directory Here, if we try to merge A2 & B2, the logic proceeds as follows: * we have multiple merge-bases: A1 & B1. So we have to merge those to get a virtual merge base. * due to "folder" as a directory and "folder" as a submodule, the path collision logic triggers and renames "folder" as a submodule to "folder~Temporary merge branch 2" so we can keep it alongside "folder" as a directory. * we now have a virtual merge base (containing both "folder" directory and a "folder~Temporary merge branch 2" submodule) and can now do the outer merge * in the first step of the outer merge, we attempt to defer recursing into folder/ as a directory, but find we need to for rename detection. * in rename detection, we note that "folder~Temporary merge branch 2" has the same hash as "folder" as a submodule in B2, which means we have an exact rename. * after rename detection, we discover no path in folder/ is needed for renames, and so we can cache renames and restart. * after restarting, we avoid recursing into "folder/" and realize we can resolve it trivially since it hasn't been modified. The resolution removes "folder/", leaving us only "folder" as a submodule from commit B2. * After this point, we should have a rename/delete conflict on "folder~Temporary merge branch 2" -> "folder", but our marking of the merge of "folder" as clean broke our ability to handle that and in fact triggers an assertion in process_renames(). When there was a df_conflict (directory/"file" conflict, where "file" could be submodule or regular file or symlink), ensure resolve_trivial_directory_merge() handles it properly. In particular: * do not pre-emptively mark the path as cleanly merged if the remaining path is a file; allow it to be processed in process_entries() later to determine if it was clean * clear the parts of dirmask or filemask corresponding to the matching sides of history, since we are resolving those away * clear the df_conflict bit afterwards; since we cleared away the two matching sides and only have one side left, that one side can't have a directory/file conflict with itself. Also add the above minimal testcase showcasing this bug to t6422, **with a sufficient number of paths under the folder/ directory to actually trigger it**. (I wish I could have all those days back from all the wrong paths I went down due to not having enough files under that directory...) I know this commit has a very high ratio of lines in the commit message to lines of comments, and a relatively high ratio of comments to actual code, but given how long it took me to track down, on the off chance that we ever need to further modify this logic, I wanted it thoroughly documented for future me and for whatever other poor soul might end up needing to read this commit message. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 35 ++++++++++- t/t6422-merge-rename-corner-cases.sh | 86 ++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+), 1 deletion(-) diff --git a/merge-ort.c b/merge-ort.c index 29858074f9d8bf..738a61ce691882 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -1502,11 +1502,44 @@ static void resolve_trivial_directory_merge(struct conflict_info *ci, int side) VERIFY_CI(ci); assert((side == 1 && ci->match_mask == 5) || (side == 2 && ci->match_mask == 3)); + + /* + * Since ci->stages[0] matches ci->stages[3-side], resolve merge in + * favor of ci->stages[side]. + */ oidcpy(&ci->merged.result.oid, &ci->stages[side].oid); ci->merged.result.mode = ci->stages[side].mode; ci->merged.is_null = is_null_oid(&ci->stages[side].oid); + + /* + * Because we resolved in favor of "side", we are no longer + * considering the paths which matched (i.e. had the same hash) any + * more. Strip the matching paths from both dirmask & filemask. + * Another consequence of merging in favor of side is that we can no + * longer have a directory/file conflict either..but there's a slight + * nuance we consider before clearing it. + * + * In most cases, resolving in favor of the other side means there's + * no conflict at all, but if we had a directory/file conflict to + * start, and the directory is resolved away, the remaining file could + * still be part of a rename. If the remaining file is part of a + * rename, then it may also be part of a rename conflict (e.g. + * rename/delete or rename/rename(1to2)), so we can't + * mark it as a clean merge if we started with a directory/file + * conflict and still have a file left. + * + * In contrast, if we started with a directory/file conflict and + * still have a directory left, no file under that directory can be + * part of a rename, otherwise we would have had to recurse into the + * directory and would have never ended up within + * resolve_trivial_directory_merge() for that directory. + */ + ci->dirmask &= (~ci->match_mask); + ci->filemask &= (~ci->match_mask); + assert(!ci->filemask || !ci->dirmask); ci->match_mask = 0; - ci->merged.clean = 1; /* (ci->filemask == 0); */ + ci->merged.clean = !ci->df_conflict || ci->dirmask; + ci->df_conflict = 0; } static int handle_deferred_entries(struct merge_options *opt, diff --git a/t/t6422-merge-rename-corner-cases.sh b/t/t6422-merge-rename-corner-cases.sh index f14c0fb30e1bf2..e18d5a227d54f7 100755 --- a/t/t6422-merge-rename-corner-cases.sh +++ b/t/t6422-merge-rename-corner-cases.sh @@ -1439,4 +1439,90 @@ test_expect_success 'rename/rename(1to2) with a binary file' ' ) ' +# Testcase preliminary submodule/directory conflict and submodule rename +# Commit O: +# Commit A1: introduce "folder" (as a tree) +# Commit B1: introduce "folder" (as a submodule) +# Commit A2: merge B1 into A1, but keep folder as a tree +# Commit B2: merge A1 into B1, but keep folder as a submodule +# Merge A2 & B2 +test_setup_submodule_directory_preliminary_conflict () { + git init submodule_directory_preliminary_conflict && + ( + cd submodule_directory_preliminary_conflict && + + # Trying to do the A2 and B2 merges above is slightly more + # challenging with a local submodule (because checking out + # another commit has the submodule in the way). Instead, + # first create the commits with the wrong parents but right + # trees, in the order A1, A2, B1, B2... + # + # Then go back and create new A2 & B2 with the correct + # parents and the same trees. + + git commit --allow-empty -m orig && + + git branch A && + git branch B && + + git checkout B && + mkdir folder && + echo A>folder/A && + echo B>folder/B && + echo C>folder/C && + echo D>folder/D && + echo E>folder/E && + git add folder && + git commit -m B1 && + + git commit --allow-empty -m B2 && + + git checkout A && + git init folder && + ( + cd folder && + >Z && + >Y && + git add Z Y && + git commit -m "original submodule commit" + ) && + git add folder && + git commit -m A1 && + + git commit --allow-empty -m A2 && + + NewA2=$(git commit-tree -p A^ -p B^ -m "Merge B into A" A^{tree}) && + NewB2=$(git commit-tree -p B^ -p A^ -m "Merge A into B" B^{tree}) && + git update-ref refs/heads/A $NewA2 && + git update-ref refs/heads/B $NewB2 + ) +} + +test_expect_success 'submodule/directory preliminary conflict' ' + test_setup_submodule_directory_preliminary_conflict && + ( + cd submodule_directory_preliminary_conflict && + + git checkout A^0 && + + test_expect_code 1 git merge B^0 && + + # Make sure the index has the right number of entries + git ls-files -s >actual && + test_line_count = 2 actual && + + # The "folder" as directory should have been resolved away + # as part of the merge. The "folder" as submodule got + # renamed to "folder~Temporary merge branch 2" in the + # virtual merge base, resulting in a + # "folder~Temporary merge branch 2" -> "folder" + # rename in the outermerge for the submodule, which then + # becomes part of a rename/delete conflict (because "folder" + # as a submodule was deleted in A2). + submod=$(git rev-parse A:folder) && + printf "160000 $submod 1\tfolder\n160000 $submod 2\tfolder\n" >expect && + test_cmp expect actual + ) +' + test_done From 861dbb1586aaac6f02c8c87dd55e5c0b9862296a Mon Sep 17 00:00:00 2001 From: Deveshi Dwivedi Date: Mon, 29 Dec 2025 18:57:37 +0000 Subject: [PATCH 6/8] t5403: use test_path_is_file instead of test -f Replace 'test -f' with the test_path_is_file in t5403-post-checkout-hook.sh. This helper provides better error messages when tests fail, making it easier to debug issues. Signed-off-by: Deveshi Dwivedi Signed-off-by: Junio C Hamano --- t/t5403-post-checkout-hook.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t5403-post-checkout-hook.sh b/t/t5403-post-checkout-hook.sh index cfaae547398e0e..ade9e5087f9f30 100755 --- a/t/t5403-post-checkout-hook.sh +++ b/t/t5403-post-checkout-hook.sh @@ -110,7 +110,7 @@ test_expect_success 'post-checkout hook is triggered by clone' ' echo "$@" >"$GIT_DIR/post-checkout.args" EOF git clone --template=templates . clone3 && - test -f clone3/.git/post-checkout.args + test_path_is_file clone3/.git/post-checkout.args ' test_done From 56d388e6ad9e819935a902b6d5ce3a6b3485b6e2 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 29 Dec 2025 21:44:57 +0000 Subject: [PATCH 7/8] diff: avoid segfault with freed entries When computing a diff in a partial clone, there is a chance that we could trigger a prefetch of missing objects at the same time as we are freeing entries from the global diff queue. This is difficult to reproduce, as we need to have some objects be freed from the queue before triggering the prefetch of missing objects. There is a new test in t4067 that does trigger the segmentation fault that results in this case. The fix is to set the queue pointer to NULL after it is freed, and then to be careful about NULL values in the prefetch. The more elaborate explanation is that within diffcore_std(), we may skip the initial prefetch due to the output format (--name-only in the test) and go straight to diffcore_skip_stat_unmatch(). In that method, the index entries that have been invalidated by path changes show up as entries but may be deleted because they are not actually content diffs and only newer timestamps than expected. As those entries are deleted, later entries are checked with diff_filespec_check_stat_unmatch(), which uses diff_queued_diff_prefetch() as the missing_object_cb in its diff options. That can trigger downloading missing objects if the appropriate scenario occurs to trigger a call to diff_popoulate_filespec(). It's finally within that callback to diff_queued_diff_prefetch() that the segfault occurs. The test was hard to find because it required some real differences, some not-different files that had a newer modified time, and the order of those files alphabetically was important to trigger the deletion before the prefetch was triggered. I briefly considered a "lock" member for the diff queue, but it was a much larger diff and introduced many more possible error scenarios. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- diff.c | 5 +++++ t/t4067-diff-partial-clone.sh | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/diff.c b/diff.c index 436da250eb150d..a68ddd2168ba1c 100644 --- a/diff.c +++ b/diff.c @@ -7098,6 +7098,7 @@ static void diffcore_skip_stat_unmatch(struct diff_options *diffopt) if (!diffopt->flags.no_index) diffopt->skip_stat_unmatch++; diff_free_filepair(p); + q->queue[i] = NULL; } } free(q->queue); @@ -7141,6 +7142,10 @@ void diff_queued_diff_prefetch(void *repository) for (i = 0; i < q->nr; i++) { struct diff_filepair *p = q->queue[i]; + + if (!p) + continue; + diff_add_if_missing(repo, &to_fetch, p->one); diff_add_if_missing(repo, &to_fetch, p->two); } diff --git a/t/t4067-diff-partial-clone.sh b/t/t4067-diff-partial-clone.sh index 581250dd2d227a..72f25de44950ae 100755 --- a/t/t4067-diff-partial-clone.sh +++ b/t/t4067-diff-partial-clone.sh @@ -132,6 +132,41 @@ test_expect_success 'diff with rename detection batches blobs' ' test_line_count = 1 done_lines ' +test_expect_success 'diff succeeds even if entries are removed from queue' ' + test_when_finished "rm -rf server client trace" && + + test_create_repo server && + for l in a c e g i p + do + echo $l >server/$l && + git -C server add $l || return 1 + done && + git -C server commit -m x && + + for l in a e i + do + git -C server rm $l || return 1 + done && + + for l in b d f i + do + echo $l$l >server/$l && + git -C server add $l || return 1 + done && + git -C server commit -a -m x && + + test_config -C server uploadpack.allowfilter 1 && + test_config -C server uploadpack.allowanysha1inwant 1 && + git clone --filter=blob:limit=0 "file://$(pwd)/server" client && + + for file in $(ls client) + do + cat client/$file >$file && + mv $file client/$file || return 1 + done && + git -C client diff --name-only --relative HEAD^ +' + test_expect_success 'diff does not fetch anything if inexact rename detection is not needed' ' test_when_finished "rm -rf server client trace" && From d529f3a197364881746f558e5652f0236131eb86 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 8 Jan 2026 15:58:11 +0900 Subject: [PATCH 8/8] The 16th batch Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.53.0.adoc | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Documentation/RelNotes/2.53.0.adoc b/Documentation/RelNotes/2.53.0.adoc index 9e8384a4c101ac..32b6966c9e4dc4 100644 --- a/Documentation/RelNotes/2.53.0.adoc +++ b/Documentation/RelNotes/2.53.0.adoc @@ -228,6 +228,16 @@ Fixes since v2.52 handling stateful ISO/IEC 2022 encoded strings. (merge cee341e9dd rs/macos-iconv-workaround later to maint). + * Running "git diff" with "--name-only" and other options that allows + us not to look at the blob contents, while objects that are lazily + fetched from a promisor remote, caused use-after-free, which has + been corrected. + + * The ort merge machinery hit an assertion failure in a history with + criss-cross merges renamed a directory and a non-directory, which + has been corrected. + (merge 979ee83e8a en/ort-recursive-d-f-conflict-fix later to maint). + * Other code cleanup, docfix, build fix, etc. (merge 46207a54cc qj/doc-http-bad-want-response later to maint). (merge df90eccd93 kh/doc-commit-extra-references later to maint). @@ -252,3 +262,4 @@ Fixes since v2.52 (merge 93f894c001 bc/checkout-error-message-fix later to maint). (merge abf05d856f rs/show-branch-prio-queue later to maint). (merge 06188ea5f3 rs/parse-config-expiry-simplify later to maint). + (merge 861dbb1586 dd/t5403-modernise later to maint).