From 00c0d84e6f0cdb91b0f261e12ed058ad3baa49bf Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 24 Feb 2026 13:59:34 -0500 Subject: [PATCH 001/236] midx: mark `get_midx_checksum()` arguments as const To make clear that the function `get_midx_checksum()` does not do anything to modify its argument, mark the MIDX pointer as const. The following commit will rename this function altogether to make clear that it returns the raw bytes of the checksum, not a hex-encoded copy of it. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 2 +- midx.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/midx.c b/midx.c index a75ea99a0d4bb0..2a6b18954c51fb 100644 --- a/midx.c +++ b/midx.c @@ -24,7 +24,7 @@ void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext int cmp_idx_or_pack_name(const char *idx_or_pack_name, const char *idx_name); -const unsigned char *get_midx_checksum(struct multi_pack_index *m) +const unsigned char *get_midx_checksum(const struct multi_pack_index *m) { return m->data + m->data_len - m->source->odb->repo->hash_algo->rawsz; } diff --git a/midx.h b/midx.h index 6e54d73503d560..7c7e0b5912151b 100644 --- a/midx.h +++ b/midx.h @@ -85,7 +85,7 @@ struct multi_pack_index { #define MIDX_EXT_BITMAP "bitmap" #define MIDX_EXT_MIDX "midx" -const unsigned char *get_midx_checksum(struct multi_pack_index *m); +const unsigned char *get_midx_checksum(const struct multi_pack_index *m); void get_midx_filename(struct odb_source *source, struct strbuf *out); void get_midx_filename_ext(struct odb_source *source, struct strbuf *out, const unsigned char *hash, const char *ext); From de811c26bb97ac324b60883ae4f4db84a83be2f1 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 24 Feb 2026 13:59:39 -0500 Subject: [PATCH 002/236] midx: rename `get_midx_checksum()` to `midx_get_checksum_hash()` Since 541204aabea (Documentation: document naming schema for structs and their functions, 2024-07-30), we have adopted a naming convention for functions that would prefer a name like, say, `midx_get_checksum()` over `get_midx_checksum()`. Adopt this convention throughout the midx.h API. Since this function returns a raw (that is, non-hex encoded) hash, let's suffix the function with "_hash()" to make this clear. As a side effect, this prepares us for the subsequent change which will introduce a "_hex()" variant that encodes the checksum itself. Suggested-by: Patrick Steinhardt Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx-write.c | 6 +++--- midx.c | 2 +- midx.h | 2 +- pack-bitmap.c | 8 ++++---- pack-revindex.c | 4 ++-- t/helper/test-read-midx.c | 4 ++-- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/midx-write.c b/midx-write.c index 6485cb67068553..73d33752ef1119 100644 --- a/midx-write.c +++ b/midx-write.c @@ -946,7 +946,7 @@ static int link_midx_to_chain(struct multi_pack_index *m) } for (i = 0; i < ARRAY_SIZE(midx_exts); i++) { - const unsigned char *hash = get_midx_checksum(m); + const unsigned char *hash = midx_get_checksum_hash(m); get_midx_filename_ext(m->source, &from, hash, midx_exts[i].non_split); @@ -1151,7 +1151,7 @@ static int write_midx_internal(struct odb_source *source, while (m) { if (flags & MIDX_WRITE_BITMAP && load_midx_revindex(m)) { error(_("could not load reverse index for MIDX %s"), - hash_to_hex_algop(get_midx_checksum(m), + hash_to_hex_algop(midx_get_checksum_hash(m), m->source->odb->repo->hash_algo)); goto cleanup; } @@ -1520,7 +1520,7 @@ static int write_midx_internal(struct odb_source *source, for (uint32_t i = 0; i < ctx.num_multi_pack_indexes_before; i++) { uint32_t j = ctx.num_multi_pack_indexes_before - i - 1; - keep_hashes[j] = xstrdup(hash_to_hex_algop(get_midx_checksum(m), + keep_hashes[j] = xstrdup(hash_to_hex_algop(midx_get_checksum_hash(m), r->hash_algo)); m = m->base_midx; } diff --git a/midx.c b/midx.c index 2a6b18954c51fb..1d072bd99316df 100644 --- a/midx.c +++ b/midx.c @@ -24,7 +24,7 @@ void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext int cmp_idx_or_pack_name(const char *idx_or_pack_name, const char *idx_name); -const unsigned char *get_midx_checksum(const struct multi_pack_index *m) +const unsigned char *midx_get_checksum_hash(const struct multi_pack_index *m) { return m->data + m->data_len - m->source->odb->repo->hash_algo->rawsz; } diff --git a/midx.h b/midx.h index 7c7e0b5912151b..62d6105195f0fb 100644 --- a/midx.h +++ b/midx.h @@ -85,7 +85,7 @@ struct multi_pack_index { #define MIDX_EXT_BITMAP "bitmap" #define MIDX_EXT_MIDX "midx" -const unsigned char *get_midx_checksum(const struct multi_pack_index *m); +const unsigned char *midx_get_checksum_hash(const struct multi_pack_index *m); void get_midx_filename(struct odb_source *source, struct strbuf *out); void get_midx_filename_ext(struct odb_source *source, struct strbuf *out, const unsigned char *hash, const char *ext); diff --git a/pack-bitmap.c b/pack-bitmap.c index 972203f12b6d9b..6307bbdf1e148a 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -441,11 +441,11 @@ char *midx_bitmap_filename(struct multi_pack_index *midx) struct strbuf buf = STRBUF_INIT; if (midx->has_chain) get_split_midx_filename_ext(midx->source, &buf, - get_midx_checksum(midx), + midx_get_checksum_hash(midx), MIDX_EXT_BITMAP); else get_midx_filename_ext(midx->source, &buf, - get_midx_checksum(midx), + midx_get_checksum_hash(midx), MIDX_EXT_BITMAP); return strbuf_detach(&buf, NULL); @@ -502,7 +502,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git, if (load_bitmap_header(bitmap_git) < 0) goto cleanup; - if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum, + if (!hasheq(midx_get_checksum_hash(bitmap_git->midx), bitmap_git->checksum, bitmap_repo(bitmap_git)->hash_algo)) { error(_("checksum doesn't match in MIDX and bitmap")); goto cleanup; @@ -2819,7 +2819,7 @@ void test_bitmap_walk(struct rev_info *revs) if (bitmap_is_midx(found)) fprintf_ln(stderr, "Located via MIDX '%s'.", - hash_to_hex_algop(get_midx_checksum(found->midx), + hash_to_hex_algop(midx_get_checksum_hash(found->midx), revs->repo->hash_algo)); else fprintf_ln(stderr, "Located via pack '%s'.", diff --git a/pack-revindex.c b/pack-revindex.c index 56cd803a6798d5..294b802d402653 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -390,11 +390,11 @@ int load_midx_revindex(struct multi_pack_index *m) if (m->has_chain) get_split_midx_filename_ext(m->source, &revindex_name, - get_midx_checksum(m), + midx_get_checksum_hash(m), MIDX_EXT_REV); else get_midx_filename_ext(m->source, &revindex_name, - get_midx_checksum(m), + midx_get_checksum_hash(m), MIDX_EXT_REV); ret = load_revindex_from_disk(m->source->odb->repo->hash_algo, diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c index 6de5d1665afbfc..b8fefb1a1245e2 100644 --- a/t/helper/test-read-midx.c +++ b/t/helper/test-read-midx.c @@ -34,7 +34,7 @@ static int read_midx_file(const char *object_dir, const char *checksum, return 1; if (checksum) { - while (m && strcmp(hash_to_hex(get_midx_checksum(m)), checksum)) + while (m && strcmp(hash_to_hex(midx_get_checksum_hash(m)), checksum)) m = m->base_midx; if (!m) return 1; @@ -94,7 +94,7 @@ static int read_midx_checksum(const char *object_dir) m = setup_midx(object_dir); if (!m) return 1; - printf("%s\n", hash_to_hex(get_midx_checksum(m))); + printf("%s\n", hash_to_hex(midx_get_checksum_hash(m))); close_midx(m); return 0; From 6e86f679248580bec5c105b37217862f3022b504 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 24 Feb 2026 13:59:44 -0500 Subject: [PATCH 003/236] midx: introduce `midx_get_checksum_hex()` When trying to print out, say, the hexadecimal representation of a MIDX's hash, our code will do something like: hash_to_hex_algop(midx_get_checksum_hash(m), m->source->odb->repo->hash_algo); , which is both cumbersome and repetitive. In fact, all but a handful of callers to `midx_get_checksum_hash()` do exactly the above. Reduce the repetitive nature of calling `midx_get_checksum_hash()` by having it return a pointer into a static buffer containing the above result. For the handful of callers that do need to compare the raw bytes and don't want to deal with an encoded copy (e.g., because they are passing it to hasheq() or similar), they may still rely on `midx_get_checksum_hash()` which returns the raw bytes. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx-write.c | 6 ++---- midx.c | 6 ++++++ midx.h | 1 + pack-bitmap.c | 3 +-- t/helper/test-read-midx.c | 4 ++-- 5 files changed, 12 insertions(+), 8 deletions(-) diff --git a/midx-write.c b/midx-write.c index 73d33752ef1119..13171d7e9c4ae0 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1151,8 +1151,7 @@ static int write_midx_internal(struct odb_source *source, while (m) { if (flags & MIDX_WRITE_BITMAP && load_midx_revindex(m)) { error(_("could not load reverse index for MIDX %s"), - hash_to_hex_algop(midx_get_checksum_hash(m), - m->source->odb->repo->hash_algo)); + midx_get_checksum_hex(m)); goto cleanup; } ctx.num_multi_pack_indexes_before++; @@ -1520,8 +1519,7 @@ static int write_midx_internal(struct odb_source *source, for (uint32_t i = 0; i < ctx.num_multi_pack_indexes_before; i++) { uint32_t j = ctx.num_multi_pack_indexes_before - i - 1; - keep_hashes[j] = xstrdup(hash_to_hex_algop(midx_get_checksum_hash(m), - r->hash_algo)); + keep_hashes[j] = xstrdup(midx_get_checksum_hex(m)); m = m->base_midx; } diff --git a/midx.c b/midx.c index 1d072bd99316df..bae458923232d7 100644 --- a/midx.c +++ b/midx.c @@ -24,6 +24,12 @@ void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext int cmp_idx_or_pack_name(const char *idx_or_pack_name, const char *idx_name); +const char *midx_get_checksum_hex(const struct multi_pack_index *m) +{ + return hash_to_hex_algop(midx_get_checksum_hash(m), + m->source->odb->repo->hash_algo); +} + const unsigned char *midx_get_checksum_hash(const struct multi_pack_index *m) { return m->data + m->data_len - m->source->odb->repo->hash_algo->rawsz; diff --git a/midx.h b/midx.h index 62d6105195f0fb..a39bcc9d03fc9c 100644 --- a/midx.h +++ b/midx.h @@ -85,6 +85,7 @@ struct multi_pack_index { #define MIDX_EXT_BITMAP "bitmap" #define MIDX_EXT_MIDX "midx" +const char *midx_get_checksum_hex(const struct multi_pack_index *m) /* static buffer */; const unsigned char *midx_get_checksum_hash(const struct multi_pack_index *m); void get_midx_filename(struct odb_source *source, struct strbuf *out); void get_midx_filename_ext(struct odb_source *source, struct strbuf *out, diff --git a/pack-bitmap.c b/pack-bitmap.c index 6307bbdf1e148a..afc7fba019779c 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -2819,8 +2819,7 @@ void test_bitmap_walk(struct rev_info *revs) if (bitmap_is_midx(found)) fprintf_ln(stderr, "Located via MIDX '%s'.", - hash_to_hex_algop(midx_get_checksum_hash(found->midx), - revs->repo->hash_algo)); + midx_get_checksum_hex(found->midx)); else fprintf_ln(stderr, "Located via pack '%s'.", hash_to_hex_algop(found->pack->hash, diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c index b8fefb1a1245e2..9d42c587564182 100644 --- a/t/helper/test-read-midx.c +++ b/t/helper/test-read-midx.c @@ -34,7 +34,7 @@ static int read_midx_file(const char *object_dir, const char *checksum, return 1; if (checksum) { - while (m && strcmp(hash_to_hex(midx_get_checksum_hash(m)), checksum)) + while (m && strcmp(midx_get_checksum_hex(m), checksum)) m = m->base_midx; if (!m) return 1; @@ -94,7 +94,7 @@ static int read_midx_checksum(const char *object_dir) m = setup_midx(object_dir); if (!m) return 1; - printf("%s\n", hash_to_hex(midx_get_checksum_hash(m))); + printf("%s\n", midx_get_checksum_hex(m)); close_midx(m); return 0; From 6b8fb17490f637f5651834596b08cdb255e9280a Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 24 Feb 2026 13:59:48 -0500 Subject: [PATCH 004/236] builtin/multi-pack-index.c: make '--progress' a common option All multi-pack-index sub-commands (write, verify, repack, and expire) support a '--progress' command-line option, despite not listing it as one of the common options in `common_opts`. As a result each sub-command declares its own `OPT_BIT()` for a "--progress" command-line option. Centralize this within the `common_opts` to avoid re-declaring it in each sub-command. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-multi-pack-index.adoc | 2 ++ builtin/multi-pack-index.c | 10 ++-------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/Documentation/git-multi-pack-index.adoc b/Documentation/git-multi-pack-index.adoc index 2f642697e9e106..a4550e28bedd5b 100644 --- a/Documentation/git-multi-pack-index.adoc +++ b/Documentation/git-multi-pack-index.adoc @@ -18,6 +18,8 @@ Write or verify a multi-pack-index (MIDX) file. OPTIONS ------- +The following command-line options are applicable to all sub-commands: + --object-dir=:: Use given directory for the location of Git objects. We check `/packs/multi-pack-index` for the current MIDX file, and diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index 5f364aa816ba25..ca98d4c3ba3c2f 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -84,6 +84,8 @@ static struct option common_opts[] = { N_("directory"), N_("object directory containing set of packfile and pack-index pairs"), parse_object_dir), + OPT_BIT(0, "progress", &opts.flags, N_("force progress reporting"), + MIDX_PROGRESS), OPT_END(), }; @@ -138,8 +140,6 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, N_("pack for reuse when computing a multi-pack bitmap")), OPT_BIT(0, "bitmap", &opts.flags, N_("write multi-pack bitmap"), MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX), - OPT_BIT(0, "progress", &opts.flags, - N_("force progress reporting"), MIDX_PROGRESS), OPT_BIT(0, "incremental", &opts.flags, N_("write a new incremental MIDX"), MIDX_WRITE_INCREMENTAL), OPT_BOOL(0, "stdin-packs", &opts.stdin_packs, @@ -200,8 +200,6 @@ static int cmd_multi_pack_index_verify(int argc, const char **argv, { struct option *options; static struct option builtin_multi_pack_index_verify_options[] = { - OPT_BIT(0, "progress", &opts.flags, - N_("force progress reporting"), MIDX_PROGRESS), OPT_END(), }; struct odb_source *source; @@ -231,8 +229,6 @@ static int cmd_multi_pack_index_expire(int argc, const char **argv, { struct option *options; static struct option builtin_multi_pack_index_expire_options[] = { - OPT_BIT(0, "progress", &opts.flags, - N_("force progress reporting"), MIDX_PROGRESS), OPT_END(), }; struct odb_source *source; @@ -264,8 +260,6 @@ static int cmd_multi_pack_index_repack(int argc, const char **argv, static struct option builtin_multi_pack_index_repack_options[] = { OPT_UNSIGNED(0, "batch-size", &opts.batch_size, N_("during repack, collect pack-files of smaller size into a batch that is larger than this size")), - OPT_BIT(0, "progress", &opts.flags, - N_("force progress reporting"), MIDX_PROGRESS), OPT_END(), }; struct odb_source *source; From f775d5b1cf7b3d7ec0b8448684b8710c82013684 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 24 Feb 2026 13:59:52 -0500 Subject: [PATCH 005/236] git-multi-pack-index(1): remove non-existent incompatibility Since fcb2205b774 (midx: implement support for writing incremental MIDX chains, 2024-08-06), the command-line options '--incremental' and '--bitmap' were declared to be incompatible with one another when running 'git multi-pack-index write'. However, since 27afc272c49 (midx: implement writing incremental MIDX bitmaps, 2025-03-20), that incompatibility no longer exists, despite the documentation saying so. Correct this by removing the stale reference to their incompatibility. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-multi-pack-index.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/git-multi-pack-index.adoc b/Documentation/git-multi-pack-index.adoc index a4550e28bedd5b..a502819fc38858 100644 --- a/Documentation/git-multi-pack-index.adoc +++ b/Documentation/git-multi-pack-index.adoc @@ -75,7 +75,7 @@ marker). Write an incremental MIDX file containing only objects and packs not present in an existing MIDX layer. Migrates non-incremental MIDXs to incremental ones when - necessary. Incompatible with `--bitmap`. + necessary. -- verify:: From d0e91c128b99ca86e31e4c0e9ef80643039709f9 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 24 Feb 2026 13:59:56 -0500 Subject: [PATCH 006/236] git-multi-pack-index(1): align SYNOPSIS with 'git multi-pack-index -h' Since c39fffc1c90 (tests: start asserting that *.txt SYNOPSIS matches -h output, 2022-10-13), the manual page for 'git multi-pack-index' has a SYNOPSIS section which differs from 'git multi-pack-index -h'. Correct this while also documenting additional options accepted by the 'write' sub-command. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-multi-pack-index.adoc | 7 ++++++- builtin/multi-pack-index.c | 5 +++-- t/t0450/adoc-help-mismatches | 1 - 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/Documentation/git-multi-pack-index.adoc b/Documentation/git-multi-pack-index.adoc index a502819fc38858..164cf1f2291b99 100644 --- a/Documentation/git-multi-pack-index.adoc +++ b/Documentation/git-multi-pack-index.adoc @@ -9,7 +9,12 @@ git-multi-pack-index - Write and verify multi-pack-indexes SYNOPSIS -------- [verse] -'git multi-pack-index' [--object-dir=] [--[no-]bitmap] +'git multi-pack-index' [] write [--preferred-pack=] + [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs] + [--refs-snapshot=] +'git multi-pack-index' [] verify +'git multi-pack-index' [] expire +'git multi-pack-index' [] repack [--batch-size=] DESCRIPTION ----------- diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index ca98d4c3ba3c2f..c0c6c1760c0f28 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -13,8 +13,9 @@ #include "repository.h" #define BUILTIN_MIDX_WRITE_USAGE \ - N_("git multi-pack-index [] write [--preferred-pack=]" \ - "[--refs-snapshot=]") + N_("git multi-pack-index [] write [--preferred-pack=]\n" \ + " [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs]\n" \ + " [--refs-snapshot=]") #define BUILTIN_MIDX_VERIFY_USAGE \ N_("git multi-pack-index [] verify") diff --git a/t/t0450/adoc-help-mismatches b/t/t0450/adoc-help-mismatches index 8ee2d3f7c81502..e8d6c13ccd0333 100644 --- a/t/t0450/adoc-help-mismatches +++ b/t/t0450/adoc-help-mismatches @@ -33,7 +33,6 @@ merge merge-file merge-index merge-one-file -multi-pack-index name-rev notes push From 80437821124d7f529dc6c98daa2df92552c84db0 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 24 Feb 2026 14:00:00 -0500 Subject: [PATCH 007/236] t/t5319-multi-pack-index.sh: fix copy-and-paste error in t5319.39 Commit d4bf1d88b90 (multi-pack-index: verify missing pack, 2018-09-13) adds a new test to the MIDX test script to test how we handle missing packs. While the commit itself describes the test as "verify missing pack[s]", the test itself is actually called "verify packnames out of order", despite that not being what it tests. Likely this was a copy-and-paste of the test immediately above it of the same name. Correct this by renaming the test to match the commit message. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- t/t5319-multi-pack-index.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index faae98c7e76a20..efeab4d22b7c79 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -517,7 +517,7 @@ test_expect_success 'verify packnames out of order' ' "pack names out of order" ' -test_expect_success 'verify packnames out of order' ' +test_expect_success 'verify missing pack' ' corrupt_midx_and_verify $MIDX_BYTE_PACKNAME_ORDER "a" $objdir \ "failed to load pack" ' From ac10f6aa40355bef7ead9e992e76c3a562d2c01a Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 24 Feb 2026 14:00:05 -0500 Subject: [PATCH 008/236] midx-write.c: don't use `pack_perm` when assigning `bitmap_pos` In midx_pack_order(), we compute for each bitmapped pack the first bit to correspond to an object in that pack, along with how many bits were assigned to object(s) in that pack. Initially, each bitmap_nr value is set to zero, and each bitmap_pos value is set to the sentinel BITMAP_POS_UNKNOWN. This is done to ensure that there are no packs who have an unknown bit position but a somehow non-zero number of objects (cf. `write_midx_bitmapped_packs()` in midx-write.c). Once the pack order is fully determined, midx_pack_order() sets the bitmap_pos field for any bitmapped packs to zero if they are still listed as BITMAP_POS_UNKNOWN. However, we enumerate the bitmapped packs in order of `ctx->pack_perm`. This is fine for existing cases, since the only time the `ctx->pack_perm` array holds a value outside of the addressable range of `ctx->info` is when there are expired packs, which only occurs via 'git multi-pack-index expire', which does not support writing MIDX bitmaps. As a result, the range of ctx->pack_perm covers all values in [0, `ctx->nr`), so enumerating in this order isn't an issue. A future change necessary for compaction will complicate this further by introducing a wrapper around the `ctx->pack_perm` array, which turns the given `pack_int_id` into one that is relative to the lower end of the compaction range. As a result, indexing into `ctx->pack_perm` through this helper, say, with "0" will produce a crash when the lower end of the compaction range has >0 pack(s) in its base layer, since the subtraction will wrap around the 32-bit unsigned range, resulting in an uninitialized read. But the process is completely unnecessary in the first place: we are enumerating all values of `ctx->info`, and there is no reason to process them in a different order than they appear in memory. Index `ctx->info` directly to reflect that. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx-write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/midx-write.c b/midx-write.c index 13171d7e9c4ae0..da9c5a7c29577c 100644 --- a/midx-write.c +++ b/midx-write.c @@ -637,7 +637,7 @@ static uint32_t *midx_pack_order(struct write_midx_context *ctx) pack_order[i] = data[i].nr; } for (i = 0; i < ctx->nr; i++) { - struct pack_info *pack = &ctx->info[ctx->pack_perm[i]]; + struct pack_info *pack = &ctx->info[i]; if (pack->bitmap_pos == BITMAP_POS_UNKNOWN) pack->bitmap_pos = 0; } From 82c905ea6bd79cd2045fea91b67f4c858379bff1 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 24 Feb 2026 14:00:09 -0500 Subject: [PATCH 009/236] midx-write.c: introduce `struct write_midx_opts` In the MIDX writing code, there are four functions which perform some sort of MIDX write operation. They are: - write_midx_file() - write_midx_file_only() - expire_midx_packs() - midx_repack() All of these functions are thin wrappers over `write_midx_internal()`, which implements the bulk of these routines. As a result, the `write_midx_internal()` function takes six arguments. Future commits in this series will want to add additional arguments, and in general this function's signature will be the union of parameters among *all* possible ways to write a MIDX. Instead of adding yet more arguments to this function to support MIDX compaction, introduce a `struct write_midx_opts`, which has the same struct members as `write_midx_internal()`'s arguments. Adding additional fields to the `write_midx_opts` struct is preferable to adding additional arguments to `write_midx_internal()`. This is because the callers below all zero-initialize the struct, so each time we add a new piece of information, we do not have to pass the zero value for it in all other call-sites that do not care about it. For now, no functional changes are included in this patch. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx-write.c | 135 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 81 insertions(+), 54 deletions(-) diff --git a/midx-write.c b/midx-write.c index da9c5a7c29577c..8a54644e427992 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1078,14 +1078,20 @@ static bool midx_needs_update(struct multi_pack_index *midx, struct write_midx_c return needed; } -static int write_midx_internal(struct odb_source *source, - struct string_list *packs_to_include, - struct string_list *packs_to_drop, - const char *preferred_pack_name, - const char *refs_snapshot, - unsigned flags) +struct write_midx_opts { + struct odb_source *source; /* non-optional */ + + struct string_list *packs_to_include; + struct string_list *packs_to_drop; + + const char *preferred_pack_name; + const char *refs_snapshot; + unsigned flags; +}; + +static int write_midx_internal(struct write_midx_opts *opts) { - struct repository *r = source->odb->repo; + struct repository *r = opts->source->odb->repo; struct strbuf midx_name = STRBUF_INIT; unsigned char midx_hash[GIT_MAX_RAWSZ]; uint32_t start_pack; @@ -1106,22 +1112,22 @@ static int write_midx_internal(struct odb_source *source, trace2_region_enter("midx", "write_midx_internal", r); ctx.repo = r; - ctx.source = source; + ctx.source = opts->source; - ctx.incremental = !!(flags & MIDX_WRITE_INCREMENTAL); + ctx.incremental = !!(opts->flags & MIDX_WRITE_INCREMENTAL); if (ctx.incremental) strbuf_addf(&midx_name, "%s/pack/multi-pack-index.d/tmp_midx_XXXXXX", - source->path); + opts->source->path); else - get_midx_filename(source, &midx_name); + get_midx_filename(opts->source, &midx_name); if (safe_create_leading_directories(r, midx_name.buf)) die_errno(_("unable to create leading directories of %s"), midx_name.buf); - if (!packs_to_include || ctx.incremental) { - struct multi_pack_index *m = get_multi_pack_index(source); + if (!opts->packs_to_include || ctx.incremental) { + struct multi_pack_index *m = get_multi_pack_index(opts->source); if (m && !midx_checksum_valid(m)) { warning(_("ignoring existing multi-pack-index; checksum mismatch")); m = NULL; @@ -1136,7 +1142,7 @@ static int write_midx_internal(struct odb_source *source, */ if (ctx.incremental) ctx.base_midx = m; - else if (!packs_to_include) + else if (!opts->packs_to_include) ctx.m = m; } } @@ -1149,7 +1155,7 @@ static int write_midx_internal(struct odb_source *source, if (ctx.incremental) { struct multi_pack_index *m = ctx.base_midx; while (m) { - if (flags & MIDX_WRITE_BITMAP && load_midx_revindex(m)) { + if (opts->flags & MIDX_WRITE_BITMAP && load_midx_revindex(m)) { error(_("could not load reverse index for MIDX %s"), midx_get_checksum_hex(m)); goto cleanup; @@ -1164,18 +1170,18 @@ static int write_midx_internal(struct odb_source *source, start_pack = ctx.nr; ctx.pack_paths_checked = 0; - if (flags & MIDX_PROGRESS) + if (opts->flags & MIDX_PROGRESS) ctx.progress = start_delayed_progress(r, _("Adding packfiles to multi-pack-index"), 0); else ctx.progress = NULL; - ctx.to_include = packs_to_include; + ctx.to_include = opts->packs_to_include; - for_each_file_in_pack_dir(source->path, add_pack_to_midx, &ctx); + for_each_file_in_pack_dir(opts->source->path, add_pack_to_midx, &ctx); stop_progress(&ctx.progress); - if (!packs_to_drop) { + if (!opts->packs_to_drop) { /* * If there is no MIDX then either it doesn't exist, or we're * doing a geometric repack. Try to load it from the source to @@ -1188,7 +1194,7 @@ static int write_midx_internal(struct odb_source *source, if (midx && !midx_needs_update(midx, &ctx)) { struct bitmap_index *bitmap_git; int bitmap_exists; - int want_bitmap = flags & MIDX_WRITE_BITMAP; + int want_bitmap = opts->flags & MIDX_WRITE_BITMAP; bitmap_git = prepare_midx_bitmap_git(midx); bitmap_exists = bitmap_git && bitmap_is_midx(bitmap_git); @@ -1200,7 +1206,7 @@ static int write_midx_internal(struct odb_source *source, * corresponding bitmap (or one wasn't requested). */ if (!want_bitmap) - clear_midx_files_ext(source, "bitmap", NULL); + clear_midx_files_ext(ctx.source, "bitmap", NULL); result = 0; goto cleanup; } @@ -1215,11 +1221,11 @@ static int write_midx_internal(struct odb_source *source, goto cleanup; /* nothing to do */ } - if (preferred_pack_name) { + if (opts->preferred_pack_name) { ctx.preferred_pack_idx = NO_PREFERRED_PACK; for (size_t i = 0; i < ctx.nr; i++) { - if (!cmp_idx_or_pack_name(preferred_pack_name, + if (!cmp_idx_or_pack_name(opts->preferred_pack_name, ctx.info[i].pack_name)) { ctx.preferred_pack_idx = i; break; @@ -1228,9 +1234,9 @@ static int write_midx_internal(struct odb_source *source, if (ctx.preferred_pack_idx == NO_PREFERRED_PACK) warning(_("unknown preferred pack: '%s'"), - preferred_pack_name); + opts->preferred_pack_name); } else if (ctx.nr && - (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))) { + (opts->flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))) { struct packed_git *oldest = ctx.info[0].p; ctx.preferred_pack_idx = 0; @@ -1241,7 +1247,7 @@ static int write_midx_internal(struct odb_source *source, */ open_pack_index(oldest); - if (packs_to_drop && packs_to_drop->nr) + if (opts->packs_to_drop && opts->packs_to_drop->nr) BUG("cannot write a MIDX bitmap during expiration"); /* @@ -1303,20 +1309,21 @@ static int write_midx_internal(struct odb_source *source, QSORT(ctx.info, ctx.nr, pack_info_compare); - if (packs_to_drop && packs_to_drop->nr) { + if (opts->packs_to_drop && opts->packs_to_drop->nr) { size_t drop_index = 0; int missing_drops = 0; - for (size_t i = 0; i < ctx.nr && drop_index < packs_to_drop->nr; i++) { + for (size_t i = 0; + i < ctx.nr && drop_index < opts->packs_to_drop->nr; i++) { int cmp = strcmp(ctx.info[i].pack_name, - packs_to_drop->items[drop_index].string); + opts->packs_to_drop->items[drop_index].string); if (!cmp) { drop_index++; ctx.info[i].expired = 1; } else if (cmp > 0) { error(_("did not see pack-file %s to drop"), - packs_to_drop->items[drop_index].string); + opts->packs_to_drop->items[drop_index].string); drop_index++; missing_drops++; i--; @@ -1353,8 +1360,8 @@ static int write_midx_internal(struct odb_source *source, } /* Check that the preferred pack wasn't expired (if given). */ - if (preferred_pack_name) { - struct pack_info *preferred = bsearch(preferred_pack_name, + if (opts->preferred_pack_name) { + struct pack_info *preferred = bsearch(opts->preferred_pack_name, ctx.info, ctx.nr, sizeof(*ctx.info), idx_or_pack_name_cmp); @@ -1362,7 +1369,7 @@ static int write_midx_internal(struct odb_source *source, uint32_t perm = ctx.pack_perm[preferred->orig_pack_int_id]; if (perm == PACK_EXPIRED) warning(_("preferred pack '%s' is expired"), - preferred_pack_name); + opts->preferred_pack_name); } } @@ -1376,15 +1383,15 @@ static int write_midx_internal(struct odb_source *source, } if (!ctx.entries_nr) { - if (flags & MIDX_WRITE_BITMAP) + if (opts->flags & MIDX_WRITE_BITMAP) warning(_("refusing to write multi-pack .bitmap without any objects")); - flags &= ~(MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP); + opts->flags &= ~(MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP); } if (ctx.incremental) { struct strbuf lock_name = STRBUF_INIT; - get_midx_chain_filename(source, &lock_name); + get_midx_chain_filename(opts->source, &lock_name); hold_lock_file_for_update(&lk, lock_name.buf, LOCK_DIE_ON_ERROR); strbuf_release(&lock_name); @@ -1427,7 +1434,7 @@ static int write_midx_internal(struct odb_source *source, MIDX_CHUNK_LARGE_OFFSET_WIDTH), write_midx_large_offsets); - if (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) { + if (opts->flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) { ctx.pack_order = midx_pack_order(&ctx); add_chunk(cf, MIDX_CHUNKID_REVINDEX, st_mult(ctx.entries_nr, sizeof(uint32_t)), @@ -1445,11 +1452,11 @@ static int write_midx_internal(struct odb_source *source, CSUM_FSYNC | CSUM_HASH_IN_STREAM); free_chunkfile(cf); - if (flags & MIDX_WRITE_REV_INDEX && + if (opts->flags & MIDX_WRITE_REV_INDEX && git_env_bool("GIT_TEST_MIDX_WRITE_REV", 0)) write_midx_reverse_index(&ctx, midx_hash); - if (flags & MIDX_WRITE_BITMAP) { + if (opts->flags & MIDX_WRITE_BITMAP) { struct packing_data pdata; struct commit_stack commits = COMMIT_STACK_INIT; @@ -1458,7 +1465,7 @@ static int write_midx_internal(struct odb_source *source, prepare_midx_packing_data(&pdata, &ctx); - find_commits_for_midx_bitmap(&commits, refs_snapshot, &ctx); + find_commits_for_midx_bitmap(&commits, opts->refs_snapshot, &ctx); /* * The previous steps translated the information from @@ -1469,8 +1476,8 @@ static int write_midx_internal(struct odb_source *source, FREE_AND_NULL(ctx.entries); ctx.entries_nr = 0; - if (write_midx_bitmap(&ctx, midx_hash, &pdata, - commits.items, commits.nr, flags) < 0) { + if (write_midx_bitmap(&ctx, midx_hash, &pdata, commits.items, + commits.nr, opts->flags) < 0) { error(_("could not write multi-pack bitmap")); clear_packing_data(&pdata); commit_stack_clear(&commits); @@ -1503,7 +1510,7 @@ static int write_midx_internal(struct odb_source *source, if (link_midx_to_chain(ctx.base_midx) < 0) goto cleanup; - get_split_midx_filename_ext(source, &final_midx_name, + get_split_midx_filename_ext(opts->source, &final_midx_name, midx_hash, MIDX_EXT_MIDX); if (rename_tempfile(&incr, final_midx_name.buf) < 0) { @@ -1536,7 +1543,7 @@ static int write_midx_internal(struct odb_source *source, if (commit_lock_file(&lk) < 0) die_errno(_("could not write multi-pack-index")); - clear_midx_files(source, keep_hashes, + clear_midx_files(opts->source, keep_hashes, ctx.num_multi_pack_indexes_before + 1, ctx.incremental); result = 0; @@ -1571,9 +1578,14 @@ int write_midx_file(struct odb_source *source, const char *preferred_pack_name, const char *refs_snapshot, unsigned flags) { - return write_midx_internal(source, NULL, NULL, - preferred_pack_name, refs_snapshot, - flags); + struct write_midx_opts opts = { + .source = source, + .preferred_pack_name = preferred_pack_name, + .refs_snapshot = refs_snapshot, + .flags = flags, + }; + + return write_midx_internal(&opts); } int write_midx_file_only(struct odb_source *source, @@ -1581,8 +1593,15 @@ int write_midx_file_only(struct odb_source *source, const char *preferred_pack_name, const char *refs_snapshot, unsigned flags) { - return write_midx_internal(source, packs_to_include, NULL, - preferred_pack_name, refs_snapshot, flags); + struct write_midx_opts opts = { + .source = source, + .packs_to_include = packs_to_include, + .preferred_pack_name = preferred_pack_name, + .refs_snapshot = refs_snapshot, + .flags = flags, + }; + + return write_midx_internal(&opts); } int expire_midx_packs(struct odb_source *source, unsigned flags) @@ -1641,9 +1660,14 @@ int expire_midx_packs(struct odb_source *source, unsigned flags) free(count); - if (packs_to_drop.nr) - result = write_midx_internal(source, NULL, - &packs_to_drop, NULL, NULL, flags); + if (packs_to_drop.nr) { + struct write_midx_opts opts = { + .source = source, + .packs_to_drop = &packs_to_drop, + .flags = flags & MIDX_PROGRESS, + }; + result = write_midx_internal(&opts); + } string_list_clear(&packs_to_drop, 0); @@ -1776,6 +1800,10 @@ int midx_repack(struct odb_source *source, size_t batch_size, unsigned flags) struct child_process cmd = CHILD_PROCESS_INIT; FILE *cmd_in; struct multi_pack_index *m = get_multi_pack_index(source); + struct write_midx_opts opts = { + .source = source, + .flags = flags, + }; /* * When updating the default for these configuration @@ -1850,8 +1878,7 @@ int midx_repack(struct odb_source *source, size_t batch_size, unsigned flags) goto cleanup; } - result = write_midx_internal(source, NULL, NULL, NULL, NULL, - flags); + result = write_midx_internal(&opts); cleanup: free(include_pack); From b2ec8e90c201d2395b67f89f3bf38bb472e43460 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 24 Feb 2026 14:00:13 -0500 Subject: [PATCH 010/236] midx: do not require packs to be sorted in lexicographic order The MIDX file format currently requires that pack files be identified by the lexicographic ordering of their names (that is, a pack having a checksum beginning with "abc" would have a numeric pack_int_id which is smaller than the same value for a pack beginning with "bcd"). As a result, it is impossible to combine adjacent MIDX layers together without permuting bits from bitmaps that are in more recent layer(s). To see why, consider the following example: | packs | preferred pack --------+-------------+--------------- MIDX #0 | { X, Y, Z } | Y MIDX #1 | { A, B, C } | B MIDX #2 | { D, E, F } | D , where MIDX #2's base MIDX is MIDX #1, and so on. Suppose that we want to combine MIDX layers #0 and #1, to create a new layer #0' containing the packs from both layers. With the original three MIDX layers, objects are laid out in the bitmap in the order they appear in their source pack, and the packs themselves are arranged according to the pseudo-pack order. In this case, that ordering is Y, X, Z, B, A, C. But recall that the pseudo-pack ordering is defined by the order that packs appear in the MIDX, with the exception of the preferred pack, which sorts ahead of all other packs regardless of its position within the MIDX. In the above example, that means that pack 'Y' could be placed anywhere (so long as it is designated as preferred), however, all other packs must be placed in the location listed above. Because that ordering isn't sorted lexicographically, it is impossible to compact MIDX layers in the above configuration without permuting the object-to-bit-position mapping. Changing this mapping would affect all bitmaps belonging to newer layers, rendering the bitmaps associated with MIDX #2 unreadable. One of the goals of MIDX compaction is that we are able to shrink the length of the MIDX chain *without* invalidating bitmaps that belong to newer layers, and the lexicographic ordering constraint is at odds with this goal. However, packs do not *need* to be lexicographically ordered within the MIDX. As far as I can gather, the only reason they are sorted lexically is to make it possible to perform a binary search over the pack names in a MIDX, necessary to make `midx_contains_pack()`'s performance logarithmic in the number of packs rather than linear. Relax this constraint by allowing MIDX writes to proceed with packs that are not arranged in lexicographic order. `midx_contains_pack()` will lazily instantiate a `pack_names_sorted` array on the MIDX, which will be used to implement the binary search over pack names. This change produces MIDXs which may not be correctly read with external tools or older versions of Git. Though older versions of Git know how to gracefully degrade and ignore any MIDX(s) they consider corrupt, external tools may not be as robust. To avoid unintentionally breaking any such tools, guard this change behind a version bump in the MIDX's on-disk format. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/gitformat-pack.adoc | 8 ++++++-- midx-write.c | 26 ++++++++++++++++++++++---- midx.c | 31 ++++++++++++++++++++++++++++--- midx.h | 4 +++- t/t5319-multi-pack-index.sh | 16 ++++++++++------ 5 files changed, 69 insertions(+), 16 deletions(-) diff --git a/Documentation/gitformat-pack.adoc b/Documentation/gitformat-pack.adoc index 1b4db4aa611e83..3416edceab82e9 100644 --- a/Documentation/gitformat-pack.adoc +++ b/Documentation/gitformat-pack.adoc @@ -374,7 +374,9 @@ HEADER: The signature is: {'M', 'I', 'D', 'X'} 1-byte version number: - Git only writes or recognizes version 1. + Git writes the version specified by the "midx.version" + configuration option, which defaults to 2. It recognizes + both versions 1 and 2. 1-byte Object Id Version We infer the length of object IDs (OIDs) from this value: @@ -413,7 +415,9 @@ CHUNK DATA: strings. There is no extra padding between the filenames, and they are listed in lexicographic order. The chunk itself is padded at the end with between 0 and 3 NUL bytes to make the - chunk size a multiple of 4 bytes. + chunk size a multiple of 4 bytes. Version 1 MIDXs are required to + list their packs in lexicographic order, but version 2 MIDXs may + list their packs in any arbitrary order. Bitmapped Packfiles (ID: {'B', 'T', 'M', 'P'}) Stores a table of two 4-byte unsigned integers in network order. diff --git a/midx-write.c b/midx-write.c index 8a54644e427992..5c8700065a1d16 100644 --- a/midx-write.c +++ b/midx-write.c @@ -36,10 +36,13 @@ extern int cmp_idx_or_pack_name(const char *idx_or_pack_name, static size_t write_midx_header(const struct git_hash_algo *hash_algo, struct hashfile *f, unsigned char num_chunks, - uint32_t num_packs) + uint32_t num_packs, int version) { + if (version != MIDX_VERSION_V1 && version != MIDX_VERSION_V2) + BUG("unexpected MIDX version: %d", version); + hashwrite_be32(f, MIDX_SIGNATURE); - hashwrite_u8(f, MIDX_VERSION); + hashwrite_u8(f, version); hashwrite_u8(f, oid_version(hash_algo)); hashwrite_u8(f, num_chunks); hashwrite_u8(f, 0); /* unused */ @@ -105,6 +108,8 @@ struct write_midx_context { uint32_t preferred_pack_idx; + int version; /* must be MIDX_VERSION_V1 or _V2 */ + int incremental; uint32_t num_multi_pack_indexes_before; @@ -410,7 +415,9 @@ static int write_midx_pack_names(struct hashfile *f, void *data) if (ctx->info[i].expired) continue; - if (i && strcmp(ctx->info[i].pack_name, ctx->info[i - 1].pack_name) <= 0) + if (ctx->version == MIDX_VERSION_V1 && + i && strcmp(ctx->info[i].pack_name, + ctx->info[i - 1].pack_name) <= 0) BUG("incorrect pack-file order: %s before %s", ctx->info[i - 1].pack_name, ctx->info[i].pack_name); @@ -1025,6 +1032,12 @@ static bool midx_needs_update(struct multi_pack_index *midx, struct write_midx_c if (!midx_checksum_valid(midx)) goto out; + /* + * If the version differs, we need to update. + */ + if (midx->version != ctx->version) + goto out; + /* * Ignore incremental updates for now. The assumption is that any * incremental update would be either empty (in which case we will bail @@ -1100,6 +1113,7 @@ static int write_midx_internal(struct write_midx_opts *opts) struct tempfile *incr; struct write_midx_context ctx = { .preferred_pack_idx = NO_PREFERRED_PACK, + .version = MIDX_VERSION_V2, }; struct multi_pack_index *midx_to_free = NULL; int bitmapped_packs_concat_len = 0; @@ -1114,6 +1128,10 @@ static int write_midx_internal(struct write_midx_opts *opts) ctx.repo = r; ctx.source = opts->source; + repo_config_get_int(ctx.repo, "midx.version", &ctx.version); + if (ctx.version != MIDX_VERSION_V1 && ctx.version != MIDX_VERSION_V2) + die(_("unknown MIDX version: %d"), ctx.version); + ctx.incremental = !!(opts->flags & MIDX_WRITE_INCREMENTAL); if (ctx.incremental) @@ -1445,7 +1463,7 @@ static int write_midx_internal(struct write_midx_opts *opts) } write_midx_header(r->hash_algo, f, get_num_chunks(cf), - ctx.nr - dropped_packs); + ctx.nr - dropped_packs, ctx.version); write_chunkfile(cf, &ctx); finalize_hashfile(f, midx_hash, FSYNC_COMPONENT_PACK_METADATA, diff --git a/midx.c b/midx.c index bae458923232d7..c1b9658240d051 100644 --- a/midx.c +++ b/midx.c @@ -149,7 +149,7 @@ static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *sou m->signature, MIDX_SIGNATURE); m->version = m->data[MIDX_BYTE_FILE_VERSION]; - if (m->version != MIDX_VERSION) + if (m->version != MIDX_VERSION_V1 && m->version != MIDX_VERSION_V2) die(_("multi-pack-index version %d not recognized"), m->version); @@ -210,7 +210,8 @@ static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *sou die(_("multi-pack-index pack-name chunk is too short")); cur_pack_name = end + 1; - if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0) + if (m->version == MIDX_VERSION_V1 && + i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0) die(_("multi-pack-index pack names out of order: '%s' before '%s'"), m->pack_names[i - 1], m->pack_names[i]); @@ -411,6 +412,7 @@ void close_midx(struct multi_pack_index *m) } FREE_AND_NULL(m->packs); FREE_AND_NULL(m->pack_names); + FREE_AND_NULL(m->pack_names_sorted); free(m); } @@ -655,17 +657,40 @@ int cmp_idx_or_pack_name(const char *idx_or_pack_name, return strcmp(idx_or_pack_name, idx_name); } + +static int midx_pack_names_cmp(const void *a, const void *b, void *m_) +{ + struct multi_pack_index *m = m_; + return strcmp(m->pack_names[*(const size_t *)a], + m->pack_names[*(const size_t *)b]); +} + static int midx_contains_pack_1(struct multi_pack_index *m, const char *idx_or_pack_name) { uint32_t first = 0, last = m->num_packs; + if (m->version == MIDX_VERSION_V2 && !m->pack_names_sorted) { + uint32_t i; + + ALLOC_ARRAY(m->pack_names_sorted, m->num_packs); + + for (i = 0; i < m->num_packs; i++) + m->pack_names_sorted[i] = i; + + QSORT_S(m->pack_names_sorted, m->num_packs, midx_pack_names_cmp, + m); + } + while (first < last) { uint32_t mid = first + (last - first) / 2; const char *current; int cmp; - current = m->pack_names[mid]; + if (m->pack_names_sorted) + current = m->pack_names[m->pack_names_sorted[mid]]; + else + current = m->pack_names[mid]; cmp = cmp_idx_or_pack_name(idx_or_pack_name, current); if (!cmp) return 1; diff --git a/midx.h b/midx.h index a39bcc9d03fc9c..aa99a6cb2152f7 100644 --- a/midx.h +++ b/midx.h @@ -11,7 +11,8 @@ struct git_hash_algo; struct odb_source; #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ -#define MIDX_VERSION 1 +#define MIDX_VERSION_V1 1 +#define MIDX_VERSION_V2 2 #define MIDX_BYTE_FILE_VERSION 4 #define MIDX_BYTE_HASH_VERSION 5 #define MIDX_BYTE_NUM_CHUNKS 6 @@ -71,6 +72,7 @@ struct multi_pack_index { uint32_t num_packs_in_base; const char **pack_names; + size_t *pack_names_sorted; struct packed_git **packs; }; diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index efeab4d22b7c79..250d21dbd67825 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -21,7 +21,7 @@ midx_read_expect () { EXTRA_CHUNKS="$5" { cat <<-EOF && - header: 4d494458 1 $HASH_LEN $NUM_CHUNKS $NUM_PACKS + header: 4d494458 2 $HASH_LEN $NUM_CHUNKS $NUM_PACKS chunks: pack-names oid-fanout oid-lookup object-offsets$EXTRA_CHUNKS num_objects: $NUM_OBJECTS packs: @@ -512,11 +512,6 @@ test_expect_success 'verify invalid chunk offset' ' "improper chunk offset(s)" ' -test_expect_success 'verify packnames out of order' ' - corrupt_midx_and_verify $MIDX_BYTE_PACKNAME_ORDER "z" $objdir \ - "pack names out of order" -' - test_expect_success 'verify missing pack' ' corrupt_midx_and_verify $MIDX_BYTE_PACKNAME_ORDER "a" $objdir \ "failed to load pack" @@ -578,6 +573,15 @@ test_expect_success 'verify incorrect checksum' ' $objdir "incorrect checksum" ' +test_expect_success 'setup for v1-specific fsck tests' ' + git -c midx.version=1 multi-pack-index write +' + +test_expect_success 'verify packnames out of order (v1)' ' + corrupt_midx_and_verify $MIDX_BYTE_PACKNAME_ORDER "z" $objdir \ + "pack names out of order" +' + test_expect_success 'repack progress off for redirected stderr' ' GIT_PROGRESS_DELAY=0 git multi-pack-index --object-dir=$objdir repack 2>err && test_line_count = 0 err From 4f8543255efc3cc1d5247fa16a9f7597ad565993 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 24 Feb 2026 14:00:17 -0500 Subject: [PATCH 011/236] midx-write.c: introduce `midx_pack_perm()` helper The `ctx->pack_perm` array can be considered as a permutation between the original `pack_int_id` of some given pack to its position in the `ctx->info` array containing all packs. Today we can always index into this array with any known `pack_int_id`, since there is never a `pack_int_id` which is greater than or equal to the value `ctx->nr`. That is not necessarily the case with MIDX compaction. For example, suppose we have a MIDX chain with three layers, each containing three packs. The base of the MIDX chain will have packs with IDs 0, 1, and 2, the next layer 3, 4, and 5, and so on. If we are compacting the topmost two layers, we'll have input `pack_int_id` values between [3, 8], but `ctx->nr` will only be 6. In that example, if we want to know where the pack whose original `pack_int_id` value was, say, 7, we would compute `ctx->pack_perm[7]`, leading to an uninitialized read, since there are only 6 entries allocated in that array. To address this, there are a couple of options: - We could allocate enough entries in `ctx->pack_perm` to accommodate the largest `orig_pack_int_id` value. - Or, we could internally shift the input values by the number of packs in the base layer of the lower end of the MIDX compaction range. This patch prepare us to take the latter approach, since it does not allocate more memory than strictly necessary. (In our above example, the base of the lower end of the compaction range is the first MIDX layer (having three packs), so we would end up indexing `ctx->pack_perm[7-3]`, which is a valid read.) Note that this patch does not actually implement that approach yet, but merely performs a behavior-preserving refactoring which will make the change easier to carry out in the future. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx-write.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/midx-write.c b/midx-write.c index 5c8700065a1d16..9d345fb4737a04 100644 --- a/midx-write.c +++ b/midx-write.c @@ -119,6 +119,12 @@ struct write_midx_context { struct odb_source *source; }; +static uint32_t midx_pack_perm(struct write_midx_context *ctx, + uint32_t orig_pack_int_id) +{ + return ctx->pack_perm[orig_pack_int_id]; +} + static int should_include_pack(const struct write_midx_context *ctx, const char *file_name) { @@ -521,12 +527,12 @@ static int write_midx_object_offsets(struct hashfile *f, for (i = 0; i < ctx->entries_nr; i++) { struct pack_midx_entry *obj = list++; - if (ctx->pack_perm[obj->pack_int_id] == PACK_EXPIRED) + if (midx_pack_perm(ctx, obj->pack_int_id) == PACK_EXPIRED) BUG("object %s is in an expired pack with int-id %d", oid_to_hex(&obj->oid), obj->pack_int_id); - hashwrite_be32(f, ctx->pack_perm[obj->pack_int_id]); + hashwrite_be32(f, midx_pack_perm(ctx, obj->pack_int_id)); if (ctx->large_offsets_needed && obj->offset >> 31) hashwrite_be32(f, MIDX_LARGE_OFFSET_NEEDED | nr_large_offset++); @@ -627,7 +633,7 @@ static uint32_t *midx_pack_order(struct write_midx_context *ctx) for (i = 0; i < ctx->entries_nr; i++) { struct pack_midx_entry *e = &ctx->entries[i]; data[i].nr = i; - data[i].pack = ctx->pack_perm[e->pack_int_id]; + data[i].pack = midx_pack_perm(ctx, e->pack_int_id); if (!e->preferred) data[i].pack |= (1U << 31); data[i].offset = e->offset; @@ -637,7 +643,7 @@ static uint32_t *midx_pack_order(struct write_midx_context *ctx) for (i = 0; i < ctx->entries_nr; i++) { struct pack_midx_entry *e = &ctx->entries[data[i].nr]; - struct pack_info *pack = &ctx->info[ctx->pack_perm[e->pack_int_id]]; + struct pack_info *pack = &ctx->info[midx_pack_perm(ctx, e->pack_int_id)]; if (pack->bitmap_pos == BITMAP_POS_UNKNOWN) pack->bitmap_pos = i + base_objects; pack->bitmap_nr++; @@ -698,7 +704,7 @@ static void prepare_midx_packing_data(struct packing_data *pdata, struct object_entry *to = packlist_alloc(pdata, &from->oid); oe_set_in_pack(pdata, to, - ctx->info[ctx->pack_perm[from->pack_int_id]].p); + ctx->info[midx_pack_perm(ctx, from->pack_int_id)].p); } trace2_region_leave("midx", "prepare_midx_packing_data", ctx->repo); @@ -1384,7 +1390,7 @@ static int write_midx_internal(struct write_midx_opts *opts) sizeof(*ctx.info), idx_or_pack_name_cmp); if (preferred) { - uint32_t perm = ctx.pack_perm[preferred->orig_pack_int_id]; + uint32_t perm = midx_pack_perm(&ctx, preferred->orig_pack_int_id); if (perm == PACK_EXPIRED) warning(_("preferred pack '%s' is expired"), opts->preferred_pack_name); From 5f3e7f7279da72b20f99a2bcd26d146f7edaef9d Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 24 Feb 2026 14:00:21 -0500 Subject: [PATCH 012/236] midx-write.c: extract `fill_pack_from_midx()` When filling packs from an existing MIDX, `fill_packs_from_midx()` handles preparing a MIDX'd pack, and reading out its pack name from the existing MIDX. MIDX compaction will want to perform an identical operation, though the caller will look quite different than `fill_packs_from_midx()`. To reduce any future code duplication, extract `fill_pack_from_midx()` from `fill_packs_from_midx()` to prepare to call our new helper function in a future change. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx-write.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/midx-write.c b/midx-write.c index 9d345fb4737a04..c54113cdc8436f 100644 --- a/midx-write.c +++ b/midx-write.c @@ -913,6 +913,21 @@ static int write_midx_bitmap(struct write_midx_context *ctx, return ret; } +static int fill_pack_from_midx(struct pack_info *info, + struct multi_pack_index *m, + uint32_t pack_int_id) +{ + if (prepare_midx_pack(m, pack_int_id)) + return error(_("could not load pack %d"), pack_int_id); + + fill_pack_info(info, + m->packs[pack_int_id - m->num_packs_in_base], + m->pack_names[pack_int_id - m->num_packs_in_base], + pack_int_id); + + return 0; +} + static int fill_packs_from_midx(struct write_midx_context *ctx) { struct multi_pack_index *m; @@ -921,13 +936,13 @@ static int fill_packs_from_midx(struct write_midx_context *ctx) uint32_t i; for (i = 0; i < m->num_packs; i++) { - if (prepare_midx_pack(m, m->num_packs_in_base + i)) - return error(_("could not load pack")); - ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc); - fill_pack_info(&ctx->info[ctx->nr++], m->packs[i], - m->pack_names[i], - m->num_packs_in_base + i); + + if (fill_pack_from_midx(&ctx->info[ctx->nr], m, + m->num_packs_in_base + i) < 0) + return -1; + + ctx->nr++; } } return 0; From 93c67df7511057e7c8fda169671a44322c04c2d0 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 24 Feb 2026 14:00:26 -0500 Subject: [PATCH 013/236] midx-write.c: enumerate `pack_int_id` values directly Our `midx-write.c::fill_packs_from_midx()` function currently enumerates the range [0, m->num_packs), and then shifts its index variable up by `m->num_packs_in_base` to produce a valid `pack_int_id`. Instead, directly enumerate the range: [m->num_packs_in_base, m->num_packs_in_base + m->num_packs) , which are the original pack_int_ids themselves as opposed to the indexes of those packs relative to the MIDX layer they are contained within. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx-write.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/midx-write.c b/midx-write.c index c54113cdc8436f..80334914d3e384 100644 --- a/midx-write.c +++ b/midx-write.c @@ -935,11 +935,11 @@ static int fill_packs_from_midx(struct write_midx_context *ctx) for (m = ctx->m; m; m = m->base_midx) { uint32_t i; - for (i = 0; i < m->num_packs; i++) { + for (i = m->num_packs_in_base; + i < m->num_packs_in_base + m->num_packs; i++) { ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc); - if (fill_pack_from_midx(&ctx->info[ctx->nr], m, - m->num_packs_in_base + i) < 0) + if (fill_pack_from_midx(&ctx->info[ctx->nr], m, i) < 0) return -1; ctx->nr++; From 9aea84c4e78b462fe55d142e67997b90d8ee3055 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 24 Feb 2026 14:00:30 -0500 Subject: [PATCH 014/236] midx-write.c: factor fanout layering from `compute_sorted_entries()` When computing the set of objects to appear in a MIDX, we use compute_sorted_entries(), which handles objects from various existing sources one fanout layer at a time. The process for computing this set is slightly different during MIDX compaction, so factor out the existing functionality into its own routine to prevent `compute_sorted_entries()` from becoming too difficult to read. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx-write.c | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/midx-write.c b/midx-write.c index 80334914d3e384..ca2469213e699f 100644 --- a/midx-write.c +++ b/midx-write.c @@ -328,6 +328,30 @@ static void midx_fanout_add_pack_fanout(struct midx_fanout *fanout, } } +static void midx_fanout_add(struct midx_fanout *fanout, + struct write_midx_context *ctx, + uint32_t start_pack, + uint32_t cur_fanout) +{ + uint32_t cur_pack; + + if (ctx->m && !ctx->incremental) + midx_fanout_add_midx_fanout(fanout, ctx->m, cur_fanout, + ctx->preferred_pack_idx); + + for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++) { + int preferred = cur_pack == ctx->preferred_pack_idx; + midx_fanout_add_pack_fanout(fanout, ctx->info, cur_pack, + preferred, cur_fanout); + } + + if (ctx->preferred_pack_idx != NO_PREFERRED_PACK && + ctx->preferred_pack_idx < start_pack) + midx_fanout_add_pack_fanout(fanout, ctx->info, + ctx->preferred_pack_idx, 1, + cur_fanout); +} + /* * It is possible to artificially get into a state where there are many * duplicate copies of objects. That can create high memory pressure if @@ -364,23 +388,7 @@ static void compute_sorted_entries(struct write_midx_context *ctx, for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) { fanout.nr = 0; - if (ctx->m && !ctx->incremental) - midx_fanout_add_midx_fanout(&fanout, ctx->m, cur_fanout, - ctx->preferred_pack_idx); - - for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++) { - int preferred = cur_pack == ctx->preferred_pack_idx; - midx_fanout_add_pack_fanout(&fanout, - ctx->info, cur_pack, - preferred, cur_fanout); - } - - if (ctx->preferred_pack_idx != NO_PREFERRED_PACK && - ctx->preferred_pack_idx < start_pack) - midx_fanout_add_pack_fanout(&fanout, ctx->info, - ctx->preferred_pack_idx, 1, - cur_fanout); - + midx_fanout_add(&fanout, ctx, start_pack, cur_fanout); midx_fanout_sort(&fanout); /* From dedf71f0b1b9a64d5aa7558ef45b26166d8d66fc Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 24 Feb 2026 14:00:36 -0500 Subject: [PATCH 015/236] t/helper/test-read-midx.c: plug memory leak when selecting layer Though our 'read-midx' test tool is capable of printing information about a single MIDX layer identified by its checksum, no caller in our test suite exercises this path. Unfortunately, there is a memory leak lurking in this (currently) unused path that would otherwise be exposed by the following commit. This occurs when providing a MIDX layer checksum other than the tip. As we walk over the MIDX chain trying to find the matching layer, we drop our reference to the top-most MIDX layer. Thus, our call to 'close_midx()' later on leaks memory between the top-most MIDX layer and the MIDX layer immediately following the specified one. Plug this leak by holding a reference to the tip of the MIDX chain, and ensure that we call `close_midx()` before terminating the test tool. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- t/helper/test-read-midx.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c index 9d42c587564182..388d29e2b53db3 100644 --- a/t/helper/test-read-midx.c +++ b/t/helper/test-read-midx.c @@ -26,9 +26,10 @@ static int read_midx_file(const char *object_dir, const char *checksum, int show_objects) { uint32_t i; - struct multi_pack_index *m; + struct multi_pack_index *m, *tip; + int ret = 0; - m = setup_midx(object_dir); + m = tip = setup_midx(object_dir); if (!m) return 1; @@ -36,8 +37,11 @@ static int read_midx_file(const char *object_dir, const char *checksum, if (checksum) { while (m && strcmp(midx_get_checksum_hex(m), checksum)) m = m->base_midx; - if (!m) - return 1; + if (!m) { + ret = error(_("could not find MIDX with checksum %s"), + checksum); + goto out; + } } printf("header: %08x %d %d %d %d\n", @@ -82,9 +86,10 @@ static int read_midx_file(const char *object_dir, const char *checksum, } } - close_midx(m); +out: + close_midx(tip); - return 0; + return ret; } static int read_midx_checksum(const char *object_dir) From 9df44a97f165bbdd8d591c6e99c8894ae036b5bd Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 24 Feb 2026 14:00:41 -0500 Subject: [PATCH 016/236] midx: implement MIDX compaction When managing a MIDX chain with many layers, it is convenient to combine a sequence of adjacent layers into a single layer to prevent the chain from growing too long. While it is conceptually possible to "compact" a sequence of MIDX layers together by running "git multi-pack-index write --stdin-packs", there are a few drawbacks that make this less than desirable: - Preserving the MIDX chain is impossible, since there is no way to write a MIDX layer that contains objects or packs found in an earlier MIDX layer already part of the chain. So callers would have to write an entirely new (non-incremental) MIDX containing only the compacted layers, discarding all other objects/packs from the MIDX. - There is (currently) no way to write a MIDX layer outside of the MIDX chain to work around the above, such that the MIDX chain could be reassembled substituting the compacted layers with the MIDX that was written. - The `--stdin-packs` command-line option does not allow us to specify the order of packs as they appear in the MIDX. Therefore, even if there were workarounds for the previous two challenges, any bitmaps belonging to layers which come after the compacted layer(s) would no longer be valid. This commit introduces a way to compact a sequence of adjacent MIDX layers into a single layer while preserving the MIDX chain, as well as any bitmap(s) in layers which are newer than the compacted ones. Implementing MIDX compaction does not require a significant number of changes to how MIDX layers are written. The main changes are as follows: - Instead of calling `fill_packs_from_midx()`, we call a new function `fill_packs_from_midx_range()`, which walks backwards along the portion of the MIDX chain which we are compacting, and adds packs one layer a time. In order to preserve the pseudo-pack order, the concatenated pack order is preserved, with the exception of preferred packs which are always added first. - After adding entries from the set of packs in the compaction range, `compute_sorted_entries()` must adjust the `pack_int_id`'s for all objects added in each fanout layer to match their original `pack_int_id`'s (as opposed to the index at which each pack appears in `ctx.info`). Note that we cannot reuse `midx_fanout_add_midx_fanout()` directly here, as it unconditionally recurs through the `->base_midx`. Factor out a `_1()` variant that operates on a single layer, reimplement the existing function in terms of it, and use the new variant from `midx_fanout_add_compact()`. Since we are sorting the list of objects ourselves, the order we add them in does not matter. - When writing out the new 'multi-pack-index-chain' file, discard any layers in the compaction range, replacing them with the newly written layer, instead of keeping them and placing the new layer at the end of the chain. This ends up being sufficient to implement MIDX compaction in such a way that preserves bitmaps corresponding to more recent layers in the MIDX chain. The tests for MIDX compaction are so far fairly spartan, since the main interesting behavior here is ensuring that the right packs/objects are selected from each layer, and that the pack order is preserved despite whether or not they are sorted in lexicographic order in the original MIDX chain. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-multi-pack-index.adoc | 13 ++ builtin/multi-pack-index.c | 74 +++++++ midx-write.c | 277 +++++++++++++++++++++--- midx.h | 5 + t/meson.build | 1 + t/t5335-compact-multi-pack-index.sh | 175 +++++++++++++++ 6 files changed, 518 insertions(+), 27 deletions(-) create mode 100755 t/t5335-compact-multi-pack-index.sh diff --git a/Documentation/git-multi-pack-index.adoc b/Documentation/git-multi-pack-index.adoc index 164cf1f2291b99..883a0529741863 100644 --- a/Documentation/git-multi-pack-index.adoc +++ b/Documentation/git-multi-pack-index.adoc @@ -12,6 +12,8 @@ SYNOPSIS 'git multi-pack-index' [] write [--preferred-pack=] [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs] [--refs-snapshot=] +'git multi-pack-index' [] compact [--[no-]incremental] + 'git multi-pack-index' [] verify 'git multi-pack-index' [] expire 'git multi-pack-index' [] repack [--batch-size=] @@ -83,6 +85,17 @@ marker). necessary. -- +compact:: + Write a new MIDX layer containing only objects and packs present + in the range `` to ``, where both arguments are + checksums of existing layers in the MIDX chain. ++ +-- + --incremental:: + Write the result to a MIDX chain instead of writing a + stand-alone MIDX. +-- + verify:: Verify the contents of the MIDX file. diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index c0c6c1760c0f28..043ee8c478a3d6 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -17,6 +17,10 @@ " [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs]\n" \ " [--refs-snapshot=]") +#define BUILTIN_MIDX_COMPACT_USAGE \ + N_("git multi-pack-index [] compact [--[no-]incremental]\n" \ + " ") + #define BUILTIN_MIDX_VERIFY_USAGE \ N_("git multi-pack-index [] verify") @@ -30,6 +34,10 @@ static char const * const builtin_multi_pack_index_write_usage[] = { BUILTIN_MIDX_WRITE_USAGE, NULL }; +static char const * const builtin_multi_pack_index_compact_usage[] = { + BUILTIN_MIDX_COMPACT_USAGE, + NULL +}; static char const * const builtin_multi_pack_index_verify_usage[] = { BUILTIN_MIDX_VERIFY_USAGE, NULL @@ -44,6 +52,7 @@ static char const * const builtin_multi_pack_index_repack_usage[] = { }; static char const * const builtin_multi_pack_index_usage[] = { BUILTIN_MIDX_WRITE_USAGE, + BUILTIN_MIDX_COMPACT_USAGE, BUILTIN_MIDX_VERIFY_USAGE, BUILTIN_MIDX_EXPIRE_USAGE, BUILTIN_MIDX_REPACK_USAGE, @@ -195,6 +204,70 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, return ret; } +static int cmd_multi_pack_index_compact(int argc, const char **argv, + const char *prefix, + struct repository *repo) +{ + struct multi_pack_index *m, *cur; + struct multi_pack_index *from_midx = NULL; + struct multi_pack_index *to_midx = NULL; + struct odb_source *source; + int ret; + + struct option *options; + static struct option builtin_multi_pack_index_compact_options[] = { + OPT_BIT(0, "incremental", &opts.flags, + N_("write a new incremental MIDX"), MIDX_WRITE_INCREMENTAL), + OPT_END(), + }; + + repo_config(repo, git_multi_pack_index_write_config, NULL); + + options = add_common_options(builtin_multi_pack_index_compact_options); + + trace2_cmd_mode(argv[0]); + + if (isatty(2)) + opts.flags |= MIDX_PROGRESS; + argc = parse_options(argc, argv, prefix, + options, builtin_multi_pack_index_compact_usage, + 0); + + if (argc != 2) + usage_with_options(builtin_multi_pack_index_compact_usage, + options); + source = handle_object_dir_option(the_repository); + + FREE_AND_NULL(options); + + m = get_multi_pack_index(source); + + for (cur = m; cur && !(from_midx && to_midx); cur = cur->base_midx) { + const char *midx_csum = midx_get_checksum_hex(cur); + + if (!from_midx && !strcmp(midx_csum, argv[0])) + from_midx = cur; + if (!to_midx && !strcmp(midx_csum, argv[1])) + to_midx = cur; + } + + if (!from_midx) + die(_("could not find MIDX: %s"), argv[0]); + if (!to_midx) + die(_("could not find MIDX: %s"), argv[1]); + if (from_midx == to_midx) + die(_("MIDX compaction endpoints must be unique")); + + for (m = from_midx; m; m = m->base_midx) { + if (m == to_midx) + die(_("MIDX %s must be an ancestor of %s"), argv[0], argv[1]); + } + + ret = write_midx_file_compact(source, from_midx, to_midx, opts.flags); + + return ret; +} + static int cmd_multi_pack_index_verify(int argc, const char **argv, const char *prefix, struct repository *repo UNUSED) @@ -295,6 +368,7 @@ int cmd_multi_pack_index(int argc, struct option builtin_multi_pack_index_options[] = { OPT_SUBCOMMAND("repack", &fn, cmd_multi_pack_index_repack), OPT_SUBCOMMAND("write", &fn, cmd_multi_pack_index_write), + OPT_SUBCOMMAND("compact", &fn, cmd_multi_pack_index_compact), OPT_SUBCOMMAND("verify", &fn, cmd_multi_pack_index_verify), OPT_SUBCOMMAND("expire", &fn, cmd_multi_pack_index_expire), OPT_END(), diff --git a/midx-write.c b/midx-write.c index ca2469213e699f..bf53ad1c4b7340 100644 --- a/midx-write.c +++ b/midx-write.c @@ -113,6 +113,10 @@ struct write_midx_context { int incremental; uint32_t num_multi_pack_indexes_before; + struct multi_pack_index *compact_from; + struct multi_pack_index *compact_to; + int compact; + struct string_list *to_include; struct repository *repo; @@ -122,6 +126,8 @@ struct write_midx_context { static uint32_t midx_pack_perm(struct write_midx_context *ctx, uint32_t orig_pack_int_id) { + if (ctx->compact) + orig_pack_int_id -= ctx->compact_from->num_packs_in_base; return ctx->pack_perm[orig_pack_int_id]; } @@ -268,18 +274,14 @@ static void midx_fanout_sort(struct midx_fanout *fanout) QSORT(fanout->entries, fanout->nr, midx_oid_compare); } -static void midx_fanout_add_midx_fanout(struct midx_fanout *fanout, - struct multi_pack_index *m, - uint32_t cur_fanout, - uint32_t preferred_pack) +static void midx_fanout_add_midx_fanout_1(struct midx_fanout *fanout, + struct multi_pack_index *m, + uint32_t cur_fanout, + uint32_t preferred_pack) { uint32_t start = m->num_objects_in_base, end; uint32_t cur_object; - if (m->base_midx) - midx_fanout_add_midx_fanout(fanout, m->base_midx, cur_fanout, - preferred_pack); - if (cur_fanout) start += ntohl(m->chunk_oid_fanout[cur_fanout - 1]); end = m->num_objects_in_base + ntohl(m->chunk_oid_fanout[cur_fanout]); @@ -303,6 +305,17 @@ static void midx_fanout_add_midx_fanout(struct midx_fanout *fanout, } } +static void midx_fanout_add_midx_fanout(struct midx_fanout *fanout, + struct multi_pack_index *m, + uint32_t cur_fanout, + uint32_t preferred_pack) +{ + if (m->base_midx) + midx_fanout_add_midx_fanout(fanout, m->base_midx, cur_fanout, + preferred_pack); + midx_fanout_add_midx_fanout_1(fanout, m, cur_fanout, preferred_pack); +} + static void midx_fanout_add_pack_fanout(struct midx_fanout *fanout, struct pack_info *info, uint32_t cur_pack, @@ -352,6 +365,21 @@ static void midx_fanout_add(struct midx_fanout *fanout, cur_fanout); } +static void midx_fanout_add_compact(struct midx_fanout *fanout, + struct write_midx_context *ctx, + uint32_t cur_fanout) +{ + struct multi_pack_index *m = ctx->compact_to; + + ASSERT(ctx->compact); + + while (m && m != ctx->compact_from->base_midx) { + midx_fanout_add_midx_fanout_1(fanout, m, cur_fanout, + NO_PREFERRED_PACK); + m = m->base_midx; + } +} + /* * It is possible to artificially get into a state where there are many * duplicate copies of objects. That can create high memory pressure if @@ -370,6 +398,9 @@ static void compute_sorted_entries(struct write_midx_context *ctx, size_t alloc_objects, total_objects = 0; struct midx_fanout fanout = { 0 }; + if (ctx->compact) + ASSERT(!start_pack); + for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++) total_objects = st_add(total_objects, ctx->info[cur_pack].p->num_objects); @@ -388,7 +419,10 @@ static void compute_sorted_entries(struct write_midx_context *ctx, for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) { fanout.nr = 0; - midx_fanout_add(&fanout, ctx, start_pack, cur_fanout); + if (ctx->compact) + midx_fanout_add_compact(&fanout, ctx, cur_fanout); + else + midx_fanout_add(&fanout, ctx, start_pack, cur_fanout); midx_fanout_sort(&fanout); /* @@ -956,6 +990,75 @@ static int fill_packs_from_midx(struct write_midx_context *ctx) return 0; } +static uint32_t compactible_packs_between(const struct multi_pack_index *from, + const struct multi_pack_index *to) +{ + uint32_t nr; + + ASSERT(from && to); + + if (unsigned_add_overflows(to->num_packs, to->num_packs_in_base)) + die(_("too many packs, unable to compact")); + + nr = to->num_packs + to->num_packs_in_base; + if (nr < from->num_packs_in_base) + BUG("unexpected number of packs in base during compaction: " + "%"PRIu32" < %"PRIu32, nr, from->num_packs_in_base); + + return nr - from->num_packs_in_base; +} + +static int fill_packs_from_midx_range(struct write_midx_context *ctx, + int bitmap_order) +{ + struct multi_pack_index *m = ctx->compact_to; + uint32_t packs_nr; + + ASSERT(ctx->compact && !ctx->nr); + ASSERT(ctx->compact_from); + ASSERT(ctx->compact_to); + + packs_nr = compactible_packs_between(ctx->compact_from, + ctx->compact_to); + + ALLOC_GROW(ctx->info, packs_nr, ctx->alloc); + + while (m != ctx->compact_from->base_midx) { + uint32_t pack_int_id, preferred_pack_id; + uint32_t i; + + if (bitmap_order) { + if (midx_preferred_pack(m, &preferred_pack_id) < 0) + die(_("could not determine preferred pack")); + } else { + preferred_pack_id = m->num_packs_in_base; + } + + pack_int_id = m->num_packs_in_base - ctx->compact_from->num_packs_in_base; + + if (fill_pack_from_midx(&ctx->info[pack_int_id++], m, + preferred_pack_id) < 0) + return -1; + + for (i = m->num_packs_in_base; + i < m->num_packs_in_base + m->num_packs; i++) { + if (preferred_pack_id == i) + continue; + + if (fill_pack_from_midx(&ctx->info[pack_int_id++], m, + i) < 0) + return -1; + } + + ctx->nr += m->num_packs; + m = m->base_midx; + } + + ASSERT(ctx->nr == packs_nr); + + return 0; +} + static struct { const char *non_split; const char *split; @@ -1075,6 +1178,9 @@ static bool midx_needs_update(struct multi_pack_index *midx, struct write_midx_c if (ctx->incremental) goto out; + if (ctx->compact) + goto out; /* Compaction always requires an update. */ + /* * Otherwise, we need to verify that the packs covered by the existing * MIDX match the packs that we already have. The logic to do so is way @@ -1120,12 +1226,23 @@ static bool midx_needs_update(struct multi_pack_index *midx, struct write_midx_c return needed; } +static int midx_hashcmp(const struct multi_pack_index *a, + const struct multi_pack_index *b, + const struct git_hash_algo *algop) +{ + return hashcmp(midx_get_checksum_hash(a), midx_get_checksum_hash(b), + algop); +} + struct write_midx_opts { struct odb_source *source; /* non-optional */ struct string_list *packs_to_include; struct string_list *packs_to_drop; + struct multi_pack_index *compact_from; + struct multi_pack_index *compact_to; + const char *preferred_pack_name; const char *refs_snapshot; unsigned flags; @@ -1150,6 +1267,7 @@ static int write_midx_internal(struct write_midx_opts *opts) int dropped_packs = 0; int result = -1; const char **keep_hashes = NULL; + size_t keep_hashes_nr = 0; struct chunkfile *cf; trace2_region_enter("midx", "write_midx_internal", r); @@ -1162,6 +1280,19 @@ static int write_midx_internal(struct write_midx_opts *opts) die(_("unknown MIDX version: %d"), ctx.version); ctx.incremental = !!(opts->flags & MIDX_WRITE_INCREMENTAL); + ctx.compact = !!(opts->flags & MIDX_WRITE_COMPACT); + + if (ctx.compact) { + if (ctx.version != MIDX_VERSION_V2) + die(_("cannot perform MIDX compaction with v1 format")); + if (!opts->compact_from) + BUG("expected non-NULL 'from' MIDX during compaction"); + if (!opts->compact_to) + BUG("expected non-NULL 'to' MIDX during compaction"); + + ctx.compact_from = opts->compact_from; + ctx.compact_to = opts->compact_to; + } if (ctx.incremental) strbuf_addf(&midx_name, @@ -1189,11 +1320,18 @@ static int write_midx_internal(struct write_midx_opts *opts) */ if (ctx.incremental) ctx.base_midx = m; - else if (!opts->packs_to_include) + if (!opts->packs_to_include) ctx.m = m; } } + /* + * If compacting MIDX layer(s) in the range [from, to], then the + * compacted MIDX will share the same base MIDX as 'from'. + */ + if (ctx.compact) + ctx.base_midx = ctx.compact_from->base_midx; + ctx.nr = 0; ctx.alloc = ctx.m ? ctx.m->num_packs + ctx.m->num_packs_in_base : 16; ctx.info = NULL; @@ -1210,7 +1348,7 @@ static int write_midx_internal(struct write_midx_opts *opts) ctx.num_multi_pack_indexes_before++; m = m->base_midx; } - } else if (ctx.m && fill_packs_from_midx(&ctx)) { + } else if (ctx.m && !ctx.compact && fill_packs_from_midx(&ctx)) { goto cleanup; } @@ -1223,9 +1361,18 @@ static int write_midx_internal(struct write_midx_opts *opts) else ctx.progress = NULL; - ctx.to_include = opts->packs_to_include; + if (ctx.compact) { + int bitmap_order = 0; + if (opts->preferred_pack_name) + bitmap_order |= 1; + else if (opts->flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) + bitmap_order |= 1; - for_each_file_in_pack_dir(opts->source->path, add_pack_to_midx, &ctx); + fill_packs_from_midx_range(&ctx, bitmap_order); + } else { + ctx.to_include = opts->packs_to_include; + for_each_file_in_pack_dir(opts->source->path, add_pack_to_midx, &ctx); + } stop_progress(&ctx.progress); if (!opts->packs_to_drop) { @@ -1354,12 +1501,19 @@ static int write_midx_internal(struct write_midx_opts *opts) ctx.large_offsets_needed = 1; } - QSORT(ctx.info, ctx.nr, pack_info_compare); + if (ctx.compact) { + if (ctx.version != MIDX_VERSION_V2) + BUG("performing MIDX compaction with v1 MIDX"); + } else { + QSORT(ctx.info, ctx.nr, pack_info_compare); + } if (opts->packs_to_drop && opts->packs_to_drop->nr) { size_t drop_index = 0; int missing_drops = 0; + ASSERT(!ctx.compact); + for (size_t i = 0; i < ctx.nr && drop_index < opts->packs_to_drop->nr; i++) { int cmp = strcmp(ctx.info[i].pack_name, @@ -1391,12 +1545,20 @@ static int write_midx_internal(struct write_midx_opts *opts) */ ALLOC_ARRAY(ctx.pack_perm, ctx.nr); for (size_t i = 0; i < ctx.nr; i++) { + uint32_t from = ctx.info[i].orig_pack_int_id; + uint32_t to; + if (ctx.info[i].expired) { + to = PACK_EXPIRED; dropped_packs++; - ctx.pack_perm[ctx.info[i].orig_pack_int_id] = PACK_EXPIRED; } else { - ctx.pack_perm[ctx.info[i].orig_pack_int_id] = i - dropped_packs; + to = i - dropped_packs; } + + if (ctx.compact) + from -= ctx.compact_from->num_packs_in_base; + + ctx.pack_perm[from] = to; } for (size_t i = 0; i < ctx.nr; i++) { @@ -1542,7 +1704,24 @@ static int write_midx_internal(struct write_midx_opts *opts) if (ctx.num_multi_pack_indexes_before == UINT32_MAX) die(_("too many multi-pack-indexes")); - CALLOC_ARRAY(keep_hashes, ctx.num_multi_pack_indexes_before + 1); + if (ctx.compact) { + struct multi_pack_index *m; + + /* + * Keep all MIDX layers excluding those in the range [from, to]. + */ + for (m = ctx.base_midx; m; m = m->base_midx) + keep_hashes_nr++; + for (m = ctx.m; + m && midx_hashcmp(m, ctx.compact_to, r->hash_algo); + m = m->base_midx) + keep_hashes_nr++; + + keep_hashes_nr++; /* include the compacted layer */ + } else { + keep_hashes_nr = ctx.num_multi_pack_indexes_before + 1; + } + CALLOC_ARRAY(keep_hashes, keep_hashes_nr); if (ctx.incremental) { FILE *chainf = fdopen_lock_file(&lk, "w"); @@ -1567,17 +1746,47 @@ static int write_midx_internal(struct write_midx_opts *opts) strbuf_release(&final_midx_name); - keep_hashes[ctx.num_multi_pack_indexes_before] = - xstrdup(hash_to_hex_algop(midx_hash, r->hash_algo)); + if (ctx.compact) { + struct multi_pack_index *m; + uint32_t num_layers_before_from = 0; + uint32_t i; - for (uint32_t i = 0; i < ctx.num_multi_pack_indexes_before; i++) { - uint32_t j = ctx.num_multi_pack_indexes_before - i - 1; + for (m = ctx.base_midx; m; m = m->base_midx) + num_layers_before_from++; - keep_hashes[j] = xstrdup(midx_get_checksum_hex(m)); - m = m->base_midx; + m = ctx.base_midx; + for (i = 0; i < num_layers_before_from; i++) { + uint32_t j = num_layers_before_from - i - 1; + + keep_hashes[j] = xstrdup(midx_get_checksum_hex(m)); + m = m->base_midx; + } + + keep_hashes[i] = xstrdup(hash_to_hex_algop(midx_hash, + r->hash_algo)); + + i = 0; + for (m = ctx.m; + m && midx_hashcmp(m, ctx.compact_to, r->hash_algo); + m = m->base_midx) { + keep_hashes[keep_hashes_nr - i - 1] = + xstrdup(midx_get_checksum_hex(m)); + i++; + } + } else { + keep_hashes[ctx.num_multi_pack_indexes_before] = + xstrdup(hash_to_hex_algop(midx_hash, + r->hash_algo)); + + for (uint32_t i = 0; i < ctx.num_multi_pack_indexes_before; i++) { + uint32_t j = ctx.num_multi_pack_indexes_before - i - 1; + + keep_hashes[j] = xstrdup(midx_get_checksum_hex(m)); + m = m->base_midx; + } } - for (uint32_t i = 0; i <= ctx.num_multi_pack_indexes_before; i++) + for (uint32_t i = 0; i < keep_hashes_nr; i++) fprintf(get_lock_file_fp(&lk), "%s\n", keep_hashes[i]); } else { keep_hashes[ctx.num_multi_pack_indexes_before] = @@ -1590,8 +1799,7 @@ static int write_midx_internal(struct write_midx_opts *opts) if (commit_lock_file(&lk) < 0) die_errno(_("could not write multi-pack-index")); - clear_midx_files(opts->source, keep_hashes, - ctx.num_multi_pack_indexes_before + 1, + clear_midx_files(opts->source, keep_hashes, keep_hashes_nr, ctx.incremental); result = 0; @@ -1609,7 +1817,7 @@ static int write_midx_internal(struct write_midx_opts *opts) free(ctx.pack_perm); free(ctx.pack_order); if (keep_hashes) { - for (uint32_t i = 0; i <= ctx.num_multi_pack_indexes_before; i++) + for (uint32_t i = 0; i < keep_hashes_nr; i++) free((char *)keep_hashes[i]); free(keep_hashes); } @@ -1651,6 +1859,21 @@ int write_midx_file_only(struct odb_source *source, return write_midx_internal(&opts); } +int write_midx_file_compact(struct odb_source *source, + struct multi_pack_index *from, + struct multi_pack_index *to, + unsigned flags) +{ + struct write_midx_opts opts = { + .source = source, + .compact_from = from, + .compact_to = to, + .flags = flags | MIDX_WRITE_COMPACT, + }; + + return write_midx_internal(&opts); +} + int expire_midx_packs(struct odb_source *source, unsigned flags) { uint32_t i, *count, result = 0; diff --git a/midx.h b/midx.h index aa99a6cb2152f7..08f3728e5204b8 100644 --- a/midx.h +++ b/midx.h @@ -82,6 +82,7 @@ struct multi_pack_index { #define MIDX_WRITE_BITMAP_HASH_CACHE (1 << 3) #define MIDX_WRITE_BITMAP_LOOKUP_TABLE (1 << 4) #define MIDX_WRITE_INCREMENTAL (1 << 5) +#define MIDX_WRITE_COMPACT (1 << 6) #define MIDX_EXT_REV "rev" #define MIDX_EXT_BITMAP "bitmap" @@ -131,6 +132,10 @@ int write_midx_file_only(struct odb_source *source, struct string_list *packs_to_include, const char *preferred_pack_name, const char *refs_snapshot, unsigned flags); +int write_midx_file_compact(struct odb_source *source, + struct multi_pack_index *from, + struct multi_pack_index *to, + unsigned flags); void clear_midx_file(struct repository *r); int verify_midx_file(struct odb_source *source, unsigned flags); int expire_midx_packs(struct odb_source *source, unsigned flags); diff --git a/t/meson.build b/t/meson.build index f80e366cff73f3..2421220917aedc 100644 --- a/t/meson.build +++ b/t/meson.build @@ -618,6 +618,7 @@ integration_tests = [ 't5332-multi-pack-reuse.sh', 't5333-pseudo-merge-bitmaps.sh', 't5334-incremental-multi-pack-index.sh', + 't5335-compact-multi-pack-index.sh', 't5351-unpack-large-objects.sh', 't5400-send-pack.sh', 't5401-update-hooks.sh', diff --git a/t/t5335-compact-multi-pack-index.sh b/t/t5335-compact-multi-pack-index.sh new file mode 100755 index 00000000000000..797ae05c3bdfa5 --- /dev/null +++ b/t/t5335-compact-multi-pack-index.sh @@ -0,0 +1,175 @@ +#!/bin/sh + +test_description='multi-pack-index compaction' + +. ./test-lib.sh + +GIT_TEST_MULTI_PACK_INDEX=0 +GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 +GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL=0 + +objdir=.git/objects +packdir=$objdir/pack +midxdir=$packdir/multi-pack-index.d +midx_chain=$midxdir/multi-pack-index-chain + +nth_line() { + local n="$1" + shift + awk "NR==$n" "$@" +} + +write_packs () { + for c in "$@" + do + test_commit "$c" && + + git pack-objects --all --unpacked $packdir/pack-$c && + git prune-packed && + + git multi-pack-index write --incremental --bitmap || return 1 + done +} + +test_midx_layer_packs () { + local checksum="$1" && + shift && + + test-tool read-midx $objdir "$checksum" >out && + + printf "%s\n" "$@" >expect && + # NOTE: do *not* pipe through sort here, we want to ensure the + # order of packs is preserved during compaction. + grep "^pack-" out | cut -d"-" -f2 >actual && + + test_cmp expect actual +} + +test_midx_layer_object_uniqueness () { + : >objs.all + while read layer + do + test-tool read-midx --show-objects $objdir "$layer" >out && + grep "\.pack$" out | cut -d" " -f1 | sort >objs.layer && + test_stdout_line_count = 0 comm -12 objs.all objs.layer && + cat objs.all objs.layer | sort >objs.tmp && + mv objs.tmp objs.all || return 1 + done <$midx_chain +} + +test_expect_success 'MIDX compaction with lex-ordered pack names' ' + git init midx-compact-lex-order && + ( + cd midx-compact-lex-order && + + git config maintenance.auto false && + + write_packs A B C D E && + test_line_count = 5 $midx_chain && + + git multi-pack-index compact --incremental \ + "$(nth_line 2 "$midx_chain")" \ + "$(nth_line 4 "$midx_chain")" && + test_line_count = 3 $midx_chain && + + test_midx_layer_packs "$(nth_line 1 "$midx_chain")" A && + test_midx_layer_packs "$(nth_line 2 "$midx_chain")" B C D && + test_midx_layer_packs "$(nth_line 3 "$midx_chain")" E && + + test_midx_layer_object_uniqueness + ) +' + +test_expect_success 'MIDX compaction with non-lex-ordered pack names' ' + git init midx-compact-non-lex-order && + ( + cd midx-compact-non-lex-order && + + git config maintenance.auto false && + + write_packs D C A B E && + test_line_count = 5 $midx_chain && + + git multi-pack-index compact --incremental \ + "$(nth_line 2 "$midx_chain")" \ + "$(nth_line 4 "$midx_chain")" && + test_line_count = 3 $midx_chain && + + test_midx_layer_packs "$(nth_line 1 "$midx_chain")" D && + test_midx_layer_packs "$(nth_line 2 "$midx_chain")" C A B && + test_midx_layer_packs "$(nth_line 3 "$midx_chain")" E && + + test_midx_layer_object_uniqueness + ) +' + +test_expect_success 'setup for bogus MIDX compaction scenarios' ' + git init midx-compact-bogus && + ( + cd midx-compact-bogus && + + git config maintenance.auto false && + + write_packs A B C + ) +' + +test_expect_success 'MIDX compaction with missing endpoints' ' + ( + cd midx-compact-bogus && + + test_must_fail git multi-pack-index compact --incremental \ + "" "" 2>err && + test_grep "could not find MIDX: " err && + + test_must_fail git multi-pack-index compact --incremental \ + "" "$(nth_line 2 "$midx_chain")" 2>err && + test_grep "could not find MIDX: " err && + + test_must_fail git multi-pack-index compact --incremental \ + "$(nth_line 2 "$midx_chain")" "" 2>err && + test_grep "could not find MIDX: " err + ) +' + +test_expect_success 'MIDX compaction with reversed endpoints' ' + ( + cd midx-compact-bogus && + + from="$(nth_line 3 "$midx_chain")" && + to="$(nth_line 1 "$midx_chain")" && + + test_must_fail git multi-pack-index compact --incremental \ + "$from" "$to" 2>err && + + test_grep "MIDX $from must be an ancestor of $to" err + ) +' + +test_expect_success 'MIDX compaction with identical endpoints' ' + ( + cd midx-compact-bogus && + + from="$(nth_line 3 "$midx_chain")" && + to="$(nth_line 3 "$midx_chain")" && + + test_must_fail git multi-pack-index compact --incremental \ + "$from" "$to" 2>err && + + test_grep "MIDX compaction endpoints must be unique" err + ) +' + +test_expect_success 'MIDX compaction with midx.version=1' ' + ( + cd midx-compact-bogus && + + test_must_fail git -c midx.version=1 multi-pack-index compact \ + "$(nth_line 1 "$midx_chain")" \ + "$(nth_line 2 "$midx_chain")" 2>err && + + test_grep "fatal: cannot perform MIDX compaction with v1 format" err + ) +' + +test_done From d54da84bd9de09fc339accff553f1fc8a5539154 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 24 Feb 2026 14:00:45 -0500 Subject: [PATCH 017/236] midx: enable reachability bitmaps during MIDX compaction Enable callers to generate reachability bitmaps when performing MIDX layer compaction by combining all existing bitmaps from the compacted layers. Note that because of the object/pack ordering described by the previous commit, the pseudo-pack order for the compacted MIDX is the same as concatenating the individual pseudo-pack orderings for each layer in the compaction range. As a result, the only non-test or documentation change necessary is to treat all objects as non-preferred during compaction so as not to disturb the object ordering. In the future, we may want to adjust which commit(s) receive reachability bitmaps when compacting multiple .bitmap files into one, or even generate new bitmaps (e.g., if the references have moved significantly since the .bitmap was generated). This commit only implements combining all existing bitmaps in range together in order to demonstrate and lay the groundwork for more exotic strategies. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-multi-pack-index.adoc | 5 +- builtin/multi-pack-index.c | 4 +- midx-write.c | 2 +- t/t5335-compact-multi-pack-index.sh | 122 +++++++++++++++++++++++- 4 files changed, 128 insertions(+), 5 deletions(-) diff --git a/Documentation/git-multi-pack-index.adoc b/Documentation/git-multi-pack-index.adoc index 883a0529741863..612568301412d6 100644 --- a/Documentation/git-multi-pack-index.adoc +++ b/Documentation/git-multi-pack-index.adoc @@ -13,7 +13,7 @@ SYNOPSIS [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs] [--refs-snapshot=] 'git multi-pack-index' [] compact [--[no-]incremental] - + [--[no-]bitmap] 'git multi-pack-index' [] verify 'git multi-pack-index' [] expire 'git multi-pack-index' [] repack [--batch-size=] @@ -94,6 +94,9 @@ compact:: --incremental:: Write the result to a MIDX chain instead of writing a stand-alone MIDX. + + --[no-]bitmap:: + Control whether or not a multi-pack bitmap is written. -- verify:: diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index 043ee8c478a3d6..2f24c113c8f261 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -19,7 +19,7 @@ #define BUILTIN_MIDX_COMPACT_USAGE \ N_("git multi-pack-index [] compact [--[no-]incremental]\n" \ - " ") + " [--[no-]bitmap] ") #define BUILTIN_MIDX_VERIFY_USAGE \ N_("git multi-pack-index [] verify") @@ -216,6 +216,8 @@ static int cmd_multi_pack_index_compact(int argc, const char **argv, struct option *options; static struct option builtin_multi_pack_index_compact_options[] = { + OPT_BIT(0, "bitmap", &opts.flags, N_("write multi-pack bitmap"), + MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX), OPT_BIT(0, "incremental", &opts.flags, N_("write a new incremental MIDX"), MIDX_WRITE_INCREMENTAL), OPT_END(), diff --git a/midx-write.c b/midx-write.c index bf53ad1c4b7340..0ff2e45aa7abdd 100644 --- a/midx-write.c +++ b/midx-write.c @@ -676,7 +676,7 @@ static uint32_t *midx_pack_order(struct write_midx_context *ctx) struct pack_midx_entry *e = &ctx->entries[i]; data[i].nr = i; data[i].pack = midx_pack_perm(ctx, e->pack_int_id); - if (!e->preferred) + if (!e->preferred || ctx->compact) data[i].pack |= (1U << 31); data[i].offset = e->offset; } diff --git a/t/t5335-compact-multi-pack-index.sh b/t/t5335-compact-multi-pack-index.sh index 797ae05c3bdfa5..40f3844282f04e 100755 --- a/t/t5335-compact-multi-pack-index.sh +++ b/t/t5335-compact-multi-pack-index.sh @@ -67,7 +67,7 @@ test_expect_success 'MIDX compaction with lex-ordered pack names' ' write_packs A B C D E && test_line_count = 5 $midx_chain && - git multi-pack-index compact --incremental \ + git multi-pack-index compact --incremental --bitmap \ "$(nth_line 2 "$midx_chain")" \ "$(nth_line 4 "$midx_chain")" && test_line_count = 3 $midx_chain && @@ -90,7 +90,7 @@ test_expect_success 'MIDX compaction with non-lex-ordered pack names' ' write_packs D C A B E && test_line_count = 5 $midx_chain && - git multi-pack-index compact --incremental \ + git multi-pack-index compact --incremental --bitmap \ "$(nth_line 2 "$midx_chain")" \ "$(nth_line 4 "$midx_chain")" && test_line_count = 3 $midx_chain && @@ -172,4 +172,122 @@ test_expect_success 'MIDX compaction with midx.version=1' ' ) ' +midx_objs_by_pack () { + awk '/\.pack$/ { split($3, a, "-"); print a[2], $1 }' | sort +} + +tag_objs_from_pack () { + objs="$(git rev-list --objects --no-object-names "$2")" && + printf "$1 %s\n" $objs | sort +} + +test_expect_success 'MIDX compaction preserves pack object selection' ' + git init midx-compact-preserve-selection && + ( + cd midx-compact-preserve-selection && + + git config maintenance.auto false && + + test_commit A && + test_commit B && + + # Create two packs, one containing just the objects from + # A, and another containing all objects from the + # repository. + p1="$(echo A | git pack-objects --revs --delta-base-offset \ + $packdir/pack-1)" && + p0="$(echo B | git pack-objects --revs --delta-base-offset \ + $packdir/pack-0)" && + + echo "pack-1-$p1.idx" | git multi-pack-index write \ + --incremental --bitmap --stdin-packs && + echo "pack-0-$p0.idx" | git multi-pack-index write \ + --incremental --bitmap --stdin-packs && + + write_packs C && + + git multi-pack-index compact --incremental --bitmap \ + "$(nth_line 1 "$midx_chain")" \ + "$(nth_line 2 "$midx_chain")" && + + + test-tool read-midx --show-objects $objdir \ + "$(nth_line 1 "$midx_chain")" >AB.info && + test-tool read-midx --show-objects $objdir \ + "$(nth_line 2 "$midx_chain")" >C.info && + + midx_objs_by_pack AB.actual && + midx_objs_by_pack C.actual && + + { + tag_objs_from_pack 1 A && + tag_objs_from_pack 0 A..B + } | sort >AB.expect && + tag_objs_from_pack C B..C >C.expect && + + test_cmp AB.expect AB.actual && + test_cmp C.expect C.actual + ) +' + +test_expect_success 'MIDX compaction with bitmaps' ' + git init midx-compact-with-bitmaps && + ( + cd midx-compact-with-bitmaps && + + git config maintenance.auto false && + + write_packs foo bar baz quux woot && + + test-tool read-midx --bitmap $objdir >bitmap.expect && + git multi-pack-index compact --incremental --bitmap \ + "$(nth_line 2 "$midx_chain")" \ + "$(nth_line 4 "$midx_chain")" && + test-tool read-midx --bitmap $objdir >bitmap.actual && + + test_cmp bitmap.expect bitmap.actual && + + true + ) +' + +test_expect_success 'MIDX compaction with bitmaps (non-trivial)' ' + git init midx-compact-with-bitmaps-non-trivial && + ( + cd midx-compact-with-bitmaps-non-trivial && + + git config maintenance.auto false && + + git branch -m main && + + # D(4) + # / + # A(1) --- B(2) --- C(3) --- G(7) + # \ + # E(5) --- F(6) + write_packs A B C && + git checkout -b side && + write_packs D && + git checkout -b other B && + write_packs E F && + git checkout main && + write_packs G && + + # Compact layers 2-4, leaving us with: + # + # [A, [B, C, D], E, F, G] + git multi-pack-index compact --incremental --bitmap \ + "$(nth_line 2 "$midx_chain")" \ + "$(nth_line 4 "$midx_chain")" && + + # Then compact the top two layers, condensing the above + # such that the new 4th layer contains F and G. + # + # [A, [B, C, D], E, [F, G]] + git multi-pack-index compact --incremental --bitmap \ + "$(nth_line 4 "$midx_chain")" \ + "$(nth_line 5 "$midx_chain")" + ) +' + test_done From e417277ae99687b576e48cb477a7a50241ea0096 Mon Sep 17 00:00:00 2001 From: Yee Cheng Chin Date: Mon, 2 Mar 2026 14:54:25 +0000 Subject: [PATCH 018/236] xdiff: re-diff shifted change groups when using histogram algorithm After a diff algorithm has been run, the compaction phase (xdl_change_compact()) shifts and merges change groups to produce a cleaner output. However, this shifting could create a new matched group where both sides now have matching lines. This results in a wrong-looking diff output which contains redundant lines that are the same on both files. Fix this by detecting this situation, and re-diff the texts on each side to find similar lines, using the fall-back Myer's diff. Only do this for histogram diff as it's the only algorithm where this is relevant. Below contains an example, and more details. For an example, consider two files below: file1: A A A A A A A file2: A A x A A A A When using Myer's diff, the algorithm finds that only the "x" has been changed, and produces a final diff result (these are line diffs, but using word-diff syntax for ease of presentation): A A[-A-]{+x+}A AAA When using histogram diff, the algorithm first discovers the LCS "A AAA", which it uses as anchor, then produces an intermediate diff: {+A Ax+}A AAA[- AAA-]. This is a longer diff than Myer's, but it's still self-consistent. However, the compaction phase attempts to shift the first file's diff group upwards (note that this shift crosses the anchor that histogram had used), leading to the final results for histogram diff: [-A AA-]{+A Ax+}A AAA This is a technically correct patch but looks clearly redundant to a human as the first 3 lines should not be in the diff. The fix would detect that a shift has caused matching to a new group, and re-diff the "A AA" and "A Ax" parts, which results in "A A" correctly re-marked as unchanged. This creates the now correct histogram diff: A A[-A-]{+x+}A AAA This issue is not applicable to Myer's diff algorithm as it already generates a minimal diff, which means a shift cannot result in a smaller diff output (the default Myer's diff in xdiff is not guaranteed to be minimal for performance reasons, but it typically does a good enough job). It's also not applicable to patience diff, because it uses only unique lines as anchor for its splits, and falls back to Myer's diff within each split. Shifting requires both ends having the same lines, and therefore cannot cross the unique line boundaries established by the patience algorithm. In contrast histogram diff uses non-unique lines as anchors, and therefore shifting can cross over them. This issue is rare in a normal repository. Below is a table of repositories (`git log --no-merges -p --histogram -1000`), showing how many times a re-diff was done and how many times it resulted in finding matching lines (therefore addressing this issue) with the fix. In general it is fewer than 1% of diff's that exhibit this offending behavior: | Repo (1k commits) | Re-diff | Found matching lines | |--------------------|---------|----------------------| | llvm-project | 45 | 11 | | vim | 110 | 9 | | git | 18 | 2 | | WebKit | 168 | 1 | | ripgrep | 22 | 1 | | cpython | 32 | 0 | | vscode | 13 | 0 | Signed-off-by: Yee Cheng Chin Signed-off-by: Junio C Hamano --- t/meson.build | 1 + t/t4074-diff-shifted-matched-group.sh | 164 ++++++++++++++++++++++++++ xdiff/xdiffi.c | 47 +++++++- 3 files changed, 210 insertions(+), 2 deletions(-) create mode 100755 t/t4074-diff-shifted-matched-group.sh diff --git a/t/meson.build b/t/meson.build index a5531df415ffe2..d6c61b5ab55ea5 100644 --- a/t/meson.build +++ b/t/meson.build @@ -496,6 +496,7 @@ integration_tests = [ 't4070-diff-pairs.sh', 't4071-diff-minimal.sh', 't4072-diff-max-depth.sh', + 't4074-diff-shifted-matched-group.sh', 't4100-apply-stat.sh', 't4101-apply-nonl.sh', 't4102-apply-rename.sh', diff --git a/t/t4074-diff-shifted-matched-group.sh b/t/t4074-diff-shifted-matched-group.sh new file mode 100755 index 00000000000000..d77fa3b79d2b53 --- /dev/null +++ b/t/t4074-diff-shifted-matched-group.sh @@ -0,0 +1,164 @@ +#!/bin/sh + +test_description='shifted diff groups re-diffing during histogram diff' + +. ./test-lib.sh + +test_expect_success 'shifted/merged diff group should re-diff to minimize patch' ' + test_write_lines A x A A A x A A A >file1 && + test_write_lines A x A Z A x A A A >file2 && + + file1_h=$(git rev-parse --short $(git hash-object file1)) && + file2_h=$(git rev-parse --short $(git hash-object file2)) && + + cat >expect <<-EOF && + diff --git a/file1 b/file2 + index $file1_h..$file2_h 100644 + --- a/file1 + +++ b/file2 + @@ -1,7 +1,7 @@ + A + x + A + -A + +Z + A + x + A + EOF + + test_expect_code 1 git diff --no-index --histogram file1 file2 >output && + test_cmp expect output +' + +test_expect_success 'merged diff group with no shift' ' + test_write_lines A Z B x >file1 && + test_write_lines C D x Z E x >file2 && + + file1_h=$(git rev-parse --short $(git hash-object file1)) && + file2_h=$(git rev-parse --short $(git hash-object file2)) && + + cat >expect <<-EOF && + diff --git a/file1 b/file2 + index $file1_h..$file2_h 100644 + --- a/file1 + +++ b/file2 + @@ -1,4 +1,6 @@ + -A + +C + +D + +x + Z + -B + +E + x + EOF + + test_expect_code 1 git diff --no-index --histogram file1 file2 >output && + test_cmp expect output +' + +test_expect_success 're-diff should preserve diff flags' ' + test_write_lines a b c a b c >file1 && + test_write_lines x " b" z a b c >file2 && + + file1_h=$(git rev-parse --short $(git hash-object file1)) && + file2_h=$(git rev-parse --short $(git hash-object file2)) && + + cat >expect <<-EOF && + diff --git a/file1 b/file2 + index $file1_h..$file2_h 100644 + --- a/file1 + +++ b/file2 + @@ -1,6 +1,6 @@ + -a + -b + -c + +x + + b + +z + a + b + c + EOF + + test_expect_code 1 git diff --no-index --histogram file1 file2 >output && + test_cmp expect output && + + cat >expect_iwhite <<-EOF && + diff --git a/file1 b/file2 + index $file1_h..$file2_h 100644 + --- a/file1 + +++ b/file2 + @@ -1,6 +1,6 @@ + -a + +x + b + -c + +z + a + b + c + EOF + + test_expect_code 1 git diff --no-index --histogram --ignore-all-space file1 file2 >output_iwhite && + test_cmp expect_iwhite output_iwhite +' + +test_expect_success 'shifting on either side should trigger re-diff properly' ' + test_write_lines a b c a b c a b c >file1 && + test_write_lines a b c a1 a2 a3 b c1 a b c >file2 && + + file1_h=$(git rev-parse --short $(git hash-object file1)) && + file2_h=$(git rev-parse --short $(git hash-object file2)) && + + cat >expect1 <<-EOF && + diff --git a/file1 b/file2 + index $file1_h..$file2_h 100644 + --- a/file1 + +++ b/file2 + @@ -1,9 +1,11 @@ + a + b + c + -a + +a1 + +a2 + +a3 + b + -c + +c1 + a + b + c + EOF + + test_expect_code 1 git diff --no-index --histogram file1 file2 >output1 && + test_cmp expect1 output1 && + + cat >expect2 <<-EOF && + diff --git a/file2 b/file1 + index $file2_h..$file1_h 100644 + --- a/file2 + +++ b/file1 + @@ -1,11 +1,9 @@ + a + b + c + -a1 + -a2 + -a3 + +a + b + -c1 + +c + a + b + c + EOF + + test_expect_code 1 git diff --no-index --histogram file2 file1 >output2 && + test_cmp expect2 output2 +' + +test_done diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c index 6f3998ee54c01e..23cc64efd8112f 100644 --- a/xdiff/xdiffi.c +++ b/xdiff/xdiffi.c @@ -793,6 +793,7 @@ static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g) */ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { struct xdlgroup g, go; + struct xdlgroup g_orig; long earliest_end, end_matching_other; long groupsize; @@ -806,10 +807,12 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { if (g.end == g.start) goto next; + g_orig = g; + /* * Now shift the change up and then down as far as possible in * each direction. If it bumps into any other changes, merge - * them. + * them and restart the process. */ do { groupsize = g.end - g.start; @@ -862,7 +865,8 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { /* * Move the possibly merged group of changes back to * line up with the last group of changes from the - * other file that it can align with. + * other file that it can align with. This avoids breaking + * a single change into a separate addition/deletion. */ while (go.end == go.start) { if (group_slide_up(xdf, &g)) @@ -915,6 +919,45 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { } } + /* + * If we merged change groups during shifting, the new + * combined group could now have matching lines in both files, + * even if the original separate groups did not. Re-diff the + * new group to find these matching lines to mark them as + * unchanged. + * + * Only do this if the corresponding group in the other file is + * non-empty, as it's trivial otherwise. + * + * Only do this for histogram diff as its LCS algorithm allows + * for this scenario. In contrast, patience diff finds LCS + * of unique lines that groups cannot be shifted across. + * Myer's diff (standalone or used as fall-back in patience + * diff) already finds minimal edits so it is not possible for + * shifted groups to result in a smaller diff. (Without + * XDF_NEED_MINIMAL, Myer's isn't technically guaranteed to be + * minimal, but it should be so most of the time) + */ + if (go.end != go.start && + XDF_DIFF_ALG(flags) == XDF_HISTOGRAM_DIFF && + (g.start != g_orig.start || + g.end != g_orig.end)) { + xpparam_t xpp; + xdfenv_t xe; + + memset(&xpp, 0, sizeof(xpp)); + xpp.flags = flags & ~XDF_DIFF_ALGORITHM_MASK; + + xe.xdf1 = *xdf; + xe.xdf2 = *xdfo; + + if (xdl_fall_back_diff(&xe, &xpp, + g.start + 1, g.end - g.start, + go.start + 1, go.end - go.start)) { + return -1; + } + } + next: /* Move past the just-processed group: */ if (group_next(xdf, &g)) From 6e4d923267ca80dd1392bf7e0673c74711e8cb68 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 2 Mar 2026 13:13:05 +0100 Subject: [PATCH 019/236] add-patch: split out header from "add-interactive.h" While we have a "add-patch.c" code file, its declarations are part of "add-interactive.h". This makes it somewhat harder than necessary to find relevant code and to identify clear boundaries between the two subsystems. Split up concerns and move declarations that relate to "add-patch.c" into a new "add-patch.h" header. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- add-interactive.h | 24 +++--------------------- add-patch.c | 1 + add-patch.h | 27 +++++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 21 deletions(-) create mode 100644 add-patch.h diff --git a/add-interactive.h b/add-interactive.h index 784339777509f7..6c62489bfef287 100644 --- a/add-interactive.h +++ b/add-interactive.h @@ -1,15 +1,11 @@ #ifndef ADD_INTERACTIVE_H #define ADD_INTERACTIVE_H +#include "add-patch.h" #include "color.h" -struct add_p_opt { - int context; - int interhunkcontext; - int auto_advance; -}; - -#define ADD_P_OPT_INIT { .context = -1, .interhunkcontext = -1, .auto_advance = 1 } +struct pathspec; +struct repository; struct add_i_state { struct repository *r; @@ -37,21 +33,7 @@ void init_add_i_state(struct add_i_state *s, struct repository *r, struct add_p_opt *add_p_opt); void clear_add_i_state(struct add_i_state *s); -struct repository; -struct pathspec; int run_add_i(struct repository *r, const struct pathspec *ps, struct add_p_opt *add_p_opt); -enum add_p_mode { - ADD_P_ADD, - ADD_P_STASH, - ADD_P_RESET, - ADD_P_CHECKOUT, - ADD_P_WORKTREE, -}; - -int run_add_p(struct repository *r, enum add_p_mode mode, - struct add_p_opt *o, const char *revision, - const struct pathspec *ps); - #endif diff --git a/add-patch.c b/add-patch.c index 8c03f710d380c1..8ce2fc02f6d2e5 100644 --- a/add-patch.c +++ b/add-patch.c @@ -3,6 +3,7 @@ #include "git-compat-util.h" #include "add-interactive.h" +#include "add-patch.h" #include "advice.h" #include "editor.h" #include "environment.h" diff --git a/add-patch.h b/add-patch.h new file mode 100644 index 00000000000000..88b00ca788722a --- /dev/null +++ b/add-patch.h @@ -0,0 +1,27 @@ +#ifndef ADD_PATCH_H +#define ADD_PATCH_H + +struct pathspec; +struct repository; + +struct add_p_opt { + int context; + int interhunkcontext; + int auto_advance; +}; + +#define ADD_P_OPT_INIT { .context = -1, .interhunkcontext = -1, .auto_advance = 1 } + +enum add_p_mode { + ADD_P_ADD, + ADD_P_STASH, + ADD_P_RESET, + ADD_P_CHECKOUT, + ADD_P_WORKTREE, +}; + +int run_add_p(struct repository *r, enum add_p_mode mode, + struct add_p_opt *o, const char *revision, + const struct pathspec *ps); + +#endif From e3d4d7787cc3b2f0281e808042ceaa08e05c281b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 2 Mar 2026 13:13:06 +0100 Subject: [PATCH 020/236] add-patch: split out `struct interactive_options` The `struct add_p_opt` is reused both by our infra for "git add -p" and "git add -i". Users of `run_add_i()` for example are expected to pass `struct add_p_opt`. This is somewhat confusing and raises the question of which options apply to what part of the stack. But things are even more confusing than that: while callers are expected to pass in `struct add_p_opt`, these options ultimately get used to initialize a `struct add_i_state` that is used by both subsystems. So we are basically going full circle here. Refactor the code and split out a new `struct interactive_options` that hosts common options used by both. These options are then applied to a `struct interactive_config` that hosts common configuration. This refactoring doesn't yet fully detangle the two subsystems from one another, as we still end up calling `init_add_i_state()` in the "git add -p" subsystem. This will be fixed in a subsequent commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- add-interactive.c | 177 +++++++++--------------------------------- add-interactive.h | 24 +----- add-patch.c | 187 ++++++++++++++++++++++++++++++++++++--------- add-patch.h | 38 ++++++++- builtin/add.c | 26 +++---- builtin/checkout.c | 4 +- builtin/commit.c | 16 ++-- builtin/reset.c | 20 ++--- builtin/stash.c | 54 ++++++------- commit.h | 2 +- 10 files changed, 290 insertions(+), 258 deletions(-) diff --git a/add-interactive.c b/add-interactive.c index 158063968266ad..152e2a02979e45 100644 --- a/add-interactive.c +++ b/add-interactive.c @@ -3,7 +3,6 @@ #include "git-compat-util.h" #include "add-interactive.h" #include "color.h" -#include "config.h" #include "diffcore.h" #include "gettext.h" #include "hash.h" @@ -20,120 +19,18 @@ #include "prompt.h" #include "tree.h" -static void init_color(struct repository *r, enum git_colorbool use_color, - const char *section_and_slot, char *dst, - const char *default_color) -{ - char *key = xstrfmt("color.%s", section_and_slot); - const char *value; - - if (!want_color(use_color)) - dst[0] = '\0'; - else if (repo_config_get_value(r, key, &value) || - color_parse(value, dst)) - strlcpy(dst, default_color, COLOR_MAXLEN); - - free(key); -} - -static enum git_colorbool check_color_config(struct repository *r, const char *var) -{ - const char *value; - enum git_colorbool ret; - - if (repo_config_get_value(r, var, &value)) - ret = GIT_COLOR_UNKNOWN; - else - ret = git_config_colorbool(var, value); - - /* - * Do not rely on want_color() to fall back to color.ui for us. It uses - * the value parsed by git_color_config(), which may not have been - * called by the main command. - */ - if (ret == GIT_COLOR_UNKNOWN && - !repo_config_get_value(r, "color.ui", &value)) - ret = git_config_colorbool("color.ui", value); - - return ret; -} - void init_add_i_state(struct add_i_state *s, struct repository *r, - struct add_p_opt *add_p_opt) + struct interactive_options *opts) { s->r = r; - s->context = -1; - s->interhunkcontext = -1; - s->auto_advance = add_p_opt->auto_advance; - - s->use_color_interactive = check_color_config(r, "color.interactive"); - - init_color(r, s->use_color_interactive, "interactive.header", - s->header_color, GIT_COLOR_BOLD); - init_color(r, s->use_color_interactive, "interactive.help", - s->help_color, GIT_COLOR_BOLD_RED); - init_color(r, s->use_color_interactive, "interactive.prompt", - s->prompt_color, GIT_COLOR_BOLD_BLUE); - init_color(r, s->use_color_interactive, "interactive.error", - s->error_color, GIT_COLOR_BOLD_RED); - strlcpy(s->reset_color_interactive, - want_color(s->use_color_interactive) ? GIT_COLOR_RESET : "", COLOR_MAXLEN); - - s->use_color_diff = check_color_config(r, "color.diff"); - - init_color(r, s->use_color_diff, "diff.frag", s->fraginfo_color, - diff_get_color(s->use_color_diff, DIFF_FRAGINFO)); - init_color(r, s->use_color_diff, "diff.context", s->context_color, - "fall back"); - if (!strcmp(s->context_color, "fall back")) - init_color(r, s->use_color_diff, "diff.plain", - s->context_color, - diff_get_color(s->use_color_diff, DIFF_CONTEXT)); - init_color(r, s->use_color_diff, "diff.old", s->file_old_color, - diff_get_color(s->use_color_diff, DIFF_FILE_OLD)); - init_color(r, s->use_color_diff, "diff.new", s->file_new_color, - diff_get_color(s->use_color_diff, DIFF_FILE_NEW)); - strlcpy(s->reset_color_diff, - want_color(s->use_color_diff) ? GIT_COLOR_RESET : "", COLOR_MAXLEN); - - FREE_AND_NULL(s->interactive_diff_filter); - repo_config_get_string(r, "interactive.difffilter", - &s->interactive_diff_filter); - - FREE_AND_NULL(s->interactive_diff_algorithm); - repo_config_get_string(r, "diff.algorithm", - &s->interactive_diff_algorithm); - - if (!repo_config_get_int(r, "diff.context", &s->context)) - if (s->context < 0) - die(_("%s cannot be negative"), "diff.context"); - if (!repo_config_get_int(r, "diff.interHunkContext", &s->interhunkcontext)) - if (s->interhunkcontext < 0) - die(_("%s cannot be negative"), "diff.interHunkContext"); - - repo_config_get_bool(r, "interactive.singlekey", &s->use_single_key); - if (s->use_single_key) - setbuf(stdin, NULL); - - if (add_p_opt->context != -1) { - if (add_p_opt->context < 0) - die(_("%s cannot be negative"), "--unified"); - s->context = add_p_opt->context; - } - if (add_p_opt->interhunkcontext != -1) { - if (add_p_opt->interhunkcontext < 0) - die(_("%s cannot be negative"), "--inter-hunk-context"); - s->interhunkcontext = add_p_opt->interhunkcontext; - } + interactive_config_init(&s->cfg, r, opts); } void clear_add_i_state(struct add_i_state *s) { - FREE_AND_NULL(s->interactive_diff_filter); - FREE_AND_NULL(s->interactive_diff_algorithm); + interactive_config_clear(&s->cfg); memset(s, 0, sizeof(*s)); - s->use_color_interactive = GIT_COLOR_UNKNOWN; - s->use_color_diff = GIT_COLOR_UNKNOWN; + interactive_config_clear(&s->cfg); } /* @@ -287,7 +184,7 @@ static void list(struct add_i_state *s, struct string_list *list, int *selected, return; if (opts->header) - color_fprintf_ln(stdout, s->header_color, + color_fprintf_ln(stdout, s->cfg.header_color, "%s", opts->header); for (i = 0; i < list->nr; i++) { @@ -355,7 +252,7 @@ static ssize_t list_and_choose(struct add_i_state *s, list(s, &items->items, items->selected, &opts->list_opts); - color_fprintf(stdout, s->prompt_color, "%s", opts->prompt); + color_fprintf(stdout, s->cfg.prompt_color, "%s", opts->prompt); fputs(singleton ? "> " : ">> ", stdout); fflush(stdout); @@ -433,7 +330,7 @@ static ssize_t list_and_choose(struct add_i_state *s, if (from < 0 || from >= items->items.nr || (singleton && from + 1 != to)) { - color_fprintf_ln(stderr, s->error_color, + color_fprintf_ln(stderr, s->cfg.error_color, _("Huh (%s)?"), p); break; } else if (singleton) { @@ -993,7 +890,7 @@ static int run_patch(struct add_i_state *s, const struct pathspec *ps, free(files->items.items[i].string); } else if (item->index.unmerged || item->worktree.unmerged) { - color_fprintf_ln(stderr, s->error_color, + color_fprintf_ln(stderr, s->cfg.error_color, _("ignoring unmerged: %s"), files->items.items[i].string); free(item); @@ -1015,10 +912,10 @@ static int run_patch(struct add_i_state *s, const struct pathspec *ps, opts->prompt = N_("Patch update"); count = list_and_choose(s, files, opts); if (count > 0) { - struct add_p_opt add_p_opt = { - .context = s->context, - .interhunkcontext = s->interhunkcontext, - .auto_advance = s->auto_advance + struct interactive_options opts = { + .context = s->cfg.context, + .interhunkcontext = s->cfg.interhunkcontext, + .auto_advance = s->cfg.auto_advance, }; struct strvec args = STRVEC_INIT; struct pathspec ps_selected = { 0 }; @@ -1030,7 +927,7 @@ static int run_patch(struct add_i_state *s, const struct pathspec *ps, parse_pathspec(&ps_selected, PATHSPEC_ALL_MAGIC & ~PATHSPEC_LITERAL, PATHSPEC_LITERAL_PATH, "", args.v); - res = run_add_p(s->r, ADD_P_ADD, &add_p_opt, NULL, &ps_selected); + res = run_add_p(s->r, ADD_P_ADD, &opts, NULL, &ps_selected); strvec_clear(&args); clear_pathspec(&ps_selected); } @@ -1066,10 +963,10 @@ static int run_diff(struct add_i_state *s, const struct pathspec *ps, struct child_process cmd = CHILD_PROCESS_INIT; strvec_pushl(&cmd.args, "git", "diff", "-p", "--cached", NULL); - if (s->context != -1) - strvec_pushf(&cmd.args, "--unified=%i", s->context); - if (s->interhunkcontext != -1) - strvec_pushf(&cmd.args, "--inter-hunk-context=%i", s->interhunkcontext); + if (s->cfg.context != -1) + strvec_pushf(&cmd.args, "--unified=%i", s->cfg.context); + if (s->cfg.interhunkcontext != -1) + strvec_pushf(&cmd.args, "--inter-hunk-context=%i", s->cfg.interhunkcontext); strvec_pushl(&cmd.args, oid_to_hex(!is_initial ? &oid : s->r->hash_algo->empty_tree), "--", NULL); for (i = 0; i < files->items.nr; i++) @@ -1087,17 +984,17 @@ static int run_help(struct add_i_state *s, const struct pathspec *ps UNUSED, struct prefix_item_list *files UNUSED, struct list_and_choose_options *opts UNUSED) { - color_fprintf_ln(stdout, s->help_color, "status - %s", + color_fprintf_ln(stdout, s->cfg.help_color, "status - %s", _("show paths with changes")); - color_fprintf_ln(stdout, s->help_color, "update - %s", + color_fprintf_ln(stdout, s->cfg.help_color, "update - %s", _("add working tree state to the staged set of changes")); - color_fprintf_ln(stdout, s->help_color, "revert - %s", + color_fprintf_ln(stdout, s->cfg.help_color, "revert - %s", _("revert staged set of changes back to the HEAD version")); - color_fprintf_ln(stdout, s->help_color, "patch - %s", + color_fprintf_ln(stdout, s->cfg.help_color, "patch - %s", _("pick hunks and update selectively")); - color_fprintf_ln(stdout, s->help_color, "diff - %s", + color_fprintf_ln(stdout, s->cfg.help_color, "diff - %s", _("view diff between HEAD and index")); - color_fprintf_ln(stdout, s->help_color, "add untracked - %s", + color_fprintf_ln(stdout, s->cfg.help_color, "add untracked - %s", _("add contents of untracked files to the staged set of changes")); return 0; @@ -1105,21 +1002,21 @@ static int run_help(struct add_i_state *s, const struct pathspec *ps UNUSED, static void choose_prompt_help(struct add_i_state *s) { - color_fprintf_ln(stdout, s->help_color, "%s", + color_fprintf_ln(stdout, s->cfg.help_color, "%s", _("Prompt help:")); - color_fprintf_ln(stdout, s->help_color, "1 - %s", + color_fprintf_ln(stdout, s->cfg.help_color, "1 - %s", _("select a single item")); - color_fprintf_ln(stdout, s->help_color, "3-5 - %s", + color_fprintf_ln(stdout, s->cfg.help_color, "3-5 - %s", _("select a range of items")); - color_fprintf_ln(stdout, s->help_color, "2-3,6-9 - %s", + color_fprintf_ln(stdout, s->cfg.help_color, "2-3,6-9 - %s", _("select multiple ranges")); - color_fprintf_ln(stdout, s->help_color, "foo - %s", + color_fprintf_ln(stdout, s->cfg.help_color, "foo - %s", _("select item based on unique prefix")); - color_fprintf_ln(stdout, s->help_color, "-... - %s", + color_fprintf_ln(stdout, s->cfg.help_color, "-... - %s", _("unselect specified items")); - color_fprintf_ln(stdout, s->help_color, "* - %s", + color_fprintf_ln(stdout, s->cfg.help_color, "* - %s", _("choose all items")); - color_fprintf_ln(stdout, s->help_color, " - %s", + color_fprintf_ln(stdout, s->cfg.help_color, " - %s", _("(empty) finish selecting")); } @@ -1154,7 +1051,7 @@ static void print_command_item(int i, int selected UNUSED, static void command_prompt_help(struct add_i_state *s) { - const char *help_color = s->help_color; + const char *help_color = s->cfg.help_color; color_fprintf_ln(stdout, help_color, "%s", _("Prompt help:")); color_fprintf_ln(stdout, help_color, "1 - %s", _("select a numbered item")); @@ -1165,7 +1062,7 @@ static void command_prompt_help(struct add_i_state *s) } int run_add_i(struct repository *r, const struct pathspec *ps, - struct add_p_opt *add_p_opt) + struct interactive_options *interactive_opts) { struct add_i_state s = { NULL }; struct print_command_item_data data = { "[", "]" }; @@ -1208,15 +1105,15 @@ int run_add_i(struct repository *r, const struct pathspec *ps, ->util = util; } - init_add_i_state(&s, r, add_p_opt); + init_add_i_state(&s, r, interactive_opts); /* * When color was asked for, use the prompt color for * highlighting, otherwise use square brackets. */ - if (want_color(s.use_color_interactive)) { - data.color = s.prompt_color; - data.reset = s.reset_color_interactive; + if (want_color(s.cfg.use_color_interactive)) { + data.color = s.cfg.prompt_color; + data.reset = s.cfg.reset_color_interactive; } print_file_item_data.color = data.color; print_file_item_data.reset = data.reset; diff --git a/add-interactive.h b/add-interactive.h index 6c62489bfef287..eefa2edc7c124b 100644 --- a/add-interactive.h +++ b/add-interactive.h @@ -2,38 +2,20 @@ #define ADD_INTERACTIVE_H #include "add-patch.h" -#include "color.h" struct pathspec; struct repository; struct add_i_state { struct repository *r; - enum git_colorbool use_color_interactive; - enum git_colorbool use_color_diff; - char header_color[COLOR_MAXLEN]; - char help_color[COLOR_MAXLEN]; - char prompt_color[COLOR_MAXLEN]; - char error_color[COLOR_MAXLEN]; - char reset_color_interactive[COLOR_MAXLEN]; - - char fraginfo_color[COLOR_MAXLEN]; - char context_color[COLOR_MAXLEN]; - char file_old_color[COLOR_MAXLEN]; - char file_new_color[COLOR_MAXLEN]; - char reset_color_diff[COLOR_MAXLEN]; - - int use_single_key; - char *interactive_diff_filter, *interactive_diff_algorithm; - int context, interhunkcontext; - int auto_advance; + struct interactive_config cfg; }; void init_add_i_state(struct add_i_state *s, struct repository *r, - struct add_p_opt *add_p_opt); + struct interactive_options *opts); void clear_add_i_state(struct add_i_state *s); int run_add_i(struct repository *r, const struct pathspec *ps, - struct add_p_opt *add_p_opt); + struct interactive_options *opts); #endif diff --git a/add-patch.c b/add-patch.c index 8ce2fc02f6d2e5..756143eb846dba 100644 --- a/add-patch.c +++ b/add-patch.c @@ -5,6 +5,8 @@ #include "add-interactive.h" #include "add-patch.h" #include "advice.h" +#include "config.h" +#include "diff.h" #include "editor.h" #include "environment.h" #include "gettext.h" @@ -279,6 +281,123 @@ struct add_p_state { const char *revision; }; +static void init_color(struct repository *r, + enum git_colorbool use_color, + const char *section_and_slot, char *dst, + const char *default_color) +{ + char *key = xstrfmt("color.%s", section_and_slot); + const char *value; + + if (!want_color(use_color)) + dst[0] = '\0'; + else if (repo_config_get_value(r, key, &value) || + color_parse(value, dst)) + strlcpy(dst, default_color, COLOR_MAXLEN); + + free(key); +} + +static enum git_colorbool check_color_config(struct repository *r, const char *var) +{ + const char *value; + enum git_colorbool ret; + + if (repo_config_get_value(r, var, &value)) + ret = GIT_COLOR_UNKNOWN; + else + ret = git_config_colorbool(var, value); + + /* + * Do not rely on want_color() to fall back to color.ui for us. It uses + * the value parsed by git_color_config(), which may not have been + * called by the main command. + */ + if (ret == GIT_COLOR_UNKNOWN && + !repo_config_get_value(r, "color.ui", &value)) + ret = git_config_colorbool("color.ui", value); + + return ret; +} + +void interactive_config_init(struct interactive_config *cfg, + struct repository *r, + struct interactive_options *opts) +{ + cfg->context = -1; + cfg->interhunkcontext = -1; + cfg->auto_advance = opts->auto_advance; + + cfg->use_color_interactive = check_color_config(r, "color.interactive"); + + init_color(r, cfg->use_color_interactive, "interactive.header", + cfg->header_color, GIT_COLOR_BOLD); + init_color(r, cfg->use_color_interactive, "interactive.help", + cfg->help_color, GIT_COLOR_BOLD_RED); + init_color(r, cfg->use_color_interactive, "interactive.prompt", + cfg->prompt_color, GIT_COLOR_BOLD_BLUE); + init_color(r, cfg->use_color_interactive, "interactive.error", + cfg->error_color, GIT_COLOR_BOLD_RED); + strlcpy(cfg->reset_color_interactive, + want_color(cfg->use_color_interactive) ? GIT_COLOR_RESET : "", COLOR_MAXLEN); + + cfg->use_color_diff = check_color_config(r, "color.diff"); + + init_color(r, cfg->use_color_diff, "diff.frag", cfg->fraginfo_color, + diff_get_color(cfg->use_color_diff, DIFF_FRAGINFO)); + init_color(r, cfg->use_color_diff, "diff.context", cfg->context_color, + "fall back"); + if (!strcmp(cfg->context_color, "fall back")) + init_color(r, cfg->use_color_diff, "diff.plain", + cfg->context_color, + diff_get_color(cfg->use_color_diff, DIFF_CONTEXT)); + init_color(r, cfg->use_color_diff, "diff.old", cfg->file_old_color, + diff_get_color(cfg->use_color_diff, DIFF_FILE_OLD)); + init_color(r, cfg->use_color_diff, "diff.new", cfg->file_new_color, + diff_get_color(cfg->use_color_diff, DIFF_FILE_NEW)); + strlcpy(cfg->reset_color_diff, + want_color(cfg->use_color_diff) ? GIT_COLOR_RESET : "", COLOR_MAXLEN); + + FREE_AND_NULL(cfg->interactive_diff_filter); + repo_config_get_string(r, "interactive.difffilter", + &cfg->interactive_diff_filter); + + FREE_AND_NULL(cfg->interactive_diff_algorithm); + repo_config_get_string(r, "diff.algorithm", + &cfg->interactive_diff_algorithm); + + if (!repo_config_get_int(r, "diff.context", &cfg->context)) + if (cfg->context < 0) + die(_("%s cannot be negative"), "diff.context"); + if (!repo_config_get_int(r, "diff.interHunkContext", &cfg->interhunkcontext)) + if (cfg->interhunkcontext < 0) + die(_("%s cannot be negative"), "diff.interHunkContext"); + + repo_config_get_bool(r, "interactive.singlekey", &cfg->use_single_key); + if (cfg->use_single_key) + setbuf(stdin, NULL); + + if (opts->context != -1) { + if (opts->context < 0) + die(_("%s cannot be negative"), "--unified"); + cfg->context = opts->context; + } + if (opts->interhunkcontext != -1) { + if (opts->interhunkcontext < 0) + die(_("%s cannot be negative"), "--inter-hunk-context"); + cfg->interhunkcontext = opts->interhunkcontext; + } +} + +void interactive_config_clear(struct interactive_config *cfg) +{ + FREE_AND_NULL(cfg->interactive_diff_filter); + FREE_AND_NULL(cfg->interactive_diff_algorithm); + memset(cfg, 0, sizeof(*cfg)); + cfg->use_color_interactive = GIT_COLOR_UNKNOWN; + cfg->use_color_diff = GIT_COLOR_UNKNOWN; +} + static void add_p_state_clear(struct add_p_state *s) { size_t i; @@ -299,9 +418,9 @@ static void err(struct add_p_state *s, const char *fmt, ...) va_list args; va_start(args, fmt); - fputs(s->s.error_color, stdout); + fputs(s->s.cfg.error_color, stdout); vprintf(fmt, args); - puts(s->s.reset_color_interactive); + puts(s->s.cfg.reset_color_interactive); va_end(args); } @@ -424,12 +543,12 @@ static int parse_diff(struct add_p_state *s, const struct pathspec *ps) int res; strvec_pushv(&args, s->mode->diff_cmd); - if (s->s.context != -1) - strvec_pushf(&args, "--unified=%i", s->s.context); - if (s->s.interhunkcontext != -1) - strvec_pushf(&args, "--inter-hunk-context=%i", s->s.interhunkcontext); - if (s->s.interactive_diff_algorithm) - strvec_pushf(&args, "--diff-algorithm=%s", s->s.interactive_diff_algorithm); + if (s->s.cfg.context != -1) + strvec_pushf(&args, "--unified=%i", s->s.cfg.context); + if (s->s.cfg.interhunkcontext != -1) + strvec_pushf(&args, "--inter-hunk-context=%i", s->s.cfg.interhunkcontext); + if (s->s.cfg.interactive_diff_algorithm) + strvec_pushf(&args, "--diff-algorithm=%s", s->s.cfg.interactive_diff_algorithm); if (s->revision) { struct object_id oid; strvec_push(&args, @@ -458,9 +577,9 @@ static int parse_diff(struct add_p_state *s, const struct pathspec *ps) } strbuf_complete_line(plain); - if (want_color_fd(1, s->s.use_color_diff)) { + if (want_color_fd(1, s->s.cfg.use_color_diff)) { struct child_process colored_cp = CHILD_PROCESS_INIT; - const char *diff_filter = s->s.interactive_diff_filter; + const char *diff_filter = s->s.cfg.interactive_diff_filter; setup_child_process(s, &colored_cp, NULL); xsnprintf((char *)args.v[color_arg_index], 8, "--color"); @@ -693,7 +812,7 @@ static void render_hunk(struct add_p_state *s, struct hunk *hunk, hunk->colored_end - hunk->colored_start); return; } else { - strbuf_addstr(out, s->s.fraginfo_color); + strbuf_addstr(out, s->s.cfg.fraginfo_color); p = s->colored.buf + header->colored_extra_start; len = header->colored_extra_end - header->colored_extra_start; @@ -715,7 +834,7 @@ static void render_hunk(struct add_p_state *s, struct hunk *hunk, if (len) strbuf_add(out, p, len); else if (colored) - strbuf_addf(out, "%s\n", s->s.reset_color_diff); + strbuf_addf(out, "%s\n", s->s.cfg.reset_color_diff); else strbuf_addch(out, '\n'); } @@ -1104,12 +1223,12 @@ static void recolor_hunk(struct add_p_state *s, struct hunk *hunk) strbuf_addstr(&s->colored, plain[current] == '-' ? - s->s.file_old_color : + s->s.cfg.file_old_color : plain[current] == '+' ? - s->s.file_new_color : - s->s.context_color); + s->s.cfg.file_new_color : + s->s.cfg.context_color); strbuf_add(&s->colored, plain + current, eol - current); - strbuf_addstr(&s->colored, s->s.reset_color_diff); + strbuf_addstr(&s->colored, s->s.cfg.reset_color_diff); if (next > eol) strbuf_add(&s->colored, plain + eol, next - eol); current = next; @@ -1238,7 +1357,7 @@ static int run_apply_check(struct add_p_state *s, static int read_single_character(struct add_p_state *s) { - if (s->s.use_single_key) { + if (s->s.cfg.use_single_key) { int res = read_key_without_echo(&s->answer); printf("%s\n", res == EOF ? "" : s->answer.buf); return res; @@ -1252,7 +1371,7 @@ static int read_single_character(struct add_p_state *s) static int prompt_yesno(struct add_p_state *s, const char *prompt) { for (;;) { - color_fprintf(stdout, s->s.prompt_color, "%s", _(prompt)); + color_fprintf(stdout, s->s.cfg.prompt_color, "%s", _(prompt)); fflush(stdout); if (read_single_character(s) == EOF) return -1; @@ -1541,7 +1660,7 @@ static size_t patch_update_file(struct add_p_state *s, size_t idx) /* Everything decided? */ if (undecided_previous < 0 && undecided_next < 0 && hunk->use != UNDECIDED_HUNK) { - if (!s->s.auto_advance) + if (!s->s.cfg.auto_advance) all_decided = 1; else { patch_update_resp++; @@ -1595,11 +1714,11 @@ static size_t patch_update_file(struct add_p_state *s, size_t idx) permitted |= ALLOW_EDIT; strbuf_addstr(&s->buf, ",e"); } - if (!s->s.auto_advance && s->file_diff_nr > 1) { + if (!s->s.cfg.auto_advance && s->file_diff_nr > 1) { permitted |= ALLOW_GOTO_NEXT_FILE; strbuf_addstr(&s->buf, ",>"); } - if (!s->s.auto_advance && s->file_diff_nr > 1) { + if (!s->s.cfg.auto_advance && s->file_diff_nr > 1) { permitted |= ALLOW_GOTO_PREVIOUS_FILE; strbuf_addstr(&s->buf, ",<"); } @@ -1614,7 +1733,7 @@ static size_t patch_update_file(struct add_p_state *s, size_t idx) else prompt_mode_type = PROMPT_HUNK; - printf("%s(%"PRIuMAX"/%"PRIuMAX") ", s->s.prompt_color, + printf("%s(%"PRIuMAX"/%"PRIuMAX") ", s->s.cfg.prompt_color, (uintmax_t)hunk_index + 1, (uintmax_t)(file_diff->hunk_nr ? file_diff->hunk_nr @@ -1627,8 +1746,8 @@ static size_t patch_update_file(struct add_p_state *s, size_t idx) } printf(_(s->mode->prompt_mode[prompt_mode_type]), hunk_use_decision, s->buf.buf); - if (*s->s.reset_color_interactive) - fputs(s->s.reset_color_interactive, stdout); + if (*s->s.cfg.reset_color_interactive) + fputs(s->s.cfg.reset_color_interactive, stdout); fflush(stdout); if (read_single_character(s) == EOF) { patch_update_resp = s->file_diff_nr; @@ -1679,7 +1798,7 @@ static size_t patch_update_file(struct add_p_state *s, size_t idx) } else if (ch == 'q') { patch_update_resp = s->file_diff_nr; break; - } else if (!s->s.auto_advance && s->answer.buf[0] == '>') { + } else if (!s->s.cfg.auto_advance && s->answer.buf[0] == '>') { if (permitted & ALLOW_GOTO_NEXT_FILE) { if (patch_update_resp == s->file_diff_nr - 1) patch_update_resp = 0; @@ -1690,7 +1809,7 @@ static size_t patch_update_file(struct add_p_state *s, size_t idx) err(s, _("No next file")); continue; } - } else if (!s->s.auto_advance && s->answer.buf[0] == '<') { + } else if (!s->s.cfg.auto_advance && s->answer.buf[0] == '<') { if (permitted & ALLOW_GOTO_PREVIOUS_FILE) { if (patch_update_resp == 0) patch_update_resp = s->file_diff_nr - 1; @@ -1813,7 +1932,7 @@ static size_t patch_update_file(struct add_p_state *s, size_t idx) err(s, _("Sorry, cannot split this hunk")); } else if (!split_hunk(s, file_diff, hunk - file_diff->hunk)) { - color_fprintf_ln(stdout, s->s.header_color, + color_fprintf_ln(stdout, s->s.cfg.header_color, _("Split into %d hunks."), (int)splittable_into); rendered_hunk_index = -1; @@ -1831,7 +1950,7 @@ static size_t patch_update_file(struct add_p_state *s, size_t idx) } else if (s->answer.buf[0] == '?') { const char *p = _(help_patch_remainder), *eol = p; - color_fprintf(stdout, s->s.help_color, "%s", + color_fprintf(stdout, s->s.cfg.help_color, "%s", _(s->mode->help_patch_text)); /* @@ -1855,13 +1974,13 @@ static size_t patch_update_file(struct add_p_state *s, size_t idx) if (file_diff->hunk[i].use == SKIP_HUNK) skipped += 1; } - color_fprintf_ln(stdout, s->s.help_color, _(p), + color_fprintf_ln(stdout, s->s.cfg.help_color, _(p), total, used, skipped); } if (*p != '?' && !strchr(s->buf.buf, *p)) continue; - color_fprintf_ln(stdout, s->s.help_color, + color_fprintf_ln(stdout, s->s.cfg.help_color, "%.*s", (int)(eol - p), p); } } else { @@ -1870,7 +1989,7 @@ static size_t patch_update_file(struct add_p_state *s, size_t idx) } } - if (s->s.auto_advance) + if (s->s.cfg.auto_advance) apply_patch(s, file_diff); putchar('\n'); @@ -1878,7 +1997,7 @@ static size_t patch_update_file(struct add_p_state *s, size_t idx) } int run_add_p(struct repository *r, enum add_p_mode mode, - struct add_p_opt *o, const char *revision, + struct interactive_options *opts, const char *revision, const struct pathspec *ps) { struct add_p_state s = { @@ -1886,7 +2005,7 @@ int run_add_p(struct repository *r, enum add_p_mode mode, }; size_t i, binary_count = 0; - init_add_i_state(&s.s, r, o); + init_add_i_state(&s.s, r, opts); if (mode == ADD_P_STASH) s.mode = &patch_mode_stash; @@ -1932,7 +2051,7 @@ int run_add_p(struct repository *r, enum add_p_mode mode, if ((i = patch_update_file(&s, i)) == s.file_diff_nr) break; } - if (!s.s.auto_advance) + if (!s.s.cfg.auto_advance) for (i = 0; i < s.file_diff_nr; i++) apply_patch(&s, s.file_diff + i); diff --git a/add-patch.h b/add-patch.h index 88b00ca788722a..e6868c60a2f343 100644 --- a/add-patch.h +++ b/add-patch.h @@ -1,16 +1,48 @@ #ifndef ADD_PATCH_H #define ADD_PATCH_H +#include "color.h" + struct pathspec; struct repository; -struct add_p_opt { +struct interactive_options { int context; int interhunkcontext; int auto_advance; }; -#define ADD_P_OPT_INIT { .context = -1, .interhunkcontext = -1, .auto_advance = 1 } +#define INTERACTIVE_OPTIONS_INIT { \ + .context = -1, \ + .interhunkcontext = -1, \ + .auto_advance = 1, \ +} + +struct interactive_config { + enum git_colorbool use_color_interactive; + enum git_colorbool use_color_diff; + char header_color[COLOR_MAXLEN]; + char help_color[COLOR_MAXLEN]; + char prompt_color[COLOR_MAXLEN]; + char error_color[COLOR_MAXLEN]; + char reset_color_interactive[COLOR_MAXLEN]; + + char fraginfo_color[COLOR_MAXLEN]; + char context_color[COLOR_MAXLEN]; + char file_old_color[COLOR_MAXLEN]; + char file_new_color[COLOR_MAXLEN]; + char reset_color_diff[COLOR_MAXLEN]; + + int use_single_key; + char *interactive_diff_filter, *interactive_diff_algorithm; + int context, interhunkcontext; + int auto_advance; +}; + +void interactive_config_init(struct interactive_config *cfg, + struct repository *r, + struct interactive_options *opts); +void interactive_config_clear(struct interactive_config *cfg); enum add_p_mode { ADD_P_ADD, @@ -21,7 +53,7 @@ enum add_p_mode { }; int run_add_p(struct repository *r, enum add_p_mode mode, - struct add_p_opt *o, const char *revision, + struct interactive_options *opts, const char *revision, const struct pathspec *ps); #endif diff --git a/builtin/add.c b/builtin/add.c index 4357f87b7f807b..84f9bcb789fc7b 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -31,7 +31,7 @@ static const char * const builtin_add_usage[] = { NULL }; static int patch_interactive, add_interactive, edit_interactive; -static struct add_p_opt add_p_opt = ADD_P_OPT_INIT; +static struct interactive_options interactive_opts = INTERACTIVE_OPTIONS_INIT; static int take_worktree_changes; static int add_renormalize; static int pathspec_file_nul; @@ -160,7 +160,7 @@ static int refresh(struct repository *repo, int verbose, const struct pathspec * int interactive_add(struct repository *repo, const char **argv, const char *prefix, - int patch, struct add_p_opt *add_p_opt) + int patch, struct interactive_options *interactive_opts) { struct pathspec pathspec; int ret; @@ -172,9 +172,9 @@ int interactive_add(struct repository *repo, prefix, argv); if (patch) - ret = !!run_add_p(repo, ADD_P_ADD, add_p_opt, NULL, &pathspec); + ret = !!run_add_p(repo, ADD_P_ADD, interactive_opts, NULL, &pathspec); else - ret = !!run_add_i(repo, &pathspec, add_p_opt); + ret = !!run_add_i(repo, &pathspec, interactive_opts); clear_pathspec(&pathspec); return ret; @@ -256,10 +256,10 @@ static struct option builtin_add_options[] = { OPT_GROUP(""), OPT_BOOL('i', "interactive", &add_interactive, N_("interactive picking")), OPT_BOOL('p', "patch", &patch_interactive, N_("select hunks interactively")), - OPT_BOOL(0, "auto-advance", &add_p_opt.auto_advance, + OPT_BOOL(0, "auto-advance", &interactive_opts.auto_advance, N_("auto advance to the next file when selecting hunks interactively")), - OPT_DIFF_UNIFIED(&add_p_opt.context), - OPT_DIFF_INTERHUNK_CONTEXT(&add_p_opt.interhunkcontext), + OPT_DIFF_UNIFIED(&interactive_opts.context), + OPT_DIFF_INTERHUNK_CONTEXT(&interactive_opts.interhunkcontext), OPT_BOOL('e', "edit", &edit_interactive, N_("edit current diff and apply")), OPT__FORCE(&ignored_too, N_("allow adding otherwise ignored files"), 0), OPT_BOOL('u', "update", &take_worktree_changes, N_("update tracked files")), @@ -402,9 +402,9 @@ int cmd_add(int argc, prepare_repo_settings(repo); repo->settings.command_requires_full_index = 0; - if (add_p_opt.context < -1) + if (interactive_opts.context < -1) die(_("'%s' cannot be negative"), "--unified"); - if (add_p_opt.interhunkcontext < -1) + if (interactive_opts.interhunkcontext < -1) die(_("'%s' cannot be negative"), "--inter-hunk-context"); if (patch_interactive) @@ -414,13 +414,13 @@ int cmd_add(int argc, die(_("options '%s' and '%s' cannot be used together"), "--dry-run", "--interactive/--patch"); if (pathspec_from_file) die(_("options '%s' and '%s' cannot be used together"), "--pathspec-from-file", "--interactive/--patch"); - exit(interactive_add(repo, argv + 1, prefix, patch_interactive, &add_p_opt)); + exit(interactive_add(repo, argv + 1, prefix, patch_interactive, &interactive_opts)); } else { - if (add_p_opt.context != -1) + if (interactive_opts.context != -1) die(_("the option '%s' requires '%s'"), "--unified", "--interactive/--patch"); - if (add_p_opt.interhunkcontext != -1) + if (interactive_opts.interhunkcontext != -1) die(_("the option '%s' requires '%s'"), "--inter-hunk-context", "--interactive/--patch"); - if (!add_p_opt.auto_advance) + if (!interactive_opts.auto_advance) die(_("the option '%s' requires '%s'"), "--no-auto-advance", "--interactive/--patch"); } diff --git a/builtin/checkout.c b/builtin/checkout.c index eefefd5d0fe6d5..bebe18c1d9029a 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -532,7 +532,7 @@ static int checkout_paths(const struct checkout_opts *opts, if (opts->patch_mode) { enum add_p_mode patch_mode; - struct add_p_opt add_p_opt = { + struct interactive_options interactive_opts = { .context = opts->patch_context, .interhunkcontext = opts->patch_interhunk_context, .auto_advance = opts->auto_advance @@ -562,7 +562,7 @@ static int checkout_paths(const struct checkout_opts *opts, else BUG("either flag must have been set, worktree=%d, index=%d", opts->checkout_worktree, opts->checkout_index); - return !!run_add_p(the_repository, patch_mode, &add_p_opt, + return !!run_add_p(the_repository, patch_mode, &interactive_opts, rev, &opts->pathspec); } diff --git a/builtin/commit.c b/builtin/commit.c index 9e3a09d532bfce..a1d64dd699b000 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -123,7 +123,7 @@ static const char *edit_message, *use_message; static char *fixup_message, *fixup_commit, *squash_message; static const char *fixup_prefix; static int all, also, interactive, patch_interactive, only, amend, signoff; -static struct add_p_opt add_p_opt = ADD_P_OPT_INIT; +static struct interactive_options interactive_opts = INTERACTIVE_OPTIONS_INIT; static int edit_flag = -1; /* unspecified */ static int quiet, verbose, no_verify, allow_empty, dry_run, renew_authorship; static int config_commit_verbose = -1; /* unspecified */ @@ -357,9 +357,9 @@ static const char *prepare_index(const char **argv, const char *prefix, const char *ret; char *path = NULL; - if (add_p_opt.context < -1) + if (interactive_opts.context < -1) die(_("'%s' cannot be negative"), "--unified"); - if (add_p_opt.interhunkcontext < -1) + if (interactive_opts.interhunkcontext < -1) die(_("'%s' cannot be negative"), "--inter-hunk-context"); if (is_status) @@ -408,7 +408,7 @@ static const char *prepare_index(const char **argv, const char *prefix, old_index_env = xstrdup_or_null(getenv(INDEX_ENVIRONMENT)); setenv(INDEX_ENVIRONMENT, the_repository->index_file, 1); - if (interactive_add(the_repository, argv, prefix, patch_interactive, &add_p_opt) != 0) + if (interactive_add(the_repository, argv, prefix, patch_interactive, &interactive_opts) != 0) die(_("interactive add failed")); the_repository->index_file = old_repo_index_file; @@ -433,9 +433,9 @@ static const char *prepare_index(const char **argv, const char *prefix, ret = get_lock_file_path(&index_lock); goto out; } else { - if (add_p_opt.context != -1) + if (interactive_opts.context != -1) die(_("the option '%s' requires '%s'"), "--unified", "--interactive/--patch"); - if (add_p_opt.interhunkcontext != -1) + if (interactive_opts.interhunkcontext != -1) die(_("the option '%s' requires '%s'"), "--inter-hunk-context", "--interactive/--patch"); } @@ -1743,8 +1743,8 @@ int cmd_commit(int argc, OPT_BOOL('i', "include", &also, N_("add specified files to index for commit")), OPT_BOOL(0, "interactive", &interactive, N_("interactively add files")), OPT_BOOL('p', "patch", &patch_interactive, N_("interactively add changes")), - OPT_DIFF_UNIFIED(&add_p_opt.context), - OPT_DIFF_INTERHUNK_CONTEXT(&add_p_opt.interhunkcontext), + OPT_DIFF_UNIFIED(&interactive_opts.context), + OPT_DIFF_INTERHUNK_CONTEXT(&interactive_opts.interhunkcontext), OPT_BOOL('o', "only", &only, N_("commit only specified files")), OPT_BOOL('n', "no-verify", &no_verify, N_("bypass pre-commit and commit-msg hooks")), OPT_BOOL(0, "dry-run", &dry_run, N_("show what would be committed")), diff --git a/builtin/reset.c b/builtin/reset.c index 88f95f9fc7aa9e..4a74a82c0a3ae8 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -346,7 +346,7 @@ int cmd_reset(int argc, struct object_id oid; struct pathspec pathspec; int intent_to_add = 0; - struct add_p_opt add_p_opt = ADD_P_OPT_INIT; + struct interactive_options interactive_opts = INTERACTIVE_OPTIONS_INIT; const struct option options[] = { OPT__QUIET(&quiet, N_("be quiet, only report errors")), OPT_BOOL(0, "no-refresh", &no_refresh, @@ -371,10 +371,10 @@ int cmd_reset(int argc, PARSE_OPT_OPTARG, option_parse_recurse_submodules_worktree_updater), OPT_BOOL('p', "patch", &patch_mode, N_("select hunks interactively")), - OPT_BOOL(0, "auto-advance", &add_p_opt.auto_advance, + OPT_BOOL(0, "auto-advance", &interactive_opts.auto_advance, N_("auto advance to the next file when selecting hunks interactively")), - OPT_DIFF_UNIFIED(&add_p_opt.context), - OPT_DIFF_INTERHUNK_CONTEXT(&add_p_opt.interhunkcontext), + OPT_DIFF_UNIFIED(&interactive_opts.context), + OPT_DIFF_INTERHUNK_CONTEXT(&interactive_opts.interhunkcontext), OPT_BOOL('N', "intent-to-add", &intent_to_add, N_("record only the fact that removed paths will be added later")), OPT_PATHSPEC_FROM_FILE(&pathspec_from_file), @@ -425,9 +425,9 @@ int cmd_reset(int argc, oidcpy(&oid, &tree->object.oid); } - if (add_p_opt.context < -1) + if (interactive_opts.context < -1) die(_("'%s' cannot be negative"), "--unified"); - if (add_p_opt.interhunkcontext < -1) + if (interactive_opts.interhunkcontext < -1) die(_("'%s' cannot be negative"), "--inter-hunk-context"); prepare_repo_settings(the_repository); @@ -438,14 +438,14 @@ int cmd_reset(int argc, die(_("options '%s' and '%s' cannot be used together"), "--patch", "--{hard,mixed,soft}"); trace2_cmd_mode("patch-interactive"); update_ref_status = !!run_add_p(the_repository, ADD_P_RESET, - &add_p_opt, rev, &pathspec); + &interactive_opts, rev, &pathspec); goto cleanup; } else { - if (add_p_opt.context != -1) + if (interactive_opts.context != -1) die(_("the option '%s' requires '%s'"), "--unified", "--patch"); - if (add_p_opt.interhunkcontext != -1) + if (interactive_opts.interhunkcontext != -1) die(_("the option '%s' requires '%s'"), "--inter-hunk-context", "--patch"); - if (!add_p_opt.auto_advance) + if (!interactive_opts.auto_advance) die(_("the option '%s' requires '%s'"), "--no-auto-advance", "--patch"); } diff --git a/builtin/stash.c b/builtin/stash.c index e79d612e572e7c..c467c02c7fb2aa 100644 --- a/builtin/stash.c +++ b/builtin/stash.c @@ -1306,7 +1306,7 @@ static int stash_staged(struct stash_info *info, struct strbuf *out_patch, static int stash_patch(struct stash_info *info, const struct pathspec *ps, struct strbuf *out_patch, int quiet, - struct add_p_opt *add_p_opt) + struct interactive_options *interactive_opts) { int ret = 0; struct child_process cp_read_tree = CHILD_PROCESS_INIT; @@ -1331,7 +1331,7 @@ static int stash_patch(struct stash_info *info, const struct pathspec *ps, old_index_env = xstrdup_or_null(getenv(INDEX_ENVIRONMENT)); setenv(INDEX_ENVIRONMENT, the_repository->index_file, 1); - ret = !!run_add_p(the_repository, ADD_P_STASH, add_p_opt, NULL, ps); + ret = !!run_add_p(the_repository, ADD_P_STASH, interactive_opts, NULL, ps); the_repository->index_file = old_repo_index_file; if (old_index_env && *old_index_env) @@ -1427,7 +1427,8 @@ static int stash_working_tree(struct stash_info *info, const struct pathspec *ps } static int do_create_stash(const struct pathspec *ps, struct strbuf *stash_msg_buf, - int include_untracked, int patch_mode, struct add_p_opt *add_p_opt, + int include_untracked, int patch_mode, + struct interactive_options *interactive_opts, int only_staged, struct stash_info *info, struct strbuf *patch, int quiet) { @@ -1509,7 +1510,7 @@ static int do_create_stash(const struct pathspec *ps, struct strbuf *stash_msg_b untracked_commit_option = 1; } if (patch_mode) { - ret = stash_patch(info, ps, patch, quiet, add_p_opt); + ret = stash_patch(info, ps, patch, quiet, interactive_opts); if (ret < 0) { if (!quiet) fprintf_ln(stderr, _("Cannot save the current " @@ -1595,7 +1596,8 @@ static int create_stash(int argc, const char **argv, const char *prefix UNUSED, } static int do_push_stash(const struct pathspec *ps, const char *stash_msg, int quiet, - int keep_index, int patch_mode, struct add_p_opt *add_p_opt, + int keep_index, int patch_mode, + struct interactive_options *interactive_opts, int include_untracked, int only_staged) { int ret = 0; @@ -1667,7 +1669,7 @@ static int do_push_stash(const struct pathspec *ps, const char *stash_msg, int q if (stash_msg) strbuf_addstr(&stash_msg_buf, stash_msg); if (do_create_stash(ps, &stash_msg_buf, include_untracked, patch_mode, - add_p_opt, only_staged, &info, &patch, quiet)) { + interactive_opts, only_staged, &info, &patch, quiet)) { ret = -1; goto done; } @@ -1841,7 +1843,7 @@ static int push_stash(int argc, const char **argv, const char *prefix, const char *stash_msg = NULL; char *pathspec_from_file = NULL; struct pathspec ps; - struct add_p_opt add_p_opt = ADD_P_OPT_INIT; + struct interactive_options interactive_opts = INTERACTIVE_OPTIONS_INIT; struct option options[] = { OPT_BOOL('k', "keep-index", &keep_index, N_("keep index")), @@ -1849,10 +1851,10 @@ static int push_stash(int argc, const char **argv, const char *prefix, N_("stash staged changes only")), OPT_BOOL('p', "patch", &patch_mode, N_("stash in patch mode")), - OPT_BOOL(0, "auto-advance", &add_p_opt.auto_advance, + OPT_BOOL(0, "auto-advance", &interactive_opts.auto_advance, N_("auto advance to the next file when selecting hunks interactively")), - OPT_DIFF_UNIFIED(&add_p_opt.context), - OPT_DIFF_INTERHUNK_CONTEXT(&add_p_opt.interhunkcontext), + OPT_DIFF_UNIFIED(&interactive_opts.context), + OPT_DIFF_INTERHUNK_CONTEXT(&interactive_opts.interhunkcontext), OPT__QUIET(&quiet, N_("quiet mode")), OPT_BOOL('u', "include-untracked", &include_untracked, N_("include untracked files in stash")), @@ -1909,21 +1911,21 @@ static int push_stash(int argc, const char **argv, const char *prefix, } if (!patch_mode) { - if (add_p_opt.context != -1) + if (interactive_opts.context != -1) die(_("the option '%s' requires '%s'"), "--unified", "--patch"); - if (add_p_opt.interhunkcontext != -1) + if (interactive_opts.interhunkcontext != -1) die(_("the option '%s' requires '%s'"), "--inter-hunk-context", "--patch"); - if (!add_p_opt.auto_advance) + if (!interactive_opts.auto_advance) die(_("the option '%s' requires '%s'"), "--no-auto-advance", "--patch"); } - if (add_p_opt.context < -1) + if (interactive_opts.context < -1) die(_("'%s' cannot be negative"), "--unified"); - if (add_p_opt.interhunkcontext < -1) + if (interactive_opts.interhunkcontext < -1) die(_("'%s' cannot be negative"), "--inter-hunk-context"); ret = do_push_stash(&ps, stash_msg, quiet, keep_index, patch_mode, - &add_p_opt, include_untracked, only_staged); + &interactive_opts, include_untracked, only_staged); clear_pathspec(&ps); free(pathspec_from_file); @@ -1948,7 +1950,7 @@ static int save_stash(int argc, const char **argv, const char *prefix, const char *stash_msg = NULL; struct pathspec ps; struct strbuf stash_msg_buf = STRBUF_INIT; - struct add_p_opt add_p_opt = ADD_P_OPT_INIT; + struct interactive_options interactive_opts = INTERACTIVE_OPTIONS_INIT; struct option options[] = { OPT_BOOL('k', "keep-index", &keep_index, N_("keep index")), @@ -1956,10 +1958,10 @@ static int save_stash(int argc, const char **argv, const char *prefix, N_("stash staged changes only")), OPT_BOOL('p', "patch", &patch_mode, N_("stash in patch mode")), - OPT_BOOL(0, "auto-advance", &add_p_opt.auto_advance, + OPT_BOOL(0, "auto-advance", &interactive_opts.auto_advance, N_("auto advance to the next file when selecting hunks interactively")), - OPT_DIFF_UNIFIED(&add_p_opt.context), - OPT_DIFF_INTERHUNK_CONTEXT(&add_p_opt.interhunkcontext), + OPT_DIFF_UNIFIED(&interactive_opts.context), + OPT_DIFF_INTERHUNK_CONTEXT(&interactive_opts.interhunkcontext), OPT__QUIET(&quiet, N_("quiet mode")), OPT_BOOL('u', "include-untracked", &include_untracked, N_("include untracked files in stash")), @@ -1979,22 +1981,22 @@ static int save_stash(int argc, const char **argv, const char *prefix, memset(&ps, 0, sizeof(ps)); - if (add_p_opt.context < -1) + if (interactive_opts.context < -1) die(_("'%s' cannot be negative"), "--unified"); - if (add_p_opt.interhunkcontext < -1) + if (interactive_opts.interhunkcontext < -1) die(_("'%s' cannot be negative"), "--inter-hunk-context"); if (!patch_mode) { - if (add_p_opt.context != -1) + if (interactive_opts.context != -1) die(_("the option '%s' requires '%s'"), "--unified", "--patch"); - if (add_p_opt.interhunkcontext != -1) + if (interactive_opts.interhunkcontext != -1) die(_("the option '%s' requires '%s'"), "--inter-hunk-context", "--patch"); - if (!add_p_opt.auto_advance) + if (!interactive_opts.auto_advance) die(_("the option '%s' requires '%s'"), "--no-auto-advance", "--patch"); } ret = do_push_stash(&ps, stash_msg, quiet, keep_index, - patch_mode, &add_p_opt, include_untracked, + patch_mode, &interactive_opts, include_untracked, only_staged); strbuf_release(&stash_msg_buf); diff --git a/commit.h b/commit.h index f2f39e1a89ef62..94181de9fda903 100644 --- a/commit.h +++ b/commit.h @@ -287,7 +287,7 @@ int for_each_commit_graft(each_commit_graft_fn, void *); int interactive_add(struct repository *repo, const char **argv, const char *prefix, - int patch, struct add_p_opt *add_p_opt); + int patch, struct interactive_options *opts); struct commit_extra_header { struct commit_extra_header *next; From d51b61f5dab9c8e715fa792f31d572bc96fb5687 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 2 Mar 2026 13:13:07 +0100 Subject: [PATCH 021/236] add-patch: remove dependency on "add-interactive" subsystem With the preceding commit we have split out interactive configuration that is used by both "git add -p" and "git add -i". But we still initialize that configuration in the "add -p" subsystem by calling `init_add_i_state()`, even though we only do so to initialize the interactive configuration as well as a repository pointer. Stop doing so and instead store and initialize the interactive configuration in `struct add_p_state` directly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- add-patch.c | 88 ++++++++++++++++++++++++++++------------------------- 1 file changed, 46 insertions(+), 42 deletions(-) diff --git a/add-patch.c b/add-patch.c index 756143eb846dba..4f089c82d0385d 100644 --- a/add-patch.c +++ b/add-patch.c @@ -2,7 +2,6 @@ #define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" -#include "add-interactive.h" #include "add-patch.h" #include "advice.h" #include "config.h" @@ -263,7 +262,8 @@ struct hunk { }; struct add_p_state { - struct add_i_state s; + struct repository *r; + struct interactive_config cfg; struct strbuf answer, buf; /* parsed diff */ @@ -409,7 +409,7 @@ static void add_p_state_clear(struct add_p_state *s) for (i = 0; i < s->file_diff_nr; i++) free(s->file_diff[i].hunk); free(s->file_diff); - clear_add_i_state(&s->s); + interactive_config_clear(&s->cfg); } __attribute__((format (printf, 2, 3))) @@ -418,9 +418,9 @@ static void err(struct add_p_state *s, const char *fmt, ...) va_list args; va_start(args, fmt); - fputs(s->s.cfg.error_color, stdout); + fputs(s->cfg.error_color, stdout); vprintf(fmt, args); - puts(s->s.cfg.reset_color_interactive); + puts(s->cfg.reset_color_interactive); va_end(args); } @@ -438,7 +438,7 @@ static void setup_child_process(struct add_p_state *s, cp->git_cmd = 1; strvec_pushf(&cp->env, - INDEX_ENVIRONMENT "=%s", s->s.r->index_file); + INDEX_ENVIRONMENT "=%s", s->r->index_file); } static int parse_range(const char **p, @@ -543,12 +543,12 @@ static int parse_diff(struct add_p_state *s, const struct pathspec *ps) int res; strvec_pushv(&args, s->mode->diff_cmd); - if (s->s.cfg.context != -1) - strvec_pushf(&args, "--unified=%i", s->s.cfg.context); - if (s->s.cfg.interhunkcontext != -1) - strvec_pushf(&args, "--inter-hunk-context=%i", s->s.cfg.interhunkcontext); - if (s->s.cfg.interactive_diff_algorithm) - strvec_pushf(&args, "--diff-algorithm=%s", s->s.cfg.interactive_diff_algorithm); + if (s->cfg.context != -1) + strvec_pushf(&args, "--unified=%i", s->cfg.context); + if (s->cfg.interhunkcontext != -1) + strvec_pushf(&args, "--inter-hunk-context=%i", s->cfg.interhunkcontext); + if (s->cfg.interactive_diff_algorithm) + strvec_pushf(&args, "--diff-algorithm=%s", s->cfg.interactive_diff_algorithm); if (s->revision) { struct object_id oid; strvec_push(&args, @@ -577,9 +577,9 @@ static int parse_diff(struct add_p_state *s, const struct pathspec *ps) } strbuf_complete_line(plain); - if (want_color_fd(1, s->s.cfg.use_color_diff)) { + if (want_color_fd(1, s->cfg.use_color_diff)) { struct child_process colored_cp = CHILD_PROCESS_INIT; - const char *diff_filter = s->s.cfg.interactive_diff_filter; + const char *diff_filter = s->cfg.interactive_diff_filter; setup_child_process(s, &colored_cp, NULL); xsnprintf((char *)args.v[color_arg_index], 8, "--color"); @@ -812,7 +812,7 @@ static void render_hunk(struct add_p_state *s, struct hunk *hunk, hunk->colored_end - hunk->colored_start); return; } else { - strbuf_addstr(out, s->s.cfg.fraginfo_color); + strbuf_addstr(out, s->cfg.fraginfo_color); p = s->colored.buf + header->colored_extra_start; len = header->colored_extra_end - header->colored_extra_start; @@ -834,7 +834,7 @@ static void render_hunk(struct add_p_state *s, struct hunk *hunk, if (len) strbuf_add(out, p, len); else if (colored) - strbuf_addf(out, "%s\n", s->s.cfg.reset_color_diff); + strbuf_addf(out, "%s\n", s->cfg.reset_color_diff); else strbuf_addch(out, '\n'); } @@ -1223,12 +1223,12 @@ static void recolor_hunk(struct add_p_state *s, struct hunk *hunk) strbuf_addstr(&s->colored, plain[current] == '-' ? - s->s.cfg.file_old_color : + s->cfg.file_old_color : plain[current] == '+' ? - s->s.cfg.file_new_color : - s->s.cfg.context_color); + s->cfg.file_new_color : + s->cfg.context_color); strbuf_add(&s->colored, plain + current, eol - current); - strbuf_addstr(&s->colored, s->s.cfg.reset_color_diff); + strbuf_addstr(&s->colored, s->cfg.reset_color_diff); if (next > eol) strbuf_add(&s->colored, plain + eol, next - eol); current = next; @@ -1357,7 +1357,7 @@ static int run_apply_check(struct add_p_state *s, static int read_single_character(struct add_p_state *s) { - if (s->s.cfg.use_single_key) { + if (s->cfg.use_single_key) { int res = read_key_without_echo(&s->answer); printf("%s\n", res == EOF ? "" : s->answer.buf); return res; @@ -1371,7 +1371,7 @@ static int read_single_character(struct add_p_state *s) static int prompt_yesno(struct add_p_state *s, const char *prompt) { for (;;) { - color_fprintf(stdout, s->s.cfg.prompt_color, "%s", _(prompt)); + color_fprintf(stdout, s->cfg.prompt_color, "%s", _(prompt)); fflush(stdout); if (read_single_character(s) == EOF) return -1; @@ -1559,7 +1559,7 @@ static void apply_patch(struct add_p_state *s, struct file_diff *file_diff) strbuf_reset(&s->buf); reassemble_patch(s, file_diff, 0, &s->buf); - discard_index(s->s.r->index); + discard_index(s->r->index); if (s->mode->apply_for_checkout) apply_for_checkout(s, &s->buf, s->mode->is_reverse); @@ -1570,9 +1570,9 @@ static void apply_patch(struct add_p_state *s, struct file_diff *file_diff) NULL, 0, NULL, 0)) error(_("'git apply' failed")); } - if (repo_read_index(s->s.r) >= 0) - repo_refresh_and_write_index(s->s.r, REFRESH_QUIET, 0, - 1, NULL, NULL, NULL); + if (repo_read_index(s->r) >= 0) + repo_refresh_and_write_index(s->r, REFRESH_QUIET, 0, + 1, NULL, NULL, NULL); } } @@ -1660,7 +1660,7 @@ static size_t patch_update_file(struct add_p_state *s, size_t idx) /* Everything decided? */ if (undecided_previous < 0 && undecided_next < 0 && hunk->use != UNDECIDED_HUNK) { - if (!s->s.cfg.auto_advance) + if (!s->cfg.auto_advance) all_decided = 1; else { patch_update_resp++; @@ -1714,11 +1714,11 @@ static size_t patch_update_file(struct add_p_state *s, size_t idx) permitted |= ALLOW_EDIT; strbuf_addstr(&s->buf, ",e"); } - if (!s->s.cfg.auto_advance && s->file_diff_nr > 1) { + if (!s->cfg.auto_advance && s->file_diff_nr > 1) { permitted |= ALLOW_GOTO_NEXT_FILE; strbuf_addstr(&s->buf, ",>"); } - if (!s->s.cfg.auto_advance && s->file_diff_nr > 1) { + if (!s->cfg.auto_advance && s->file_diff_nr > 1) { permitted |= ALLOW_GOTO_PREVIOUS_FILE; strbuf_addstr(&s->buf, ",<"); } @@ -1733,7 +1733,7 @@ static size_t patch_update_file(struct add_p_state *s, size_t idx) else prompt_mode_type = PROMPT_HUNK; - printf("%s(%"PRIuMAX"/%"PRIuMAX") ", s->s.cfg.prompt_color, + printf("%s(%"PRIuMAX"/%"PRIuMAX") ", s->cfg.prompt_color, (uintmax_t)hunk_index + 1, (uintmax_t)(file_diff->hunk_nr ? file_diff->hunk_nr @@ -1746,8 +1746,8 @@ static size_t patch_update_file(struct add_p_state *s, size_t idx) } printf(_(s->mode->prompt_mode[prompt_mode_type]), hunk_use_decision, s->buf.buf); - if (*s->s.cfg.reset_color_interactive) - fputs(s->s.cfg.reset_color_interactive, stdout); + if (*s->cfg.reset_color_interactive) + fputs(s->cfg.reset_color_interactive, stdout); fflush(stdout); if (read_single_character(s) == EOF) { patch_update_resp = s->file_diff_nr; @@ -1798,7 +1798,7 @@ static size_t patch_update_file(struct add_p_state *s, size_t idx) } else if (ch == 'q') { patch_update_resp = s->file_diff_nr; break; - } else if (!s->s.cfg.auto_advance && s->answer.buf[0] == '>') { + } else if (!s->cfg.auto_advance && s->answer.buf[0] == '>') { if (permitted & ALLOW_GOTO_NEXT_FILE) { if (patch_update_resp == s->file_diff_nr - 1) patch_update_resp = 0; @@ -1809,7 +1809,7 @@ static size_t patch_update_file(struct add_p_state *s, size_t idx) err(s, _("No next file")); continue; } - } else if (!s->s.cfg.auto_advance && s->answer.buf[0] == '<') { + } else if (!s->cfg.auto_advance && s->answer.buf[0] == '<') { if (permitted & ALLOW_GOTO_PREVIOUS_FILE) { if (patch_update_resp == 0) patch_update_resp = s->file_diff_nr - 1; @@ -1932,7 +1932,7 @@ static size_t patch_update_file(struct add_p_state *s, size_t idx) err(s, _("Sorry, cannot split this hunk")); } else if (!split_hunk(s, file_diff, hunk - file_diff->hunk)) { - color_fprintf_ln(stdout, s->s.cfg.header_color, + color_fprintf_ln(stdout, s->cfg.header_color, _("Split into %d hunks."), (int)splittable_into); rendered_hunk_index = -1; @@ -1950,7 +1950,7 @@ static size_t patch_update_file(struct add_p_state *s, size_t idx) } else if (s->answer.buf[0] == '?') { const char *p = _(help_patch_remainder), *eol = p; - color_fprintf(stdout, s->s.cfg.help_color, "%s", + color_fprintf(stdout, s->cfg.help_color, "%s", _(s->mode->help_patch_text)); /* @@ -1974,13 +1974,13 @@ static size_t patch_update_file(struct add_p_state *s, size_t idx) if (file_diff->hunk[i].use == SKIP_HUNK) skipped += 1; } - color_fprintf_ln(stdout, s->s.cfg.help_color, _(p), + color_fprintf_ln(stdout, s->cfg.help_color, _(p), total, used, skipped); } if (*p != '?' && !strchr(s->buf.buf, *p)) continue; - color_fprintf_ln(stdout, s->s.cfg.help_color, + color_fprintf_ln(stdout, s->cfg.help_color, "%.*s", (int)(eol - p), p); } } else { @@ -1989,7 +1989,7 @@ static size_t patch_update_file(struct add_p_state *s, size_t idx) } } - if (s->s.cfg.auto_advance) + if (s->cfg.auto_advance) apply_patch(s, file_diff); putchar('\n'); @@ -2001,11 +2001,15 @@ int run_add_p(struct repository *r, enum add_p_mode mode, const struct pathspec *ps) { struct add_p_state s = { - { r }, STRBUF_INIT, STRBUF_INIT, STRBUF_INIT, STRBUF_INIT + .r = r, + .answer = STRBUF_INIT, + .buf = STRBUF_INIT, + .plain = STRBUF_INIT, + .colored = STRBUF_INIT, }; size_t i, binary_count = 0; - init_add_i_state(&s.s, r, opts); + interactive_config_init(&s.cfg, r, opts); if (mode == ADD_P_STASH) s.mode = &patch_mode_stash; @@ -2051,7 +2055,7 @@ int run_add_p(struct repository *r, enum add_p_mode mode, if ((i = patch_update_file(&s, i)) == s.file_diff_nr) break; } - if (!s.s.cfg.auto_advance) + if (!s.cfg.auto_advance) for (i = 0; i < s.file_diff_nr; i++) apply_patch(&s, s.file_diff + i); From 0c5583a57d618db191afceeff54e8cafbee89f41 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 2 Mar 2026 13:13:08 +0100 Subject: [PATCH 022/236] add-patch: add support for in-memory index patching With `run_add_p()` callers have the ability to apply changes from a specific revision to a repository's index. This infra supports several different modes, like for example applying changes to the index, working tree or both. One feature that is missing though is the ability to apply changes to an in-memory index different from the repository's index. Add a new function `run_add_p_index()` to plug this gap. This new function will be used in a subsequent commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- add-patch.c | 149 +++++++++++++++++++++++++++++++++++++++++++--------- add-patch.h | 8 +++ 2 files changed, 132 insertions(+), 25 deletions(-) diff --git a/add-patch.c b/add-patch.c index 4f089c82d0385d..b4dc7d2293930c 100644 --- a/add-patch.c +++ b/add-patch.c @@ -4,11 +4,13 @@ #include "git-compat-util.h" #include "add-patch.h" #include "advice.h" +#include "commit.h" #include "config.h" #include "diff.h" #include "editor.h" #include "environment.h" #include "gettext.h" +#include "hex.h" #include "object-name.h" #include "pager.h" #include "read-cache-ll.h" @@ -263,6 +265,8 @@ struct hunk { struct add_p_state { struct repository *r; + struct index_state *index; + const char *index_file; struct interactive_config cfg; struct strbuf answer, buf; @@ -438,7 +442,7 @@ static void setup_child_process(struct add_p_state *s, cp->git_cmd = 1; strvec_pushf(&cp->env, - INDEX_ENVIRONMENT "=%s", s->r->index_file); + INDEX_ENVIRONMENT "=%s", s->index_file); } static int parse_range(const char **p, @@ -1559,7 +1563,7 @@ static void apply_patch(struct add_p_state *s, struct file_diff *file_diff) strbuf_reset(&s->buf); reassemble_patch(s, file_diff, 0, &s->buf); - discard_index(s->r->index); + discard_index(s->index); if (s->mode->apply_for_checkout) apply_for_checkout(s, &s->buf, s->mode->is_reverse); @@ -1570,9 +1574,11 @@ static void apply_patch(struct add_p_state *s, struct file_diff *file_diff) NULL, 0, NULL, 0)) error(_("'git apply' failed")); } - if (repo_read_index(s->r) >= 0) + if (read_index_from(s->index, s->index_file, s->r->gitdir) >= 0 && + s->index == s->r->index) { repo_refresh_and_write_index(s->r, REFRESH_QUIET, 0, 1, NULL, NULL, NULL); + } } } @@ -1996,18 +2002,51 @@ static size_t patch_update_file(struct add_p_state *s, size_t idx) return patch_update_resp; } +static int run_add_p_common(struct add_p_state *state, + const struct pathspec *ps) +{ + size_t binary_count = 0; + size_t i; + + if (parse_diff(state, ps) < 0) + return -1; + + for (i = 0; i < state->file_diff_nr;) { + if (state->file_diff[i].binary && !state->file_diff[i].hunk_nr) { + binary_count++; + i++; + continue; + } + if ((i = patch_update_file(state, i)) == state->file_diff_nr) + break; + } + + if (!state->cfg.auto_advance) + for (i = 0; i < state->file_diff_nr; i++) + apply_patch(state, state->file_diff + i); + + if (state->file_diff_nr == 0) + err(state, _("No changes.")); + else if (binary_count == state->file_diff_nr) + err(state, _("Only binary files changed.")); + + return 0; +} + int run_add_p(struct repository *r, enum add_p_mode mode, struct interactive_options *opts, const char *revision, const struct pathspec *ps) { struct add_p_state s = { .r = r, + .index = r->index, + .index_file = r->index_file, .answer = STRBUF_INIT, .buf = STRBUF_INIT, .plain = STRBUF_INIT, .colored = STRBUF_INIT, }; - size_t i, binary_count = 0; + int ret; interactive_config_init(&s.cfg, r, opts); @@ -2040,30 +2079,90 @@ int run_add_p(struct repository *r, enum add_p_mode mode, if (repo_read_index(r) < 0 || (!s.mode->index_only && repo_refresh_and_write_index(r, REFRESH_QUIET, 0, 1, - NULL, NULL, NULL) < 0) || - parse_diff(&s, ps) < 0) { - add_p_state_clear(&s); - return -1; + NULL, NULL, NULL) < 0)) { + ret = -1; + goto out; } - for (i = 0; i < s.file_diff_nr;) { - if (s.file_diff[i].binary && !s.file_diff[i].hunk_nr) { - binary_count++; - i++; - continue; - } - if ((i = patch_update_file(&s, i)) == s.file_diff_nr) - break; - } - if (!s.cfg.auto_advance) - for (i = 0; i < s.file_diff_nr; i++) - apply_patch(&s, s.file_diff + i); + ret = run_add_p_common(&s, ps); + if (ret < 0) + goto out; - if (s.file_diff_nr == 0) - err(&s, _("No changes.")); - else if (binary_count == s.file_diff_nr) - err(&s, _("Only binary files changed.")); + ret = 0; +out: add_p_state_clear(&s); - return 0; + return ret; +} + +int run_add_p_index(struct repository *r, + struct index_state *index, + const char *index_file, + struct interactive_options *opts, + const char *revision, + const struct pathspec *ps) +{ + struct patch_mode mode = { + .apply_args = { "--cached", NULL }, + .apply_check_args = { "--cached", NULL }, + .prompt_mode = { + N_("Stage mode change [y,n,q,a,d%s,?]? "), + N_("Stage deletion [y,n,q,a,d%s,?]? "), + N_("Stage addition [y,n,q,a,d%s,?]? "), + N_("Stage this hunk [y,n,q,a,d%s,?]? ") + }, + .edit_hunk_hint = N_("If the patch applies cleanly, the edited hunk " + "will immediately be marked for staging."), + .help_patch_text = + N_("y - stage this hunk\n" + "n - do not stage this hunk\n" + "q - quit; do not stage this hunk or any of the remaining " + "ones\n" + "a - stage this hunk and all later hunks in the file\n" + "d - do not stage this hunk or any of the later hunks in " + "the file\n"), + .index_only = 1, + }; + struct add_p_state s = { + .r = r, + .index = index, + .index_file = index_file, + .answer = STRBUF_INIT, + .buf = STRBUF_INIT, + .plain = STRBUF_INIT, + .colored = STRBUF_INIT, + .mode = &mode, + .revision = revision, + }; + char parent_tree_oid[GIT_MAX_HEXSZ + 1]; + struct commit *commit; + int ret; + + interactive_config_init(&s.cfg, r, opts); + + commit = lookup_commit_reference_by_name(revision); + if (!commit) { + err(&s, _("Revision does not refer to a commit")); + ret = -1; + goto out; + } + + if (commit->parents) + oid_to_hex_r(parent_tree_oid, get_commit_tree_oid(commit->parents->item)); + else + oid_to_hex_r(parent_tree_oid, r->hash_algo->empty_tree); + + mode.diff_cmd[0] = "diff-tree"; + mode.diff_cmd[1] = "-r"; + mode.diff_cmd[2] = parent_tree_oid; + + ret = run_add_p_common(&s, ps); + if (ret < 0) + goto out; + + ret = 0; + +out: + add_p_state_clear(&s); + return ret; } diff --git a/add-patch.h b/add-patch.h index e6868c60a2f343..cf2a31a40fb918 100644 --- a/add-patch.h +++ b/add-patch.h @@ -3,6 +3,7 @@ #include "color.h" +struct index_state; struct pathspec; struct repository; @@ -56,4 +57,11 @@ int run_add_p(struct repository *r, enum add_p_mode mode, struct interactive_options *opts, const char *revision, const struct pathspec *ps); +int run_add_p_index(struct repository *r, + struct index_state *index, + const char *index_file, + struct interactive_options *opts, + const char *revision, + const struct pathspec *ps); + #endif From 48f6d9232834be661f0d1dc4f187b324124ccbe0 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 2 Mar 2026 13:13:09 +0100 Subject: [PATCH 023/236] add-patch: allow disabling editing of hunks The "add-patch" mode allows the user to edit hunks to apply custom changes. This is incompatible with a new `git history split` command that we're about to introduce in a subsequent commit, so we need a way to disable this mode. Add a new flag to disable editing hunks. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- add-interactive.c | 2 +- add-patch.c | 22 ++++++++++++++-------- add-patch.h | 11 +++++++++-- builtin/add.c | 2 +- builtin/checkout.c | 2 +- builtin/reset.c | 2 +- builtin/stash.c | 2 +- 7 files changed, 28 insertions(+), 15 deletions(-) diff --git a/add-interactive.c b/add-interactive.c index 152e2a02979e45..3cf8a1dbf85e3f 100644 --- a/add-interactive.c +++ b/add-interactive.c @@ -927,7 +927,7 @@ static int run_patch(struct add_i_state *s, const struct pathspec *ps, parse_pathspec(&ps_selected, PATHSPEC_ALL_MAGIC & ~PATHSPEC_LITERAL, PATHSPEC_LITERAL_PATH, "", args.v); - res = run_add_p(s->r, ADD_P_ADD, &opts, NULL, &ps_selected); + res = run_add_p(s->r, ADD_P_ADD, &opts, NULL, &ps_selected, 0); strvec_clear(&args); clear_pathspec(&ps_selected); } diff --git a/add-patch.c b/add-patch.c index b4dc7d2293930c..4e28e5c18786e2 100644 --- a/add-patch.c +++ b/add-patch.c @@ -1604,7 +1604,9 @@ static bool get_first_undecided(const struct file_diff *file_diff, size_t *idx) return false; } -static size_t patch_update_file(struct add_p_state *s, size_t idx) +static size_t patch_update_file(struct add_p_state *s, + size_t idx, + unsigned flags) { size_t hunk_index = 0; ssize_t i, undecided_previous, undecided_next, rendered_hunk_index = -1; @@ -1715,7 +1717,8 @@ static size_t patch_update_file(struct add_p_state *s, size_t idx) permitted |= ALLOW_SPLIT; strbuf_addstr(&s->buf, ",s"); } - if (hunk_index + 1 > file_diff->mode_change && + if (!(flags & ADD_P_DISALLOW_EDIT) && + hunk_index + 1 > file_diff->mode_change && !file_diff->deleted) { permitted |= ALLOW_EDIT; strbuf_addstr(&s->buf, ",e"); @@ -2003,7 +2006,8 @@ static size_t patch_update_file(struct add_p_state *s, size_t idx) } static int run_add_p_common(struct add_p_state *state, - const struct pathspec *ps) + const struct pathspec *ps, + unsigned flags) { size_t binary_count = 0; size_t i; @@ -2017,7 +2021,7 @@ static int run_add_p_common(struct add_p_state *state, i++; continue; } - if ((i = patch_update_file(state, i)) == state->file_diff_nr) + if ((i = patch_update_file(state, i, flags)) == state->file_diff_nr) break; } @@ -2035,7 +2039,8 @@ static int run_add_p_common(struct add_p_state *state, int run_add_p(struct repository *r, enum add_p_mode mode, struct interactive_options *opts, const char *revision, - const struct pathspec *ps) + const struct pathspec *ps, + unsigned flags) { struct add_p_state s = { .r = r, @@ -2084,7 +2089,7 @@ int run_add_p(struct repository *r, enum add_p_mode mode, goto out; } - ret = run_add_p_common(&s, ps); + ret = run_add_p_common(&s, ps, flags); if (ret < 0) goto out; @@ -2100,7 +2105,8 @@ int run_add_p_index(struct repository *r, const char *index_file, struct interactive_options *opts, const char *revision, - const struct pathspec *ps) + const struct pathspec *ps, + unsigned flags) { struct patch_mode mode = { .apply_args = { "--cached", NULL }, @@ -2156,7 +2162,7 @@ int run_add_p_index(struct repository *r, mode.diff_cmd[1] = "-r"; mode.diff_cmd[2] = parent_tree_oid; - ret = run_add_p_common(&s, ps); + ret = run_add_p_common(&s, ps, flags); if (ret < 0) goto out; diff --git a/add-patch.h b/add-patch.h index cf2a31a40fb918..fb6d975b68cc4d 100644 --- a/add-patch.h +++ b/add-patch.h @@ -53,15 +53,22 @@ enum add_p_mode { ADD_P_WORKTREE, }; +enum add_p_flags { + /* Disallow "editing" hunks. */ + ADD_P_DISALLOW_EDIT = (1 << 0), +}; + int run_add_p(struct repository *r, enum add_p_mode mode, struct interactive_options *opts, const char *revision, - const struct pathspec *ps); + const struct pathspec *ps, + unsigned flags); int run_add_p_index(struct repository *r, struct index_state *index, const char *index_file, struct interactive_options *opts, const char *revision, - const struct pathspec *ps); + const struct pathspec *ps, + unsigned flags); #endif diff --git a/builtin/add.c b/builtin/add.c index 84f9bcb789fc7b..eeab779328e42f 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -172,7 +172,7 @@ int interactive_add(struct repository *repo, prefix, argv); if (patch) - ret = !!run_add_p(repo, ADD_P_ADD, interactive_opts, NULL, &pathspec); + ret = !!run_add_p(repo, ADD_P_ADD, interactive_opts, NULL, &pathspec, 0); else ret = !!run_add_i(repo, &pathspec, interactive_opts); diff --git a/builtin/checkout.c b/builtin/checkout.c index bebe18c1d9029a..a8863277f225c4 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -563,7 +563,7 @@ static int checkout_paths(const struct checkout_opts *opts, BUG("either flag must have been set, worktree=%d, index=%d", opts->checkout_worktree, opts->checkout_index); return !!run_add_p(the_repository, patch_mode, &interactive_opts, - rev, &opts->pathspec); + rev, &opts->pathspec, 0); } repo_hold_locked_index(the_repository, &lock_file, LOCK_DIE_ON_ERROR); diff --git a/builtin/reset.c b/builtin/reset.c index 4a74a82c0a3ae8..3590be57a5f03c 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -438,7 +438,7 @@ int cmd_reset(int argc, die(_("options '%s' and '%s' cannot be used together"), "--patch", "--{hard,mixed,soft}"); trace2_cmd_mode("patch-interactive"); update_ref_status = !!run_add_p(the_repository, ADD_P_RESET, - &interactive_opts, rev, &pathspec); + &interactive_opts, rev, &pathspec, 0); goto cleanup; } else { if (interactive_opts.context != -1) diff --git a/builtin/stash.c b/builtin/stash.c index c467c02c7fb2aa..7c68a1d7f9a85b 100644 --- a/builtin/stash.c +++ b/builtin/stash.c @@ -1331,7 +1331,7 @@ static int stash_patch(struct stash_info *info, const struct pathspec *ps, old_index_env = xstrdup_or_null(getenv(INDEX_ENVIRONMENT)); setenv(INDEX_ENVIRONMENT, the_repository->index_file, 1); - ret = !!run_add_p(the_repository, ADD_P_STASH, interactive_opts, NULL, ps); + ret = !!run_add_p(the_repository, ADD_P_STASH, interactive_opts, NULL, ps, 0); the_repository->index_file = old_repo_index_file; if (old_index_env && *old_index_env) From a021e4f92cbacdb028b3efa49f619b076e72c9a6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 2 Mar 2026 13:13:10 +0100 Subject: [PATCH 024/236] cache-tree: allow writing in-memory index as tree The function `write_in_core_index_as_tree()` takes a repository and writes its index into a tree object. What this function cannot do though is to take an _arbitrary_ in-memory index. Introduce a new `struct index_state` parameter so that the caller can pass a different index than the one belonging to the repository. This will be used in a subsequent commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/checkout.c | 3 ++- cache-tree.c | 4 ++-- cache-tree.h | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/builtin/checkout.c b/builtin/checkout.c index a8863277f225c4..f8b3a7b08ce120 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -891,7 +891,8 @@ static int merge_working_tree(const struct checkout_opts *opts, 0); init_ui_merge_options(&o, the_repository); o.verbosity = 0; - work = write_in_core_index_as_tree(the_repository); + work = write_in_core_index_as_tree(the_repository, + the_repository->index); ret = reset_tree(new_tree, opts, 1, diff --git a/cache-tree.c b/cache-tree.c index 16c3a36b48267b..60bcc07c3b8357 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -723,11 +723,11 @@ static int write_index_as_tree_internal(struct object_id *oid, return 0; } -struct tree* write_in_core_index_as_tree(struct repository *repo) { +struct tree *write_in_core_index_as_tree(struct repository *repo, + struct index_state *index_state) { struct object_id o; int was_valid, ret; - struct index_state *index_state = repo->index; was_valid = index_state->cache_tree && cache_tree_fully_valid(index_state->cache_tree); diff --git a/cache-tree.h b/cache-tree.h index b82c4963e7c805..f8bddae5235f54 100644 --- a/cache-tree.h +++ b/cache-tree.h @@ -47,7 +47,8 @@ int cache_tree_verify(struct repository *, struct index_state *); #define WRITE_TREE_UNMERGED_INDEX (-2) #define WRITE_TREE_PREFIX_ERROR (-3) -struct tree* write_in_core_index_as_tree(struct repository *repo); +struct tree *write_in_core_index_as_tree(struct repository *repo, + struct index_state *index_state); int write_index_as_tree(struct object_id *oid, struct index_state *index_state, const char *index_path, int flags, const char *prefix); void prime_cache_tree(struct repository *, struct index_state *, struct tree *); From 98f839425db94eb8d4c1f773e923ba1cff622d66 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 2 Mar 2026 13:13:11 +0100 Subject: [PATCH 025/236] builtin/history: split out extended function to create commits In the next commit we're about to introduce a new command that splits up a commit into two. Most of the logic will be shared with rewording commits, except that we also need to have control over the parents and the old/new trees. Extract a new function `commit_tree_with_edited_message_ext()` to prepare for this commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/history.c | 67 ++++++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 27 deletions(-) diff --git a/builtin/history.c b/builtin/history.c index 1cf6c668cfd814..80726ce14b5779 100644 --- a/builtin/history.c +++ b/builtin/history.c @@ -83,10 +83,13 @@ static int fill_commit_message(struct repository *repo, return 0; } -static int commit_tree_with_edited_message(struct repository *repo, - const char *action, - struct commit *original, - struct commit **out) +static int commit_tree_with_edited_message_ext(struct repository *repo, + const char *action, + struct commit *commit_with_message, + const struct commit_list *parents, + const struct object_id *old_tree, + const struct object_id *new_tree, + struct commit **out) { const char *exclude_gpgsig[] = { /* We reencode the message, so the encoding needs to be stripped. */ @@ -100,44 +103,27 @@ static int commit_tree_with_edited_message(struct repository *repo, struct commit_extra_header *original_extra_headers = NULL; struct strbuf commit_message = STRBUF_INIT; struct object_id rewritten_commit_oid; - struct object_id original_tree_oid; - struct object_id parent_tree_oid; char *original_author = NULL; - struct commit *parent; size_t len; int ret; - original_tree_oid = repo_get_commit_tree(repo, original)->object.oid; - - parent = original->parents ? original->parents->item : NULL; - if (parent) { - if (repo_parse_commit(repo, parent)) { - ret = error(_("unable to parse parent commit %s"), - oid_to_hex(&parent->object.oid)); - goto out; - } - - parent_tree_oid = repo_get_commit_tree(repo, parent)->object.oid; - } else { - oidcpy(&parent_tree_oid, repo->hash_algo->empty_tree); - } - /* We retain authorship of the original commit. */ - original_message = repo_logmsg_reencode(repo, original, NULL, NULL); + original_message = repo_logmsg_reencode(repo, commit_with_message, NULL, NULL); ptr = find_commit_header(original_message, "author", &len); if (ptr) original_author = xmemdupz(ptr, len); find_commit_subject(original_message, &original_body); - ret = fill_commit_message(repo, &parent_tree_oid, &original_tree_oid, + ret = fill_commit_message(repo, old_tree, new_tree, original_body, action, &commit_message); if (ret < 0) goto out; - original_extra_headers = read_commit_extra_headers(original, exclude_gpgsig); + original_extra_headers = read_commit_extra_headers(commit_with_message, + exclude_gpgsig); - ret = commit_tree_extended(commit_message.buf, commit_message.len, &original_tree_oid, - original->parents, &rewritten_commit_oid, original_author, + ret = commit_tree_extended(commit_message.buf, commit_message.len, new_tree, + parents, &rewritten_commit_oid, original_author, NULL, NULL, original_extra_headers); if (ret < 0) goto out; @@ -151,6 +137,33 @@ static int commit_tree_with_edited_message(struct repository *repo, return ret; } +static int commit_tree_with_edited_message(struct repository *repo, + const char *action, + struct commit *original, + struct commit **out) +{ + struct object_id parent_tree_oid; + const struct object_id *tree_oid; + struct commit *parent; + + tree_oid = &repo_get_commit_tree(repo, original)->object.oid; + + parent = original->parents ? original->parents->item : NULL; + if (parent) { + if (repo_parse_commit(repo, parent)) { + return error(_("unable to parse parent commit %s"), + oid_to_hex(&parent->object.oid)); + } + + parent_tree_oid = repo_get_commit_tree(repo, parent)->object.oid; + } else { + oidcpy(&parent_tree_oid, repo->hash_algo->empty_tree); + } + + return commit_tree_with_edited_message_ext(repo, action, original, original->parents, + &parent_tree_oid, tree_oid, out); +} + enum ref_action { REF_ACTION_DEFAULT, REF_ACTION_BRANCHES, From d563ecec2845467880f5742e178a9723afef495a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 2 Mar 2026 13:13:12 +0100 Subject: [PATCH 026/236] builtin/history: implement "split" subcommand It is quite a common use case that one wants to split up one commit into multiple commits by moving parts of the changes of the original commit out into a separate commit. This is quite an involved operation though: 1. Identify the commit in question that is to be dropped. 2. Perform an interactive rebase on top of that commit's parent. 3. Modify the instruction sheet to "edit" the commit that is to be split up. 4. Drop the commit via "git reset HEAD~". 5. Stage changes that should go into the first commit and commit it. 6. Stage changes that should go into the second commit and commit it. 7. Finalize the rebase. This is quite complex, and overall I would claim that most people who are not experts in Git would struggle with this flow. Introduce a new "split" subcommand for git-history(1) to make this way easier. All the user needs to do is to say `git history split $COMMIT`. From hereon, Git asks the user which parts of the commit shall be moved out into a separate commit and, once done, asks the user for the commit message. Git then creates that split-out commit and applies the original commit on top of it. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Documentation/git-history.adoc | 62 +++ builtin/history.c | 250 +++++++++++ t/meson.build | 1 + t/t3452-history-split.sh | 757 +++++++++++++++++++++++++++++++++ 4 files changed, 1070 insertions(+) create mode 100755 t/t3452-history-split.sh diff --git a/Documentation/git-history.adoc b/Documentation/git-history.adoc index cc019de69764d2..24dc907033b469 100644 --- a/Documentation/git-history.adoc +++ b/Documentation/git-history.adoc @@ -9,6 +9,7 @@ SYNOPSIS -------- [synopsis] git history reword [--dry-run] [--update-refs=(branches|head)] +git history split [--dry-run] [--update-refs=(branches|head)] [--] [...] DESCRIPTION ----------- @@ -57,6 +58,26 @@ The following commands are available to rewrite history in different ways: details of this commit remain unchanged. This command will spawn an editor with the current message of that commit. +`split [--] [...]`:: + Interactively split up into two commits by choosing + hunks introduced by it that will be moved into the new split-out + commit. These hunks will then be written into a new commit that + becomes the parent of the previous commit. The original commit + stays intact, except that its parent will be the newly split-out + commit. ++ +The commit messages of the split-up commits will be asked for by launching +the configured editor. Authorship of the commit will be the same as for the +original commit. ++ +If passed, __ can be used to limit which changes shall be split out +of the original commit. Files not matching any of the pathspecs will remain +part of the original commit. For more details, see the 'pathspec' entry in +linkgit:gitglossary[7]. ++ +It is invalid to select either all or no hunks, as that would lead to +one of the commits becoming empty. + OPTIONS ------- @@ -72,6 +93,47 @@ OPTIONS descendants of the original commit will be rewritten. With `head`, only the current `HEAD` reference will be rewritten. Defaults to `branches`. +EXAMPLES +-------- + +Split a commit +~~~~~~~~~~~~~~ + +---------- +$ git log --stat --oneline +3f81232 (HEAD -> main) original + bar | 1 + + foo | 1 + + 2 files changed, 2 insertions(+) + +$ git history split HEAD +diff --git a/bar b/bar +new file mode 100644 +index 0000000..5716ca5 +--- /dev/null ++++ b/bar +@@ -0,0 +1 @@ ++bar +(1/1) Stage addition [y,n,q,a,d,p,?]? y + +diff --git a/foo b/foo +new file mode 100644 +index 0000000..257cc56 +--- /dev/null ++++ b/foo +@@ -0,0 +1 @@ ++foo +(1/1) Stage addition [y,n,q,a,d,p,?]? n + +$ git log --stat --oneline +7cebe64 (HEAD -> main) original + foo | 1 + + 1 file changed, 1 insertion(+) +d1582f3 split-out commit + bar | 1 + + 1 file changed, 1 insertion(+) +---------- + GIT --- Part of the linkgit:git[1] suite diff --git a/builtin/history.c b/builtin/history.c index 80726ce14b5779..dfb9d3f1802733 100644 --- a/builtin/history.c +++ b/builtin/history.c @@ -1,6 +1,7 @@ #define USE_THE_REPOSITORY_VARIABLE #include "builtin.h" +#include "cache-tree.h" #include "commit.h" #include "commit-reach.h" #include "config.h" @@ -8,17 +9,24 @@ #include "environment.h" #include "gettext.h" #include "hex.h" +#include "lockfile.h" +#include "oidmap.h" #include "parse-options.h" +#include "path.h" +#include "read-cache.h" #include "refs.h" #include "replay.h" #include "revision.h" #include "sequencer.h" #include "strvec.h" #include "tree.h" +#include "unpack-trees.h" #include "wt-status.h" #define GIT_HISTORY_REWORD_USAGE \ N_("git history reword [--dry-run] [--update-refs=(branches|head)]") +#define GIT_HISTORY_SPLIT_USAGE \ + N_("git history split [--dry-run] [--update-refs=(branches|head)] [--] [...]") static void change_data_free(void *util, const char *str UNUSED) { @@ -484,6 +492,246 @@ static int cmd_history_reword(int argc, return ret; } +static int write_ondisk_index(struct repository *repo, + struct object_id *oid, + const char *path) +{ + struct unpack_trees_options opts = { 0 }; + struct lock_file lock = LOCK_INIT; + struct tree_desc tree_desc; + struct index_state index; + struct tree *tree; + int ret; + + index_state_init(&index, repo); + + opts.head_idx = -1; + opts.src_index = &index; + opts.dst_index = &index; + + tree = repo_parse_tree_indirect(repo, oid); + init_tree_desc(&tree_desc, &tree->object.oid, tree->buffer, tree->size); + + if (unpack_trees(1, &tree_desc, &opts)) { + ret = error(_("unable to populate index with tree")); + goto out; + } + + prime_cache_tree(repo, &index, tree); + + if (hold_lock_file_for_update(&lock, path, 0) < 0) { + ret = error_errno(_("unable to acquire index lock")); + goto out; + } + + if (write_locked_index(&index, &lock, COMMIT_LOCK)) { + ret = error(_("unable to write new index file")); + goto out; + } + + ret = 0; + +out: + rollback_lock_file(&lock); + release_index(&index); + return ret; +} + +static int split_commit(struct repository *repo, + struct commit *original, + struct pathspec *pathspec, + struct commit **out) +{ + struct interactive_options interactive_opts = INTERACTIVE_OPTIONS_INIT; + struct strbuf index_file = STRBUF_INIT; + struct index_state index = INDEX_STATE_INIT(repo); + const struct object_id *original_commit_tree_oid; + const struct object_id *old_tree_oid, *new_tree_oid; + struct object_id parent_tree_oid; + char original_commit_oid[GIT_MAX_HEXSZ + 1]; + struct commit *first_commit, *second_commit; + struct commit_list *parents = NULL; + struct tree *split_tree; + int ret; + + if (original->parents) { + if (repo_parse_commit(repo, original->parents->item)) { + ret = error(_("unable to parse parent commit %s"), + oid_to_hex(&original->parents->item->object.oid)); + goto out; + } + + parent_tree_oid = *get_commit_tree_oid(original->parents->item); + } else { + oidcpy(&parent_tree_oid, repo->hash_algo->empty_tree); + } + original_commit_tree_oid = get_commit_tree_oid(original); + + /* + * Construct the first commit. This is done by taking the original + * commit parent's tree and selectively patching changes from the diff + * between that parent and its child. + */ + repo_git_path_replace(repo, &index_file, "%s", "history-split.index"); + + ret = write_ondisk_index(repo, &parent_tree_oid, index_file.buf); + if (ret < 0) + goto out; + + ret = read_index_from(&index, index_file.buf, repo->gitdir); + if (ret < 0) { + ret = error(_("failed reading temporary index")); + goto out; + } + + oid_to_hex_r(original_commit_oid, &original->object.oid); + ret = run_add_p_index(repo, &index, index_file.buf, &interactive_opts, + original_commit_oid, pathspec, ADD_P_DISALLOW_EDIT); + if (ret < 0) + goto out; + + split_tree = write_in_core_index_as_tree(repo, &index); + if (!split_tree) { + ret = error(_("failed split tree")); + goto out; + } + + unlink(index_file.buf); + strbuf_release(&index_file); + + /* + * We disallow the cases where either the split-out commit or the + * original commit would become empty. Consequently, if we see that the + * new tree ID matches either of those trees we abort. + */ + if (oideq(&split_tree->object.oid, &parent_tree_oid)) { + ret = error(_("split commit is empty")); + goto out; + } else if (oideq(&split_tree->object.oid, original_commit_tree_oid)) { + ret = error(_("split commit tree matches original commit")); + goto out; + } + + /* + * The first commit is constructed from the split-out tree. The base + * that shall be diffed against is the parent of the original commit. + */ + ret = commit_tree_with_edited_message_ext(repo, "split-out", original, + original->parents, &parent_tree_oid, + &split_tree->object.oid, &first_commit); + if (ret < 0) { + ret = error(_("failed writing first commit")); + goto out; + } + + /* + * The second commit is constructed from the original tree. The base to + * diff against and the parent in this case is the first split-out + * commit. + */ + commit_list_append(first_commit, &parents); + + old_tree_oid = &repo_get_commit_tree(repo, first_commit)->object.oid; + new_tree_oid = &repo_get_commit_tree(repo, original)->object.oid; + + ret = commit_tree_with_edited_message_ext(repo, "split-out", original, + parents, old_tree_oid, + new_tree_oid, &second_commit); + if (ret < 0) { + ret = error(_("failed writing second commit")); + goto out; + } + + *out = second_commit; + ret = 0; + +out: + if (index_file.len) + unlink(index_file.buf); + strbuf_release(&index_file); + free_commit_list(parents); + release_index(&index); + return ret; +} + +static int cmd_history_split(int argc, + const char **argv, + const char *prefix, + struct repository *repo) +{ + const char * const usage[] = { + GIT_HISTORY_SPLIT_USAGE, + NULL, + }; + enum ref_action action = REF_ACTION_DEFAULT; + int dry_run = 0; + struct option options[] = { + OPT_CALLBACK_F(0, "update-refs", &action, N_(""), + N_("control ref update behavior (branches|head|print)"), + PARSE_OPT_NONEG, parse_ref_action), + OPT_BOOL('n', "dry-run", &dry_run, + N_("perform a dry-run without updating any refs")), + OPT_END(), + }; + struct commit *original, *rewritten = NULL; + struct strbuf reflog_msg = STRBUF_INIT; + struct pathspec pathspec = { 0 }; + struct rev_info revs = { 0 }; + int ret; + + argc = parse_options(argc, argv, prefix, options, usage, 0); + if (argc < 1) { + ret = error(_("command expects a committish")); + goto out; + } + repo_config(repo, git_default_config, NULL); + + if (action == REF_ACTION_DEFAULT) + action = REF_ACTION_BRANCHES; + + parse_pathspec(&pathspec, 0, + PATHSPEC_PREFER_FULL | + PATHSPEC_SYMLINK_LEADING_PATH | + PATHSPEC_PREFIX_ORIGIN, + prefix, argv + 1); + + original = lookup_commit_reference_by_name(argv[0]); + if (!original) { + ret = error(_("commit cannot be found: %s"), argv[0]); + goto out; + } + + ret = setup_revwalk(repo, action, original, &revs); + if (ret < 0) + goto out; + + if (original->parents && original->parents->next) { + ret = error(_("cannot split up merge commit")); + goto out; + } + + ret = split_commit(repo, original, &pathspec, &rewritten); + if (ret < 0) + goto out; + + strbuf_addf(&reflog_msg, "split: updating %s", argv[0]); + + ret = handle_reference_updates(&revs, action, original, rewritten, + reflog_msg.buf, dry_run); + if (ret < 0) { + ret = error(_("failed replaying descendants")); + goto out; + } + + ret = 0; + +out: + strbuf_release(&reflog_msg); + clear_pathspec(&pathspec); + release_revisions(&revs); + return ret; +} + int cmd_history(int argc, const char **argv, const char *prefix, @@ -491,11 +739,13 @@ int cmd_history(int argc, { const char * const usage[] = { GIT_HISTORY_REWORD_USAGE, + GIT_HISTORY_SPLIT_USAGE, NULL, }; parse_opt_subcommand_fn *fn = NULL; struct option options[] = { OPT_SUBCOMMAND("reword", &fn, cmd_history_reword), + OPT_SUBCOMMAND("split", &fn, cmd_history_split), OPT_END(), }; diff --git a/t/meson.build b/t/meson.build index 6d91470ebc1cf7..2d578ef58b5169 100644 --- a/t/meson.build +++ b/t/meson.build @@ -392,6 +392,7 @@ integration_tests = [ 't3438-rebase-broken-files.sh', 't3450-history.sh', 't3451-history-reword.sh', + 't3452-history-split.sh', 't3500-cherry.sh', 't3501-revert-cherry-pick.sh', 't3502-cherry-pick-merge.sh', diff --git a/t/t3452-history-split.sh b/t/t3452-history-split.sh new file mode 100755 index 00000000000000..8ed0cebb500296 --- /dev/null +++ b/t/t3452-history-split.sh @@ -0,0 +1,757 @@ +#!/bin/sh + +test_description='tests for git-history split subcommand' + +. ./test-lib.sh +. "$TEST_DIRECTORY/lib-log-graph.sh" + +# The fake editor takes multiple arguments, each of which represents a commit +# message. Subsequent invocations of the editor will then yield those messages +# in order. +# +set_fake_editor () { + printf "%s\n" "$@" >fake-input && + write_script fake-editor.sh <<-\EOF && + head -n1 fake-input >"$1" + sed 1d fake-input >fake-input.trimmed && + mv fake-input.trimmed fake-input + EOF + test_set_editor "$(pwd)"/fake-editor.sh +} + +expect_graph () { + cat >expect && + lib_test_cmp_graph --graph --format=%s "$@" +} + +expect_log () { + git log --format="%s" >actual && + cat >expect && + test_cmp expect actual +} + +expect_tree_entries () { + git ls-tree --name-only "$1" >actual && + cat >expect && + test_cmp expect actual +} + +test_expect_success 'refuses to work with merge commits' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit base && + git branch branch && + test_commit ours && + git switch branch && + test_commit theirs && + git switch - && + git merge theirs && + test_must_fail git history split HEAD 2>err && + test_grep "cannot split up merge commit" err && + test_must_fail git history split HEAD~ 2>err && + test_grep "replaying merge commits is not supported yet" err + ) +' + +test_expect_success 'errors on missing commit argument' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit initial && + test_must_fail git history split 2>err && + test_grep "command expects a committish" err + ) +' + +test_expect_success 'errors on unknown revision' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit initial && + test_must_fail git history split does-not-exist 2>err && + test_grep "commit cannot be found" err + ) +' + +test_expect_success '--dry-run does not modify any refs' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit base && + touch bar foo && + git add . && + git commit -m split-me && + + git refs list --include-root-refs >before && + + set_fake_editor "first" "second" && + git history split --dry-run HEAD <<-EOF && + y + n + EOF + + git refs list --include-root-refs >after && + test_cmp before after + ) +' + +test_expect_success 'can split up tip commit' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit initial && + touch bar foo && + git add . && + git commit -m split-me && + + git symbolic-ref HEAD >expect && + set_fake_editor "first" "second" && + git history split HEAD <<-EOF && + y + n + EOF + git symbolic-ref HEAD >actual && + test_cmp expect actual && + + expect_log <<-EOF && + second + first + initial + EOF + + expect_tree_entries HEAD~ <<-EOF && + bar + initial.t + EOF + + expect_tree_entries HEAD <<-EOF && + bar + foo + initial.t + EOF + + git reflog >reflog && + test_grep "split: updating HEAD" reflog + ) +' + +test_expect_success 'can split up root commit' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + touch bar foo && + git add . && + git commit -m root && + test_commit tip && + + set_fake_editor "first" "second" && + git history split HEAD~ <<-EOF && + y + n + EOF + + expect_log <<-EOF && + tip + second + first + EOF + + expect_tree_entries HEAD~2 <<-EOF && + bar + EOF + + expect_tree_entries HEAD~ <<-EOF && + bar + foo + EOF + + expect_tree_entries HEAD <<-EOF + bar + foo + tip.t + EOF + ) +' + +test_expect_success 'can split up in-between commit' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit initial && + touch bar foo && + git add . && + git commit -m split-me && + test_commit tip && + + set_fake_editor "first" "second" && + git history split HEAD~ <<-EOF && + y + n + EOF + + expect_log <<-EOF && + tip + second + first + initial + EOF + + expect_tree_entries HEAD~2 <<-EOF && + bar + initial.t + EOF + + expect_tree_entries HEAD~ <<-EOF && + bar + foo + initial.t + EOF + + expect_tree_entries HEAD <<-EOF + bar + foo + initial.t + tip.t + EOF + ) +' + +test_expect_success 'can split HEAD only' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit base && + touch a b && + git add . && + git commit -m split-me && + git branch unrelated && + + set_fake_editor "ours-a" "ours-b" && + git history split --update-refs=head HEAD <<-EOF && + y + n + EOF + expect_graph --branches <<-EOF + * ours-b + * ours-a + | * split-me + |/ + * base + EOF + ) +' + +test_expect_success 'can split detached HEAD' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit initial && + touch bar foo && + git add . && + git commit -m split-me && + git checkout --detach HEAD && + + set_fake_editor "first" "second" && + git history split --update-refs=head HEAD <<-EOF && + y + n + EOF + + # HEAD should be detached and updated. + test_must_fail git symbolic-ref HEAD && + + expect_log <<-EOF + second + first + initial + EOF + ) +' + +test_expect_success 'can split commit in unrelated branch' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit base && + git branch ours && + git switch --create theirs && + touch theirs-a theirs-b && + git add . && + git commit -m theirs && + git switch ours && + test_commit ours && + + # With --update-refs=head it is not possible to split up a + # commit that is unrelated to HEAD. + test_must_fail git history split --update-refs=head theirs 2>err && + test_grep "rewritten commit must be an ancestor of HEAD" err && + + set_fake_editor "theirs-rewritten-a" "theirs-rewritten-b" && + git history split theirs <<-EOF && + y + n + EOF + expect_graph --branches <<-EOF && + * ours + | * theirs-rewritten-b + | * theirs-rewritten-a + |/ + * base + EOF + + expect_tree_entries theirs~ <<-EOF && + base.t + theirs-a + EOF + + expect_tree_entries theirs <<-EOF + base.t + theirs-a + theirs-b + EOF + ) +' + +test_expect_success 'updates multiple descendant branches' ' + test_when_finished "rm -rf repo" && + git init repo --initial-branch=main && + ( + cd repo && + test_commit base && + touch file-a file-b && + git add . && + git commit -m split-me && + git branch branch && + test_commit on-main && + git switch branch && + test_commit on-branch && + git switch main && + + set_fake_editor "split-a" "split-b" && + git history split HEAD~ <<-EOF && + y + n + EOF + + # Both branches should now descend from the split commits. + expect_graph --branches <<-EOF + * on-branch + | * on-main + |/ + * split-b + * split-a + * base + EOF + ) +' + +test_expect_success 'can pick multiple hunks' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + touch bar baz foo qux && + git add . && + git commit -m split-me && + + set_fake_editor "first" "second" && + git history split HEAD <<-EOF && + y + n + y + n + EOF + + expect_tree_entries HEAD~ <<-EOF && + bar + foo + EOF + + expect_tree_entries HEAD <<-EOF + bar + baz + foo + qux + EOF + ) +' + +test_expect_success 'can use only last hunk' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + touch bar foo && + git add . && + git commit -m split-me && + + set_fake_editor "first" "second" && + git history split HEAD <<-EOF && + n + y + EOF + + expect_log <<-EOF && + second + first + EOF + + expect_tree_entries HEAD~ <<-EOF && + foo + EOF + + expect_tree_entries HEAD <<-EOF + bar + foo + EOF + ) +' + +test_expect_success 'can split commit with file deletions' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + echo a >a && + echo b >b && + echo c >c && + git add . && + git commit -m base && + git rm a b && + git commit -m delete-both && + + set_fake_editor "delete-a" "delete-b" && + git history split HEAD <<-EOF && + y + n + EOF + + expect_log <<-EOF && + delete-b + delete-a + base + EOF + + expect_tree_entries HEAD~ <<-EOF && + b + c + EOF + + expect_tree_entries HEAD <<-EOF + c + EOF + ) +' + +test_expect_success 'preserves original authorship' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit initial && + touch bar foo && + git add . && + GIT_AUTHOR_NAME="Other Author" \ + GIT_AUTHOR_EMAIL="other@example.com" \ + git commit -m split-me && + + set_fake_editor "first" "second" && + git history split HEAD <<-EOF && + y + n + EOF + + git log -1 --format="%an <%ae>" HEAD~ >actual && + echo "Other Author " >expect && + test_cmp expect actual && + + git log -1 --format="%an <%ae>" HEAD >actual && + test_cmp expect actual + ) +' + +test_expect_success 'aborts with empty commit message' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + touch bar foo && + git add . && + git commit -m split-me && + + set_fake_editor "" && + test_must_fail git history split HEAD <<-EOF 2>err && + y + n + EOF + test_grep "Aborting commit due to empty commit message." err + ) +' + +test_expect_success 'commit message editor sees split-out changes' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + touch bar foo && + git add . && + git commit -m split-me && + + write_script fake-editor.sh <<-\EOF && + cat "$1" >>MESSAGES && + echo "some commit message" >"$1" + EOF + test_set_editor "$(pwd)"/fake-editor.sh && + + git history split HEAD <<-EOF && + y + n + EOF + + # Note that we expect to see the messages twice, once for each + # of the commits. The committed files are different though. + cat >expect <<-EOF && + split-me + + # Please enter the commit message for the split-out changes. Lines starting + # with ${SQ}#${SQ} will be ignored, and an empty message aborts the commit. + # Changes to be committed: + # new file: bar + # + split-me + + # Please enter the commit message for the split-out changes. Lines starting + # with ${SQ}#${SQ} will be ignored, and an empty message aborts the commit. + # Changes to be committed: + # new file: foo + # + EOF + test_cmp expect MESSAGES && + + expect_log <<-EOF + some commit message + some commit message + EOF + ) +' + +test_expect_success 'can use pathspec to limit what gets split' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + touch bar foo && + git add . && + git commit -m split-me && + + set_fake_editor "first" "second" && + git history split HEAD -- foo <<-EOF && + y + EOF + + expect_tree_entries HEAD~ <<-EOF && + foo + EOF + + expect_tree_entries HEAD <<-EOF + bar + foo + EOF + ) +' + +test_expect_success 'pathspec matching no files produces empty split error' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit initial && + touch bar foo && + git add . && + git commit -m split-me && + + set_fake_editor "first" "second" && + test_must_fail git history split HEAD -- nonexistent 2>err && + test_grep "split commit is empty" err + ) +' + +test_expect_success 'split with multiple pathspecs' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit initial && + touch a b c d && + git add . && + git commit -m split-me && + + # Only a and c should be offered for splitting. + set_fake_editor "split-ac" "remainder" && + git history split HEAD -- a c <<-EOF && + y + y + EOF + + expect_tree_entries HEAD~ <<-EOF && + a + c + initial.t + EOF + + expect_tree_entries HEAD <<-EOF + a + b + c + d + initial.t + EOF + ) +' + +test_expect_success 'split with file mode change' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + echo content >script && + git add . && + git commit -m base && + test_chmod +x script && + echo change >script && + git commit -a -m "mode and content change" && + + set_fake_editor "mode-change" "content-change" && + git history split HEAD <<-EOF && + y + n + EOF + + expect_log <<-EOF + content-change + mode-change + base + EOF + ) +' + +test_expect_success 'refuses to create empty split-out commit' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit base && + touch bar foo && + git add . && + git commit -m split-me && + + test_must_fail git history split HEAD 2>err <<-EOF && + n + n + EOF + test_grep "split commit is empty" err + ) +' + +test_expect_success 'hooks are not executed for rewritten commits' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + touch bar foo && + git add . && + git commit -m split-me && + old_head=$(git rev-parse HEAD) && + + ORIG_PATH="$(pwd)" && + export ORIG_PATH && + for hook in prepare-commit-msg pre-commit post-commit post-rewrite commit-msg + do + write_script .git/hooks/$hook <<-\EOF || exit 1 + touch "$ORIG_PATH"/hooks.log + EOF + done && + + set_fake_editor "first" "second" && + git history split HEAD <<-EOF && + y + n + EOF + + expect_log <<-EOF && + second + first + EOF + + test_path_is_missing hooks.log + ) +' + +test_expect_success 'refuses to create empty original commit' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + touch bar foo && + git add . && + git commit -m split-me && + + test_must_fail git history split HEAD 2>err <<-EOF && + y + y + EOF + test_grep "split commit tree matches original commit" err + ) +' + +test_expect_success 'retains changes in the worktree and index' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + echo a >a && + echo b >b && + git add . && + git commit -m "initial commit" && + echo a-modified >a && + echo b-modified >b && + git add b && + set_fake_editor "a-only" "remainder" && + git history split HEAD <<-EOF && + y + n + EOF + + expect_tree_entries HEAD~ <<-EOF && + a + EOF + expect_tree_entries HEAD <<-EOF && + a + b + EOF + + cat >expect <<-\EOF && + M a + M b + ?? actual + ?? expect + ?? fake-editor.sh + ?? fake-input + EOF + git status --porcelain >actual && + test_cmp expect actual + ) +' + +test_done From 3b5fb32da836f5aead1cef319bc3e0a9b975ea35 Mon Sep 17 00:00:00 2001 From: Nasser Grainawi Date: Tue, 3 Mar 2026 15:40:44 -0800 Subject: [PATCH 027/236] submodule: fetch missing objects from default remote When be76c21282 (fetch: ensure submodule objects fetched, 2018-12-06) added support for fetching a missing submodule object by id, it hardcoded the remote name as "origin" and deferred anything more complicated for a later patch. Implement the NEEDSWORK item to remove the hardcoded assumption by adding and using a submodule helper subcmd 'get-default-remote'. Fixing this lets 'git fetch --recurse-submodules' succeed when the fetched commit(s) in the superproject trigger a submodule fetch, and that submodule's default remote name is not "origin". Add non-"origin" remote tests to t5526-fetch-submodules.sh and t5572-pull-submodule.sh demonstrating this works as expected and add dedicated tests for get-default-remote. Signed-off-by: Nasser Grainawi Reviewed-by: Jacob Keller Signed-off-by: Junio C Hamano --- builtin/submodule--helper.c | 38 +++++ submodule.c | 17 ++- t/meson.build | 1 + t/t5526-fetch-submodules.sh | 71 ++++++++- t/t5572-pull-submodule.sh | 21 ++- t/t7426-submodule-get-default-remote.sh | 186 ++++++++++++++++++++++++ 6 files changed, 330 insertions(+), 4 deletions(-) create mode 100755 t/t7426-submodule-get-default-remote.sh diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index d537ab087a02e9..b180a240910024 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -112,6 +112,43 @@ static int get_default_remote_submodule(const char *module_path, char **default_ return 0; } +static int module_get_default_remote(int argc, const char **argv, const char *prefix, + struct repository *repo UNUSED) +{ + const char *path; + char *resolved_path = NULL; + char *default_remote = NULL; + int code; + struct option options[] = { + OPT_END() + }; + const char *const usage[] = { + N_("git submodule--helper get-default-remote "), + NULL + }; + + argc = parse_options(argc, argv, prefix, options, usage, 0); + if (argc != 1) + usage_with_options(usage, options); + + path = argv[0]; + if (prefix && *prefix && !is_absolute_path(path)) { + resolved_path = xstrfmt("%s%s", prefix, path); + path = resolved_path; + } + + code = get_default_remote_submodule(path, &default_remote); + if (code) { + free(resolved_path); + return code; + } + + printf("%s\n", default_remote); + free(default_remote); + free(resolved_path); + return 0; +} + /* the result should be freed by the caller. */ static char *get_submodule_displaypath(const char *path, const char *prefix, const char *super_prefix) @@ -3608,6 +3645,7 @@ int cmd_submodule__helper(int argc, OPT_SUBCOMMAND("set-url", &fn, module_set_url), OPT_SUBCOMMAND("set-branch", &fn, module_set_branch), OPT_SUBCOMMAND("create-branch", &fn, module_create_branch), + OPT_SUBCOMMAND("get-default-remote", &fn, module_get_default_remote), OPT_END() }; argc = parse_options(argc, argv, prefix, options, usage, 0); diff --git a/submodule.c b/submodule.c index 40a5c6fb9d1545..6599657f341e10 100644 --- a/submodule.c +++ b/submodule.c @@ -1706,6 +1706,8 @@ static int get_next_submodule(struct child_process *cp, struct strbuf *err, if (spf->oid_fetch_tasks_nr) { struct fetch_task *task = spf->oid_fetch_tasks[spf->oid_fetch_tasks_nr - 1]; + struct child_process cp_remote = CHILD_PROCESS_INIT; + struct strbuf remote_name = STRBUF_INIT; spf->oid_fetch_tasks_nr--; child_process_init(cp); @@ -1719,8 +1721,19 @@ static int get_next_submodule(struct child_process *cp, struct strbuf *err, strvec_pushf(&cp->args, "--submodule-prefix=%s%s/", spf->prefix, task->sub->path); - /* NEEDSWORK: have get_default_remote from submodule--helper */ - strvec_push(&cp->args, "origin"); + cp_remote.git_cmd = 1; + strvec_pushl(&cp_remote.args, "submodule--helper", + "get-default-remote", task->sub->path, NULL); + + if (!capture_command(&cp_remote, &remote_name, 0)) { + strbuf_trim_trailing_newline(&remote_name); + strvec_push(&cp->args, remote_name.buf); + } else { + /* Fallback to "origin" if the helper fails */ + strvec_push(&cp->args, "origin"); + } + strbuf_release(&remote_name); + oid_array_for_each_unique(task->commits, append_oid_to_argv, &cp->args); diff --git a/t/meson.build b/t/meson.build index 459c52a48972e4..6ff54cf2769038 100644 --- a/t/meson.build +++ b/t/meson.build @@ -887,6 +887,7 @@ integration_tests = [ 't7422-submodule-output.sh', 't7423-submodule-symlinks.sh', 't7424-submodule-mixed-ref-formats.sh', + 't7426-submodule-get-default-remote.sh', 't7450-bad-git-dotfiles.sh', 't7500-commit-template-squash-signoff.sh', 't7501-commit-basic-functionality.sh', diff --git a/t/t5526-fetch-submodules.sh b/t/t5526-fetch-submodules.sh index 5e566205ba4b95..1242ee918526ea 100755 --- a/t/t5526-fetch-submodules.sh +++ b/t/t5526-fetch-submodules.sh @@ -834,11 +834,18 @@ test_expect_success "fetch new submodule commits on-demand outside standard refs git commit -m "updated submodules outside of refs/heads" && E=$(git rev-parse HEAD) && git update-ref refs/changes/3 $E && + FETCH_TRACE="$(pwd)/trace.out" && + test_when_finished "rm -f \"$FETCH_TRACE\"" && ( cd downstream && - git fetch --recurse-submodules origin refs/changes/3:refs/heads/my_branch && + GIT_TRACE="$FETCH_TRACE" git fetch --recurse-submodules origin \ + refs/changes/3:refs/heads/my_branch && git -C submodule cat-file -t $C && git -C sub1 cat-file -t $D && + test_grep "trace: built-in: git submodule--helper get-default-remote sub1" \ + "$FETCH_TRACE" && + test_grep "trace: built-in: git fetch .* --submodule-prefix=sub1/ origin" \ + "$FETCH_TRACE" && git checkout --recurse-submodules FETCH_HEAD ) ' @@ -929,6 +936,68 @@ test_expect_success 'fetch new submodule commit intermittently referenced by sup ) ' +test_expect_success 'fetch new submodule commits on-demand outside standard refspec with custom remote name' ' + # depends on the previous test for setup + + # Rename the remote in sub1 from "origin" to "custom_remote" + git -C downstream/sub1 remote rename origin custom_remote && + + # Create new commits in the original submodules + C=$(git -C submodule commit-tree \ + -m "change outside refs/heads for custom remote" HEAD^{tree}) && + git -C submodule update-ref refs/changes/custom1 $C && + git update-index --cacheinfo 160000 $C submodule && + test_tick && + + D=$(git -C sub1 commit-tree \ + -m "change outside refs/heads for custom remote" HEAD^{tree}) && + git -C sub1 update-ref refs/changes/custom2 $D && + git update-index --cacheinfo 160000 $D sub1 && + + git commit \ + -m "updated submodules outside of refs/heads for custom remote" && + E=$(git rev-parse HEAD) && + git update-ref refs/changes/custom3 $E && + FETCH_TRACE="$(pwd)/trace.out" && + test_when_finished "rm -f \"$FETCH_TRACE\"" && + ( + cd downstream && + GIT_TRACE="$FETCH_TRACE" git fetch --recurse-submodules origin \ + refs/changes/custom3:refs/heads/my_other_branch && + git -C submodule cat-file -t $C && + git -C sub1 cat-file -t $D && + test_grep "trace: built-in: git submodule--helper get-default-remote sub1" \ + "$FETCH_TRACE" && + test_grep "trace: built-in: git fetch .* --submodule-prefix=sub1/ custom_remote $D" \ + "$FETCH_TRACE" && + git checkout --recurse-submodules FETCH_HEAD + ) +' + +test_expect_success 'fetch new submodule commit on-demand in FETCH_HEAD from custom remote' ' + # depends on the previous test for setup + + C=$(git -C submodule commit-tree -m "another change outside refs/heads for custom remote" HEAD^{tree}) && + git -C submodule update-ref refs/changes/custom4 $C && + git update-index --cacheinfo 160000 $C submodule && + test_tick && + + D=$(git -C sub1 commit-tree -m "another change outside refs/heads for custom remote" HEAD^{tree}) && + git -C sub1 update-ref refs/changes/custom5 $D && + git update-index --cacheinfo 160000 $D sub1 && + + git commit -m "updated submodules outside of refs/heads" && + E=$(git rev-parse HEAD) && + git update-ref refs/changes/custom6 $E && + ( + cd downstream && + git fetch --recurse-submodules origin refs/changes/custom6 && + git -C submodule cat-file -t $C && + git -C sub1 cat-file -t $D && + git checkout --recurse-submodules FETCH_HEAD + ) +' + add_commit_push () { dir="$1" && msg="$2" && diff --git a/t/t5572-pull-submodule.sh b/t/t5572-pull-submodule.sh index 45f384dd328054..42d14328b6b42a 100755 --- a/t/t5572-pull-submodule.sh +++ b/t/t5572-pull-submodule.sh @@ -257,7 +257,26 @@ test_expect_success 'fetch submodule remote of different name from superproject' git -C a-submodule reset --hard HEAD^^ && git -C child pull --no-recurse-submodules && - git -C child submodule update + git -C child submodule update && + test_path_is_file child/a-submodule/moreecho.t +' + +test_expect_success 'fetch non-origin submodule remote named different from superproject' ' + git -C child/a-submodule remote rename origin o2 && + + # Create commit that is unreachable from current master branch + # newmain is already reset in the previous test + test_commit -C a-submodule echo_o2 && + test_commit -C a-submodule moreecho_o2 && + subc=$(git -C a-submodule rev-parse --short HEAD) && + + git -C parent/a-submodule fetch && + git -C parent/a-submodule checkout "$subc" && + git -C parent commit -m "update submodule o2" a-submodule && + git -C a-submodule reset --hard HEAD^^ && + + git -C child pull --recurse-submodules && + test_path_is_file child/a-submodule/moreecho_o2.t ' test_done diff --git a/t/t7426-submodule-get-default-remote.sh b/t/t7426-submodule-get-default-remote.sh new file mode 100755 index 00000000000000..b842af9a2d26ff --- /dev/null +++ b/t/t7426-submodule-get-default-remote.sh @@ -0,0 +1,186 @@ +#!/bin/sh + +test_description='git submodule--helper get-default-remote' + +TEST_NO_CREATE_REPO=1 +. ./test-lib.sh + +test_expect_success 'setup' ' + git config --global protocol.file.allow always +' + +test_expect_success 'setup repositories' ' + # Create a repository to be used as submodule + git init sub && + test_commit --no-tag -C sub "initial commit in sub" file.txt "sub content" && + + # Create main repository + git init super && + ( + cd super && + mkdir subdir && + test_commit --no-tag -C subdir "initial commit in super" main.txt "super content" && + git submodule add ../sub subpath && + git commit -m "add submodule 'sub' at subpath" + ) +' + +test_expect_success 'get-default-remote returns origin for initialized submodule' ' + ( + cd super && + git submodule update --init && + echo "origin" >expect && + git submodule--helper get-default-remote subpath >actual && + test_cmp expect actual + ) +' + +test_expect_success 'get-default-remote works from subdirectory' ' + ( + cd super/subdir && + echo "origin" >expect && + git submodule--helper get-default-remote ../subpath >actual && + test_cmp expect actual + ) +' + +test_expect_success 'get-default-remote fails with non-existent path' ' + ( + cd super && + test_must_fail git submodule--helper get-default-remote nonexistent 2>err && + test_grep "could not get a repository handle" err + ) +' + +test_expect_success 'get-default-remote fails with non-submodule path' ' + ( + cd super && + test_must_fail git submodule--helper get-default-remote subdir 2>err && + test_grep "could not get a repository handle" err + ) +' + +test_expect_success 'get-default-remote fails without path argument' ' + ( + cd super && + test_must_fail git submodule--helper get-default-remote 2>err && + test_grep "usage:" err + ) +' + +test_expect_success 'get-default-remote fails with too many arguments' ' + ( + cd super && + test_must_fail git submodule--helper get-default-remote subpath subdir 2>err && + test_grep "usage:" err + ) +' + +test_expect_success 'setup submodule with non-origin default remote name' ' + # Create another submodule path with a different remote name + ( + cd super && + git submodule add ../sub upstream-subpath && + git commit -m "add second submodule in upstream-subpath" && + git submodule update --init upstream-subpath && + + # Change the remote name in the submodule + cd upstream-subpath && + git remote rename origin upstream + ) +' + +test_expect_success 'get-default-remote returns non-origin remote name' ' + ( + cd super && + echo "upstream" >expect && + git submodule--helper get-default-remote upstream-subpath >actual && + test_cmp expect actual + ) +' + +test_expect_success 'get-default-remote handles submodule with multiple remotes' ' + ( + cd super/subpath && + git remote add other-upstream ../../sub && + git remote add myfork ../../sub + ) && + + ( + cd super && + echo "origin" >expect && + git submodule--helper get-default-remote subpath >actual && + test_cmp expect actual + ) +' + +test_expect_success 'get-default-remote handles submodule with multiple remotes and none are origin' ' + ( + cd super/upstream-subpath && + git remote add yet-another-upstream ../../sub && + git remote add yourfork ../../sub + ) && + + ( + cd super && + echo "upstream" >expect && + git submodule--helper get-default-remote upstream-subpath >actual && + test_cmp expect actual + ) +' + +test_expect_success 'setup nested submodule with non-origin remote' ' + git init innersub && + test_commit --no-tag -C innersub "initial commit in innersub" inner.txt "innersub content" && + + ( + cd sub && + git submodule add ../innersub innersubpath && + git commit -m "add nested submodule at innersubpath" + ) && + + ( + cd super/upstream-subpath && + git pull upstream && + git submodule update --init --recursive . && + ( + cd innersubpath && + git remote rename origin another_upstream + ) + ) +' + +test_expect_success 'get-default-remote works with nested submodule' ' + ( + cd super && + echo "another_upstream" >expect && + git submodule--helper get-default-remote upstream-subpath/innersubpath >actual && + test_cmp expect actual + ) +' + +test_expect_success 'get-default-remote works with submodule that has no remotes' ' + # Create a submodule directory manually without remotes + ( + cd super && + git init no-remote-sub && + test_commit --no-tag -C no-remote-sub "local commit" local.txt "local content" + ) && + + # Add it as a submodule + ( + cd super && + git submodule add ./no-remote-sub && + git commit -m "add local submodule 'no-remote-sub'" + ) && + + ( + cd super && + # Should fall back to "origin" remote name when no remotes exist + echo "origin" >expect && + git submodule--helper get-default-remote no-remote-sub >actual && + test_cmp expect actual + ) +' + +test_done From 6b837b4c3c455cf06c030fa86135b6ee818ae27e Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Thu, 5 Mar 2026 06:55:01 +0000 Subject: [PATCH 028/236] fsmonitor: fix khash memory leak in do_handle_client The `shown` kh_str_t was freed with kh_release_str() at a point in the code only reachable in the non-trivial response path. When the client receives a trivial response, the code jumps to the `cleanup` label, skipping the kh_release_str() call entirely and leaking the hash table. Fix this by initializing `shown` to NULL and moving the cleanup to the `cleanup` label using kh_destroy_str(), which is safe to call on NULL. This ensures the hash table is freed regardless of which code path is taken. Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- builtin/fsmonitor--daemon.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c index 242c594646d1f5..bc4571938cc1db 100644 --- a/builtin/fsmonitor--daemon.c +++ b/builtin/fsmonitor--daemon.c @@ -671,7 +671,7 @@ static int do_handle_client(struct fsmonitor_daemon_state *state, const struct fsmonitor_batch *batch; struct fsmonitor_batch *remainder = NULL; intmax_t count = 0, duplicates = 0; - kh_str_t *shown; + kh_str_t *shown = NULL; int hash_ret; int do_trivial = 0; int do_flush = 0; @@ -909,8 +909,6 @@ static int do_handle_client(struct fsmonitor_daemon_state *state, total_response_len += payload.len; } - kh_release_str(shown); - pthread_mutex_lock(&state->main_lock); if (token_data->client_ref_count > 0) @@ -954,6 +952,7 @@ static int do_handle_client(struct fsmonitor_daemon_state *state, trace2_data_intmax("fsmonitor", the_repository, "response/count/duplicates", duplicates); cleanup: + kh_destroy_str(shown); strbuf_release(&response_token); strbuf_release(&requested_token_id); strbuf_release(&payload); From d55ffd0ee4f7e4da95019fe78c8d795496c8dce3 Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Thu, 5 Mar 2026 06:55:02 +0000 Subject: [PATCH 029/236] fsmonitor: fix hashmap memory leak in fsmonitor_run_daemon The `state.cookies` hashmap is initialized during daemon startup but never freed during cleanup in the `done:` label of fsmonitor_run_daemon(). The cookie entries also have names allocated via strbuf_detach() that must be freed individually. Iterate the hashmap to free each cookie name, then call hashmap_clear_and_free() to release the entries and table. Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- builtin/fsmonitor--daemon.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c index bc4571938cc1db..d8d32b01ef2859 100644 --- a/builtin/fsmonitor--daemon.c +++ b/builtin/fsmonitor--daemon.c @@ -1404,6 +1404,15 @@ static int fsmonitor_run_daemon(void) done: pthread_cond_destroy(&state.cookies_cond); pthread_mutex_destroy(&state.main_lock); + { + struct hashmap_iter iter; + struct fsmonitor_cookie_item *cookie; + + hashmap_for_each_entry(&state.cookies, &iter, cookie, entry) + free(cookie->name); + hashmap_clear_and_free(&state.cookies, + struct fsmonitor_cookie_item, entry); + } fsm_listen__dtor(&state); fsm_health__dtor(&state); From ed08395f34f8f432c035bec870e94f0ffd9b59cb Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Thu, 5 Mar 2026 06:55:03 +0000 Subject: [PATCH 030/236] compat/win32: add pthread_cond_timedwait Add a pthread_cond_timedwait() implementation to the Windows pthread compatibility layer using SleepConditionVariableCS() with a millisecond timeout computed from the absolute deadline. Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- compat/win32/pthread.c | 26 ++++++++++++++++++++++++++ compat/win32/pthread.h | 2 ++ 2 files changed, 28 insertions(+) diff --git a/compat/win32/pthread.c b/compat/win32/pthread.c index 7e93146963ec56..398caa96029718 100644 --- a/compat/win32/pthread.c +++ b/compat/win32/pthread.c @@ -66,3 +66,29 @@ int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) return err_win_to_posix(GetLastError()); return 0; } + +int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, + const struct timespec *abstime) +{ + struct timeval now; + long long now_ms, deadline_ms; + DWORD timeout_ms; + + gettimeofday(&now, NULL); + now_ms = (long long)now.tv_sec * 1000 + now.tv_usec / 1000; + deadline_ms = (long long)abstime->tv_sec * 1000 + + abstime->tv_nsec / 1000000; + + if (deadline_ms <= now_ms) + return ETIMEDOUT; + else + timeout_ms = (DWORD)(deadline_ms - now_ms); + + if (SleepConditionVariableCS(cond, mutex, timeout_ms) == 0) { + DWORD err = GetLastError(); + if (err == ERROR_TIMEOUT) + return ETIMEDOUT; + return err_win_to_posix(err); + } + return 0; +} diff --git a/compat/win32/pthread.h b/compat/win32/pthread.h index ccacc5a53ba976..d80df8d12af2dc 100644 --- a/compat/win32/pthread.h +++ b/compat/win32/pthread.h @@ -64,6 +64,8 @@ int win32_pthread_join(pthread_t *thread, void **value_ptr); pthread_t pthread_self(void); int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex); +int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, + const struct timespec *abstime); static inline void NORETURN pthread_exit(void *ret) { From f9cc37dc05c137ce1d4ee6d6d5d9d930345ea620 Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Thu, 5 Mar 2026 06:55:04 +0000 Subject: [PATCH 031/236] fsmonitor: use pthread_cond_timedwait for cookie wait The cookie wait in with_lock__wait_for_cookie() uses an infinite pthread_cond_wait() loop. The existing comment notes the desire to switch to pthread_cond_timedwait(), but the routine was not available in git thread-utils. On certain container or overlay filesystems, inotify watches may succeed but events are never delivered. In this case the daemon would hang indefinitely waiting for the cookie event, which in turn causes the client to hang. Replace the infinite wait with a one-second timeout using pthread_cond_timedwait(). If the timeout fires, report an error and let the client proceed with a trivial (full-scan) response rather than blocking forever. Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- builtin/fsmonitor--daemon.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c index d8d32b01ef2859..c8ec7b722e953e 100644 --- a/builtin/fsmonitor--daemon.c +++ b/builtin/fsmonitor--daemon.c @@ -197,20 +197,31 @@ static enum fsmonitor_cookie_item_result with_lock__wait_for_cookie( unlink(cookie_pathname.buf); /* - * Technically, this is an infinite wait (well, unless another - * thread sends us an abort). I'd like to change this to - * use `pthread_cond_timedwait()` and return an error/timeout - * and let the caller do the trivial response thing, but we - * don't have that routine in our thread-utils. - * - * After extensive beta testing I'm not really worried about - * this. Also note that the above open() and unlink() calls - * will cause at least two FS events on that path, so the odds - * of getting stuck are pretty slim. + * Wait for the listener thread to observe the cookie file. + * Time out after a short interval so that the client + * does not hang forever if the filesystem does not deliver + * events (e.g., on certain container/overlay filesystems + * where inotify watches succeed but events never arrive). */ - while (cookie->result == FCIR_INIT) - pthread_cond_wait(&state->cookies_cond, - &state->main_lock); + { + struct timeval now; + struct timespec ts; + int err = 0; + + gettimeofday(&now, NULL); + ts.tv_sec = now.tv_sec + 1; + ts.tv_nsec = now.tv_usec * 1000; + + while (cookie->result == FCIR_INIT && !err) + err = pthread_cond_timedwait(&state->cookies_cond, + &state->main_lock, + &ts); + if (err == ETIMEDOUT && cookie->result == FCIR_INIT) { + trace_printf_key(&trace_fsmonitor, + "cookie_wait timed out"); + cookie->result = FCIR_ERROR; + } + } done: hashmap_remove(&state->cookies, &cookie->entry, NULL); From 9aaed64aeeceba04ea4de825f3dabc954bf4cec6 Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Thu, 5 Mar 2026 06:55:05 +0000 Subject: [PATCH 032/236] fsmonitor: rename fsm-ipc-darwin.c to fsm-ipc-unix.c The fsmonitor IPC path logic in fsm-ipc-darwin.c is not Darwin-specific and will be reused by the upcoming Linux implementation. Rename it to fsm-ipc-unix.c to reflect that it is shared by all Unix platforms. Introduce FSMONITOR_OS_SETTINGS (set to "unix" for non-Windows, "win32" for Windows) as a separate variable from FSMONITOR_DAEMON_BACKEND so that the build files can distinguish between platform-specific files (listen, health, path-utils) and shared Unix files (ipc, settings). Move fsm-ipc to the FSMONITOR_OS_SETTINGS section in the Makefile, and switch fsm-path-utils to use FSMONITOR_DAEMON_BACKEND since path-utils is platform-specific (there will be separate darwin and linux versions). Based-on-patch-by: Eric DeCosta Based-on-patch-by: Marziyeh Esipreh Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- Makefile | 6 ++--- .../{fsm-ipc-darwin.c => fsm-ipc-unix.c} | 0 config.mak.uname | 2 +- contrib/buildsystems/CMakeLists.txt | 25 +++++++++---------- meson.build | 7 ++++-- 5 files changed, 21 insertions(+), 19 deletions(-) rename compat/fsmonitor/{fsm-ipc-darwin.c => fsm-ipc-unix.c} (100%) diff --git a/Makefile b/Makefile index 89d8d73ec0a21b..c04e747af8d463 100644 --- a/Makefile +++ b/Makefile @@ -408,7 +408,7 @@ include shared.mak # If your platform has OS-specific ways to tell if a repo is incompatible with # fsmonitor (whether the hook or IPC daemon version), set FSMONITOR_OS_SETTINGS # to the "" of the corresponding `compat/fsmonitor/fsm-settings-.c` -# that implements the `fsm_os_settings__*()` routines. +# and `compat/fsmonitor/fsm-ipc-.c` files. # # Define LINK_FUZZ_PROGRAMS if you want `make all` to also build the fuzz test # programs in oss-fuzz/. @@ -2323,13 +2323,13 @@ ifdef FSMONITOR_DAEMON_BACKEND COMPAT_CFLAGS += -DHAVE_FSMONITOR_DAEMON_BACKEND COMPAT_OBJS += compat/fsmonitor/fsm-listen-$(FSMONITOR_DAEMON_BACKEND).o COMPAT_OBJS += compat/fsmonitor/fsm-health-$(FSMONITOR_DAEMON_BACKEND).o - COMPAT_OBJS += compat/fsmonitor/fsm-ipc-$(FSMONITOR_DAEMON_BACKEND).o endif ifdef FSMONITOR_OS_SETTINGS COMPAT_CFLAGS += -DHAVE_FSMONITOR_OS_SETTINGS + COMPAT_OBJS += compat/fsmonitor/fsm-ipc-$(FSMONITOR_OS_SETTINGS).o COMPAT_OBJS += compat/fsmonitor/fsm-settings-$(FSMONITOR_OS_SETTINGS).o - COMPAT_OBJS += compat/fsmonitor/fsm-path-utils-$(FSMONITOR_OS_SETTINGS).o + COMPAT_OBJS += compat/fsmonitor/fsm-path-utils-$(FSMONITOR_DAEMON_BACKEND).o endif ifdef WITH_BREAKING_CHANGES diff --git a/compat/fsmonitor/fsm-ipc-darwin.c b/compat/fsmonitor/fsm-ipc-unix.c similarity index 100% rename from compat/fsmonitor/fsm-ipc-darwin.c rename to compat/fsmonitor/fsm-ipc-unix.c diff --git a/config.mak.uname b/config.mak.uname index 1691c6ae6e01e3..00bcb84cee15c3 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -178,7 +178,7 @@ ifeq ($(uname_S),Darwin) ifndef NO_PTHREADS ifndef NO_UNIX_SOCKETS FSMONITOR_DAEMON_BACKEND = darwin - FSMONITOR_OS_SETTINGS = darwin + FSMONITOR_OS_SETTINGS = unix endif endif diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index 28877feb9d1707..6197d5729cbfe4 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -291,23 +291,22 @@ endif() if(SUPPORTS_SIMPLE_IPC) if(CMAKE_SYSTEM_NAME STREQUAL "Windows") - add_compile_definitions(HAVE_FSMONITOR_DAEMON_BACKEND) - list(APPEND compat_SOURCES compat/fsmonitor/fsm-listen-win32.c) - list(APPEND compat_SOURCES compat/fsmonitor/fsm-health-win32.c) - list(APPEND compat_SOURCES compat/fsmonitor/fsm-ipc-win32.c) - list(APPEND compat_SOURCES compat/fsmonitor/fsm-path-utils-win32.c) - - add_compile_definitions(HAVE_FSMONITOR_OS_SETTINGS) - list(APPEND compat_SOURCES compat/fsmonitor/fsm-settings-win32.c) + set(FSMONITOR_DAEMON_BACKEND "win32") + set(FSMONITOR_OS_SETTINGS "win32") elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + set(FSMONITOR_DAEMON_BACKEND "darwin") + set(FSMONITOR_OS_SETTINGS "unix") + endif() + + if(FSMONITOR_DAEMON_BACKEND) add_compile_definitions(HAVE_FSMONITOR_DAEMON_BACKEND) - list(APPEND compat_SOURCES compat/fsmonitor/fsm-listen-darwin.c) - list(APPEND compat_SOURCES compat/fsmonitor/fsm-health-darwin.c) - list(APPEND compat_SOURCES compat/fsmonitor/fsm-ipc-darwin.c) - list(APPEND compat_SOURCES compat/fsmonitor/fsm-path-utils-darwin.c) + list(APPEND compat_SOURCES compat/fsmonitor/fsm-listen-${FSMONITOR_DAEMON_BACKEND}.c) + list(APPEND compat_SOURCES compat/fsmonitor/fsm-health-${FSMONITOR_DAEMON_BACKEND}.c) + list(APPEND compat_SOURCES compat/fsmonitor/fsm-ipc-${FSMONITOR_OS_SETTINGS}.c) + list(APPEND compat_SOURCES compat/fsmonitor/fsm-path-utils-${FSMONITOR_DAEMON_BACKEND}.c) add_compile_definitions(HAVE_FSMONITOR_OS_SETTINGS) - list(APPEND compat_SOURCES compat/fsmonitor/fsm-settings-darwin.c) + list(APPEND compat_SOURCES compat/fsmonitor/fsm-settings-${FSMONITOR_DAEMON_BACKEND}.c) endif() endif() diff --git a/meson.build b/meson.build index dd52efd1c87574..86a68365a99099 100644 --- a/meson.build +++ b/meson.build @@ -1320,10 +1320,13 @@ else endif fsmonitor_backend = '' +fsmonitor_os = '' if host_machine.system() == 'windows' fsmonitor_backend = 'win32' + fsmonitor_os = 'win32' elif host_machine.system() == 'darwin' fsmonitor_backend = 'darwin' + fsmonitor_os = 'unix' libgit_dependencies += dependency('CoreServices') endif if fsmonitor_backend != '' @@ -1332,14 +1335,14 @@ if fsmonitor_backend != '' libgit_sources += [ 'compat/fsmonitor/fsm-health-' + fsmonitor_backend + '.c', - 'compat/fsmonitor/fsm-ipc-' + fsmonitor_backend + '.c', + 'compat/fsmonitor/fsm-ipc-' + fsmonitor_os + '.c', 'compat/fsmonitor/fsm-listen-' + fsmonitor_backend + '.c', 'compat/fsmonitor/fsm-path-utils-' + fsmonitor_backend + '.c', 'compat/fsmonitor/fsm-settings-' + fsmonitor_backend + '.c', ] endif build_options_config.set_quoted('FSMONITOR_DAEMON_BACKEND', fsmonitor_backend) -build_options_config.set_quoted('FSMONITOR_OS_SETTINGS', fsmonitor_backend) +build_options_config.set_quoted('FSMONITOR_OS_SETTINGS', fsmonitor_os) if not get_option('b_sanitize').contains('address') and get_option('regex').allowed() and compiler.has_header('regex.h') and compiler.get_define('REG_STARTEND', prefix: '#include ') != '' build_options_config.set('NO_REGEX', '') From a713ceab8e4cb3e04c768b52d5b50f4b89cd9011 Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Thu, 5 Mar 2026 06:55:06 +0000 Subject: [PATCH 033/236] fsmonitor: rename fsm-settings-darwin.c to fsm-settings-unix.c The fsmonitor settings logic in fsm-settings-darwin.c is not Darwin-specific and will be reused by the upcoming Linux implementation. Rename it to fsm-settings-unix.c to reflect that it is shared by all Unix platforms. Update the build files (meson.build and CMakeLists.txt) to use FSMONITOR_OS_SETTINGS for fsm-settings, matching the approach already used for fsm-ipc. Based-on-patch-by: Eric DeCosta Based-on-patch-by: Marziyeh Esipreh Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- compat/fsmonitor/{fsm-settings-darwin.c => fsm-settings-unix.c} | 0 contrib/buildsystems/CMakeLists.txt | 2 +- meson.build | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename compat/fsmonitor/{fsm-settings-darwin.c => fsm-settings-unix.c} (100%) diff --git a/compat/fsmonitor/fsm-settings-darwin.c b/compat/fsmonitor/fsm-settings-unix.c similarity index 100% rename from compat/fsmonitor/fsm-settings-darwin.c rename to compat/fsmonitor/fsm-settings-unix.c diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index 6197d5729cbfe4..d613809e26fd20 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -306,7 +306,7 @@ if(SUPPORTS_SIMPLE_IPC) list(APPEND compat_SOURCES compat/fsmonitor/fsm-path-utils-${FSMONITOR_DAEMON_BACKEND}.c) add_compile_definitions(HAVE_FSMONITOR_OS_SETTINGS) - list(APPEND compat_SOURCES compat/fsmonitor/fsm-settings-${FSMONITOR_DAEMON_BACKEND}.c) + list(APPEND compat_SOURCES compat/fsmonitor/fsm-settings-${FSMONITOR_OS_SETTINGS}.c) endif() endif() diff --git a/meson.build b/meson.build index 86a68365a99099..4f0c0a33b85c7d 100644 --- a/meson.build +++ b/meson.build @@ -1338,7 +1338,7 @@ if fsmonitor_backend != '' 'compat/fsmonitor/fsm-ipc-' + fsmonitor_os + '.c', 'compat/fsmonitor/fsm-listen-' + fsmonitor_backend + '.c', 'compat/fsmonitor/fsm-path-utils-' + fsmonitor_backend + '.c', - 'compat/fsmonitor/fsm-settings-' + fsmonitor_backend + '.c', + 'compat/fsmonitor/fsm-settings-' + fsmonitor_os + '.c', ] endif build_options_config.set_quoted('FSMONITOR_DAEMON_BACKEND', fsmonitor_backend) From 87d6a750103441ed354e7b007f0157b26f7a2e8b Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Thu, 5 Mar 2026 06:55:07 +0000 Subject: [PATCH 034/236] fsmonitor: implement filesystem change listener for Linux Implement the built-in fsmonitor daemon for Linux using the inotify API, bringing it to feature parity with the existing Windows and macOS implementations. The implementation uses inotify rather than fanotify because fanotify requires either CAP_SYS_ADMIN or CAP_PERFMON capabilities, making it unsuitable for an unprivileged user-space daemon. While inotify has the limitation of requiring a separate watch on every directory (unlike macOS's FSEvents, which can monitor an entire directory tree with a single watch), it operates without elevated privileges and provides the per-file event granularity needed for fsmonitor. The listener uses inotify_init1(O_NONBLOCK) with a poll loop that checks for events with a 50-millisecond timeout, keeping the inotify queue well-drained to minimize the risk of overflows. Bidirectional hashmaps map between watch descriptors and directory paths for efficient event resolution. Directory renames are tracked using inotify's cookie mechanism to correlate IN_MOVED_FROM and IN_MOVED_TO event pairs; a periodic check detects stale renames where the matching IN_MOVED_TO never arrived, forcing a resync. New directory creation triggers recursive watch registration to ensure all subdirectories are monitored. The IN_MASK_CREATE flag is used where available to prevent modifying existing watches, with a fallback for older kernels. When IN_MASK_CREATE is available and inotify_add_watch returns EEXIST, it means another thread or recursive scan has already registered the watch, so it is safe to ignore. Remote filesystem detection uses statfs() to identify network-mounted filesystems (NFS, CIFS, SMB, FUSE, etc.) via their magic numbers. Mount point information is read from /proc/mounts and matched against the statfs f_fsid to get accurate, human-readable filesystem type names for logging. When the .git directory is on a remote filesystem, the IPC socket falls back to $HOME or a user-configured directory via the fsmonitor.socketDir setting. Based-on-patch-by: Eric DeCosta Based-on-patch-by: Marziyeh Esipreh Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- Documentation/config/fsmonitor--daemon.adoc | 4 +- Documentation/git-fsmonitor--daemon.adoc | 28 +- compat/fsmonitor/fsm-health-linux.c | 33 + compat/fsmonitor/fsm-listen-linux.c | 746 ++++++++++++++++++++ compat/fsmonitor/fsm-path-utils-linux.c | 217 ++++++ config.mak.uname | 10 + contrib/buildsystems/CMakeLists.txt | 8 +- meson.build | 4 + 8 files changed, 1042 insertions(+), 8 deletions(-) create mode 100644 compat/fsmonitor/fsm-health-linux.c create mode 100644 compat/fsmonitor/fsm-listen-linux.c create mode 100644 compat/fsmonitor/fsm-path-utils-linux.c diff --git a/Documentation/config/fsmonitor--daemon.adoc b/Documentation/config/fsmonitor--daemon.adoc index 671f9b94628446..6f8386e29150ff 100644 --- a/Documentation/config/fsmonitor--daemon.adoc +++ b/Documentation/config/fsmonitor--daemon.adoc @@ -4,8 +4,8 @@ fsmonitor.allowRemote:: behavior. Only respected when `core.fsmonitor` is set to `true`. fsmonitor.socketDir:: - This Mac OS-specific option, if set, specifies the directory in + This Mac OS and Linux-specific option, if set, specifies the directory in which to create the Unix domain socket used for communication between the fsmonitor daemon and various Git commands. The directory must - reside on a native Mac OS filesystem. Only respected when `core.fsmonitor` + reside on a native filesystem. Only respected when `core.fsmonitor` is set to `true`. diff --git a/Documentation/git-fsmonitor--daemon.adoc b/Documentation/git-fsmonitor--daemon.adoc index 8fe5241b08b007..12fa866a64ecc9 100644 --- a/Documentation/git-fsmonitor--daemon.adoc +++ b/Documentation/git-fsmonitor--daemon.adoc @@ -76,9 +76,9 @@ repositories; this may be overridden by setting `fsmonitor.allowRemote` to correctly with all network-mounted repositories, so such use is considered experimental. -On Mac OS, the inter-process communication (IPC) between various Git +On Mac OS and Linux, the inter-process communication (IPC) between various Git commands and the fsmonitor daemon is done via a Unix domain socket (UDS) -- a -special type of file -- which is supported by native Mac OS filesystems, +special type of file -- which is supported by native Mac OS and Linux filesystems, but not on network-mounted filesystems, NTFS, or FAT32. Other filesystems may or may not have the needed support; the fsmonitor daemon is not guaranteed to work with these filesystems and such use is considered experimental. @@ -87,13 +87,33 @@ By default, the socket is created in the `.git` directory. However, if the `.git` directory is on a network-mounted filesystem, it will instead be created at `$HOME/.git-fsmonitor-*` unless `$HOME` itself is on a network-mounted filesystem, in which case you must set the configuration -variable `fsmonitor.socketDir` to the path of a directory on a Mac OS native +variable `fsmonitor.socketDir` to the path of a directory on a native filesystem in which to create the socket file. If none of the above directories (`.git`, `$HOME`, or `fsmonitor.socketDir`) -is on a native Mac OS file filesystem the fsmonitor daemon will report an +is on a native filesystem the fsmonitor daemon will report an error that will cause the daemon and the currently running command to exit. +LINUX CAVEATS +~~~~~~~~~~~~~ + +On Linux, the fsmonitor daemon uses inotify to monitor filesystem events. +The inotify system has per-user limits on the number of watches that can +be created. The default limit is typically 8192 watches per user. + +For large repositories with many directories, you may need to increase +this limit. Check the current limit with: + + cat /proc/sys/fs/inotify/max_user_watches + +To temporarily increase the limit: + + sudo sysctl fs.inotify.max_user_watches=65536 + +To make the change permanent, add to `/etc/sysctl.conf`: + + fs.inotify.max_user_watches=65536 + CONFIGURATION ------------- diff --git a/compat/fsmonitor/fsm-health-linux.c b/compat/fsmonitor/fsm-health-linux.c new file mode 100644 index 00000000000000..43d67c4b8b9efa --- /dev/null +++ b/compat/fsmonitor/fsm-health-linux.c @@ -0,0 +1,33 @@ +#include "git-compat-util.h" +#include "config.h" +#include "fsmonitor-ll.h" +#include "fsm-health.h" +#include "fsmonitor--daemon.h" + +/* + * The Linux fsmonitor implementation uses inotify which has its own + * mechanisms for detecting filesystem unmount and other events that + * would require the daemon to shutdown. Therefore, we don't need + * a separate health thread like Windows does. + * + * These stub functions satisfy the interface requirements. + */ + +int fsm_health__ctor(struct fsmonitor_daemon_state *state UNUSED) +{ + return 0; +} + +void fsm_health__dtor(struct fsmonitor_daemon_state *state UNUSED) +{ + return; +} + +void fsm_health__loop(struct fsmonitor_daemon_state *state UNUSED) +{ + return; +} + +void fsm_health__stop_async(struct fsmonitor_daemon_state *state UNUSED) +{ +} diff --git a/compat/fsmonitor/fsm-listen-linux.c b/compat/fsmonitor/fsm-listen-linux.c new file mode 100644 index 00000000000000..e3dca14b620ee3 --- /dev/null +++ b/compat/fsmonitor/fsm-listen-linux.c @@ -0,0 +1,746 @@ +#include "git-compat-util.h" +#include "dir.h" +#include "fsmonitor-ll.h" +#include "fsm-listen.h" +#include "fsmonitor--daemon.h" +#include "fsmonitor-path-utils.h" +#include "gettext.h" +#include "simple-ipc.h" +#include "string-list.h" +#include "trace.h" + +#include + +/* + * Safe value to bitwise OR with rest of mask for + * kernels that do not support IN_MASK_CREATE + */ +#ifndef IN_MASK_CREATE +#define IN_MASK_CREATE 0x00000000 +#endif + +enum shutdown_reason { + SHUTDOWN_CONTINUE = 0, + SHUTDOWN_STOP, + SHUTDOWN_ERROR, + SHUTDOWN_FORCE +}; + +struct watch_entry { + struct hashmap_entry ent; + int wd; + uint32_t cookie; + const char *dir; +}; + +struct rename_entry { + struct hashmap_entry ent; + time_t whence; + uint32_t cookie; + const char *dir; +}; + +struct fsm_listen_data { + int fd_inotify; + enum shutdown_reason shutdown; + struct hashmap watches; + struct hashmap renames; + struct hashmap revwatches; +}; + +static int watch_entry_cmp(const void *cmp_data UNUSED, + const struct hashmap_entry *eptr, + const struct hashmap_entry *entry_or_key, + const void *keydata UNUSED) +{ + const struct watch_entry *e1, *e2; + + e1 = container_of(eptr, const struct watch_entry, ent); + e2 = container_of(entry_or_key, const struct watch_entry, ent); + return e1->wd != e2->wd; +} + +static int revwatches_entry_cmp(const void *cmp_data UNUSED, + const struct hashmap_entry *eptr, + const struct hashmap_entry *entry_or_key, + const void *keydata UNUSED) +{ + const struct watch_entry *e1, *e2; + + e1 = container_of(eptr, const struct watch_entry, ent); + e2 = container_of(entry_or_key, const struct watch_entry, ent); + return strcmp(e1->dir, e2->dir); +} + +static int rename_entry_cmp(const void *cmp_data UNUSED, + const struct hashmap_entry *eptr, + const struct hashmap_entry *entry_or_key, + const void *keydata UNUSED) +{ + const struct rename_entry *e1, *e2; + + e1 = container_of(eptr, const struct rename_entry, ent); + e2 = container_of(entry_or_key, const struct rename_entry, ent); + return e1->cookie != e2->cookie; +} + +/* + * Register an inotify watch, add watch descriptor to path mapping + * and the reverse mapping. + */ +static int add_watch(const char *path, struct fsm_listen_data *data) +{ + const char *interned = strintern(path); + struct watch_entry *w1, *w2; + + /* add the inotify watch, don't allow watches to be modified */ + int wd = inotify_add_watch(data->fd_inotify, interned, + (IN_ALL_EVENTS | IN_ONLYDIR | IN_MASK_CREATE) + ^ IN_ACCESS ^ IN_CLOSE ^ IN_OPEN); + if (wd < 0) { + if (errno == ENOENT || errno == ENOTDIR) + return 0; /* directory was deleted or is not a directory */ + if (errno == EEXIST) + return 0; /* watch already exists, no action needed */ + if (errno == ENOSPC) + return error(_("inotify watch limit reached; " + "increase fs.inotify.max_user_watches")); + return error_errno(_("inotify_add_watch('%s') failed"), interned); + } + + /* add watch descriptor -> directory mapping */ + CALLOC_ARRAY(w1, 1); + w1->wd = wd; + w1->dir = interned; + hashmap_entry_init(&w1->ent, memhash(&w1->wd, sizeof(int))); + hashmap_add(&data->watches, &w1->ent); + + /* add directory -> watch descriptor mapping */ + CALLOC_ARRAY(w2, 1); + w2->wd = wd; + w2->dir = interned; + hashmap_entry_init(&w2->ent, strhash(w2->dir)); + hashmap_add(&data->revwatches, &w2->ent); + + return 0; +} + +/* + * Remove the inotify watch, the watch descriptor to path mapping + * and the reverse mapping. + */ +static void remove_watch(struct watch_entry *w, struct fsm_listen_data *data) +{ + struct watch_entry k1, k2, *w1, *w2; + + /* remove watch, ignore error if kernel already did it */ + if (inotify_rm_watch(data->fd_inotify, w->wd) && errno != EINVAL) + error_errno(_("inotify_rm_watch() failed")); + + k1.wd = w->wd; + hashmap_entry_init(&k1.ent, memhash(&k1.wd, sizeof(int))); + w1 = hashmap_remove_entry(&data->watches, &k1, ent, NULL); + if (!w1) + BUG("double remove of watch for '%s'", w->dir); + + if (w1->cookie) + BUG("removing watch for '%s' which has a pending rename", w1->dir); + + k2.dir = w->dir; + hashmap_entry_init(&k2.ent, strhash(k2.dir)); + w2 = hashmap_remove_entry(&data->revwatches, &k2, ent, NULL); + if (!w2) + BUG("double remove of reverse watch for '%s'", w->dir); + + /* w1->dir and w2->dir are interned strings, we don't own them */ + free(w1); + free(w2); +} + +/* + * Check for stale directory renames. + * + * https://man7.org/linux/man-pages/man7/inotify.7.html + * + * Allow for some small timeout to account for the fact that insertion of the + * IN_MOVED_FROM+IN_MOVED_TO event pair is not atomic, and the possibility that + * there may not be any IN_MOVED_TO event. + * + * If the IN_MOVED_TO event is not received within the timeout then events have + * been missed and the monitor is in an inconsistent state with respect to the + * filesystem. + */ +static int check_stale_dir_renames(struct hashmap *renames, time_t max_age) +{ + struct rename_entry *re; + struct hashmap_iter iter; + + hashmap_for_each_entry(renames, &iter, re, ent) { + if (re->whence <= max_age) + return -1; + } + return 0; +} + +/* + * Track pending renames. + * + * Tracking is done via an event cookie to watch descriptor mapping. + * + * A rename is not complete until matching an IN_MOVED_TO event is received + * for a corresponding IN_MOVED_FROM event. + */ +static void add_dir_rename(uint32_t cookie, const char *path, + struct fsm_listen_data *data) +{ + struct watch_entry k, *w; + struct rename_entry *re; + + /* lookup the watch descriptor for the given path */ + k.dir = path; + hashmap_entry_init(&k.ent, strhash(path)); + w = hashmap_get_entry(&data->revwatches, &k, ent, NULL); + if (!w) { + /* + * This can happen in rare cases where the directory was + * moved before we had a chance to add a watch on it. + * Just ignore this rename. + */ + trace_printf_key(&trace_fsmonitor, + "no watch found for rename from '%s'", path); + return; + } + w->cookie = cookie; + + /* add the pending rename to match against later */ + CALLOC_ARRAY(re, 1); + re->dir = w->dir; + re->cookie = w->cookie; + re->whence = time(NULL); + hashmap_entry_init(&re->ent, memhash(&re->cookie, sizeof(uint32_t))); + hashmap_add(&data->renames, &re->ent); +} + +/* + * Handle directory renames + * + * Once an IN_MOVED_TO event is received, lookup the rename tracking information + * via the event cookie and use this information to update the watch. + */ +static void rename_dir(uint32_t cookie, const char *path, + struct fsm_listen_data *data) +{ + struct rename_entry rek, *re; + struct watch_entry k, *w; + + /* lookup a pending rename to match */ + rek.cookie = cookie; + hashmap_entry_init(&rek.ent, memhash(&rek.cookie, sizeof(uint32_t))); + re = hashmap_get_entry(&data->renames, &rek, ent, NULL); + if (re) { + k.dir = re->dir; + hashmap_entry_init(&k.ent, strhash(k.dir)); + w = hashmap_get_entry(&data->revwatches, &k, ent, NULL); + if (w) { + w->cookie = 0; /* rename handled */ + remove_watch(w, data); + if (add_watch(path, data)) + trace_printf_key(&trace_fsmonitor, + "failed to add watch for renamed dir '%s'", + path); + } else { + /* Directory was moved out of watch tree */ + trace_printf_key(&trace_fsmonitor, + "no matching watch for rename to '%s'", path); + } + hashmap_remove_entry(&data->renames, &rek, ent, NULL); + free(re); + } else { + /* Directory was moved from outside the watch tree */ + trace_printf_key(&trace_fsmonitor, + "no matching cookie for rename to '%s'", path); + } +} + +/* + * Recursively add watches to every directory under path + */ +static int register_inotify(const char *path, + struct fsmonitor_daemon_state *state, + struct fsmonitor_batch *batch) +{ + DIR *dir; + const char *rel; + struct strbuf current = STRBUF_INIT; + struct dirent *de; + struct stat fs; + int ret = -1; + + dir = opendir(path); + if (!dir) { + if (errno == ENOENT || errno == ENOTDIR) + return 0; /* directory was deleted */ + return error_errno(_("opendir('%s') failed"), path); + } + + while ((de = readdir_skip_dot_and_dotdot(dir)) != NULL) { + strbuf_reset(¤t); + strbuf_addf(¤t, "%s/%s", path, de->d_name); + if (lstat(current.buf, &fs)) { + if (errno == ENOENT) + continue; /* file was deleted */ + error_errno(_("lstat('%s') failed"), current.buf); + goto failed; + } + + /* recurse into directory */ + if (S_ISDIR(fs.st_mode)) { + if (add_watch(current.buf, state->listen_data)) + goto failed; + if (register_inotify(current.buf, state, batch)) + goto failed; + } else if (batch) { + rel = current.buf + state->path_worktree_watch.len + 1; + trace_printf_key(&trace_fsmonitor, "explicitly adding '%s'", rel); + fsmonitor_batch__add_path(batch, rel); + } + } + ret = 0; + +failed: + strbuf_release(¤t); + if (closedir(dir) < 0) + return error_errno(_("closedir('%s') failed"), path); + return ret; +} + +static int em_rename_dir_from(uint32_t mask) +{ + return ((mask & IN_ISDIR) && (mask & IN_MOVED_FROM)); +} + +static int em_rename_dir_to(uint32_t mask) +{ + return ((mask & IN_ISDIR) && (mask & IN_MOVED_TO)); +} + +static int em_remove_watch(uint32_t mask) +{ + return (mask & IN_DELETE_SELF); +} + +static int em_dir_renamed(uint32_t mask) +{ + return ((mask & IN_ISDIR) && (mask & IN_MOVE)); +} + +static int em_dir_created(uint32_t mask) +{ + return ((mask & IN_ISDIR) && (mask & IN_CREATE)); +} + +static int em_dir_deleted(uint32_t mask) +{ + return ((mask & IN_ISDIR) && (mask & IN_DELETE)); +} + +static int em_force_shutdown(uint32_t mask) +{ + return (mask & IN_UNMOUNT) || (mask & IN_Q_OVERFLOW); +} + +static int em_ignore(uint32_t mask) +{ + return (mask & IN_IGNORED) || (mask & IN_MOVE_SELF); +} + +static void log_mask_set(const char *path, uint32_t mask) +{ + struct strbuf msg = STRBUF_INIT; + + if (mask & IN_ACCESS) + strbuf_addstr(&msg, "IN_ACCESS|"); + if (mask & IN_MODIFY) + strbuf_addstr(&msg, "IN_MODIFY|"); + if (mask & IN_ATTRIB) + strbuf_addstr(&msg, "IN_ATTRIB|"); + if (mask & IN_CLOSE_WRITE) + strbuf_addstr(&msg, "IN_CLOSE_WRITE|"); + if (mask & IN_CLOSE_NOWRITE) + strbuf_addstr(&msg, "IN_CLOSE_NOWRITE|"); + if (mask & IN_OPEN) + strbuf_addstr(&msg, "IN_OPEN|"); + if (mask & IN_MOVED_FROM) + strbuf_addstr(&msg, "IN_MOVED_FROM|"); + if (mask & IN_MOVED_TO) + strbuf_addstr(&msg, "IN_MOVED_TO|"); + if (mask & IN_CREATE) + strbuf_addstr(&msg, "IN_CREATE|"); + if (mask & IN_DELETE) + strbuf_addstr(&msg, "IN_DELETE|"); + if (mask & IN_DELETE_SELF) + strbuf_addstr(&msg, "IN_DELETE_SELF|"); + if (mask & IN_MOVE_SELF) + strbuf_addstr(&msg, "IN_MOVE_SELF|"); + if (mask & IN_UNMOUNT) + strbuf_addstr(&msg, "IN_UNMOUNT|"); + if (mask & IN_Q_OVERFLOW) + strbuf_addstr(&msg, "IN_Q_OVERFLOW|"); + if (mask & IN_IGNORED) + strbuf_addstr(&msg, "IN_IGNORED|"); + if (mask & IN_ISDIR) + strbuf_addstr(&msg, "IN_ISDIR|"); + + strbuf_strip_suffix(&msg, "|"); + + trace_printf_key(&trace_fsmonitor, "inotify_event: '%s', mask=%#8.8x %s", + path, mask, msg.buf); + + strbuf_release(&msg); +} + +int fsm_listen__ctor(struct fsmonitor_daemon_state *state) +{ + int fd; + int ret = 0; + struct fsm_listen_data *data; + + CALLOC_ARRAY(data, 1); + state->listen_data = data; + state->listen_error_code = -1; + data->fd_inotify = -1; + data->shutdown = SHUTDOWN_ERROR; + + fd = inotify_init1(O_NONBLOCK); + if (fd < 0) { + FREE_AND_NULL(state->listen_data); + return error_errno(_("inotify_init1() failed")); + } + + data->fd_inotify = fd; + + hashmap_init(&data->watches, watch_entry_cmp, NULL, 0); + hashmap_init(&data->renames, rename_entry_cmp, NULL, 0); + hashmap_init(&data->revwatches, revwatches_entry_cmp, NULL, 0); + + if (add_watch(state->path_worktree_watch.buf, data)) + ret = -1; + else if (register_inotify(state->path_worktree_watch.buf, state, NULL)) + ret = -1; + else if (state->nr_paths_watching > 1) { + if (add_watch(state->path_gitdir_watch.buf, data)) + ret = -1; + else if (register_inotify(state->path_gitdir_watch.buf, state, NULL)) + ret = -1; + } + + if (!ret) { + state->listen_error_code = 0; + data->shutdown = SHUTDOWN_CONTINUE; + } + + return ret; +} + +void fsm_listen__dtor(struct fsmonitor_daemon_state *state) +{ + struct fsm_listen_data *data; + struct hashmap_iter iter; + struct watch_entry *w; + struct watch_entry **to_remove; + size_t nr_to_remove = 0, alloc_to_remove = 0; + size_t i; + int fd; + + if (!state || !state->listen_data) + return; + + data = state->listen_data; + fd = data->fd_inotify; + + /* + * Collect all entries first, then remove them. + * We can't modify the hashmap while iterating over it. + */ + to_remove = NULL; + hashmap_for_each_entry(&data->watches, &iter, w, ent) { + ALLOC_GROW(to_remove, nr_to_remove + 1, alloc_to_remove); + to_remove[nr_to_remove++] = w; + } + + for (i = 0; i < nr_to_remove; i++) { + to_remove[i]->cookie = 0; /* ignore any pending renames */ + remove_watch(to_remove[i], data); + } + free(to_remove); + + hashmap_clear(&data->watches); + + hashmap_clear(&data->revwatches); /* remove_watch freed the entries */ + + hashmap_clear_and_free(&data->renames, struct rename_entry, ent); + + FREE_AND_NULL(state->listen_data); + + if (fd >= 0 && (close(fd) < 0)) + error_errno(_("closing inotify file descriptor failed")); +} + +void fsm_listen__stop_async(struct fsmonitor_daemon_state *state) +{ + if (state && state->listen_data && + state->listen_data->shutdown == SHUTDOWN_CONTINUE) + state->listen_data->shutdown = SHUTDOWN_STOP; +} + +/* + * Process a single inotify event and queue for publication. + */ +static int process_event(const char *path, + const struct inotify_event *event, + struct fsmonitor_batch **batch, + struct string_list *cookie_list, + struct fsmonitor_daemon_state *state) +{ + const char *rel; + const char *last_sep; + + switch (fsmonitor_classify_path_absolute(state, path)) { + case IS_INSIDE_DOT_GIT_WITH_COOKIE_PREFIX: + case IS_INSIDE_GITDIR_WITH_COOKIE_PREFIX: + /* Use just the filename of the cookie file. */ + last_sep = find_last_dir_sep(path); + string_list_append(cookie_list, + last_sep ? last_sep + 1 : path); + break; + case IS_INSIDE_DOT_GIT: + case IS_INSIDE_GITDIR: + break; + case IS_DOT_GIT: + case IS_GITDIR: + /* + * If .git directory is deleted or renamed away, + * we have to quit. + */ + if (em_dir_deleted(event->mask)) { + trace_printf_key(&trace_fsmonitor, + "event: gitdir removed"); + state->listen_data->shutdown = SHUTDOWN_FORCE; + goto done; + } + + if (em_dir_renamed(event->mask)) { + trace_printf_key(&trace_fsmonitor, + "event: gitdir renamed"); + state->listen_data->shutdown = SHUTDOWN_FORCE; + goto done; + } + break; + case IS_WORKDIR_PATH: + /* normal events in the working directory */ + if (trace_pass_fl(&trace_fsmonitor)) + log_mask_set(path, event->mask); + + if (!*batch) + *batch = fsmonitor_batch__new(); + + rel = path + state->path_worktree_watch.len + 1; + fsmonitor_batch__add_path(*batch, rel); + + if (em_dir_deleted(event->mask)) + break; + + /* received IN_MOVE_FROM, add tracking for expected IN_MOVE_TO */ + if (em_rename_dir_from(event->mask)) + add_dir_rename(event->cookie, path, state->listen_data); + + /* received IN_MOVE_TO, update watch to reflect new path */ + if (em_rename_dir_to(event->mask)) { + rename_dir(event->cookie, path, state->listen_data); + if (register_inotify(path, state, *batch)) { + state->listen_data->shutdown = SHUTDOWN_ERROR; + goto done; + } + } + + if (em_dir_created(event->mask)) { + if (add_watch(path, state->listen_data)) { + state->listen_data->shutdown = SHUTDOWN_ERROR; + goto done; + } + if (register_inotify(path, state, *batch)) { + state->listen_data->shutdown = SHUTDOWN_ERROR; + goto done; + } + } + break; + case IS_OUTSIDE_CONE: + default: + trace_printf_key(&trace_fsmonitor, + "ignoring '%s'", path); + break; + } + return 0; +done: + return -1; +} + +/* + * Read the inotify event stream and pre-process events before further + * processing and eventual publishing. + */ +static void handle_events(struct fsmonitor_daemon_state *state) +{ + /* See https://man7.org/linux/man-pages/man7/inotify.7.html */ + char buf[4096] + __attribute__ ((aligned(__alignof__(struct inotify_event)))); + + struct hashmap *watches = &state->listen_data->watches; + struct fsmonitor_batch *batch = NULL; + struct string_list cookie_list = STRING_LIST_INIT_DUP; + struct watch_entry k, *w; + struct strbuf path = STRBUF_INIT; + const struct inotify_event *event; + int fd = state->listen_data->fd_inotify; + ssize_t len; + char *ptr, *p; + + for (;;) { + len = read(fd, buf, sizeof(buf)); + if (len == -1) { + if (errno == EAGAIN || errno == EINTR) + goto done; + error_errno(_("reading inotify message stream failed")); + state->listen_data->shutdown = SHUTDOWN_ERROR; + goto done; + } + + /* nothing to read */ + if (len == 0) + goto done; + + /* Loop over all events in the buffer. */ + for (ptr = buf; ptr < buf + len; + ptr += sizeof(struct inotify_event) + event->len) { + + event = (const struct inotify_event *)ptr; + + if (em_ignore(event->mask)) + continue; + + /* File system was unmounted or event queue overflowed */ + if (em_force_shutdown(event->mask)) { + if (trace_pass_fl(&trace_fsmonitor)) + log_mask_set("forcing shutdown", event->mask); + state->listen_data->shutdown = SHUTDOWN_FORCE; + goto done; + } + + k.wd = event->wd; + hashmap_entry_init(&k.ent, memhash(&k.wd, sizeof(int))); + + w = hashmap_get_entry(watches, &k, ent, NULL); + if (!w) { + /* Watch was removed, skip event */ + continue; + } + + /* directory watch was removed */ + if (em_remove_watch(event->mask)) { + remove_watch(w, state->listen_data); + continue; + } + + strbuf_reset(&path); + strbuf_addf(&path, "%s/%s", w->dir, event->name); + + p = fsmonitor__resolve_alias(path.buf, &state->alias); + if (!p) + p = strbuf_detach(&path, NULL); + + if (process_event(p, event, &batch, &cookie_list, state)) { + free(p); + goto done; + } + free(p); + } + strbuf_reset(&path); + fsmonitor_publish(state, batch, &cookie_list); + string_list_clear(&cookie_list, 0); + batch = NULL; + } +done: + strbuf_release(&path); + fsmonitor_batch__free_list(batch); + string_list_clear(&cookie_list, 0); +} + +/* + * Non-blocking read of the inotify events stream. The inotify fd is polled + * frequently to help minimize the number of queue overflows. + */ +void fsm_listen__loop(struct fsmonitor_daemon_state *state) +{ + int poll_num; + /* + * Interval in seconds between checks for stale directory renames. + * A directory rename that is not completed within this window + * (i.e. no matching IN_MOVED_TO for an IN_MOVED_FROM) indicates + * missed events, forcing a shutdown. + */ + const int interval = 1; + time_t checked = time(NULL); + struct pollfd fds[1]; + + fds[0].fd = state->listen_data->fd_inotify; + fds[0].events = POLLIN; + + /* + * Our fs event listener is now running, so it's safe to start + * serving client requests. + */ + ipc_server_start_async(state->ipc_server_data); + + for (;;) { + switch (state->listen_data->shutdown) { + case SHUTDOWN_CONTINUE: + poll_num = poll(fds, 1, 50); + if (poll_num == -1) { + if (errno == EINTR) + continue; + error_errno(_("polling inotify message stream failed")); + state->listen_data->shutdown = SHUTDOWN_ERROR; + continue; + } + + if ((time(NULL) - checked) >= interval) { + checked = time(NULL); + if (check_stale_dir_renames(&state->listen_data->renames, + checked - interval)) { + trace_printf_key(&trace_fsmonitor, + "missed IN_MOVED_TO events, forcing shutdown"); + state->listen_data->shutdown = SHUTDOWN_FORCE; + continue; + } + } + + if (poll_num > 0 && (fds[0].revents & POLLIN)) + handle_events(state); + + continue; + case SHUTDOWN_ERROR: + state->listen_error_code = -1; + ipc_server_stop_async(state->ipc_server_data); + break; + case SHUTDOWN_FORCE: + state->listen_error_code = 0; + ipc_server_stop_async(state->ipc_server_data); + break; + case SHUTDOWN_STOP: + default: + state->listen_error_code = 0; + break; + } + return; + } +} diff --git a/compat/fsmonitor/fsm-path-utils-linux.c b/compat/fsmonitor/fsm-path-utils-linux.c new file mode 100644 index 00000000000000..c9866b1b24ca8e --- /dev/null +++ b/compat/fsmonitor/fsm-path-utils-linux.c @@ -0,0 +1,217 @@ +#include "git-compat-util.h" +#include "fsmonitor-ll.h" +#include "fsmonitor-path-utils.h" +#include "gettext.h" +#include "trace.h" + +#include + +#ifdef HAVE_LINUX_MAGIC_H +#include +#endif + +/* + * Filesystem magic numbers for remote filesystems. + * Defined here if not available in linux/magic.h. + */ +#ifndef CIFS_SUPER_MAGIC +#define CIFS_SUPER_MAGIC 0xff534d42 +#endif +#ifndef SMB_SUPER_MAGIC +#define SMB_SUPER_MAGIC 0x517b +#endif +#ifndef SMB2_SUPER_MAGIC +#define SMB2_SUPER_MAGIC 0xfe534d42 +#endif +#ifndef NFS_SUPER_MAGIC +#define NFS_SUPER_MAGIC 0x6969 +#endif +#ifndef AFS_SUPER_MAGIC +#define AFS_SUPER_MAGIC 0x5346414f +#endif +#ifndef CODA_SUPER_MAGIC +#define CODA_SUPER_MAGIC 0x73757245 +#endif +#ifndef FUSE_SUPER_MAGIC +#define FUSE_SUPER_MAGIC 0x65735546 +#endif + +/* + * Check if filesystem type is a remote filesystem. + */ +static int is_remote_fs(unsigned long f_type) +{ + switch (f_type) { + case CIFS_SUPER_MAGIC: + case SMB_SUPER_MAGIC: + case SMB2_SUPER_MAGIC: + case NFS_SUPER_MAGIC: + case AFS_SUPER_MAGIC: + case CODA_SUPER_MAGIC: + case FUSE_SUPER_MAGIC: + return 1; + default: + return 0; + } +} + +/* + * Map filesystem magic numbers to human-readable names as a fallback + * when /proc/mounts is unavailable. This only covers the remote and + * special filesystems in is_remote_fs() above; local filesystems are + * never flagged as incompatible, so we do not need their names here. + */ +static const char *get_fs_typename(unsigned long f_type) +{ + switch (f_type) { + case CIFS_SUPER_MAGIC: + return "cifs"; + case SMB_SUPER_MAGIC: + return "smb"; + case SMB2_SUPER_MAGIC: + return "smb2"; + case NFS_SUPER_MAGIC: + return "nfs"; + case AFS_SUPER_MAGIC: + return "afs"; + case CODA_SUPER_MAGIC: + return "coda"; + case FUSE_SUPER_MAGIC: + return "fuse"; + default: + return "unknown"; + } +} + +/* + * Find the mount point for a given path by reading /proc/mounts. + * + * statfs(2) gives us f_type (the magic number) but not the human-readable + * filesystem type string. We scan /proc/mounts to find the mount entry + * whose path is the longest prefix of ours and whose f_fsid matches, + * which gives us the fstype string (e.g. "nfs", "ext4") for logging. + */ +static char *find_mount(const char *path, const struct statfs *path_fs) +{ + FILE *fp; + struct strbuf line = STRBUF_INIT; + struct strbuf match = STRBUF_INIT; + struct strbuf fstype = STRBUF_INIT; + char *result = NULL; + + fp = fopen("/proc/mounts", "r"); + if (!fp) + return NULL; + + while (strbuf_getline(&line, fp) != EOF) { + char *fields[6]; + char *p = line.buf; + int i; + + /* Parse mount entry: device mountpoint fstype options dump pass */ + for (i = 0; i < 6 && p; i++) { + fields[i] = p; + p = strchr(p, ' '); + if (p) + *p++ = '\0'; + } + + if (i >= 3) { + const char *mountpoint = fields[1]; + const char *type = fields[2]; + struct statfs mount_fs; + + /* Check if this mount point is a prefix of our path */ + if (starts_with(path, mountpoint) && + (path[strlen(mountpoint)] == '/' || + path[strlen(mountpoint)] == '\0')) { + /* Check if filesystem ID matches */ + if (statfs(mountpoint, &mount_fs) == 0 && + !memcmp(&mount_fs.f_fsid, &path_fs->f_fsid, + sizeof(mount_fs.f_fsid))) { + /* Keep the longest matching mount point */ + if (strlen(mountpoint) > match.len) { + strbuf_reset(&match); + strbuf_addstr(&match, mountpoint); + strbuf_reset(&fstype); + strbuf_addstr(&fstype, type); + } + } + } + } + } + + fclose(fp); + strbuf_release(&line); + strbuf_release(&match); + + if (fstype.len) + result = strbuf_detach(&fstype, NULL); + else + strbuf_release(&fstype); + + return result; +} + +int fsmonitor__get_fs_info(const char *path, struct fs_info *fs_info) +{ + struct statfs fs; + + if (statfs(path, &fs) == -1) { + int saved_errno = errno; + trace_printf_key(&trace_fsmonitor, "statfs('%s') failed: %s", + path, strerror(saved_errno)); + errno = saved_errno; + return -1; + } + + trace_printf_key(&trace_fsmonitor, + "statfs('%s') [type 0x%08lx]", + path, (unsigned long)fs.f_type); + + fs_info->is_remote = is_remote_fs(fs.f_type); + + /* + * Try to get filesystem type from /proc/mounts for a more + * descriptive name. + */ + fs_info->typename = find_mount(path, &fs); + if (!fs_info->typename) + fs_info->typename = xstrdup(get_fs_typename(fs.f_type)); + + trace_printf_key(&trace_fsmonitor, + "'%s' is_remote: %d, typename: %s", + path, fs_info->is_remote, fs_info->typename); + + return 0; +} + +int fsmonitor__is_fs_remote(const char *path) +{ + struct fs_info fs; + + if (fsmonitor__get_fs_info(path, &fs)) + return -1; + + free(fs.typename); + + return fs.is_remote; +} + +/* + * No-op for Linux - we don't have firmlinks like macOS. + */ +int fsmonitor__get_alias(const char *path UNUSED, + struct alias_info *info UNUSED) +{ + return 0; +} + +/* + * No-op for Linux - we don't have firmlinks like macOS. + */ +char *fsmonitor__resolve_alias(const char *path UNUSED, + const struct alias_info *info UNUSED) +{ + return NULL; +} diff --git a/config.mak.uname b/config.mak.uname index 00bcb84cee15c3..fd91729dd2b80f 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -68,6 +68,16 @@ ifeq ($(uname_S),Linux) BASIC_CFLAGS += -std=c99 endif LINK_FUZZ_PROGRAMS = YesPlease + + # The builtin FSMonitor on Linux builds upon Simple-IPC. Both require + # Unix domain sockets and PThreads. + ifndef NO_PTHREADS + ifndef NO_UNIX_SOCKETS + FSMONITOR_DAEMON_BACKEND = linux + FSMONITOR_OS_SETTINGS = unix + BASIC_CFLAGS += -DHAVE_LINUX_MAGIC_H + endif + endif endif ifeq ($(uname_S),GNU/kFreeBSD) HAVE_ALLOCA_H = YesPlease diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index d613809e26fd20..b7da108f298dc3 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -296,6 +296,10 @@ if(SUPPORTS_SIMPLE_IPC) elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin") set(FSMONITOR_DAEMON_BACKEND "darwin") set(FSMONITOR_OS_SETTINGS "unix") + elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux") + set(FSMONITOR_DAEMON_BACKEND "linux") + set(FSMONITOR_OS_SETTINGS "unix") + add_compile_definitions(HAVE_LINUX_MAGIC_H) endif() if(FSMONITOR_DAEMON_BACKEND) @@ -1149,8 +1153,8 @@ endif() file(STRINGS ${CMAKE_SOURCE_DIR}/GIT-BUILD-OPTIONS.in git_build_options NEWLINE_CONSUME) string(REPLACE "@BROKEN_PATH_FIX@" "" git_build_options "${git_build_options}") string(REPLACE "@DIFF@" "'${DIFF}'" git_build_options "${git_build_options}") -string(REPLACE "@FSMONITOR_DAEMON_BACKEND@" "win32" git_build_options "${git_build_options}") -string(REPLACE "@FSMONITOR_OS_SETTINGS@" "win32" git_build_options "${git_build_options}") +string(REPLACE "@FSMONITOR_DAEMON_BACKEND@" "${FSMONITOR_DAEMON_BACKEND}" git_build_options "${git_build_options}") +string(REPLACE "@FSMONITOR_OS_SETTINGS@" "${FSMONITOR_OS_SETTINGS}" git_build_options "${git_build_options}") string(REPLACE "@GITWEBDIR@" "'${GITWEBDIR}'" git_build_options "${git_build_options}") string(REPLACE "@GIT_INTEROP_MAKE_OPTS@" "" git_build_options "${git_build_options}") string(REPLACE "@GIT_PERF_LARGE_REPO@" "" git_build_options "${git_build_options}") diff --git a/meson.build b/meson.build index 4f0c0a33b85c7d..123d2184602aa9 100644 --- a/meson.build +++ b/meson.build @@ -1324,6 +1324,10 @@ fsmonitor_os = '' if host_machine.system() == 'windows' fsmonitor_backend = 'win32' fsmonitor_os = 'win32' +elif host_machine.system() == 'linux' and threads.found() and compiler.has_header('linux/magic.h') + fsmonitor_backend = 'linux' + fsmonitor_os = 'unix' + libgit_c_args += '-DHAVE_LINUX_MAGIC_H' elif host_machine.system() == 'darwin' fsmonitor_backend = 'darwin' fsmonitor_os = 'unix' From 89e7939fc9e5cc936ec5780b809a82447e441a8c Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Thu, 5 Mar 2026 06:55:08 +0000 Subject: [PATCH 035/236] run-command: add close_fd_above_stderr option Add a close_fd_above_stderr flag to struct child_process. When set, the child closes file descriptors 3 and above between fork and exec (skipping the child-notifier pipe), capped at sysconf(_SC_OPEN_MAX) or 4096, whichever is smaller. This prevents the child from inheriting pipe endpoints or other descriptors from the parent environment (e.g., the test harness). Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- run-command.c | 12 ++++++++++++ run-command.h | 9 +++++++++ 2 files changed, 21 insertions(+) diff --git a/run-command.c b/run-command.c index e3e02475ccec50..f4361906c9b0e5 100644 --- a/run-command.c +++ b/run-command.c @@ -546,6 +546,7 @@ static void atfork_parent(struct atfork_state *as) "restoring signal mask"); #endif } + #endif /* GIT_WINDOWS_NATIVE */ static inline void set_cloexec(int fd) @@ -832,6 +833,17 @@ int start_command(struct child_process *cmd) child_close(cmd->out); } + if (cmd->close_fd_above_stderr) { + long max_fd = sysconf(_SC_OPEN_MAX); + int fd; + if (max_fd < 0 || max_fd > 4096) + max_fd = 4096; + for (fd = 3; fd < max_fd; fd++) { + if (fd != child_notifier) + close(fd); + } + } + if (cmd->dir && chdir(cmd->dir)) child_die(CHILD_ERR_CHDIR); diff --git a/run-command.h b/run-command.h index 0df25e445f001c..fdaa01e140705f 100644 --- a/run-command.h +++ b/run-command.h @@ -141,6 +141,15 @@ struct child_process { unsigned stdout_to_stderr:1; unsigned clean_on_exit:1; unsigned wait_after_clean:1; + + /** + * Close file descriptors 3 and above in the child after forking + * but before exec. This prevents the child from inheriting + * pipe endpoints or other descriptors from the parent + * environment (e.g., the test harness). + */ + unsigned close_fd_above_stderr:1; + void (*clean_on_exit_handler)(struct child_process *process); }; From 30f05c1b6ccda73593bbf8490ac2f77804867111 Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Thu, 5 Mar 2026 06:55:09 +0000 Subject: [PATCH 036/236] fsmonitor: close inherited file descriptors and detach in daemon When the fsmonitor daemon is spawned as a background process, it may inherit file descriptors from its parent that it does not need. In particular, when the test harness or a CI system captures output through pipes, the daemon can inherit duplicated pipe endpoints. If the daemon holds these open, the parent process never sees EOF and may appear to hang. Set close_fd_above_stderr on the child process at both daemon startup paths: the explicit "fsmonitor--daemon start" command and the implicit spawn triggered by fsmonitor-ipc when a client finds no running daemon. Also suppress stdout and stderr on the implicit spawn path to prevent the background daemon from writing to the client's terminal. Additionally, call setsid() when the daemon starts with --detach to create a new session and process group. This prevents the daemon from being part of the spawning shell's process group, which could cause the shell's "wait" to block until the daemon exits. Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- builtin/fsmonitor--daemon.c | 16 ++++++++++++++-- fsmonitor-ipc.c | 3 +++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c index c8ec7b722e953e..b2a816dc3fea5e 100644 --- a/builtin/fsmonitor--daemon.c +++ b/builtin/fsmonitor--daemon.c @@ -1439,7 +1439,7 @@ static int fsmonitor_run_daemon(void) return err; } -static int try_to_run_foreground_daemon(int detach_console MAYBE_UNUSED) +static int try_to_run_foreground_daemon(int detach_console) { /* * Technically, we don't need to probe for an existing daemon @@ -1459,10 +1459,21 @@ static int try_to_run_foreground_daemon(int detach_console MAYBE_UNUSED) fflush(stderr); } + if (detach_console) { #ifdef GIT_WINDOWS_NATIVE - if (detach_console) FreeConsole(); +#else + /* + * Create a new session so that the daemon is detached + * from the parent's process group. This prevents + * shells with job control (e.g. bash with "set -m") + * from waiting on the daemon when they wait for a + * foreground command that implicitly spawned it. + */ + if (setsid() == -1) + warning_errno(_("setsid failed")); #endif + } return !!fsmonitor_run_daemon(); } @@ -1525,6 +1536,7 @@ static int try_to_start_background_daemon(void) cp.no_stdin = 1; cp.no_stdout = 1; cp.no_stderr = 1; + cp.close_fd_above_stderr = 1; sbgr = start_bg_command(&cp, bg_wait_cb, NULL, fsmonitor__start_timeout_sec); diff --git a/fsmonitor-ipc.c b/fsmonitor-ipc.c index f1b163111194fb..6112d130644f04 100644 --- a/fsmonitor-ipc.c +++ b/fsmonitor-ipc.c @@ -61,6 +61,9 @@ static int spawn_daemon(void) cmd.git_cmd = 1; cmd.no_stdin = 1; + cmd.no_stdout = 1; + cmd.no_stderr = 1; + cmd.close_fd_above_stderr = 1; cmd.trace2_child_class = "fsmonitor"; strvec_pushl(&cmd.args, "fsmonitor--daemon", "start", NULL); From 9f79264a5e07ca4c012bc433169b0d699cefccdb Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Thu, 5 Mar 2026 06:55:10 +0000 Subject: [PATCH 037/236] fsmonitor: add timeout to daemon stop command The "fsmonitor--daemon stop" command polls in a loop waiting for the daemon to exit after sending a "quit" command over IPC. If the daemon fails to shut down (e.g. it is stuck or wedged), this loop spins forever. Add a 30-second timeout so the stop command returns an error instead of blocking indefinitely. Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- builtin/fsmonitor--daemon.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c index b2a816dc3fea5e..53d8ad1f0d2a17 100644 --- a/builtin/fsmonitor--daemon.c +++ b/builtin/fsmonitor--daemon.c @@ -86,6 +86,8 @@ static int do_as_client__send_stop(void) { struct strbuf answer = STRBUF_INIT; int ret; + int max_wait_ms = 30000; + int elapsed_ms = 0; ret = fsmonitor_ipc__send_command("quit", &answer); @@ -96,8 +98,16 @@ static int do_as_client__send_stop(void) return ret; trace2_region_enter("fsm_client", "polling-for-daemon-exit", NULL); - while (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING) + while (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING) { + if (elapsed_ms >= max_wait_ms) { + trace2_region_leave("fsm_client", + "polling-for-daemon-exit", NULL); + return error(_("daemon did not stop within %d seconds"), + max_wait_ms / 1000); + } sleep_millisec(50); + elapsed_ms += 50; + } trace2_region_leave("fsm_client", "polling-for-daemon-exit", NULL); return 0; From 81769b81506d454c05b86831a889355a778fa290 Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Thu, 5 Mar 2026 06:55:11 +0000 Subject: [PATCH 038/236] fsmonitor: add tests for Linux Add a smoke test that verifies the filesystem actually delivers inotify events to the daemon. On some configurations (e.g., overlayfs with older kernels), inotify watches succeed but events are never delivered. The daemon cookie wait will time out, but every subsequent test would fail. Skip the entire test file early when this is detected. Add a test that exercises rapid nested directory creation to verify the daemon correctly handles the EEXIST race between recursive scan and queued inotify events. When IN_MASK_CREATE is available and a directory watch is added during recursive registration, the kernel may also deliver a queued IN_CREATE event for the same directory. The second inotify_add_watch() returns EEXIST, which must be treated as harmless. An earlier version of the listener crashed in this scenario. Reduce --start-timeout from the default 60 seconds to 10 seconds so that tests fail promptly when the daemon cannot start. Harden the test helpers to work in environments without procps (e.g., Fedora CI): fall back to reading /proc/$pid/stat for the process group ID when ps is unavailable, guard stop_git() against an empty pgid, and redirect stderr from kill to /dev/null to avoid noise when processes have already exited. Use set -m to enable job control in the submodule-pull test so that the background git pull gets its own process group, preventing the shell wait from blocking on the daemon. setsid() in the previous commit detaches the daemon itself, but the intermediate git pull process still needs its own process group for the test shell to manage it correctly. Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- t/t7527-builtin-fsmonitor.sh | 89 +++++++++++++++++++++++++++++++++--- 1 file changed, 82 insertions(+), 7 deletions(-) diff --git a/t/t7527-builtin-fsmonitor.sh b/t/t7527-builtin-fsmonitor.sh index 409cd0cd121695..774da5ac60924d 100755 --- a/t/t7527-builtin-fsmonitor.sh +++ b/t/t7527-builtin-fsmonitor.sh @@ -10,9 +10,58 @@ then test_done fi +# Verify that the filesystem delivers events to the daemon. +# On some configurations (e.g., overlayfs with older kernels), +# inotify watches succeed but events are never delivered. The +# cookie wait will time out and the daemon logs a trace message. +# +# Use "timeout" (if available) to guard each step against hangs. +maybe_timeout () { + if type timeout >/dev/null 2>&1 + then + timeout "$@" + else + shift + "$@" + fi +} +verify_fsmonitor_works () { + git init test_fsmonitor_smoke || return 1 + + GIT_TRACE_FSMONITOR="$PWD/smoke.trace" && + export GIT_TRACE_FSMONITOR && + maybe_timeout 30 \ + git -C test_fsmonitor_smoke fsmonitor--daemon start \ + --start-timeout=10 + ret=$? + unset GIT_TRACE_FSMONITOR + if test $ret -ne 0 + then + rm -rf test_fsmonitor_smoke smoke.trace + return 1 + fi + + maybe_timeout 10 \ + test-tool -C test_fsmonitor_smoke fsmonitor-client query \ + --token 0 >/dev/null 2>&1 + maybe_timeout 5 \ + git -C test_fsmonitor_smoke fsmonitor--daemon stop 2>/dev/null + ! grep -q "cookie_wait timed out" "$PWD/smoke.trace" 2>/dev/null + ret=$? + rm -rf test_fsmonitor_smoke smoke.trace + return $ret +} + +if ! verify_fsmonitor_works +then + skip_all="filesystem does not deliver fsmonitor events (container/overlayfs?)" + test_done +fi + stop_daemon_delete_repo () { r=$1 && - test_might_fail git -C $r fsmonitor--daemon stop && + test_might_fail maybe_timeout 30 \ + git -C $r fsmonitor--daemon stop 2>/dev/null rm -rf $1 } @@ -67,7 +116,7 @@ start_daemon () { export GIT_TEST_FSMONITOR_TOKEN fi && - git $r fsmonitor--daemon start && + git $r fsmonitor--daemon start --start-timeout=10 && git $r fsmonitor--daemon status ) } @@ -520,6 +569,28 @@ test_expect_success 'directory changes to a file' ' grep "^event: dir1$" .git/trace ' +test_expect_success 'rapid nested directory creation' ' + test_when_finished "git fsmonitor--daemon stop; rm -rf rapid" && + + start_daemon --tf "$PWD/.git/trace" && + + # Rapidly create nested directories to exercise race conditions + # where directory watches may be added concurrently during + # event processing and recursive scanning. + for i in $(test_seq 1 20) + do + mkdir -p "rapid/nested/dir$i/subdir/deep" || return 1 + done && + + # Give the daemon time to process all events + sleep 1 && + + test-tool fsmonitor-client query --token 0 && + + # Verify daemon is still running (did not crash) + git fsmonitor--daemon status +' + # The next few test cases exercise the token-resync code. When filesystem # drops events (because of filesystem velocity or because the daemon isn't # polling fast enough), we need to discard the cached data (relative to the @@ -910,7 +981,10 @@ test_expect_success "submodule absorbgitdirs implicitly starts daemon" ' start_git_in_background () { git "$@" & git_pid=$! - git_pgid=$(ps -o pgid= -p $git_pid) + git_pgid=$(ps -o pgid= -p $git_pid 2>/dev/null || + awk '{print $5}' /proc/$git_pid/stat 2>/dev/null) && + git_pgid="${git_pgid## }" && + git_pgid="${git_pgid%% }" nr_tries_left=10 while true do @@ -921,15 +995,16 @@ start_git_in_background () { fi sleep 1 nr_tries_left=$(($nr_tries_left - 1)) - done >/dev/null 2>&1 & + done >/dev/null 2>&1 3>&- 4>&- 5>&- 6>&- 7>&- & watchdog_pid=$! wait $git_pid } stop_git () { - while kill -0 -- -$git_pgid + test -n "$git_pgid" || return 0 + while kill -0 -- -$git_pgid 2>/dev/null do - kill -- -$git_pgid + kill -- -$git_pgid 2>/dev/null sleep 1 done } @@ -944,7 +1019,7 @@ stop_watchdog () { test_expect_success !MINGW "submodule implicitly starts daemon by pull" ' test_atexit "stop_watchdog" && - test_when_finished "stop_git; rm -rf cloned super sub" && + test_when_finished "set +m; stop_git; rm -rf cloned super sub" && create_super super && create_sub sub && From 1defaf5665cf963c5bfac2d7fbaf1bcea887cc19 Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Thu, 5 Mar 2026 06:55:12 +0000 Subject: [PATCH 039/236] fsmonitor: convert shown khash to strset in do_handle_client Replace the khash-based string set used for deduplicating pathnames in do_handle_client() with a strset, which provides a cleaner interface for the same purpose. Since the paths are interned strings from the batch data, use strdup_strings=0 to avoid unnecessary copies. Suggested-by: Patrick Steinhardt Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- builtin/fsmonitor--daemon.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c index 53d8ad1f0d2a17..f920cf3a8202f6 100644 --- a/builtin/fsmonitor--daemon.c +++ b/builtin/fsmonitor--daemon.c @@ -16,7 +16,7 @@ #include "fsmonitor--daemon.h" #include "simple-ipc.h" -#include "khash.h" +#include "strmap.h" #include "run-command.h" #include "trace.h" #include "trace2.h" @@ -674,8 +674,6 @@ static int fsmonitor_parse_client_token(const char *buf_token, return 0; } -KHASH_INIT(str, const char *, int, 0, kh_str_hash_func, kh_str_hash_equal) - static int do_handle_client(struct fsmonitor_daemon_state *state, const char *command, ipc_server_reply_cb *reply, @@ -692,8 +690,7 @@ static int do_handle_client(struct fsmonitor_daemon_state *state, const struct fsmonitor_batch *batch; struct fsmonitor_batch *remainder = NULL; intmax_t count = 0, duplicates = 0; - kh_str_t *shown = NULL; - int hash_ret; + struct strset shown = STRSET_INIT; int do_trivial = 0; int do_flush = 0; int do_cookie = 0; @@ -882,14 +879,14 @@ static int do_handle_client(struct fsmonitor_daemon_state *state, * so walk the batch list backwards from the current head back * to the batch (sequence number) they named. * - * We use khash to de-dup the list of pathnames. + * We use a strset to de-dup the list of pathnames. * * NEEDSWORK: each batch contains a list of interned strings, * so we only need to do pointer comparisons here to build the * hash table. Currently, we're still comparing the string * values. */ - shown = kh_init_str(); + strset_init_with_options(&shown, NULL, 0); for (batch = batch_head; batch && batch->batch_seq_nr > requested_oldest_seq_nr; batch = batch->next) { @@ -899,11 +896,9 @@ static int do_handle_client(struct fsmonitor_daemon_state *state, const char *s = batch->interned_paths[k]; size_t s_len; - if (kh_get_str(shown, s) != kh_end(shown)) + if (!strset_add(&shown, s)) duplicates++; else { - kh_put_str(shown, s, &hash_ret); - trace_printf_key(&trace_fsmonitor, "send[%"PRIuMAX"]: %s", count, s); @@ -973,7 +968,7 @@ static int do_handle_client(struct fsmonitor_daemon_state *state, trace2_data_intmax("fsmonitor", the_repository, "response/count/duplicates", duplicates); cleanup: - kh_destroy_str(shown); + strset_clear(&shown); strbuf_release(&response_token); strbuf_release(&requested_token_id); strbuf_release(&payload); From 04949538203ba7aeb298a607280715d4f26dbe0a Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 5 Mar 2026 15:34:46 -0800 Subject: [PATCH 040/236] sideband: mask control characters The output of `git clone` is a vital component for understanding what has happened when things go wrong. However, these logs are partially under the control of the remote server (via the "sideband", which typically contains what the remote `git pack-objects` process sends to `stderr`), and is currently not sanitized by Git. This makes Git susceptible to ANSI escape sequence injection (see CWE-150, https://cwe.mitre.org/data/definitions/150.html), which allows attackers to corrupt terminal state, to hide information, and even to insert characters into the input buffer (i.e. as if the user had typed those characters). To plug this vulnerability, disallow any control character in the sideband, replacing them instead with the common `^` (e.g. `^[` for `\x1b`, `^A` for `\x01`). There is likely a need for more fine-grained controls instead of using a "heavy hammer" like this, which will be introduced subsequently. Helped-by: Phillip Wood Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- sideband.c | 17 +++++++++++++++-- t/t5409-colorize-remote-messages.sh | 12 ++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/sideband.c b/sideband.c index ea7c25211ef7e1..c1bbadccac682b 100644 --- a/sideband.c +++ b/sideband.c @@ -66,6 +66,19 @@ void list_config_color_sideband_slots(struct string_list *list, const char *pref list_config_item(list, prefix, keywords[i].keyword); } +static void strbuf_add_sanitized(struct strbuf *dest, const char *src, int n) +{ + strbuf_grow(dest, n); + for (; n && *src; src++, n--) { + if (!iscntrl(*src) || *src == '\t' || *src == '\n') { + strbuf_addch(dest, *src); + } else { + strbuf_addch(dest, '^'); + strbuf_addch(dest, *src == 0x7f ? '?' : 0x40 + *src); + } + } +} + /* * Optionally highlight one keyword in remote output if it appears at the start * of the line. This should be called for a single line only, which is @@ -81,7 +94,7 @@ static void maybe_colorize_sideband(struct strbuf *dest, const char *src, int n) int i; if (!want_color_stderr(use_sideband_colors())) { - strbuf_add(dest, src, n); + strbuf_add_sanitized(dest, src, n); return; } @@ -114,7 +127,7 @@ static void maybe_colorize_sideband(struct strbuf *dest, const char *src, int n) } } - strbuf_add(dest, src, n); + strbuf_add_sanitized(dest, src, n); } diff --git a/t/t5409-colorize-remote-messages.sh b/t/t5409-colorize-remote-messages.sh index fa5de4500a4f50..aa5b57057148e0 100755 --- a/t/t5409-colorize-remote-messages.sh +++ b/t/t5409-colorize-remote-messages.sh @@ -98,4 +98,16 @@ test_expect_success 'fallback to color.ui' ' grep "error: error" decoded ' +test_expect_success 'disallow (color) control sequences in sideband' ' + write_script .git/color-me-surprised <<-\EOF && + printf "error: Have you \\033[31mread\\033[m this?\\n" >&2 + exec "$@" + EOF + test_config_global uploadPack.packObjectsHook ./color-me-surprised && + test_commit need-at-least-one-commit && + git clone --no-local . throw-away 2>stderr && + test_decode_color decoded && + test_grep ! RED decoded +' + test_done From 9ed1625a581a35d7ec2d851258cf4c7fc08c1ed7 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 5 Mar 2026 15:34:47 -0800 Subject: [PATCH 041/236] sideband: introduce an "escape hatch" to allow control characters The preceding commit fixed the vulnerability whereas sideband messages (that are under the control of the remote server) could contain ANSI escape sequences that would be sent to the terminal verbatim. However, this fix may not be desirable under all circumstances, e.g. when remote servers deliberately add coloring to their messages to increase their urgency. To help with those use cases, give users a way to opt-out of the protections: `sideband.allowControlCharacters`. Suggested-by: brian m. carlson Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- Documentation/config.adoc | 2 ++ Documentation/config/sideband.adoc | 5 +++++ sideband.c | 10 ++++++++++ t/t5409-colorize-remote-messages.sh | 8 +++++++- 4 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 Documentation/config/sideband.adoc diff --git a/Documentation/config.adoc b/Documentation/config.adoc index 62eebe7c54501c..dcea3c0c15e2a9 100644 --- a/Documentation/config.adoc +++ b/Documentation/config.adoc @@ -523,6 +523,8 @@ include::config/sequencer.adoc[] include::config/showbranch.adoc[] +include::config/sideband.adoc[] + include::config/sparse.adoc[] include::config/splitindex.adoc[] diff --git a/Documentation/config/sideband.adoc b/Documentation/config/sideband.adoc new file mode 100644 index 00000000000000..3fb5045cd79581 --- /dev/null +++ b/Documentation/config/sideband.adoc @@ -0,0 +1,5 @@ +sideband.allowControlCharacters:: + By default, control characters that are delivered via the sideband + are masked, to prevent potentially unwanted ANSI escape sequences + from being sent to the terminal. Use this config setting to override + this behavior. diff --git a/sideband.c b/sideband.c index c1bbadccac682b..682f1cbbedb9b8 100644 --- a/sideband.c +++ b/sideband.c @@ -26,6 +26,8 @@ static struct keyword_entry keywords[] = { { "error", GIT_COLOR_BOLD_RED }, }; +static int allow_control_characters; + /* Returns a color setting (GIT_COLOR_NEVER, etc). */ static enum git_colorbool use_sideband_colors(void) { @@ -39,6 +41,9 @@ static enum git_colorbool use_sideband_colors(void) if (use_sideband_colors_cached != GIT_COLOR_UNKNOWN) return use_sideband_colors_cached; + repo_config_get_bool(the_repository, "sideband.allowcontrolcharacters", + &allow_control_characters); + if (!repo_config_get_string_tmp(the_repository, key, &value)) use_sideband_colors_cached = git_config_colorbool(key, value); else if (!repo_config_get_string_tmp(the_repository, "color.ui", &value)) @@ -68,6 +73,11 @@ void list_config_color_sideband_slots(struct string_list *list, const char *pref static void strbuf_add_sanitized(struct strbuf *dest, const char *src, int n) { + if (allow_control_characters) { + strbuf_add(dest, src, n); + return; + } + strbuf_grow(dest, n); for (; n && *src; src++, n--) { if (!iscntrl(*src) || *src == '\t' || *src == '\n') { diff --git a/t/t5409-colorize-remote-messages.sh b/t/t5409-colorize-remote-messages.sh index aa5b57057148e0..9caee9a07f1556 100755 --- a/t/t5409-colorize-remote-messages.sh +++ b/t/t5409-colorize-remote-messages.sh @@ -105,9 +105,15 @@ test_expect_success 'disallow (color) control sequences in sideband' ' EOF test_config_global uploadPack.packObjectsHook ./color-me-surprised && test_commit need-at-least-one-commit && + git clone --no-local . throw-away 2>stderr && test_decode_color decoded && - test_grep ! RED decoded + test_grep ! RED decoded && + + rm -rf throw-away && + git -c sideband.allowControlCharacters clone --no-local . throw-away 2>stderr && + test_decode_color decoded && + test_grep RED decoded ' test_done From 12f0fda905b4af3a15c125f96808e49ddbe39742 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 5 Mar 2026 15:34:48 -0800 Subject: [PATCH 042/236] sideband: do allow ANSI color sequences by default The preceding two commits introduced special handling of the sideband channel to neutralize ANSI escape sequences before sending the payload to the terminal, and `sideband.allowControlCharacters` to override that behavior. However, as reported by brian m. carlson, some `pre-receive` hooks that are actively used in practice want to color their messages and therefore rely on the fact that Git passes them through to the terminal, even though they have no way to determine whether the receiving side can actually handle Escape sequences (think e.g. about the practice recommended by Git that third-party applications wishing to use Git functionality parse the output of Git commands). In contrast to other ANSI escape sequences, it is highly unlikely that coloring sequences can be essential tools in attack vectors that mislead Git users e.g. by hiding crucial information. Therefore we can have both: Continue to allow ANSI coloring sequences to be passed to the terminal by default, and neutralize all other ANSI Escape sequences. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- Documentation/config/sideband.adoc | 18 ++++++-- sideband.c | 66 +++++++++++++++++++++++++++-- t/t5409-colorize-remote-messages.sh | 16 ++++++- 3 files changed, 91 insertions(+), 9 deletions(-) diff --git a/Documentation/config/sideband.adoc b/Documentation/config/sideband.adoc index 3fb5045cd79581..b55c73726fe2c7 100644 --- a/Documentation/config/sideband.adoc +++ b/Documentation/config/sideband.adoc @@ -1,5 +1,17 @@ sideband.allowControlCharacters:: By default, control characters that are delivered via the sideband - are masked, to prevent potentially unwanted ANSI escape sequences - from being sent to the terminal. Use this config setting to override - this behavior. + are masked, except ANSI color sequences. This prevents potentially + unwanted ANSI escape sequences from being sent to the terminal. Use + this config setting to override this behavior: ++ +-- + `default`:: + `color`:: + Allow ANSI color sequences, line feeds and horizontal tabs, + but mask all other control characters. This is the default. + `false`:: + Mask all control characters other than line feeds and + horizontal tabs. + `true`:: + Allow all control characters to be sent to the terminal. +-- diff --git a/sideband.c b/sideband.c index 682f1cbbedb9b8..eeba6fa2ca8dd6 100644 --- a/sideband.c +++ b/sideband.c @@ -26,7 +26,12 @@ static struct keyword_entry keywords[] = { { "error", GIT_COLOR_BOLD_RED }, }; -static int allow_control_characters; +static enum { + ALLOW_NO_CONTROL_CHARACTERS = 0, + ALLOW_ANSI_COLOR_SEQUENCES = 1<<0, + ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ANSI_COLOR_SEQUENCES, + ALLOW_ALL_CONTROL_CHARACTERS = 1<<1, +} allow_control_characters = ALLOW_ANSI_COLOR_SEQUENCES; /* Returns a color setting (GIT_COLOR_NEVER, etc). */ static enum git_colorbool use_sideband_colors(void) @@ -41,8 +46,26 @@ static enum git_colorbool use_sideband_colors(void) if (use_sideband_colors_cached != GIT_COLOR_UNKNOWN) return use_sideband_colors_cached; - repo_config_get_bool(the_repository, "sideband.allowcontrolcharacters", - &allow_control_characters); + switch (repo_config_get_maybe_bool(the_repository, "sideband.allowcontrolcharacters", &i)) { + case 0: /* Boolean value */ + allow_control_characters = i ? ALLOW_ALL_CONTROL_CHARACTERS : + ALLOW_NO_CONTROL_CHARACTERS; + break; + case -1: /* non-Boolean value */ + if (repo_config_get_string_tmp(the_repository, "sideband.allowcontrolcharacters", + &value)) + ; /* huh? `get_maybe_bool()` returned -1 */ + else if (!strcmp(value, "default")) + allow_control_characters = ALLOW_DEFAULT_ANSI_SEQUENCES; + else if (!strcmp(value, "color")) + allow_control_characters = ALLOW_ANSI_COLOR_SEQUENCES; + else + warning(_("unrecognized value for `sideband." + "allowControlCharacters`: '%s'"), value); + break; + default: + break; /* not configured */ + } if (!repo_config_get_string_tmp(the_repository, key, &value)) use_sideband_colors_cached = git_config_colorbool(key, value); @@ -71,9 +94,41 @@ void list_config_color_sideband_slots(struct string_list *list, const char *pref list_config_item(list, prefix, keywords[i].keyword); } +static int handle_ansi_color_sequence(struct strbuf *dest, const char *src, int n) +{ + int i; + + /* + * Valid ANSI color sequences are of the form + * + * ESC [ [ [; ]*] m + * + * These are part of the Select Graphic Rendition sequences which + * contain more than just color sequences, for more details see + * https://en.wikipedia.org/wiki/ANSI_escape_code#SGR. + */ + + if (allow_control_characters != ALLOW_ANSI_COLOR_SEQUENCES || + n < 3 || src[0] != '\x1b' || src[1] != '[') + return 0; + + for (i = 2; i < n; i++) { + if (src[i] == 'm') { + strbuf_add(dest, src, i + 1); + return i; + } + if (!isdigit(src[i]) && src[i] != ';') + break; + } + + return 0; +} + static void strbuf_add_sanitized(struct strbuf *dest, const char *src, int n) { - if (allow_control_characters) { + int i; + + if (allow_control_characters == ALLOW_ALL_CONTROL_CHARACTERS) { strbuf_add(dest, src, n); return; } @@ -82,6 +137,9 @@ static void strbuf_add_sanitized(struct strbuf *dest, const char *src, int n) for (; n && *src; src++, n--) { if (!iscntrl(*src) || *src == '\t' || *src == '\n') { strbuf_addch(dest, *src); + } else if ((i = handle_ansi_color_sequence(dest, src, n))) { + src += i; + n -= i; } else { strbuf_addch(dest, '^'); strbuf_addch(dest, *src == 0x7f ? '?' : 0x40 + *src); diff --git a/t/t5409-colorize-remote-messages.sh b/t/t5409-colorize-remote-messages.sh index 9caee9a07f1556..e5092d3b426cd3 100755 --- a/t/t5409-colorize-remote-messages.sh +++ b/t/t5409-colorize-remote-messages.sh @@ -100,7 +100,7 @@ test_expect_success 'fallback to color.ui' ' test_expect_success 'disallow (color) control sequences in sideband' ' write_script .git/color-me-surprised <<-\EOF && - printf "error: Have you \\033[31mread\\033[m this?\\n" >&2 + printf "error: Have you \\033[31mread\\033[m this?\\a\\n" >&2 exec "$@" EOF test_config_global uploadPack.packObjectsHook ./color-me-surprised && @@ -108,12 +108,24 @@ test_expect_success 'disallow (color) control sequences in sideband' ' git clone --no-local . throw-away 2>stderr && test_decode_color decoded && + test_grep RED decoded && + test_grep "\\^G" stderr && + tr -dc "\\007" actual && + test_must_be_empty actual && + + rm -rf throw-away && + git -c sideband.allowControlCharacters=false \ + clone --no-local . throw-away 2>stderr && + test_decode_color decoded && test_grep ! RED decoded && + test_grep "\\^G" stderr && rm -rf throw-away && git -c sideband.allowControlCharacters clone --no-local . throw-away 2>stderr && test_decode_color decoded && - test_grep RED decoded + test_grep RED decoded && + tr -dc "\\007" actual && + test_file_not_empty actual ' test_done From 128914438a0d2d55ae34314a0881f55a797024d5 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 5 Mar 2026 15:34:49 -0800 Subject: [PATCH 043/236] sideband: add options to allow more control sequences to be passed through Even though control sequences that erase characters are quite juicy for attack scenarios, where attackers are eager to hide traces of suspicious activities, during the review of the side band sanitizing patch series concerns were raised that there might be some legimitate scenarios where Git server's `pre-receive` hooks use those sequences in a benign way. Control sequences to move the cursor can likewise be used to hide tracks by overwriting characters, and have been equally pointed out as having legitimate users. Let's add options to let users opt into passing through those ANSI Escape sequences: `sideband.allowControlCharacters` now supports also `cursor` and `erase`, and it parses the value as a comma-separated list. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- Documentation/config/sideband.adoc | 9 ++- sideband.c | 91 ++++++++++++++++++++++++----- t/t5409-colorize-remote-messages.sh | 38 ++++++++++++ 3 files changed, 123 insertions(+), 15 deletions(-) diff --git a/Documentation/config/sideband.adoc b/Documentation/config/sideband.adoc index b55c73726fe2c7..2bf04262840b02 100644 --- a/Documentation/config/sideband.adoc +++ b/Documentation/config/sideband.adoc @@ -2,13 +2,20 @@ sideband.allowControlCharacters:: By default, control characters that are delivered via the sideband are masked, except ANSI color sequences. This prevents potentially unwanted ANSI escape sequences from being sent to the terminal. Use - this config setting to override this behavior: + this config setting to override this behavior (the value can be + a comma-separated list of the following keywords): + -- `default`:: `color`:: Allow ANSI color sequences, line feeds and horizontal tabs, but mask all other control characters. This is the default. + `cursor:`: + Allow control sequences that move the cursor. This is + disabled by default. + `erase`:: + Allow control sequences that erase charactrs. This is + disabled by default. `false`:: Mask all control characters other than line feeds and horizontal tabs. diff --git a/sideband.c b/sideband.c index eeba6fa2ca8dd6..0b420ca3193888 100644 --- a/sideband.c +++ b/sideband.c @@ -29,9 +29,43 @@ static struct keyword_entry keywords[] = { static enum { ALLOW_NO_CONTROL_CHARACTERS = 0, ALLOW_ANSI_COLOR_SEQUENCES = 1<<0, + ALLOW_ANSI_CURSOR_MOVEMENTS = 1<<1, + ALLOW_ANSI_ERASE = 1<<2, ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ANSI_COLOR_SEQUENCES, - ALLOW_ALL_CONTROL_CHARACTERS = 1<<1, -} allow_control_characters = ALLOW_ANSI_COLOR_SEQUENCES; + ALLOW_ALL_CONTROL_CHARACTERS = 1<<3, +} allow_control_characters = ALLOW_DEFAULT_ANSI_SEQUENCES; + +static inline int skip_prefix_in_csv(const char *value, const char *prefix, + const char **out) +{ + if (!skip_prefix(value, prefix, &value) || + (*value && *value != ',')) + return 0; + *out = value + !!*value; + return 1; +} + +static void parse_allow_control_characters(const char *value) +{ + allow_control_characters = ALLOW_NO_CONTROL_CHARACTERS; + while (*value) { + if (skip_prefix_in_csv(value, "default", &value)) + allow_control_characters |= ALLOW_DEFAULT_ANSI_SEQUENCES; + else if (skip_prefix_in_csv(value, "color", &value)) + allow_control_characters |= ALLOW_ANSI_COLOR_SEQUENCES; + else if (skip_prefix_in_csv(value, "cursor", &value)) + allow_control_characters |= ALLOW_ANSI_CURSOR_MOVEMENTS; + else if (skip_prefix_in_csv(value, "erase", &value)) + allow_control_characters |= ALLOW_ANSI_ERASE; + else if (skip_prefix_in_csv(value, "true", &value)) + allow_control_characters = ALLOW_ALL_CONTROL_CHARACTERS; + else if (skip_prefix_in_csv(value, "false", &value)) + allow_control_characters = ALLOW_NO_CONTROL_CHARACTERS; + else + warning(_("unrecognized value for `sideband." + "allowControlCharacters`: '%s'"), value); + } +} /* Returns a color setting (GIT_COLOR_NEVER, etc). */ static enum git_colorbool use_sideband_colors(void) @@ -55,13 +89,8 @@ static enum git_colorbool use_sideband_colors(void) if (repo_config_get_string_tmp(the_repository, "sideband.allowcontrolcharacters", &value)) ; /* huh? `get_maybe_bool()` returned -1 */ - else if (!strcmp(value, "default")) - allow_control_characters = ALLOW_DEFAULT_ANSI_SEQUENCES; - else if (!strcmp(value, "color")) - allow_control_characters = ALLOW_ANSI_COLOR_SEQUENCES; else - warning(_("unrecognized value for `sideband." - "allowControlCharacters`: '%s'"), value); + parse_allow_control_characters(value); break; default: break; /* not configured */ @@ -94,7 +123,7 @@ void list_config_color_sideband_slots(struct string_list *list, const char *pref list_config_item(list, prefix, keywords[i].keyword); } -static int handle_ansi_color_sequence(struct strbuf *dest, const char *src, int n) +static int handle_ansi_sequence(struct strbuf *dest, const char *src, int n) { int i; @@ -106,14 +135,47 @@ static int handle_ansi_color_sequence(struct strbuf *dest, const char *src, int * These are part of the Select Graphic Rendition sequences which * contain more than just color sequences, for more details see * https://en.wikipedia.org/wiki/ANSI_escape_code#SGR. + * + * The cursor movement sequences are: + * + * ESC [ n A - Cursor up n lines (CUU) + * ESC [ n B - Cursor down n lines (CUD) + * ESC [ n C - Cursor forward n columns (CUF) + * ESC [ n D - Cursor back n columns (CUB) + * ESC [ n E - Cursor next line, beginning (CNL) + * ESC [ n F - Cursor previous line, beginning (CPL) + * ESC [ n G - Cursor to column n (CHA) + * ESC [ n ; m H - Cursor position (row n, col m) (CUP) + * ESC [ n ; m f - Same as H (HVP) + * + * The sequences to erase characters are: + * + * + * ESC [ 0 J - Clear from cursor to end of screen (ED) + * ESC [ 1 J - Clear from cursor to beginning of screen (ED) + * ESC [ 2 J - Clear entire screen (ED) + * ESC [ 3 J - Clear entire screen + scrollback (ED) - xterm extension + * ESC [ 0 K - Clear from cursor to end of line (EL) + * ESC [ 1 K - Clear from cursor to beginning of line (EL) + * ESC [ 2 K - Clear entire line (EL) + * ESC [ n M - Delete n lines (DL) + * ESC [ n P - Delete n characters (DCH) + * ESC [ n X - Erase n characters (ECH) + * + * For a comprehensive list of common ANSI Escape sequences, see + * https://www.xfree86.org/current/ctlseqs.html */ - if (allow_control_characters != ALLOW_ANSI_COLOR_SEQUENCES || - n < 3 || src[0] != '\x1b' || src[1] != '[') + if (n < 3 || src[0] != '\x1b' || src[1] != '[') return 0; for (i = 2; i < n; i++) { - if (src[i] == 'm') { + if (((allow_control_characters & ALLOW_ANSI_COLOR_SEQUENCES) && + src[i] == 'm') || + ((allow_control_characters & ALLOW_ANSI_CURSOR_MOVEMENTS) && + strchr("ABCDEFGHf", src[i])) || + ((allow_control_characters & ALLOW_ANSI_ERASE) && + strchr("JKMPX", src[i]))) { strbuf_add(dest, src, i + 1); return i; } @@ -128,7 +190,7 @@ static void strbuf_add_sanitized(struct strbuf *dest, const char *src, int n) { int i; - if (allow_control_characters == ALLOW_ALL_CONTROL_CHARACTERS) { + if ((allow_control_characters & ALLOW_ALL_CONTROL_CHARACTERS)) { strbuf_add(dest, src, n); return; } @@ -137,7 +199,8 @@ static void strbuf_add_sanitized(struct strbuf *dest, const char *src, int n) for (; n && *src; src++, n--) { if (!iscntrl(*src) || *src == '\t' || *src == '\n') { strbuf_addch(dest, *src); - } else if ((i = handle_ansi_color_sequence(dest, src, n))) { + } else if (allow_control_characters != ALLOW_NO_CONTROL_CHARACTERS && + (i = handle_ansi_sequence(dest, src, n))) { src += i; n -= i; } else { diff --git a/t/t5409-colorize-remote-messages.sh b/t/t5409-colorize-remote-messages.sh index e5092d3b426cd3..896e790bf955cd 100755 --- a/t/t5409-colorize-remote-messages.sh +++ b/t/t5409-colorize-remote-messages.sh @@ -128,4 +128,42 @@ test_expect_success 'disallow (color) control sequences in sideband' ' test_file_not_empty actual ' +test_decode_csi() { + awk '{ + while (match($0, /\033/) != 0) { + printf "%sCSI ", substr($0, 1, RSTART-1); + $0 = substr($0, RSTART + RLENGTH, length($0) - RSTART - RLENGTH + 1); + } + print + }' +} + +test_expect_success 'control sequences in sideband allowed by default' ' + write_script .git/color-me-surprised <<-\EOF && + printf "error: \\033[31mcolor\\033[m\\033[Goverwrite\\033[Gerase\\033[K\\033?25l\\n" >&2 + exec "$@" + EOF + test_config_global uploadPack.packObjectsHook ./color-me-surprised && + test_commit need-at-least-one-commit-at-least && + + rm -rf throw-away && + git clone --no-local . throw-away 2>stderr && + test_decode_color color-decoded && + test_decode_csi decoded && + test_grep ! "CSI \\[K" decoded && + test_grep ! "CSI \\[G" decoded && + test_grep "\\^\\[?25l" decoded && + + rm -rf throw-away && + git -c sideband.allowControlCharacters=erase,cursor,color \ + clone --no-local . throw-away 2>stderr && + test_decode_color color-decoded && + test_decode_csi decoded && + test_grep "RED" decoded && + test_grep "CSI \\[K" decoded && + test_grep "CSI \\[G" decoded && + test_grep ! "\\^\\[\\[K" decoded && + test_grep ! "\\^\\[\\[G" decoded +' + test_done From 602c83f0efed46c2e86a36273673bf8776ded04e Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 5 Mar 2026 15:34:50 -0800 Subject: [PATCH 044/236] sideband: offer to configure sanitizing on a per-URL basis The main objection against sanitizing the sideband that was raised during the review of the sideband sanitizing patches, first on the git-security mailing list, then on the public mailing list, was that there are some setups where server-side `pre-receive` hooks want to error out, giving colorful messages to the users on the client side (if they are not redirecting the output into a file, that is). To avoid breaking such setups, the default chosen by the sideband sanitizing patches is to pass through ANSI color sequences. Still, there might be some use case out there where that is not enough. Therefore the `sideband.allowControlCharacters` config setting allows for configuring levels of sanitizing. As Junio Hamano pointed out, to keep users safe by default, we need to be able to scope this to some servers because while a user may trust their company's Git server, the same might not apply to other Git servers. To allow for this, let's imitate the way `http..*` offers to scope config settings to certain URLs, by letting users override the `sideband.allowControlCharacters` setting via `sideband..allowControlCharacters`. Suggested-by: Junio Hamano Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- Documentation/config/sideband.adoc | 4 ++ sideband.c | 81 ++++++++++++++++++++--------- sideband.h | 14 +++++ t/t5409-colorize-remote-messages.sh | 24 +++++++++ transport.c | 3 ++ 5 files changed, 102 insertions(+), 24 deletions(-) diff --git a/Documentation/config/sideband.adoc b/Documentation/config/sideband.adoc index 2bf04262840b02..32088bbf2f0a40 100644 --- a/Documentation/config/sideband.adoc +++ b/Documentation/config/sideband.adoc @@ -22,3 +22,7 @@ sideband.allowControlCharacters:: `true`:: Allow all control characters to be sent to the terminal. -- + +sideband..*:: + Apply the `sideband.*` option selectively to specific URLs. The + same URL matching logic applies as for `http..*` settings. diff --git a/sideband.c b/sideband.c index 0b420ca3193888..a90db9e2880cba 100644 --- a/sideband.c +++ b/sideband.c @@ -10,6 +10,7 @@ #include "help.h" #include "pkt-line.h" #include "write-or-die.h" +#include "urlmatch.h" struct keyword_entry { /* @@ -27,13 +28,14 @@ static struct keyword_entry keywords[] = { }; static enum { - ALLOW_NO_CONTROL_CHARACTERS = 0, - ALLOW_ANSI_COLOR_SEQUENCES = 1<<0, - ALLOW_ANSI_CURSOR_MOVEMENTS = 1<<1, - ALLOW_ANSI_ERASE = 1<<2, - ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ANSI_COLOR_SEQUENCES, - ALLOW_ALL_CONTROL_CHARACTERS = 1<<3, -} allow_control_characters = ALLOW_DEFAULT_ANSI_SEQUENCES; + ALLOW_CONTROL_SEQUENCES_UNSET = -1, + ALLOW_NO_CONTROL_CHARACTERS = 0, + ALLOW_ANSI_COLOR_SEQUENCES = 1<<0, + ALLOW_ANSI_CURSOR_MOVEMENTS = 1<<1, + ALLOW_ANSI_ERASE = 1<<2, + ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ANSI_COLOR_SEQUENCES, + ALLOW_ALL_CONTROL_CHARACTERS = 1<<3, +} allow_control_characters = ALLOW_CONTROL_SEQUENCES_UNSET; static inline int skip_prefix_in_csv(const char *value, const char *prefix, const char **out) @@ -45,8 +47,19 @@ static inline int skip_prefix_in_csv(const char *value, const char *prefix, return 1; } -static void parse_allow_control_characters(const char *value) +int sideband_allow_control_characters_config(const char *var, const char *value) { + switch (git_parse_maybe_bool(value)) { + case 0: + allow_control_characters = ALLOW_NO_CONTROL_CHARACTERS; + return 0; + case 1: + allow_control_characters = ALLOW_ALL_CONTROL_CHARACTERS; + return 0; + default: + break; + } + allow_control_characters = ALLOW_NO_CONTROL_CHARACTERS; while (*value) { if (skip_prefix_in_csv(value, "default", &value)) @@ -62,9 +75,37 @@ static void parse_allow_control_characters(const char *value) else if (skip_prefix_in_csv(value, "false", &value)) allow_control_characters = ALLOW_NO_CONTROL_CHARACTERS; else - warning(_("unrecognized value for `sideband." - "allowControlCharacters`: '%s'"), value); + warning(_("unrecognized value for '%s': '%s'"), var, value); } + return 0; +} + +static int sideband_config_callback(const char *var, const char *value, + const struct config_context *ctx UNUSED, + void *data UNUSED) +{ + if (!strcmp(var, "sideband.allowcontrolcharacters")) + return sideband_allow_control_characters_config(var, value); + + return 0; +} + +void sideband_apply_url_config(const char *url) +{ + struct urlmatch_config config = URLMATCH_CONFIG_INIT; + char *normalized_url; + + if (!url) + BUG("must not call sideband_apply_url_config(NULL)"); + + config.section = "sideband"; + config.collect_fn = sideband_config_callback; + + normalized_url = url_normalize(url, &config.url); + repo_config(the_repository, urlmatch_config_entry, &config); + free(normalized_url); + string_list_clear(&config.vars, 1); + urlmatch_config_release(&config); } /* Returns a color setting (GIT_COLOR_NEVER, etc). */ @@ -80,20 +121,12 @@ static enum git_colorbool use_sideband_colors(void) if (use_sideband_colors_cached != GIT_COLOR_UNKNOWN) return use_sideband_colors_cached; - switch (repo_config_get_maybe_bool(the_repository, "sideband.allowcontrolcharacters", &i)) { - case 0: /* Boolean value */ - allow_control_characters = i ? ALLOW_ALL_CONTROL_CHARACTERS : - ALLOW_NO_CONTROL_CHARACTERS; - break; - case -1: /* non-Boolean value */ - if (repo_config_get_string_tmp(the_repository, "sideband.allowcontrolcharacters", - &value)) - ; /* huh? `get_maybe_bool()` returned -1 */ - else - parse_allow_control_characters(value); - break; - default: - break; /* not configured */ + if (allow_control_characters == ALLOW_CONTROL_SEQUENCES_UNSET) { + if (!repo_config_get_value(the_repository, "sideband.allowcontrolcharacters", &value)) + sideband_allow_control_characters_config("sideband.allowcontrolcharacters", value); + + if (allow_control_characters == ALLOW_CONTROL_SEQUENCES_UNSET) + allow_control_characters = ALLOW_DEFAULT_ANSI_SEQUENCES; } if (!repo_config_get_string_tmp(the_repository, key, &value)) diff --git a/sideband.h b/sideband.h index 5a25331be55d30..d15fa4015fa0a3 100644 --- a/sideband.h +++ b/sideband.h @@ -30,4 +30,18 @@ int demultiplex_sideband(const char *me, int status, void send_sideband(int fd, int band, const char *data, ssize_t sz, int packet_max); +/* + * Apply sideband configuration for the given URL. This should be called + * when a transport is created to allow URL-specific configuration of + * sideband behavior (e.g., sideband..allowControlCharacters). + */ +void sideband_apply_url_config(const char *url); + +/* + * Parse and set the sideband allow control characters configuration. + * The var parameter should be the key name (without section prefix). + * Returns 0 if the variable was recognized and handled, non-zero otherwise. + */ +int sideband_allow_control_characters_config(const char *var, const char *value); + #endif diff --git a/t/t5409-colorize-remote-messages.sh b/t/t5409-colorize-remote-messages.sh index 896e790bf955cd..3010913bb113e4 100755 --- a/t/t5409-colorize-remote-messages.sh +++ b/t/t5409-colorize-remote-messages.sh @@ -166,4 +166,28 @@ test_expect_success 'control sequences in sideband allowed by default' ' test_grep ! "\\^\\[\\[G" decoded ' +test_expect_success 'allow all control sequences for a specific URL' ' + write_script .git/eraser <<-\EOF && + printf "error: Ohai!\\r\\033[K" >&2 + exec "$@" + EOF + test_config_global uploadPack.packObjectsHook ./eraser && + test_commit one-more-please && + + rm -rf throw-away && + git clone --no-local . throw-away 2>stderr && + test_decode_color color-decoded && + test_decode_csi decoded && + test_grep ! "CSI \\[K" decoded && + test_grep "\\^\\[\\[K" decoded && + + rm -rf throw-away && + git -c "sideband.file://.allowControlCharacters=true" \ + clone --no-local "file://$PWD" throw-away 2>stderr && + test_decode_color color-decoded && + test_decode_csi decoded && + test_grep "CSI \\[K" decoded && + test_grep ! "\\^\\[\\[K" decoded +' + test_done diff --git a/transport.c b/transport.c index c7f06a7382e605..1602065953a54e 100644 --- a/transport.c +++ b/transport.c @@ -29,6 +29,7 @@ #include "object-name.h" #include "color.h" #include "bundle-uri.h" +#include "sideband.h" static enum git_colorbool transport_use_color = GIT_COLOR_UNKNOWN; static char transport_colors[][COLOR_MAXLEN] = { @@ -1245,6 +1246,8 @@ struct transport *transport_get(struct remote *remote, const char *url) ret->hash_algo = &hash_algos[GIT_HASH_SHA1_LEGACY]; + sideband_apply_url_config(ret->url); + return ret; } From 826cc4722088a02d0ae240c1267b5b74d476b153 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 5 Mar 2026 15:34:51 -0800 Subject: [PATCH 045/236] sideband: drop 'default' configuration The topic so far allows users to tweak the configuration variable sideband.allowControlCharacters to override the hardcoded default, but among which there is the value called 'default'. The plan [*] of the series is to loosen the setting by a later commit in the series and schedule it to tighten at the Git 3.0 boundary for end users, at which point, the meaning of this 'default' value will change. Which is a dubious design. A user expresses their preference by setting configuration variable in order to guard against sudden change brought in by changes to the hardcoded default behaviour, and letting them set it to 'default' that will change at the Git 3.0 boundary defeats its purpose. If a user wants to say "I am easy and can go with whatever hardcoded default Git implementors choose for me", they simply leave the configuration variable unspecified. Let's remove it from the state before Git 3.0 so that those users who set it to 'default' will not see the behaviour changed under their feet all of sudden. Signed-off-by: Junio C Hamano --- Documentation/config/sideband.adoc | 1 - sideband.c | 6 ++---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/Documentation/config/sideband.adoc b/Documentation/config/sideband.adoc index 32088bbf2f0a40..96fade7f5fee39 100644 --- a/Documentation/config/sideband.adoc +++ b/Documentation/config/sideband.adoc @@ -6,7 +6,6 @@ sideband.allowControlCharacters:: a comma-separated list of the following keywords): + -- - `default`:: `color`:: Allow ANSI color sequences, line feeds and horizontal tabs, but mask all other control characters. This is the default. diff --git a/sideband.c b/sideband.c index a90db9e2880cba..04282a568edd90 100644 --- a/sideband.c +++ b/sideband.c @@ -33,8 +33,8 @@ static enum { ALLOW_ANSI_COLOR_SEQUENCES = 1<<0, ALLOW_ANSI_CURSOR_MOVEMENTS = 1<<1, ALLOW_ANSI_ERASE = 1<<2, - ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ANSI_COLOR_SEQUENCES, ALLOW_ALL_CONTROL_CHARACTERS = 1<<3, + ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ANSI_COLOR_SEQUENCES } allow_control_characters = ALLOW_CONTROL_SEQUENCES_UNSET; static inline int skip_prefix_in_csv(const char *value, const char *prefix, @@ -62,9 +62,7 @@ int sideband_allow_control_characters_config(const char *var, const char *value) allow_control_characters = ALLOW_NO_CONTROL_CHARACTERS; while (*value) { - if (skip_prefix_in_csv(value, "default", &value)) - allow_control_characters |= ALLOW_DEFAULT_ANSI_SEQUENCES; - else if (skip_prefix_in_csv(value, "color", &value)) + if (skip_prefix_in_csv(value, "color", &value)) allow_control_characters |= ALLOW_ANSI_COLOR_SEQUENCES; else if (skip_prefix_in_csv(value, "cursor", &value)) allow_control_characters |= ALLOW_ANSI_CURSOR_MOVEMENTS; From 818fbfd208f919e7a4fd9c827b65e5ce5372479b Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 5 Mar 2026 15:34:52 -0800 Subject: [PATCH 046/236] sideband: delay sanitizing by default to Git v3.0 The sideband sanitization patches allow ANSI color sequences through by default, preserving compatibility with pre-receive hooks that provide colored output during `git push`. Even so, there is concern that changing any default behavior in a minor release may have unforeseen consequences. To accommodate this, defer the secure-by-default behavior to Git v3.0, where breaking changes are expected. This gives users and tooling time to prepare, while committing to address CVE-2024-52005 in Git v3.0. Signed-off-by: Johannes Schindelin [jc: adjusted for the removal of 'default' value] Signed-off-by: Junio C Hamano --- Documentation/config/sideband.adoc | 12 ++++++++++-- sideband.c | 6 +++++- t/t5409-colorize-remote-messages.sh | 18 +++++++++++++----- 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/Documentation/config/sideband.adoc b/Documentation/config/sideband.adoc index 96fade7f5fee39..ddba93393ccadc 100644 --- a/Documentation/config/sideband.adoc +++ b/Documentation/config/sideband.adoc @@ -1,8 +1,16 @@ sideband.allowControlCharacters:: +ifdef::with-breaking-changes[] By default, control characters that are delivered via the sideband are masked, except ANSI color sequences. This prevents potentially - unwanted ANSI escape sequences from being sent to the terminal. Use - this config setting to override this behavior (the value can be + unwanted ANSI escape sequences from being sent to the terminal. +endif::with-breaking-changes[] +ifndef::with-breaking-changes[] + By default, no control characters delivered via the sideband + are masked. This is unsafe and will change in Git v3.* to only + allow ANSI color sequences by default, preventing potentially + unwanted ANSI escape sequences from being sent to the terminal. +endif::with-breaking-changes[] + Use this config setting to override this behavior (the value can be a comma-separated list of the following keywords): + -- diff --git a/sideband.c b/sideband.c index 04282a568edd90..5fb60e52bf00b2 100644 --- a/sideband.c +++ b/sideband.c @@ -34,7 +34,11 @@ static enum { ALLOW_ANSI_CURSOR_MOVEMENTS = 1<<1, ALLOW_ANSI_ERASE = 1<<2, ALLOW_ALL_CONTROL_CHARACTERS = 1<<3, - ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ANSI_COLOR_SEQUENCES +#ifdef WITH_BREAKING_CHANGES + ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ANSI_COLOR_SEQUENCES, +#else + ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ALL_CONTROL_CHARACTERS, +#endif } allow_control_characters = ALLOW_CONTROL_SEQUENCES_UNSET; static inline int skip_prefix_in_csv(const char *value, const char *prefix, diff --git a/t/t5409-colorize-remote-messages.sh b/t/t5409-colorize-remote-messages.sh index 3010913bb113e4..07cbc62736bd26 100755 --- a/t/t5409-colorize-remote-messages.sh +++ b/t/t5409-colorize-remote-messages.sh @@ -98,6 +98,13 @@ test_expect_success 'fallback to color.ui' ' grep "error: error" decoded ' +if test_have_prereq WITH_BREAKING_CHANGES +then + TURN_ON_SANITIZING=already.turned=on +else + TURN_ON_SANITIZING=sideband.allowControlCharacters=color +fi + test_expect_success 'disallow (color) control sequences in sideband' ' write_script .git/color-me-surprised <<-\EOF && printf "error: Have you \\033[31mread\\033[m this?\\a\\n" >&2 @@ -106,7 +113,7 @@ test_expect_success 'disallow (color) control sequences in sideband' ' test_config_global uploadPack.packObjectsHook ./color-me-surprised && test_commit need-at-least-one-commit && - git clone --no-local . throw-away 2>stderr && + git -c $TURN_ON_SANITIZING clone --no-local . throw-away 2>stderr && test_decode_color decoded && test_grep RED decoded && test_grep "\\^G" stderr && @@ -138,7 +145,7 @@ test_decode_csi() { }' } -test_expect_success 'control sequences in sideband allowed by default' ' +test_expect_success 'control sequences in sideband allowed by default (in Git v3.8)' ' write_script .git/color-me-surprised <<-\EOF && printf "error: \\033[31mcolor\\033[m\\033[Goverwrite\\033[Gerase\\033[K\\033?25l\\n" >&2 exec "$@" @@ -147,7 +154,7 @@ test_expect_success 'control sequences in sideband allowed by default' ' test_commit need-at-least-one-commit-at-least && rm -rf throw-away && - git clone --no-local . throw-away 2>stderr && + git -c $TURN_ON_SANITIZING clone --no-local . throw-away 2>stderr && test_decode_color color-decoded && test_decode_csi decoded && test_grep ! "CSI \\[K" decoded && @@ -175,14 +182,15 @@ test_expect_success 'allow all control sequences for a specific URL' ' test_commit one-more-please && rm -rf throw-away && - git clone --no-local . throw-away 2>stderr && + git -c $TURN_ON_SANITIZING clone --no-local . throw-away 2>stderr && test_decode_color color-decoded && test_decode_csi decoded && test_grep ! "CSI \\[K" decoded && test_grep "\\^\\[\\[K" decoded && rm -rf throw-away && - git -c "sideband.file://.allowControlCharacters=true" \ + git -c sideband.allowControlCharacters=false \ + -c "sideband.file://.allowControlCharacters=true" \ clone --no-local "file://$PWD" throw-away 2>stderr && test_decode_color color-decoded && test_decode_csi decoded && From f8e90b972ef9567df2d6983ae2c5f1f2659e86ad Mon Sep 17 00:00:00 2001 From: Colin Stagner Date: Thu, 5 Mar 2026 17:55:47 -0600 Subject: [PATCH 047/236] contrib/subtree: reduce function side-effects `process_subtree_split_trailer()` communicates its return value to the caller by setting a variable (`sub`) that is also defined by the calling function. This is both unclear and encourages side-effects. Invoke this function in a sub-shell instead. Signed-off-by: Colin Stagner Signed-off-by: Junio C Hamano --- contrib/subtree/git-subtree.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/contrib/subtree/git-subtree.sh b/contrib/subtree/git-subtree.sh index 791fd8260c4703..bae5d9170bd7a3 100755 --- a/contrib/subtree/git-subtree.sh +++ b/contrib/subtree/git-subtree.sh @@ -373,6 +373,10 @@ try_remove_previous () { } # Usage: process_subtree_split_trailer SPLIT_HASH MAIN_HASH [REPOSITORY] +# +# Parse SPLIT_HASH as a commit. If the commit is not found, fetches +# REPOSITORY and tries again. If found, prints full commit hash. +# Otherwise, dies. process_subtree_split_trailer () { assert test $# -ge 2 assert test $# -le 3 @@ -400,6 +404,7 @@ process_subtree_split_trailer () { die "$fail_msg" fi fi + echo "${sub}" } # Usage: find_latest_squash DIR [REPOSITORY] @@ -432,7 +437,7 @@ find_latest_squash () { main="$b" ;; git-subtree-split:) - process_subtree_split_trailer "$b" "$sq" "$repository" + sub="$(process_subtree_split_trailer "$b" "$sq" "$repository")" || exit 1 ;; END) if test -n "$sub" @@ -489,7 +494,7 @@ find_existing_splits () { main="$b" ;; git-subtree-split:) - process_subtree_split_trailer "$b" "$sq" "$repository" + sub="$(process_subtree_split_trailer "$b" "$sq" "$repository")" || exit 1 ;; END) debug "Main is: '$main'" From 3b3ace4d5bb72cb1845e547439b53e00dcf49b8e Mon Sep 17 00:00:00 2001 From: Colin Stagner Date: Thu, 5 Mar 2026 17:55:48 -0600 Subject: [PATCH 048/236] contrib/subtree: functionalize split traversal `git subtree split` requires an ancestor-first history traversal. Refactor the existing rev-list traversal into its own function, `find_commits_to_split`. Pass unrevs via stdin to avoid limits on the maximum length of command-line arguments. Also remove an unnecessary `eval`. Signed-off-by: Colin Stagner Signed-off-by: Junio C Hamano --- contrib/subtree/git-subtree.sh | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/contrib/subtree/git-subtree.sh b/contrib/subtree/git-subtree.sh index bae5d9170bd7a3..c1756b3e74cc35 100755 --- a/contrib/subtree/git-subtree.sh +++ b/contrib/subtree/git-subtree.sh @@ -519,6 +519,31 @@ find_existing_splits () { done || exit $? } +# Usage: find_commits_to_split REV UNREVS [ARGS...] +# +# List each commit to split, with its parents. +# +# Specify the starting REV for the split, which is usually +# a branch tip. Populate UNREVS with the last --rejoin for +# this prefix, if any. Typically, `subtree split` ignores +# history prior to the last --rejoin... unless and if it +# becomes necessary to consider it. `find_existing_splits` is +# a convenient source of UNREVS. +# +# Remaining arguments are passed to rev-list. +# +# Outputs commits in ancestor-first order, one per line, with +# parent information. Outputs all parents before any child. +find_commits_to_split() { + assert test $# -ge 2 + rev="$1" + unrevs="$2" + shift 2 + + echo "$unrevs" | + git rev-list --topo-order --reverse --parents --stdin "$rev" "$@" +} + # Usage: copy_commit REV TREE FLAGS_STR copy_commit () { assert test $# = 3 @@ -976,12 +1001,11 @@ cmd_split () { # We can't restrict rev-list to only $dir here, because some of our # parents have the $dir contents the root, and those won't match. # (and rev-list --follow doesn't seem to solve this) - grl='git rev-list --topo-order --reverse --parents $rev $unrevs' - revmax=$(eval "$grl" | wc -l) + revmax="$(find_commits_to_split "$rev" "$unrevs" --count)" revcount=0 createcount=0 extracount=0 - eval "$grl" | + find_commits_to_split "$rev" "$unrevs" | while read rev parents do process_split_commit "$rev" "$parents" From c30871b91d4d01ddf24f8129e23aff9da0a57575 Mon Sep 17 00:00:00 2001 From: Colin Stagner Date: Thu, 5 Mar 2026 17:55:49 -0600 Subject: [PATCH 049/236] contrib/subtree: reduce recursion during split MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Debian-alikes, POSIX sh has a hardcoded recursion depth of 1000. This limit operates like bash's `$FUNCNEST` [1], but it does not actually respect `$FUNCNEST`. This is non-standard behavior. On other distros, the sh recursion depth is limited only by the available stack size. With certain history graphs, subtree splits are recursive—with one recursion per commit. Attempting to split complex repos that have thousands of commits, like [2], may fail on these distros. Reduce the amount of recursion required by eagerly discovering the complete range of commits to process. The recursion is a side-effect of the rejoin-finder in `find_existing_splits`. Rejoin mode, as in git subtree split --rejoin -b hax main ... improves the speed of later splits by merging the split history back into `main`. This gives the splitting algorithm a stopping point. The rejoin maps one commit on `main` to one split commit on `hax`. If we encounter this commit, we know that it maps to `hax`. But this is only a single point in the history. Many splits require history from before the rejoin. See patch content for examples. If pre-rejoin history is required, `check_parents` recursively discovers each individual parent, with one recursion per commit. The recursion deepens the entire tree, even if an older rejoin is available. This quickly overwhelms the Debian sh stack. Instead of recursively processing each commit, process *all* the commits back to the next obvious starting point: i.e., either the next-oldest --rejoin or the beginning of history. This is where the recursion is likely to stop anyway. While this still requires recursion, it is *considerably* less recursive. [1]: https://www.gnu.org/software/bash/manual/html_node/Bash-Variables.html#index-FUNCNEST [2]: https://github.com/christian-heusel/aur.git Signed-off-by: Colin Stagner Signed-off-by: Junio C Hamano --- contrib/subtree/git-subtree.sh | 56 ++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/contrib/subtree/git-subtree.sh b/contrib/subtree/git-subtree.sh index c1756b3e74cc35..c649a9e393a96c 100755 --- a/contrib/subtree/git-subtree.sh +++ b/contrib/subtree/git-subtree.sh @@ -315,6 +315,46 @@ cache_miss () { } # Usage: check_parents [REVS...] +# +# During a split, check that every commit in REVS has already been +# processed via `process_split_commit`. If not, deepen the history +# until it is. +# +# Commits authored by `subtree split` have to be created in the +# same order as every other git commit: ancestor-first, with new +# commits building on old commits. The traversal order normally +# ensures this is the case, but it also excludes --rejoins commits +# by default. +# +# The --rejoin tells us, "this mainline commit is equivalent to +# this split commit." The relationship is only known for that +# exact commit---and not before or after it. Frequently, commits +# prior to a rejoin are not needed... but, just as often, they +# are! Consider this history graph: +# +# --D--- +# / \ +# A--B--C--R--X--Y main +# / / +# a--b--c / split +# \ / +# --e--/ +# +# The main branch has commits A, B, and C. main is split into +# commits a, b, and c. The split history is rejoined at R. +# +# There are at least two cases where we might need the A-B-C +# history that is prior to R: +# +# 1. Commit D is based on history prior to R, but +# it isn't merged into mainline until after R. +# +# 2. Commit e is based on old split history. It is merged +# back into mainline with a subtree merge. Again, this +# happens after R. +# +# check_parents detects these cases and deepens the history +# to the next available rejoin. check_parents () { missed=$(cache_miss "$@") || exit $? local indent=$(($indent + 1)) @@ -322,8 +362,20 @@ check_parents () { do if ! test -r "$cachedir/notree/$miss" then - debug "incorrect order: $miss" - process_split_commit "$miss" "" + debug "found commit excluded by --rejoin: $miss. skipping to the next --rejoin..." + unrevs="$(find_existing_splits "$dir" "$miss" "$repository")" || exit 1 + + find_commits_to_split "$miss" "$unrevs" | + while read -r rev parents + do + process_split_commit "$rev" "$parents" + done + + if ! test -r "$cachedir/$miss" && + ! test -r "$cachedir/notree/$miss" + then + die "failed to deepen history at $miss" + fi fi done } From 9ccd9e6fcab2573a04b24f81d96cfdc355a7d51f Mon Sep 17 00:00:00 2001 From: Mirko Faina Date: Sat, 7 Mar 2026 00:34:40 +0100 Subject: [PATCH 050/236] pretty.c: add %(count) and %(total) placeholders In many commands we can customize the output through the "--format" or the "--pretty" options. This patch adds two new placeholders used mainly when there's a range of commits that we want to show. Currently these two placeholders are not usable as they're coupled with the rev_info->nr and rev_info->total fields, fields that are used only by the format-patch numbered email subjects. Teach repo_format_commit_message() the %(count) and %(total) placeholders. Signed-off-by: Mirko Faina Signed-off-by: Junio C Hamano --- pretty.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pretty.c b/pretty.c index e0646bbc5d49cc..e29bb8b877411d 100644 --- a/pretty.c +++ b/pretty.c @@ -1549,6 +1549,21 @@ static size_t format_commit_one(struct strbuf *sb, /* in UTF-8 */ if (!commit->object.parsed) parse_object(the_repository, &commit->object.oid); + if (starts_with(placeholder, "(count)")) { + if (!c->pretty_ctx->rev) + die(_("this format specifier can't be used with this command")); + strbuf_addf(sb, "%0*d", decimal_width(c->pretty_ctx->rev->total), + c->pretty_ctx->rev->nr); + return 7; + } + + if (starts_with(placeholder, "(total)")) { + if (!c->pretty_ctx->rev) + die(_("this format specifier can't be used with this command")); + strbuf_addf(sb, "%d", c->pretty_ctx->rev->total); + return 7; + } + switch (placeholder[0]) { case 'H': /* commit hash */ strbuf_addstr(sb, diff_get_color(c->auto_color, DIFF_COMMIT)); From 2af59cbcf4375f4d7c61954a19244d130de0a0db Mon Sep 17 00:00:00 2001 From: Mirko Faina Date: Sat, 7 Mar 2026 00:34:41 +0100 Subject: [PATCH 051/236] format-patch: move cover letter summary generation As of now format-patch allows generation of a template cover letter for patch series through "--cover-letter". Move shortlog summary code generation to its own function. This is done in preparation to other patches where we enable the user to format the commit list using thier own format string. Signed-off-by: Mirko Faina Signed-off-by: Junio C Hamano --- builtin/log.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/builtin/log.c b/builtin/log.c index 5c9a8ef3632906..0d12272031f003 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -1324,6 +1324,25 @@ static void get_notes_args(struct strvec *arg, struct rev_info *rev) } } +static void generate_shortlog_cover_letter(struct shortlog *log, + struct rev_info *rev, + struct commit **list, + int nr) +{ + shortlog_init(log); + log->wrap_lines = 1; + log->wrap = MAIL_DEFAULT_WRAP; + log->in1 = 2; + log->in2 = 4; + log->file = rev->diffopt.file; + log->groups = SHORTLOG_GROUP_AUTHOR; + shortlog_finish_setup(log); + for (int i = 0; i < nr; i++) + shortlog_add_commit(log, list[i]); + + shortlog_output(log); +} + static void make_cover_letter(struct rev_info *rev, int use_separate_file, struct commit *origin, int nr, struct commit **list, @@ -1377,18 +1396,7 @@ static void make_cover_letter(struct rev_info *rev, int use_separate_file, free(pp.after_subject); strbuf_release(&sb); - shortlog_init(&log); - log.wrap_lines = 1; - log.wrap = MAIL_DEFAULT_WRAP; - log.in1 = 2; - log.in2 = 4; - log.file = rev->diffopt.file; - log.groups = SHORTLOG_GROUP_AUTHOR; - shortlog_finish_setup(&log); - for (i = 0; i < nr; i++) - shortlog_add_commit(&log, list[i]); - - shortlog_output(&log); + generate_shortlog_cover_letter(&log, rev, list, nr); /* We can only do diffstat with a unique reference point */ if (origin) From 6005932d95ff05541f9dbe8c49a45b7abaf7432e Mon Sep 17 00:00:00 2001 From: Mirko Faina Date: Sat, 7 Mar 2026 00:34:42 +0100 Subject: [PATCH 052/236] format-patch: add ability to use alt cover format Often when sending patch series there's a need to clarify to the reviewer what's the purpose of said series, since it might be difficult to understand it from reading the commits messages one by one. "git format-patch" provides the useful "--cover-letter" flag to declare if we want it to generate a template for us to use. By default it will generate a "git shortlog" of the changes, which developers find less useful than they'd like, mainly because the shortlog groups commits by author, and gives no obvious chronological order. Give format-patch the ability to specify an alternative format spec through the "--cover-letter-format" option. This option either takes "shortlog", which is the current format, or a format spec prefixed with "log:". Example: git format-patch --cover-letter \ --cover-letter-format="log:[%(count)/%(total)] %s (%an)" HEAD~3 [1/3] this is a commit summary (Mirko Faina) [2/3] this is another commit summary (Mirko Faina) ... Signed-off-by: Mirko Faina Signed-off-by: Junio C Hamano --- builtin/log.c | 40 +++++++++++++++++++++++++++++++--- t/t4014-format-patch.sh | 48 +++++++++++++++++++++++++++++++++++++++++ t/t9902-completion.sh | 1 + 3 files changed, 86 insertions(+), 3 deletions(-) diff --git a/builtin/log.c b/builtin/log.c index 0d12272031f003..95e5d9755fac6d 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -1343,13 +1343,36 @@ static void generate_shortlog_cover_letter(struct shortlog *log, shortlog_output(log); } +static void generate_commit_list_cover(FILE *cover_file, const char *format, + struct commit **list, int n) +{ + struct strbuf commit_line = STRBUF_INIT; + struct pretty_print_context ctx = {0}; + struct rev_info rev = REV_INFO_INIT; + + strbuf_init(&commit_line, 0); + rev.total = n; + ctx.rev = &rev; + for (int i = n - 1; i >= 0; i--) { + rev.nr = n - i; + repo_format_commit_message(the_repository, list[i], format, + &commit_line, &ctx); + fprintf(cover_file, "%s\n", commit_line.buf); + strbuf_reset(&commit_line); + } + fprintf(cover_file, "\n"); + + strbuf_release(&commit_line); +} + static void make_cover_letter(struct rev_info *rev, int use_separate_file, struct commit *origin, int nr, struct commit **list, const char *description_file, const char *branch_name, int quiet, - const struct format_config *cfg) + const struct format_config *cfg, + const char *format) { const char *committer; struct shortlog log; @@ -1396,7 +1419,12 @@ static void make_cover_letter(struct rev_info *rev, int use_separate_file, free(pp.after_subject); strbuf_release(&sb); - generate_shortlog_cover_letter(&log, rev, list, nr); + if (skip_prefix(format, "log:", &format)) + generate_commit_list_cover(rev->diffopt.file, format, list, nr); + else if (!strcmp(format, "shortlog")) + generate_shortlog_cover_letter(&log, rev, list, nr); + else + die(_("'%s' is not a valid format string"), format); /* We can only do diffstat with a unique reference point */ if (origin) @@ -1914,6 +1942,7 @@ int cmd_format_patch(int argc, int just_numbers = 0; int ignore_if_in_upstream = 0; int cover_letter = -1; + const char *cover_letter_fmt = NULL; int boundary_count = 0; int no_binary_diff = 0; int zero_commit = 0; @@ -1960,6 +1989,8 @@ int cmd_format_patch(int argc, N_("print patches to standard out")), OPT_BOOL(0, "cover-letter", &cover_letter, N_("generate a cover letter")), + OPT_STRING(0, "cover-letter-format", &cover_letter_fmt, N_("format-spec"), + N_("format spec used for the commit list in the cover letter")), OPT_BOOL(0, "numbered-files", &just_numbers, N_("use simple number sequence for output file names")), OPT_STRING(0, "suffix", &fmt_patch_suffix, N_("sfx"), @@ -2297,6 +2328,7 @@ int cmd_format_patch(int argc, /* nothing to do */ goto done; total = list.nr; + if (cover_letter == -1) { if (cfg.config_cover_letter == COVER_AUTO) cover_letter = (total > 1); @@ -2383,12 +2415,14 @@ int cmd_format_patch(int argc, } rev.numbered_files = just_numbers; rev.patch_suffix = fmt_patch_suffix; + if (cover_letter) { if (cfg.thread) gen_message_id(&rev, "cover"); make_cover_letter(&rev, !!output_directory, origin, list.nr, list.items, - description_file, branch_name, quiet, &cfg); + description_file, branch_name, quiet, &cfg, + cover_letter_fmt); print_bases(&bases, rev.diffopt.file); print_signature(signature, rev.diffopt.file); total++; diff --git a/t/t4014-format-patch.sh b/t/t4014-format-patch.sh index 21d6d0cd9ef679..458da80721a1e9 100755 --- a/t/t4014-format-patch.sh +++ b/t/t4014-format-patch.sh @@ -380,6 +380,54 @@ test_expect_success 'filename limit applies only to basename' ' done ' +test_expect_success 'cover letter with subject, author and count' ' + rm -rf patches && + test_when_finished "git reset --hard HEAD~1" && + test_when_finished "rm -rf patches result test_file" && + touch test_file && + git add test_file && + git commit -m "This is a subject" && + git format-patch --cover-letter \ + --cover-letter-format="log:[%(count)/%(total)] %s (%an)" -o patches HEAD~1 && + grep "^\[1/1\] This is a subject (A U Thor)$" patches/0000-cover-letter.patch >result && + test_line_count = 1 result +' + +test_expected_success 'cover letter with author and count' ' + test_when_finished "git reset --hard HEAD~1" && + test_when_finished "rm -rf patches result test_file" && + touch test_file && + git add test_file && + git commit -m "This is a subject" && + git format-patch --cover-letter \ + --cover-letter-format="log:[%(count)/%(total)] %an" -o patches HEAD~1 && + grep "^\[1/1\] A U Thor$" patches/0000-cover-letter.patch >result && + test_line_count = 1 result +' + +test_expect_success 'cover letter shortlog' ' + test_when_finished "git reset --hard HEAD~1" && + test_when_finished "rm -rf patches result test_file" && + touch test_file && + git add test_file && + git commit -m "This is a subject" && + git format-patch --cover-letter --cover-letter-format=shortlog \ + -o patches HEAD~1 && + sed -n -e "/^A U Thor/p;" patches/0000-cover-letter.patch >result && + test_line_count = 1 result +' + +test_expect_success 'cover letter no format' ' + test_when_finished "git reset --hard HEAD~1" && + test_when_finished "rm -rf patches result test_file" && + touch test_file && + git add test_file && + git commit -m "This is a subject" && + git format-patch --cover-letter -o patches HEAD~1 && + sed -n -e "/^A U Thor/p;" patches/0000-cover-letter.patch >result && + test_line_count = 1 result +' + test_expect_success 'reroll count' ' rm -fr patches && git format-patch -o patches --cover-letter --reroll-count 4 main..side >list && diff --git a/t/t9902-completion.sh b/t/t9902-completion.sh index 964e1f156932c6..4f760a746862c3 100755 --- a/t/t9902-completion.sh +++ b/t/t9902-completion.sh @@ -2774,6 +2774,7 @@ test_expect_success PERL 'send-email' ' test_completion "git send-email --cov" <<-\EOF && --cover-from-description=Z --cover-letter Z + --cover-letter-format=Z EOF test_completion "git send-email --val" <<-\EOF && --validate Z From be0ef6fcd2379ea3dc1569d6d8c360d6d59d031f Mon Sep 17 00:00:00 2001 From: Mirko Faina Date: Sat, 7 Mar 2026 00:34:43 +0100 Subject: [PATCH 053/236] format-patch: add commitListFormat config Using "--cover-letter" we can tell format-patch to generate a cover letter, in this cover letter there's a list of commits included in the patch series and the format is specified by the "--cover-letter-format" option. Would be useful if this format could be configured from the config file instead of always needing to pass it from the command line. Teach format-patch how to read the format spec for the cover letter from the config files. The variable it should look for is called format.commitListFormat. Possible values: - commitListFormat is set but no string is passed: it will default to "[%(count)/%(total)] %s" - if a string is passed: will use it as a format spec. Note that this is either "shortlog" or a format spec prefixed by "log:" e.g."log:%s (%an)" - if commitListFormat is not set: it will default to the shortlog format. Signed-off-by: Mirko Faina Signed-off-by: Junio C Hamano --- builtin/log.c | 21 ++++++++++++++++ t/t4014-format-patch.sh | 53 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/builtin/log.c b/builtin/log.c index 95e5d9755fac6d..5fec0ddaf91b0a 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -886,6 +886,7 @@ struct format_config { char *signature; char *signature_file; enum cover_setting config_cover_letter; + char *fmt_cover_letter_commit_list; char *config_output_directory; enum cover_from_description cover_from_description_mode; int show_notes; @@ -930,6 +931,7 @@ static void format_config_release(struct format_config *cfg) string_list_clear(&cfg->extra_cc, 0); strbuf_release(&cfg->sprefix); free(cfg->fmt_patch_suffix); + free(cfg->fmt_cover_letter_commit_list); } static enum cover_from_description parse_cover_from_description(const char *arg) @@ -1052,6 +1054,19 @@ static int git_format_config(const char *var, const char *value, cfg->config_cover_letter = git_config_bool(var, value) ? COVER_ON : COVER_OFF; return 0; } + if (!strcmp(var, "format.commitlistformat")) { + struct strbuf tmp = STRBUF_INIT; + strbuf_init(&tmp, 0); + if (value) + strbuf_addstr(&tmp, value); + else + strbuf_addstr(&tmp, "log:[%(count)/%(total)] %s"); + + FREE_AND_NULL(cfg->fmt_cover_letter_commit_list); + git_config_string(&cfg->fmt_cover_letter_commit_list, var, tmp.buf); + strbuf_release(&tmp); + return 0; + } if (!strcmp(var, "format.outputdirectory")) { FREE_AND_NULL(cfg->config_output_directory); return git_config_string(&cfg->config_output_directory, var, value); @@ -2329,6 +2344,12 @@ int cmd_format_patch(int argc, goto done; total = list.nr; + if (!cover_letter_fmt) { + cover_letter_fmt = cfg.fmt_cover_letter_commit_list; + if (!cover_letter_fmt) + cover_letter_fmt = "shortlog"; + } + if (cover_letter == -1) { if (cfg.config_cover_letter == COVER_AUTO) cover_letter = (total > 1); diff --git a/t/t4014-format-patch.sh b/t/t4014-format-patch.sh index 458da80721a1e9..4891389a53aed4 100755 --- a/t/t4014-format-patch.sh +++ b/t/t4014-format-patch.sh @@ -428,6 +428,59 @@ test_expect_success 'cover letter no format' ' test_line_count = 1 result ' +test_expect_success 'cover letter config with count, subject and author' ' + test_when_finished "rm -rf patches result" && + test_when_finished "git config unset format.coverletter" && + test_when_finished "git config unset format.commitlistformat" && + git config set format.coverletter true && + git config set format.commitlistformat "log:[%(count)/%(total)] %s (%an)" && + git format-patch -o patches HEAD~2 && + grep -E "^[[[:digit:]]+/[[:digit:]]+] .* \(A U Thor\)" patches/0000-cover-letter.patch >result && + test_line_count = 2 result +' + +test_expect_success 'cover letter config with count and author' ' + test_when_finished "rm -rf patches result" && + test_when_finished "git config unset format.coverletter" && + test_when_finished "git config unset format.commitlistformat" && + git config set format.coverletter true && + git config set format.commitlistformat "log:[%(count)/%(total)] (%an)" && + git format-patch -o patches HEAD~2 && + grep -E "^[[[:digit:]]+/[[:digit:]]+] \(A U Thor\)" patches/0000-cover-letter.patch >result && + test_line_count = 2 result +' + +test_expect_success 'cover letter config commitlistformat set but no format' ' + test_when_finished "rm -rf patches result" && + test_when_finished "git config unset format.coverletter" && + test_when_finished "git config unset format.commitlistformat" && + git config set format.coverletter true && + printf "\tcommitlistformat" >> .git/config && + git format-patch -o patches HEAD~2 && + grep -E "^[[[:digit:]]+/[[:digit:]]+] .*" patches/0000-cover-letter.patch >result && + test_line_count = 2 result +' + +test_expect_success 'cover letter config commitlistformat set to shortlog' ' + test_when_finished "rm -rf patches result" && + test_when_finished "git config unset format.coverletter" && + test_when_finished "git config unset format.commitlistformat" && + git config set format.coverletter true && + git config set format.commitlistformat shortlog && + git format-patch -o patches HEAD~2 && + grep -E "^A U Thor \([[:digit:]]+\)" patches/0000-cover-letter.patch >result && + test_line_count = 1 result +' + +test_expect_success 'cover letter config commitlistformat not set' ' + test_when_finished "rm -rf patches result" && + test_when_finished "git config unset format.coverletter" && + git config set format.coverletter true && + git format-patch -o patches HEAD~2 && + grep -E "^A U Thor \([[:digit:]]+\)" patches/0000-cover-letter.patch >result && + test_line_count = 1 result +' + test_expect_success 'reroll count' ' rm -fr patches && git format-patch -o patches --cover-letter --reroll-count 4 main..side >list && From 51ed9f7e724468b8e44a7c33946dda38a335acca Mon Sep 17 00:00:00 2001 From: Mirko Faina Date: Sat, 7 Mar 2026 00:34:44 +0100 Subject: [PATCH 054/236] docs: add usage for the cover-letter fmt feature Document the new "--cover-letter-format" option in format-patch and its related configuration variable "format.commitListFormat". Signed-off-by: Mirko Faina Signed-off-by: Junio C Hamano --- Documentation/config/format.adoc | 5 +++++ Documentation/git-format-patch.adoc | 13 +++++++++++++ 2 files changed, 18 insertions(+) diff --git a/Documentation/config/format.adoc b/Documentation/config/format.adoc index ab0710e86a3e2c..ea5ec5df7a2aee 100644 --- a/Documentation/config/format.adoc +++ b/Documentation/config/format.adoc @@ -101,6 +101,11 @@ format.coverLetter:: generate a cover-letter only when there's more than one patch. Default is false. +format.commitListFormat:: + When the `--cover-letter-format` option is not given, `format-patch` + uses the value of this variable to decide how to format the title of + each commit. Default to `shortlog`. + format.outputDirectory:: Set a custom directory to store the resulting files instead of the current working directory. All directory components will be created. diff --git a/Documentation/git-format-patch.adoc b/Documentation/git-format-patch.adoc index 9a7807ca71a528..668330a0152eb0 100644 --- a/Documentation/git-format-patch.adoc +++ b/Documentation/git-format-patch.adoc @@ -24,6 +24,7 @@ SYNOPSIS [(--reroll-count|-v) ] [--to=] [--cc=] [--[no-]cover-letter] [--quiet] + [--cover-letter-format=] [--[no-]encode-email-headers] [--no-notes | --notes[=]] [--interdiff=] @@ -321,6 +322,17 @@ feeding the result to `git send-email`. containing the branch description, shortlog and the overall diffstat. You can fill in a description in the file before sending it out. +--cover-letter-format=:: + Specify the format in which to generate the commit list of the + patch series. This option is available if the user wants to use + an alternative to the default `shortlog` format. The accepted + values for format-spec are "shortlog" or a format string + prefixed with `log:`. + e.g. `log: %s (%an)` + If defined, defaults to the `format.commitListFormat` configuration + variable. + This option is relevant only if a cover letter is generated. + --encode-email-headers:: --no-encode-email-headers:: Encode email headers that have non-ASCII characters with @@ -452,6 +464,7 @@ with configuration variables. signOff = true outputDirectory = coverLetter = auto + commitListFormat = shortlog coverFromDescription = auto ------------ From 1ac1d4e761c5f394526873b364ba23cf5b9b0da5 Mon Sep 17 00:00:00 2001 From: Collin Funk Date: Sun, 8 Mar 2026 19:55:11 -0700 Subject: [PATCH 055/236] bloom: remove a misleading const qualifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building with glibc-2.43 there is the following warning: bloom.c: In function ‘get_or_compute_bloom_filter’: bloom.c:515:52: warning: initialization discards ‘const’ qualifier from pointer target type [-Wdiscarded-qualifiers] 515 | char *last_slash = strrchr(path, '/'); | ^~~~~~~ In this case, we always write through "path" through the "last_slash" pointer. Therefore, the const qualifier on "path" is misleading and we can just remove it. Signed-off-by: Collin Funk Signed-off-by: Junio C Hamano --- bloom.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bloom.c b/bloom.c index 2d7b951e5bf245..d604e8f07ab8d8 100644 --- a/bloom.c +++ b/bloom.c @@ -501,7 +501,7 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r, struct hashmap_iter iter; for (i = 0; i < diff_queued_diff.nr; i++) { - const char *path = diff_queued_diff.queue[i]->two->path; + char *path = diff_queued_diff.queue[i]->two->path; /* * Add each leading directory of the changed file, i.e. for @@ -523,7 +523,7 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r, free(e); if (!last_slash) - last_slash = (char*)path; + last_slash = path; *last_slash = '\0'; } while (*path); From 02cbae61df7b3493e7f76f8385fc9257de60755f Mon Sep 17 00:00:00 2001 From: Collin Funk Date: Sun, 8 Mar 2026 20:23:06 -0700 Subject: [PATCH 056/236] dir: avoid -Wdiscarded-qualifiers in remove_path() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building with glibc-2.43 there is the following warning: dir.c:3526:15: warning: assignment discards ‘const’ qualifier from pointer target type [-Wdiscarded-qualifiers] 3526 | slash = strrchr(name, '/'); | ^ In this case we use a non-const pointer to get the last slash of the unwritable file name, and then use it again to write in the strdup'd file name. We can avoid this warning and make the code a bit more clear by using a separate variable to access the original argument and its strdup'd copy. Signed-off-by: Collin Funk Signed-off-by: Junio C Hamano --- dir.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dir.c b/dir.c index b00821f294fea2..046a8bbf325ac5 100644 --- a/dir.c +++ b/dir.c @@ -3516,15 +3516,15 @@ int get_sparse_checkout_patterns(struct pattern_list *pl) int remove_path(const char *name) { - char *slash; + const char *last; if (unlink(name) && !is_missing_file_error(errno)) return -1; - slash = strrchr(name, '/'); - if (slash) { + last = strrchr(name, '/'); + if (last) { char *dirs = xstrdup(name); - slash = dirs + (slash - name); + char *slash = dirs + (last - name); do { *slash = '\0'; if (startup_info->original_cwd && From 4f6a803aba9e97650af304b5c266cfbc25b11fcc Mon Sep 17 00:00:00 2001 From: Tian Yuchen Date: Tue, 10 Mar 2026 17:50:17 +0800 Subject: [PATCH 057/236] diff: document -U without as using default context The documentation for '-U' implies that the numeric value '' is mandatory. However, the command line parser has historically accepted '-U' without a number. Strictly requiring a number for '-U' would break existing tests (e.g., in 't4013') and likely disrupt user scripts relying on this undocumented behavior. Hence we retain this fallback behavior for backward compatibility, but document it as such. Signed-off-by: Tian Yuchen Signed-off-by: Junio C Hamano --- Documentation/diff-context-options.adoc | 6 ++++-- Documentation/diff-options.adoc | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Documentation/diff-context-options.adoc b/Documentation/diff-context-options.adoc index e161260358fff5..b9ace2aa4b3092 100644 --- a/Documentation/diff-context-options.adoc +++ b/Documentation/diff-context-options.adoc @@ -1,7 +1,9 @@ `-U`:: `--unified=`:: - Generate diffs with __ lines of context. Defaults to `diff.context` - or 3 if the config option is unset. + Generate diffs with __ lines of context. The number of context + lines defaults to `diff.context` or 3 if the configuration variable + is unset. (`-U` without `` is silently accepted as a synonym for + `-p` due to a historical accident). `--inter-hunk-context=`:: Show the context between diff hunks, up to the specified __ diff --git a/Documentation/diff-options.adoc b/Documentation/diff-options.adoc index 9cdad6f72a0c7d..867bef631280e3 100644 --- a/Documentation/diff-options.adoc +++ b/Documentation/diff-options.adoc @@ -127,8 +127,10 @@ endif::git-log[] `-U`:: `--unified=`:: - Generate diffs with __ lines of context instead of - the usual three. + Generate diffs with __ lines of context. The number of context + lines defaults to `diff.context` or 3 if the configuration variable + is unset. (`-U` without `` is silently accepted as a synonym for + `-p` due to a historical accident). ifndef::git-format-patch[] Implies `--patch`. endif::git-format-patch[] From 69efd53c8154a26cbb98c9c16ceff26060b288a9 Mon Sep 17 00:00:00 2001 From: Mansi Singh Date: Tue, 10 Mar 2026 22:50:22 +0000 Subject: [PATCH 058/236] t7605: use test_path_is_file instead of test -f Replace old-style 'test -f' path checks with the modern test_path_is_file helper in the merge_c1_to_c2_cmds block. The helper provides clearer failure messages and is the established convention in Git's test suite. Signed-off-by: Mansi Singh Signed-off-by: Junio C Hamano --- t/t7605-merge-resolve.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/t/t7605-merge-resolve.sh b/t/t7605-merge-resolve.sh index 5d56c3854647b2..44de97a480f785 100755 --- a/t/t7605-merge-resolve.sh +++ b/t/t7605-merge-resolve.sh @@ -34,9 +34,9 @@ merge_c1_to_c2_cmds=' test "$(git rev-parse c1)" = "$(git rev-parse HEAD^1)" && test "$(git rev-parse c2)" = "$(git rev-parse HEAD^2)" && git diff --exit-code && - test -f c0.c && - test -f c1.c && - test -f c2.c && + test_path_is_file c0.c && + test_path_is_file c1.c && + test_path_is_file c2.c && test 3 = $(git ls-tree -r HEAD | wc -l) && test 3 = $(git ls-files | wc -l) ' From f584f9d36129f8af18251bd0f193c914ed8b0cfb Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Wed, 11 Mar 2026 14:19:38 +0000 Subject: [PATCH 059/236] promisor-remote: prevent lazy-fetch recursion in child fetch fetch_objects() spawns a child `git fetch` to lazily fill in missing objects. That child's index-pack, when it receives a thin pack containing a REF_DELTA against a still-missing base, calls promisor_remote_get_direct() -- which is fetch_objects() again. With negotiationAlgorithm=noop the client advertises no "have" lines, so a well-behaved server sends requested objects un-deltified or deltified only against objects in the same pack. A server that nevertheless sends REF_DELTA against a base the client does not have is misbehaving; however the client should not recurse unboundedly in response. Propagate GIT_NO_LAZY_FETCH=1 into the child fetch's environment so that if the child's index-pack encounters such a REF_DELTA, it hits the existing guard at the top of fetch_objects() and fails fast instead of recursing. Depth-1 lazy fetch (the whole point of fetch_objects()) is unaffected: only the child and its descendants see the variable. Add a test that injects a thin pack containing a REF_DELTA against a missing base via HTTP, triggering the recursion path through index-pack's promisor_remote_get_direct() call. With the fix, the child's fetch_objects() sees GIT_NO_LAZY_FETCH=1 and blocks the depth-2 fetch with a "lazy fetching disabled" warning. Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- promisor-remote.c | 7 +++++ t/t5616-partial-clone.sh | 60 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/promisor-remote.c b/promisor-remote.c index 77ebf537e2b3ee..2f56e89404b19a 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -42,6 +42,13 @@ static int fetch_objects(struct repository *repo, child.in = -1; if (repo != the_repository) prepare_other_repo_env(&child.env, repo->gitdir); + /* + * Prevent the child's index-pack from recursing back into + * fetch_objects() when resolving REF_DELTA bases it does not + * have. With noop negotiation the server should never need + * to send such deltas, so a depth-2 fetch would not help. + */ + strvec_pushf(&child.env, "%s=1", NO_LAZY_FETCH_ENVIRONMENT); strvec_pushl(&child.args, "-c", "fetch.negotiationAlgorithm=noop", "fetch", remote_name, "--no-tags", "--no-write-fetch-head", "--recurse-submodules=no", diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index 1e354e057fa12c..27f131c8d9ba57 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -907,6 +907,66 @@ test_expect_success PERL_TEST_HELPERS 'tolerate server sending REF_DELTA against ! test -e "$HTTPD_ROOT_PATH/one-time-script" ' +test_expect_success PERL_TEST_HELPERS 'lazy-fetch of REF_DELTA with missing base does not recurse' ' + SERVER="$HTTPD_DOCUMENT_ROOT_PATH/server" && + rm -rf "$SERVER" repo && + test_create_repo "$SERVER" && + test_config -C "$SERVER" uploadpack.allowfilter 1 && + test_config -C "$SERVER" uploadpack.allowanysha1inwant 1 && + + # Create a commit with 2 blobs to be used as delta base and content. + for i in $(test_seq 10) + do + echo "this is a line" >>"$SERVER/foo.txt" && + echo "this is another line" >>"$SERVER/bar.txt" || return 1 + done && + git -C "$SERVER" add foo.txt bar.txt && + git -C "$SERVER" commit -m initial && + BLOB_FOO=$(git -C "$SERVER" rev-parse HEAD:foo.txt) && + BLOB_BAR=$(git -C "$SERVER" rev-parse HEAD:bar.txt) && + + # Partial clone with blob:none. The client has commits and + # trees but no blobs. + test_config -C "$SERVER" protocol.version 2 && + git -c protocol.version=2 clone --no-checkout \ + --filter=blob:none $HTTPD_URL/one_time_script/server repo && + + # Sanity check: client does not have either blob locally. + git -C repo rev-list --objects --ignore-missing \ + -- $BLOB_FOO >objlist && + test_line_count = 0 objlist && + + # Craft a thin pack where BLOB_FOO is a REF_DELTA against + # BLOB_BAR. Since the client has neither blob (blob:none + # filter), the delta base will be missing. This simulates a + # misbehaving server that sends REF_DELTA against an object + # the client does not have. + test-tool -C "$SERVER" pack-deltas --num-objects=1 >thin.pack <<-EOF && + REF_DELTA $BLOB_FOO $BLOB_BAR + EOF + + replace_packfile thin.pack && + + # Trigger a lazy fetch for BLOB_FOO. The child fetch spawned + # by fetch_objects() receives our crafted thin pack. Its + # index-pack encounters the missing delta base (BLOB_BAR) and + # tries to lazy-fetch it via promisor_remote_get_direct(). + # + # With the fix: fetch_objects() propagates GIT_NO_LAZY_FETCH=1 + # to the child, so the depth-2 fetch is blocked and we see the + # "lazy fetching disabled" warning. The object cannot be + # resolved, so cat-file fails. + # + # Without the fix: the depth-2 fetch would proceed, potentially + # recursing unboundedly with a persistently misbehaving server. + test_must_fail git -C repo -c protocol.version=2 \ + cat-file -p $BLOB_FOO 2>err && + test_grep "lazy fetching disabled" err && + + # Ensure that the one-time-script was used. + ! test -e "$HTTPD_ROOT_PATH/one-time-script" +' + # DO NOT add non-httpd-specific tests here, because the last part of this # test script is only executed when httpd is available and enabled. From 088e994bf62a8135b87615c3e786958b397a9fd7 Mon Sep 17 00:00:00 2001 From: Amisha Chhajed Date: Thu, 12 Mar 2026 00:54:53 +0530 Subject: [PATCH 060/236] help: cleanup the contruction of keys_uniq construction of keys_uniq depends on sort operation executed on keys before processing, which does not gurantee that keys_uniq will be sorted. refactor the code to shift the sort operation after the processing to remove dependency on key's sort operation and strictly maintain the sorted order of keys_uniq. move strbuf init and release out of loop to reuse same buffer. dedent sort -u and sed in tests and replace grep with sed, to avoid piping grep's output to sed. Suggested-by: Siddharth Shrimali Signed-off-by: Amisha Chhajed Signed-off-by: Junio C Hamano --- builtin/help.c | 91 ++++++++++++++++++++++++++++++------------------- t/t0012-help.sh | 39 ++++++++++++--------- 2 files changed, 78 insertions(+), 52 deletions(-) diff --git a/builtin/help.c b/builtin/help.c index c09cbc8912deb6..daadc61c70ae78 100644 --- a/builtin/help.c +++ b/builtin/help.c @@ -111,6 +111,49 @@ struct slot_expansion { int found; }; +static void set_config_vars(struct string_list *keys_uniq, struct string_list_item *var) +{ + struct strbuf sb = STRBUF_INIT; + const char *str = var->string; + const char *wildcard = strchr(str, '*'); + const char *tag = strchr(str, '<'); + const char *cut; + + if (wildcard && tag) + cut = wildcard < tag ? wildcard : tag; + else if (wildcard) + cut = wildcard; + else if (tag) + cut = tag; + else { + string_list_append(keys_uniq, str); + return; + } + + strbuf_add(&sb, str, cut - str); + string_list_append(keys_uniq, sb.buf); + strbuf_release(&sb); +} + +static void set_config_sections(struct string_list *keys_uniq, struct string_list_item *var) +{ + struct strbuf sb = STRBUF_INIT; + const char *str = var->string; + const char *dot = strchr(str, '.'); + const char *cut; + + if (dot) + cut = dot; + else { + set_config_vars(keys_uniq, var); + return; + } + + strbuf_add(&sb, str, cut - str); + string_list_append(keys_uniq, sb.buf); + strbuf_release(&sb); +} + static void list_config_help(enum show_config_type type) { struct slot_expansion slot_expansions[] = { @@ -131,13 +174,12 @@ static void list_config_help(enum show_config_type type) struct string_list keys = STRING_LIST_INIT_DUP; struct string_list keys_uniq = STRING_LIST_INIT_DUP; struct string_list_item *item; + struct strbuf sb = STRBUF_INIT; for (p = config_name_list; *p; p++) { const char *var = *p; - struct strbuf sb = STRBUF_INIT; for (e = slot_expansions; e->prefix; e++) { - strbuf_reset(&sb); strbuf_addf(&sb, "%s.%s", e->prefix, e->placeholder); if (!strcasecmp(var, sb.buf)) { @@ -146,60 +188,39 @@ static void list_config_help(enum show_config_type type) break; } } - strbuf_release(&sb); + if (!e->prefix) string_list_append(&keys, var); } + strbuf_release(&sb); + for (e = slot_expansions; e->prefix; e++) if (!e->found) BUG("slot_expansion %s.%s is not used", e->prefix, e->placeholder); - string_list_sort(&keys); for (size_t i = 0; i < keys.nr; i++) { - const char *var = keys.items[i].string; - const char *wildcard, *tag, *cut; - const char *dot = NULL; - struct strbuf sb = STRBUF_INIT; - switch (type) { case SHOW_CONFIG_HUMAN: - puts(var); - continue; + string_list_append(&keys_uniq, keys.items[i].string); + break; case SHOW_CONFIG_SECTIONS: - dot = strchr(var, '.'); + set_config_sections(&keys_uniq, &keys.items[i]); break; case SHOW_CONFIG_VARS: + set_config_vars(&keys_uniq, &keys.items[i]); break; + default: + BUG("%d: unexpected type", type); } - wildcard = strchr(var, '*'); - tag = strchr(var, '<'); - - if (!dot && !wildcard && !tag) { - string_list_append(&keys_uniq, var); - continue; - } - - if (dot) - cut = dot; - else if (wildcard && !tag) - cut = wildcard; - else if (!wildcard && tag) - cut = tag; - else - cut = wildcard < tag ? wildcard : tag; - - strbuf_add(&sb, var, cut - var); - string_list_append(&keys_uniq, sb.buf); - strbuf_release(&sb); - } - string_list_clear(&keys, 0); - string_list_remove_duplicates(&keys_uniq, 0); + + string_list_sort_u(&keys_uniq, 0); for_each_string_list_item(item, &keys_uniq) puts(item->string); string_list_clear(&keys_uniq, 0); + string_list_clear(&keys, 0); } static enum help_format parse_help_format(const char *format) diff --git a/t/t0012-help.sh b/t/t0012-help.sh index d3a0967e9d0752..c33501bdcd2b9b 100755 --- a/t/t0012-help.sh +++ b/t/t0012-help.sh @@ -141,20 +141,23 @@ test_expect_success 'git help -c' ' '\''git help config'\'' for more information EOF - grep -v -E \ - -e "^[^.]+\.[^.]+$" \ - -e "^[^.]+\.[^.]+\.[^.]+$" \ - help.output >actual && + sed -E -e " + /^[^.]+\.[^.]+$/d + /^[^.]+\.[^.]+\.[^.]+$/d + " help.output >actual && test_cmp expect actual ' test_expect_success 'git help --config-for-completion' ' git help -c >human && - grep -E \ - -e "^[^.]+\.[^.]+$" \ - -e "^[^.]+\.[^.]+\.[^.]+$" human | - sed -e "s/\*.*//" -e "s/<.*//" | - sort -u >human.munged && + sed -E -e " + /^[^.]+\.[^.]+$/b out + /^[^.]+\.[^.]+\.[^.]+$/b out + d + : out + s/\*.*// + s/<.*// + " human | sort -u >human.munged && git help --config-for-completion >vars && test_cmp human.munged vars @@ -162,14 +165,16 @@ test_expect_success 'git help --config-for-completion' ' test_expect_success 'git help --config-sections-for-completion' ' git help -c >human && - grep -E \ - -e "^[^.]+\.[^.]+$" \ - -e "^[^.]+\.[^.]+\.[^.]+$" human | - sed -e "s/\..*//" | - sort -u >human.munged && - - git help --config-sections-for-completion >sections && - test_cmp human.munged sections + sed -E -e " + /^[^.]+\.[^.]+$/b out + /^[^.]+\.[^.]+\.[^.]+$/b out + d + : out + s/\..*// + " human | sort -u >expect && + + git help --config-sections-for-completion >actual && + test_cmp expect actual ' test_section_spacing () { From be430f4eaa9fa32420778f322a6a1c0d6162fbce Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 11 Mar 2026 14:35:41 -0700 Subject: [PATCH 061/236] t: allow use of "sed -E" Since early 2019 with e62e225f (test-lint: only use only sed [-n] [-e command] [-f command_file], 2019-01-20), we have been trying to limit the options of "sed" we use in our tests to "-e ", "-n", and "-f ". Before the commit, we were trying to reject only "-i" (which is one of the really-not-portable options), but the commit explicitly wanted to reject use of "-E" (use ERE instead of BRE). The commit cites the then-current POSIX.1 (Issue 7, 2018 edition) to show that "even recent POSIX does not have it!", but the latest edition (Issue 8) documents "-E" as an option to use ERE. But that was 7 years ago, and that is a long time for many things to happen. Besides, we have been using "sed -E" without the check in question triggering in one of the scripts since 2022, with 461fec41 (bisect run: keep some of the post-v2.30.0 output, 2022-11-10). It was hidden because the 'E' was squished with another single letter option. t/t6030-bisect-porcelain.sh: sed -En 's/.*(bisect... This escaped the rather simple pattern used in the checker /\bsed\s+-[^efn]\s+/ and err 'sed option not portable...'; because -E did not appear as a singleton. Let's change the rule to allow the "-E" option, which nobody has complained against for the past 3 years. We rewrite our first use of the "-E" option so that it is caught by the old rule, primarily because we do not want to teach our mischievous developers how to smuggle in an unwanted option undetected by the test lint. And at the same time, loosen the pattern to allow "-E" the same way we allow "-n" and friends. Signed-off-by: Junio C Hamano --- t/check-non-portable-shell.pl | 2 +- t/t6030-bisect-porcelain.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/t/check-non-portable-shell.pl b/t/check-non-portable-shell.pl index 6ee7700eb48013..18d944b8107dd5 100755 --- a/t/check-non-portable-shell.pl +++ b/t/check-non-portable-shell.pl @@ -36,7 +36,7 @@ sub err { $_ = $line; /\bcp\s+-a/ and err 'cp -a is not portable'; - /\bsed\s+-[^efn]\s+/ and err 'sed option not portable (use only -n, -e, -f)'; + /\bsed\s+-[^Eefn]\s+/ and err 'sed option not portable (use only -E, -n, -e, -f)'; /\becho\s+-[neE]/ and err 'echo with option is not portable (use printf)'; /^\s*declare\s+/ and err 'arrays/declare not portable'; /^\s*[^#]\s*which\s/ and err 'which is not portable (use type)'; diff --git a/t/t6030-bisect-porcelain.sh b/t/t6030-bisect-porcelain.sh index cdc02706404b34..1ba9ca219e5da9 100755 --- a/t/t6030-bisect-porcelain.sh +++ b/t/t6030-bisect-porcelain.sh @@ -402,7 +402,7 @@ test_expect_success 'git bisect run: negative exit code' " git bisect good $HASH1 && git bisect bad $HASH4 && ! git bisect run ./fail.sh 2>err && - sed -En 's/.*(bisect.*code) (-?[0-9]+) (from.*)/\1 -1 \3/p' err >actual && + sed -E -n 's/.*(bisect.*code) (-?[0-9]+) (from.*)/\1 -1 \3/p' err >actual && test_cmp expect actual " From dfcdd0b960fc1efd2fe19e97b973b435727b4c42 Mon Sep 17 00:00:00 2001 From: Beat Bolli Date: Wed, 11 Mar 2026 23:10:25 +0100 Subject: [PATCH 062/236] imap-send: use the OpenSSL API to access the subject alternative names The OpenSSL 4.0 master branch has made the ASN1_STRING structure opaque, forbidding access to its internal fields. Use the official accessor functions instead. They have existed since OpenSSL v1.1.0. Signed-off-by: Beat Bolli Signed-off-by: Junio C Hamano --- imap-send.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/imap-send.c b/imap-send.c index 26dda7f3287127..1c934c24877e3f 100644 --- a/imap-send.c +++ b/imap-send.c @@ -244,10 +244,14 @@ static int verify_hostname(X509 *cert, const char *hostname) if ((subj_alt_names = X509_get_ext_d2i(cert, NID_subject_alt_name, NULL, NULL))) { int num_subj_alt_names = sk_GENERAL_NAME_num(subj_alt_names); for (i = 0; !found && i < num_subj_alt_names; i++) { + int ntype; GENERAL_NAME *subj_alt_name = sk_GENERAL_NAME_value(subj_alt_names, i); - if (subj_alt_name->type == GEN_DNS && - strlen((const char *)subj_alt_name->d.ia5->data) == (size_t)subj_alt_name->d.ia5->length && - host_matches(hostname, (const char *)(subj_alt_name->d.ia5->data))) + ASN1_STRING *subj_alt_str = GENERAL_NAME_get0_value(subj_alt_name, &ntype); + + if (ntype == GEN_DNS && + strlen((const char *)ASN1_STRING_get0_data(subj_alt_str)) == + ASN1_STRING_length(subj_alt_str) && + host_matches(hostname, (const char *)ASN1_STRING_get0_data(subj_alt_str))) found = 1; } sk_GENERAL_NAME_pop_free(subj_alt_names, GENERAL_NAME_free); From 08fd302fc4b8eaf0bb32856231a5fb46430e3c7e Mon Sep 17 00:00:00 2001 From: Beat Bolli Date: Wed, 11 Mar 2026 23:10:26 +0100 Subject: [PATCH 063/236] imap-send: use the OpenSSL API to access the subject common name The OpenSSL 4.0 master branch has deprecated the X509_NAME_get_text_by_NID function. Use the recommended replacement APIs instead. They have existed since OpenSSL v1.1.0. Take care to get the constness right for pre-4.0 versions. Signed-off-by: Beat Bolli Signed-off-by: Junio C Hamano --- imap-send.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/imap-send.c b/imap-send.c index 1c934c24877e3f..2a904314dd4bfb 100644 --- a/imap-send.c +++ b/imap-send.c @@ -233,9 +233,13 @@ static int host_matches(const char *host, const char *pattern) static int verify_hostname(X509 *cert, const char *hostname) { - int len; +#if (OPENSSL_VERSION_NUMBER >= 0x40000000L) + const X509_NAME *subj; +#else X509_NAME *subj; - char cname[1000]; +#endif + const X509_NAME_ENTRY *cname_entry; + const ASN1_STRING *cname; int i, found; STACK_OF(GENERAL_NAME) *subj_alt_names; @@ -262,12 +266,15 @@ static int verify_hostname(X509 *cert, const char *hostname) /* try the common name */ if (!(subj = X509_get_subject_name(cert))) return error("cannot get certificate subject"); - if ((len = X509_NAME_get_text_by_NID(subj, NID_commonName, cname, sizeof(cname))) < 0) + if ((i = X509_NAME_get_index_by_NID(subj, NID_commonName, -1)) < 0 || + (cname_entry = X509_NAME_get_entry(subj, i)) == NULL || + (cname = X509_NAME_ENTRY_get_data(cname_entry)) == NULL) return error("cannot get certificate common name"); - if (strlen(cname) == (size_t)len && host_matches(hostname, cname)) + if (strlen((const char *)ASN1_STRING_get0_data(cname)) == ASN1_STRING_length(cname) && + host_matches(hostname, (const char *)ASN1_STRING_get0_data(cname))) return 0; return error("certificate owner '%s' does not match hostname '%s'", - cname, hostname); + ASN1_STRING_get0_data(cname), hostname); } static int ssl_socket_connect(struct imap_socket *sock, From 6392a0b75d979ba8e23c85d57b85779aace25370 Mon Sep 17 00:00:00 2001 From: Beat Bolli Date: Wed, 11 Mar 2026 23:10:27 +0100 Subject: [PATCH 064/236] imap-send: move common code into function host_matches() Move the ASN1_STRING access, the associated cast and the check for embedded NUL bytes into host_matches() to simplify both callers. Reformulate the NUL check using memchr() and add a comment to make it more obvious what it is about. Signed-off-by: Beat Bolli Signed-off-by: Junio C Hamano --- imap-send.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/imap-send.c b/imap-send.c index 2a904314dd4bfb..af02c6a689495e 100644 --- a/imap-send.c +++ b/imap-send.c @@ -219,8 +219,14 @@ static int ssl_socket_connect(struct imap_socket *sock UNUSED, #else -static int host_matches(const char *host, const char *pattern) +static int host_matches(const char *host, const ASN1_STRING *asn1_str) { + const char *pattern = (const char *)ASN1_STRING_get0_data(asn1_str); + + /* embedded NUL characters may open a security hole */ + if (memchr(pattern, '\0', ASN1_STRING_length(asn1_str))) + return 0; + if (pattern[0] == '*' && pattern[1] == '.') { pattern += 2; if (!(host = strchr(host, '.'))) @@ -252,10 +258,7 @@ static int verify_hostname(X509 *cert, const char *hostname) GENERAL_NAME *subj_alt_name = sk_GENERAL_NAME_value(subj_alt_names, i); ASN1_STRING *subj_alt_str = GENERAL_NAME_get0_value(subj_alt_name, &ntype); - if (ntype == GEN_DNS && - strlen((const char *)ASN1_STRING_get0_data(subj_alt_str)) == - ASN1_STRING_length(subj_alt_str) && - host_matches(hostname, (const char *)ASN1_STRING_get0_data(subj_alt_str))) + if (ntype == GEN_DNS && host_matches(hostname, subj_alt_str)) found = 1; } sk_GENERAL_NAME_pop_free(subj_alt_names, GENERAL_NAME_free); @@ -270,8 +273,7 @@ static int verify_hostname(X509 *cert, const char *hostname) (cname_entry = X509_NAME_get_entry(subj, i)) == NULL || (cname = X509_NAME_ENTRY_get_data(cname_entry)) == NULL) return error("cannot get certificate common name"); - if (strlen((const char *)ASN1_STRING_get0_data(cname)) == ASN1_STRING_length(cname) && - host_matches(hostname, (const char *)ASN1_STRING_get0_data(cname))) + if (host_matches(hostname, cname)) return 0; return error("certificate owner '%s' does not match hostname '%s'", ASN1_STRING_get0_data(cname), hostname); From 6daeb66baac581ab81148bcb9d5fcc61ae33347e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 12 Mar 2026 09:42:56 +0100 Subject: [PATCH 065/236] odb: stop including "odb/source.h" The "odb.h" header currently includes the "odb/source.h" file. This is somewhat roundabout though: most callers shouldn't have to care about the `struct odb_source`, but should rather use the ODB-level functions. Furthermore, it means that a couple of definitions have to live on the source level even though they should be part of the generic interface. Reverse the relation between "odb/source.h" and "odb.h" and move the enums and typedefs that relate to the generic interfaces back into "odb.h". Add the necessary includes to all files that rely on the transitive include. Suggested-by: Justin Tobler Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/multi-pack-index.c | 1 + builtin/submodule--helper.c | 1 + odb.h | 50 ++++++++++++++++++++++++++++++++++- odb/source.h | 52 +------------------------------------ odb/streaming.c | 1 + repository.c | 1 + submodule-config.c | 1 + tmp-objdir.c | 1 + 8 files changed, 56 insertions(+), 52 deletions(-) diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index 5f364aa816ba25..3fcb207f1aa054 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -9,6 +9,7 @@ #include "strbuf.h" #include "trace2.h" #include "odb.h" +#include "odb/source.h" #include "replace-object.h" #include "repository.h" diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index 143f7cb3cca7d0..4957487536f453 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -29,6 +29,7 @@ #include "object-file.h" #include "object-name.h" #include "odb.h" +#include "odb/source.h" #include "advice.h" #include "branch.h" #include "list-objects-filter-options.h" diff --git a/odb.h b/odb.h index 86e0365c243863..7a583e38732b7d 100644 --- a/odb.h +++ b/odb.h @@ -3,7 +3,6 @@ #include "hashmap.h" #include "object.h" -#include "odb/source.h" #include "oidset.h" #include "oidmap.h" #include "string-list.h" @@ -12,6 +11,7 @@ struct oidmap; struct oidtree; struct strbuf; +struct strvec; struct repository; struct multi_pack_index; @@ -339,6 +339,42 @@ struct object_info { */ #define OBJECT_INFO_INIT { 0 } +/* Flags that can be passed to `odb_read_object_info_extended()`. */ +enum object_info_flags { + /* Invoke lookup_replace_object() on the given hash. */ + OBJECT_INFO_LOOKUP_REPLACE = (1 << 0), + + /* Do not reprepare object sources when the first lookup has failed. */ + OBJECT_INFO_QUICK = (1 << 1), + + /* + * Do not attempt to fetch the object if missing (even if fetch_is_missing is + * nonzero). + */ + OBJECT_INFO_SKIP_FETCH_OBJECT = (1 << 2), + + /* Die if object corruption (not just an object being missing) was detected. */ + OBJECT_INFO_DIE_IF_CORRUPT = (1 << 3), + + /* + * We have already tried reading the object, but it couldn't be found + * via any of the attached sources, and are now doing a second read. + * This second read asks the individual sources to also evaluate + * whether any on-disk state may have changed that may have caused the + * object to appear. + * + * This flag is for internal use, only. The second read only occurs + * when `OBJECT_INFO_QUICK` was not passed. + */ + OBJECT_INFO_SECOND_READ = (1 << 4), + + /* + * This is meant for bulk prefetching of missing blobs in a partial + * clone. Implies OBJECT_INFO_SKIP_FETCH_OBJECT and OBJECT_INFO_QUICK. + */ + OBJECT_INFO_FOR_PREFETCH = (OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK), +}; + /* * Read object info from the object database and populate the `object_info` * structure. Returns 0 on success, a negative error code otherwise. @@ -432,6 +468,18 @@ enum odb_for_each_object_flags { ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4), }; +/* + * A callback function that can be used to iterate through objects. If given, + * the optional `oi` parameter will be populated the same as if you would call + * `odb_read_object_info()`. + * + * Returning a non-zero error code will cause iteration to abort. The error + * code will be propagated. + */ +typedef int (*odb_for_each_object_cb)(const struct object_id *oid, + struct object_info *oi, + void *cb_data); + /* * Iterate through all objects contained in the object database. Note that * objects may be iterated over multiple times in case they are either stored diff --git a/odb/source.h b/odb/source.h index caac5581495ece..a1fd9dd9209137 100644 --- a/odb/source.h +++ b/odb/source.h @@ -2,6 +2,7 @@ #define ODB_SOURCE_H #include "object.h" +#include "odb.h" enum odb_source_type { /* @@ -14,61 +15,10 @@ enum odb_source_type { ODB_SOURCE_FILES, }; -/* Flags that can be passed to `odb_read_object_info_extended()`. */ -enum object_info_flags { - /* Invoke lookup_replace_object() on the given hash. */ - OBJECT_INFO_LOOKUP_REPLACE = (1 << 0), - - /* Do not reprepare object sources when the first lookup has failed. */ - OBJECT_INFO_QUICK = (1 << 1), - - /* - * Do not attempt to fetch the object if missing (even if fetch_is_missing is - * nonzero). - */ - OBJECT_INFO_SKIP_FETCH_OBJECT = (1 << 2), - - /* Die if object corruption (not just an object being missing) was detected. */ - OBJECT_INFO_DIE_IF_CORRUPT = (1 << 3), - - /* - * We have already tried reading the object, but it couldn't be found - * via any of the attached sources, and are now doing a second read. - * This second read asks the individual sources to also evaluate - * whether any on-disk state may have changed that may have caused the - * object to appear. - * - * This flag is for internal use, only. The second read only occurs - * when `OBJECT_INFO_QUICK` was not passed. - */ - OBJECT_INFO_SECOND_READ = (1 << 4), - - /* - * This is meant for bulk prefetching of missing blobs in a partial - * clone. Implies OBJECT_INFO_SKIP_FETCH_OBJECT and OBJECT_INFO_QUICK. - */ - OBJECT_INFO_FOR_PREFETCH = (OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK), -}; - struct object_id; -struct object_info; struct odb_read_stream; -struct odb_transaction; -struct odb_write_stream; struct strvec; -/* - * A callback function that can be used to iterate through objects. If given, - * the optional `oi` parameter will be populated the same as if you would call - * `odb_read_object_info()`. - * - * Returning a non-zero error code will cause iteration to abort. The error - * code will be propagated. - */ -typedef int (*odb_for_each_object_cb)(const struct object_id *oid, - struct object_info *oi, - void *cb_data); - /* * The source is the part of the object database that stores the actual * objects. It thus encapsulates the logic to read and write the specific diff --git a/odb/streaming.c b/odb/streaming.c index a4355cd2458c38..5927a12954ba59 100644 --- a/odb/streaming.c +++ b/odb/streaming.c @@ -7,6 +7,7 @@ #include "environment.h" #include "repository.h" #include "odb.h" +#include "odb/source.h" #include "odb/streaming.h" #include "replace-object.h" diff --git a/repository.c b/repository.c index e7fa42c14f2254..05c26bdbc3bc08 100644 --- a/repository.c +++ b/repository.c @@ -2,6 +2,7 @@ #include "abspath.h" #include "repository.h" #include "odb.h" +#include "odb/source.h" #include "config.h" #include "object.h" #include "lockfile.h" diff --git a/submodule-config.c b/submodule-config.c index 1f19fe207741bc..72a46b7a54bc3d 100644 --- a/submodule-config.c +++ b/submodule-config.c @@ -14,6 +14,7 @@ #include "strbuf.h" #include "object-name.h" #include "odb.h" +#include "odb/source.h" #include "parse-options.h" #include "thread-utils.h" #include "tree-walk.h" diff --git a/tmp-objdir.c b/tmp-objdir.c index e436eed07e4449..d199d39e7c9d51 100644 --- a/tmp-objdir.c +++ b/tmp-objdir.c @@ -11,6 +11,7 @@ #include "strvec.h" #include "quote.h" #include "odb.h" +#include "odb/source.h" #include "repository.h" struct tmp_objdir { From dd587cd59e1575f2d5698cb45b42644e1df9b835 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 12 Mar 2026 09:42:57 +0100 Subject: [PATCH 066/236] packfile: extract logic to count number of objects In a subsequent commit we're about to introduce a new `odb_source_count_objects()` function so that we can make the logic pluggable. Prepare for this change by extracting the logic that we have to count packed objects into a standalone function. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 45 +++++++++++++++++++++++++++++++++++---------- packfile.h | 9 +++++++++ 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/packfile.c b/packfile.c index 215a23e42be0fe..1ee5dd3da35c07 100644 --- a/packfile.c +++ b/packfile.c @@ -1101,6 +1101,36 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor return store->packs.head; } +int packfile_store_count_objects(struct packfile_store *store, + unsigned long *out) +{ + struct packfile_list_entry *e; + struct multi_pack_index *m; + unsigned long count = 0; + int ret; + + m = get_multi_pack_index(store->source); + if (m) + count += m->num_objects + m->num_objects_in_base; + + for (e = packfile_store_get_packs(store); e; e = e->next) { + if (e->pack->multi_pack_index) + continue; + if (open_pack_index(e->pack)) { + ret = -1; + goto out; + } + + count += e->pack->num_objects; + } + + *out = count; + ret = 0; + +out: + return ret; +} + /* * Give a fast, rough count of the number of objects in the repository. This * ignores loose objects completely. If you have a lot of them, then either @@ -1113,21 +1143,16 @@ unsigned long repo_approximate_object_count(struct repository *r) if (!r->objects->approximate_object_count_valid) { struct odb_source *source; unsigned long count = 0; - struct packed_git *p; odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) { - struct multi_pack_index *m = get_multi_pack_index(source); - if (m) - count += m->num_objects + m->num_objects_in_base; - } + struct odb_source_files *files = odb_source_files_downcast(source); + unsigned long c; - repo_for_each_pack(r, p) { - if (p->multi_pack_index || open_pack_index(p)) - continue; - count += p->num_objects; + if (!packfile_store_count_objects(files->packed, &c)) + count += c; } + r->objects->approximate_object_count = count; r->objects->approximate_object_count_valid = 1; } diff --git a/packfile.h b/packfile.h index 8b04a258a7b9d5..1da8c729cba131 100644 --- a/packfile.h +++ b/packfile.h @@ -268,6 +268,15 @@ enum kept_pack_type { KEPT_PACK_IN_CORE = (1 << 1), }; +/* + * Count the number objects contained in the given packfile store. If + * successful, the number of objects will be written to the `out` pointer. + * + * Return 0 on success, a negative error code otherwise. + */ +int packfile_store_count_objects(struct packfile_store *store, + unsigned long *out); + /* * Retrieve the cache of kept packs from the given packfile store. Accepts a * combination of `kept_pack_type` flags. The cache is computed on demand and From 222fddeaa44b633eea345996735b4f7893eb71ec Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 12 Mar 2026 09:42:58 +0100 Subject: [PATCH 067/236] object-file: extract logic to approximate object count In "builtin/gc.c" we have some logic that checks whether we need to repack objects. This is done by counting the number of objects that we have and checking whether it exceeds a certain threshold. We don't really need an accurate object count though, which is why we only open a single object directory shard and then extrapolate from there. Extract this logic into a new function that is owned by the loose object database source. This is done to prepare for a subsequent change, where we'll introduce object counting on the object database source level. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/gc.c | 37 +++++++++---------------------------- object-file.c | 41 +++++++++++++++++++++++++++++++++++++++++ object-file.h | 13 +++++++++++++ 3 files changed, 63 insertions(+), 28 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index fb329c2cffab80..a08c7554cb1374 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -467,37 +467,18 @@ static int rerere_gc_condition(struct gc_config *cfg UNUSED) static int too_many_loose_objects(int limit) { /* - * Quickly check if a "gc" is needed, by estimating how - * many loose objects there are. Because SHA-1 is evenly - * distributed, we can check only one and get a reasonable - * estimate. + * This is weird, but stems from legacy behaviour: the GC auto + * threshold was always essentially interpreted as if it was rounded up + * to the next multiple 256 of, so we retain this behaviour for now. */ - DIR *dir; - struct dirent *ent; - int auto_threshold; - int num_loose = 0; - int needed = 0; - const unsigned hexsz_loose = the_hash_algo->hexsz - 2; - char *path; - - path = repo_git_path(the_repository, "objects/17"); - dir = opendir(path); - free(path); - if (!dir) + int auto_threshold = DIV_ROUND_UP(limit, 256) * 256; + unsigned long loose_count; + + if (odb_source_loose_approximate_object_count(the_repository->objects->sources, + &loose_count) < 0) return 0; - auto_threshold = DIV_ROUND_UP(limit, 256); - while ((ent = readdir(dir)) != NULL) { - if (strspn(ent->d_name, "0123456789abcdef") != hexsz_loose || - ent->d_name[hexsz_loose] != '\0') - continue; - if (++num_loose > auto_threshold) { - needed = 1; - break; - } - } - closedir(dir); - return needed; + return loose_count > auto_threshold; } static struct packed_git *find_base_packs(struct string_list *packs, diff --git a/object-file.c b/object-file.c index a3ff7f586c91f3..da67e3c9ff576f 100644 --- a/object-file.c +++ b/object-file.c @@ -1868,6 +1868,47 @@ int odb_source_loose_for_each_object(struct odb_source *source, NULL, NULL, &data); } +int odb_source_loose_approximate_object_count(struct odb_source *source, + unsigned long *out) +{ + const unsigned hexsz = source->odb->repo->hash_algo->hexsz - 2; + unsigned long count = 0; + struct dirent *ent; + char *path = NULL; + DIR *dir = NULL; + int ret; + + path = xstrfmt("%s/17", source->path); + + dir = opendir(path); + if (!dir) { + if (errno == ENOENT) { + *out = 0; + ret = 0; + goto out; + } + + ret = error_errno("cannot open object shard '%s'", path); + goto out; + } + + while ((ent = readdir(dir)) != NULL) { + if (strspn(ent->d_name, "0123456789abcdef") != hexsz || + ent->d_name[hexsz] != '\0') + continue; + count++; + } + + *out = count * 256; + ret = 0; + +out: + if (dir) + closedir(dir); + free(path); + return ret; +} + static int append_loose_object(const struct object_id *oid, const char *path UNUSED, void *data) diff --git a/object-file.h b/object-file.h index ff6da6529640dc..b870ea9fa8deb2 100644 --- a/object-file.h +++ b/object-file.h @@ -139,6 +139,19 @@ int odb_source_loose_for_each_object(struct odb_source *source, void *cb_data, unsigned flags); +/* + * Count the number of loose objects in this source. + * + * The object count is approximated by opening a single sharding directory for + * loose objects and scanning its contents. The result is then extrapolated by + * 256. This should generally work as a reasonable estimate given that the + * object hash is supposed to be indistinguishable from random. + * + * Returns 0 on success, a negative error code otherwise. + */ +int odb_source_loose_approximate_object_count(struct odb_source *source, + unsigned long *out); + /** * format_object_header() is a thin wrapper around s xsnprintf() that * writes the initial " " part of the loose object From 2b24db1110150138ac1e09bc60d9ae5245909625 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 12 Mar 2026 09:42:59 +0100 Subject: [PATCH 068/236] object-file: generalize counting objects Generalize the function introduced in the preceding commit to not only be able to approximate the number of loose objects, but to also provide an accurate count. The behaviour can be toggled via a new flag. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/gc.c | 5 +++-- object-file.c | 59 +++++++++++++++++++++++++++++++++------------------ object-file.h | 5 +++-- odb.h | 9 ++++++++ 4 files changed, 53 insertions(+), 25 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index a08c7554cb1374..3a64d28da81b61 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -474,8 +474,9 @@ static int too_many_loose_objects(int limit) int auto_threshold = DIV_ROUND_UP(limit, 256) * 256; unsigned long loose_count; - if (odb_source_loose_approximate_object_count(the_repository->objects->sources, - &loose_count) < 0) + if (odb_source_loose_count_objects(the_repository->objects->sources, + ODB_COUNT_OBJECTS_APPROXIMATE, + &loose_count) < 0) return 0; return loose_count > auto_threshold; diff --git a/object-file.c b/object-file.c index da67e3c9ff576f..569ce6eaed95d4 100644 --- a/object-file.c +++ b/object-file.c @@ -1868,40 +1868,57 @@ int odb_source_loose_for_each_object(struct odb_source *source, NULL, NULL, &data); } -int odb_source_loose_approximate_object_count(struct odb_source *source, - unsigned long *out) +static int count_loose_object(const struct object_id *oid UNUSED, + struct object_info *oi UNUSED, + void *payload) +{ + unsigned long *count = payload; + (*count)++; + return 0; +} + +int odb_source_loose_count_objects(struct odb_source *source, + enum odb_count_objects_flags flags, + unsigned long *out) { const unsigned hexsz = source->odb->repo->hash_algo->hexsz - 2; - unsigned long count = 0; - struct dirent *ent; char *path = NULL; DIR *dir = NULL; int ret; - path = xstrfmt("%s/17", source->path); + if (flags & ODB_COUNT_OBJECTS_APPROXIMATE) { + unsigned long count = 0; + struct dirent *ent; - dir = opendir(path); - if (!dir) { - if (errno == ENOENT) { - *out = 0; - ret = 0; + path = xstrfmt("%s/17", source->path); + + dir = opendir(path); + if (!dir) { + if (errno == ENOENT) { + *out = 0; + ret = 0; + goto out; + } + + ret = error_errno("cannot open object shard '%s'", path); goto out; } - ret = error_errno("cannot open object shard '%s'", path); - goto out; - } + while ((ent = readdir(dir)) != NULL) { + if (strspn(ent->d_name, "0123456789abcdef") != hexsz || + ent->d_name[hexsz] != '\0') + continue; + count++; + } - while ((ent = readdir(dir)) != NULL) { - if (strspn(ent->d_name, "0123456789abcdef") != hexsz || - ent->d_name[hexsz] != '\0') - continue; - count++; + *out = count * 256; + ret = 0; + } else { + *out = 0; + ret = odb_source_loose_for_each_object(source, NULL, count_loose_object, + out, 0); } - *out = count * 256; - ret = 0; - out: if (dir) closedir(dir); diff --git a/object-file.h b/object-file.h index b870ea9fa8deb2..f8d8805a18cc8c 100644 --- a/object-file.h +++ b/object-file.h @@ -149,8 +149,9 @@ int odb_source_loose_for_each_object(struct odb_source *source, * * Returns 0 on success, a negative error code otherwise. */ -int odb_source_loose_approximate_object_count(struct odb_source *source, - unsigned long *out); +int odb_source_loose_count_objects(struct odb_source *source, + enum odb_count_objects_flags flags, + unsigned long *out); /** * format_object_header() is a thin wrapper around s xsnprintf() that diff --git a/odb.h b/odb.h index 7a583e38732b7d..e6057477f624cd 100644 --- a/odb.h +++ b/odb.h @@ -500,6 +500,15 @@ int odb_for_each_object(struct object_database *odb, void *cb_data, unsigned flags); +enum odb_count_objects_flags { + /* + * Instead of providing an accurate count, allow the number of objects + * to be approximated. Details of how this approximation works are + * subject to the specific source's implementation. + */ + ODB_COUNT_OBJECTS_APPROXIMATE = (1 << 0), +}; + enum { /* * By default, `odb_write_object()` does not actually write anything From b259f2175b0ccd5574fc6b06b8ec5cbeaa860610 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 12 Mar 2026 09:43:00 +0100 Subject: [PATCH 069/236] odb/source: introduce generic object counting Introduce generic object counting on the object database source level with a new backend-specific callback function. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-files.c | 30 ++++++++++++++++++++++++++++++ odb/source.h | 27 +++++++++++++++++++++++++++ packfile.c | 4 ++-- packfile.h | 1 + 4 files changed, 60 insertions(+), 2 deletions(-) diff --git a/odb/source-files.c b/odb/source-files.c index 14cb9adecadd99..c08d8993e378a0 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -93,6 +93,35 @@ static int odb_source_files_for_each_object(struct odb_source *source, return 0; } +static int odb_source_files_count_objects(struct odb_source *source, + enum odb_count_objects_flags flags, + unsigned long *out) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + unsigned long count; + int ret; + + ret = packfile_store_count_objects(files->packed, flags, &count); + if (ret < 0) + goto out; + + if (!(flags & ODB_COUNT_OBJECTS_APPROXIMATE)) { + unsigned long loose_count; + + ret = odb_source_loose_count_objects(source, flags, &loose_count); + if (ret < 0) + goto out; + + count += loose_count; + } + + *out = count; + ret = 0; + +out: + return ret; +} + static int odb_source_files_freshen_object(struct odb_source *source, const struct object_id *oid) { @@ -220,6 +249,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, files->base.read_object_info = odb_source_files_read_object_info; files->base.read_object_stream = odb_source_files_read_object_stream; files->base.for_each_object = odb_source_files_for_each_object; + files->base.count_objects = odb_source_files_count_objects; files->base.freshen_object = odb_source_files_freshen_object; files->base.write_object = odb_source_files_write_object; files->base.write_object_stream = odb_source_files_write_object_stream; diff --git a/odb/source.h b/odb/source.h index a1fd9dd9209137..96c906e7a1b350 100644 --- a/odb/source.h +++ b/odb/source.h @@ -142,6 +142,21 @@ struct odb_source { void *cb_data, unsigned flags); + /* + * This callback is expected to count objects in the given object + * database source. The callback function does not have to guarantee + * that only unique objects are counted. The result shall be assigned + * to the `out` pointer. + * + * Accepts `enum odb_count_objects_flag` flags to alter the behaviour. + * + * The callback is expected to return 0 on success, or a negative error + * code otherwise. + */ + int (*count_objects)(struct odb_source *source, + enum odb_count_objects_flags flags, + unsigned long *out); + /* * This callback is expected to freshen the given object so that its * last access time is set to the current time. This is used to ensure @@ -333,6 +348,18 @@ static inline int odb_source_for_each_object(struct odb_source *source, return source->for_each_object(source, request, cb, cb_data, flags); } +/* + * Count the number of objects in the given object database source. + * + * Returns 0 on success, a negative error code otherwise. + */ +static inline int odb_source_count_objects(struct odb_source *source, + enum odb_count_objects_flags flags, + unsigned long *out) +{ + return source->count_objects(source, flags, out); +} + /* * Freshen an object in the object database by updating its timestamp. * Returns 1 in case the object has been freshened, 0 in case the object does diff --git a/packfile.c b/packfile.c index 1ee5dd3da35c07..8ee462303afa79 100644 --- a/packfile.c +++ b/packfile.c @@ -1102,6 +1102,7 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor } int packfile_store_count_objects(struct packfile_store *store, + enum odb_count_objects_flags flags UNUSED, unsigned long *out) { struct packfile_list_entry *e; @@ -1146,10 +1147,9 @@ unsigned long repo_approximate_object_count(struct repository *r) odb_prepare_alternates(r->objects); for (source = r->objects->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); unsigned long c; - if (!packfile_store_count_objects(files->packed, &c)) + if (!odb_source_count_objects(source, ODB_COUNT_OBJECTS_APPROXIMATE, &c)) count += c; } diff --git a/packfile.h b/packfile.h index 1da8c729cba131..74b6bc58c5a6b7 100644 --- a/packfile.h +++ b/packfile.h @@ -275,6 +275,7 @@ enum kept_pack_type { * Return 0 on success, a negative error code otherwise. */ int packfile_store_count_objects(struct packfile_store *store, + enum odb_count_objects_flags flags, unsigned long *out); /* From 6801ffd37df8420917e1feaf03b5f7c175798bff Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 12 Mar 2026 09:43:01 +0100 Subject: [PATCH 070/236] odb: introduce generic object counting Similar to the preceding commit, introduce counting of objects on the object database level, replacing the logic that we have in `repo_approximate_object_count()`. Note that the function knows to cache the object count. It's unclear whether this cache is really required as we shouldn't have that many cases where we count objects repeatedly. But to be on the safe side the caching mechanism is retained, with the only excepting being that we also have to use the passed flags as caching key. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/gc.c | 6 +++++- commit-graph.c | 3 ++- object-name.c | 6 +++++- odb.c | 37 ++++++++++++++++++++++++++++++++++++- odb.h | 19 ++++++++++++++++--- packfile.c | 27 --------------------------- packfile.h | 6 ------ 7 files changed, 64 insertions(+), 40 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index 3a64d28da81b61..cb9ca89a974819 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -574,9 +574,13 @@ static uint64_t total_ram(void) static uint64_t estimate_repack_memory(struct gc_config *cfg, struct packed_git *pack) { - unsigned long nr_objects = repo_approximate_object_count(the_repository); + unsigned long nr_objects; size_t os_cache, heap; + if (odb_count_objects(the_repository->objects, + ODB_COUNT_OBJECTS_APPROXIMATE, &nr_objects) < 0) + return 0; + if (!pack || !nr_objects) return 0; diff --git a/commit-graph.c b/commit-graph.c index f8e24145a513b0..c0300033304404 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -2607,7 +2607,8 @@ int write_commit_graph(struct odb_source *source, replace = ctx.opts->split_flags & COMMIT_GRAPH_SPLIT_REPLACE; } - ctx.approx_nr_objects = repo_approximate_object_count(r); + if (odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE, &ctx.approx_nr_objects) < 0) + ctx.approx_nr_objects = 0; if (ctx.append && g) { for (i = 0; i < g->num_commits; i++) { diff --git a/object-name.c b/object-name.c index 7b14c3bf9b9b48..e5adec4c9d5084 100644 --- a/object-name.c +++ b/object-name.c @@ -837,7 +837,11 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex, const unsigned hexsz = algo->hexsz; if (len < 0) { - unsigned long count = repo_approximate_object_count(r); + unsigned long count; + + if (odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0) + count = 0; + /* * Add one because the MSB only tells us the highest bit set, * not including the value of all the _other_ bits (so "15" diff --git a/odb.c b/odb.c index 84a31084d3884b..350e23f3c0798d 100644 --- a/odb.c +++ b/odb.c @@ -917,6 +917,41 @@ int odb_for_each_object(struct object_database *odb, return 0; } +int odb_count_objects(struct object_database *odb, + enum odb_count_objects_flags flags, + unsigned long *out) +{ + struct odb_source *source; + unsigned long count = 0; + int ret; + + if (odb->object_count_valid && odb->object_count_flags == flags) { + *out = odb->object_count; + return 0; + } + + odb_prepare_alternates(odb); + for (source = odb->sources; source; source = source->next) { + unsigned long c; + + ret = odb_source_count_objects(source, flags, &c); + if (ret < 0) + goto out; + + count += c; + } + + odb->object_count = count; + odb->object_count_valid = 1; + odb->object_count_flags = flags; + + *out = count; + ret = 0; + +out: + return ret; +} + void odb_assert_oid_type(struct object_database *odb, const struct object_id *oid, enum object_type expect) { @@ -1030,7 +1065,7 @@ void odb_reprepare(struct object_database *o) for (source = o->sources; source; source = source->next) odb_source_reprepare(source); - o->approximate_object_count_valid = 0; + o->object_count_valid = 0; obj_read_unlock(); } diff --git a/odb.h b/odb.h index e6057477f624cd..9aee260105ae54 100644 --- a/odb.h +++ b/odb.h @@ -110,10 +110,11 @@ struct object_database { /* * A fast, rough count of the number of objects in the repository. * These two fields are not meant for direct access. Use - * repo_approximate_object_count() instead. + * odb_count_objects() instead. */ - unsigned long approximate_object_count; - unsigned approximate_object_count_valid : 1; + unsigned long object_count; + unsigned object_count_flags; + unsigned object_count_valid : 1; /* * Submodule source paths that will be added as additional sources to @@ -509,6 +510,18 @@ enum odb_count_objects_flags { ODB_COUNT_OBJECTS_APPROXIMATE = (1 << 0), }; +/* + * Count the number of objects in the given object database. This object count + * may double-count objects that are stored in multiple backends, or which are + * stored multiple times in a single backend. + * + * Returns 0 on success, a negative error code otherwise. The number of objects + * will be assigned to the `out` pointer on success. + */ +int odb_count_objects(struct object_database *odb, + enum odb_count_objects_flags flags, + unsigned long *out); + enum { /* * By default, `odb_write_object()` does not actually write anything diff --git a/packfile.c b/packfile.c index 8ee462303afa79..d4de9f3ffe831e 100644 --- a/packfile.c +++ b/packfile.c @@ -1132,33 +1132,6 @@ int packfile_store_count_objects(struct packfile_store *store, return ret; } -/* - * Give a fast, rough count of the number of objects in the repository. This - * ignores loose objects completely. If you have a lot of them, then either - * you should repack because your performance will be awful, or they are - * all unreachable objects about to be pruned, in which case they're not really - * interesting as a measure of repo size in the first place. - */ -unsigned long repo_approximate_object_count(struct repository *r) -{ - if (!r->objects->approximate_object_count_valid) { - struct odb_source *source; - unsigned long count = 0; - - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) { - unsigned long c; - - if (!odb_source_count_objects(source, ODB_COUNT_OBJECTS_APPROXIMATE, &c)) - count += c; - } - - r->objects->approximate_object_count = count; - r->objects->approximate_object_count_valid = 1; - } - return r->objects->approximate_object_count; -} - unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep) { diff --git a/packfile.h b/packfile.h index 74b6bc58c5a6b7..a16ec3950d2507 100644 --- a/packfile.h +++ b/packfile.h @@ -375,12 +375,6 @@ int packfile_store_for_each_object(struct packfile_store *store, #define PACKDIR_FILE_GARBAGE 4 extern void (*report_garbage)(unsigned seen_bits, const char *path); -/* - * Give a rough count of objects in the repository. This sacrifices accuracy - * for speed. - */ -unsigned long repo_approximate_object_count(struct repository *r); - void pack_report(struct repository *repo); /* From 78827970ecf1cb853fc2c9059c180f91fa154c97 Mon Sep 17 00:00:00 2001 From: Tian Yuchen Date: Fri, 13 Mar 2026 00:42:03 +0800 Subject: [PATCH 071/236] builtin/mktree: remove USE_THE_REPOSITORY_VARIABLE The 'cmd_mktree()' function already receives a 'struct repository *repo' pointer, but it was previously marked as UNUSED. Pass the 'repo' pointer down to 'mktree_line()' and 'write_tree()'. Consequently, remove the 'USE_THE_REPOSITORY_VARIABLE' macro, replace usages of 'the_repository', and swap 'parse_oid_hex()' with its context-aware version 'parse_oid_hex_algop()'. This refactoring is safe because 'cmd_mktree()' is registered with the 'RUN_SETUP' flag in 'git.c', which guarantees that the command is executed within a initialized repository, ensuring that the passed 'repo' pointer is never 'NULL'. Signed-off-by: Tian Yuchen Signed-off-by: Junio C Hamano --- builtin/mktree.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/builtin/mktree.c b/builtin/mktree.c index 12772303f504f3..4084e324768b94 100644 --- a/builtin/mktree.c +++ b/builtin/mktree.c @@ -3,7 +3,6 @@ * * Copyright (c) Junio C Hamano, 2006, 2009 */ -#define USE_THE_REPOSITORY_VARIABLE #include "builtin.h" #include "gettext.h" #include "hex.h" @@ -46,7 +45,7 @@ static int ent_compare(const void *a_, const void *b_) b->name, b->len, b->mode); } -static void write_tree(struct object_id *oid) +static void write_tree(struct repository *repo, struct object_id *oid) { struct strbuf buf; size_t size; @@ -60,10 +59,10 @@ static void write_tree(struct object_id *oid) for (i = 0; i < used; i++) { struct treeent *ent = entries[i]; strbuf_addf(&buf, "%o %s%c", ent->mode, ent->name, '\0'); - strbuf_add(&buf, ent->oid.hash, the_hash_algo->rawsz); + strbuf_add(&buf, ent->oid.hash, repo->hash_algo->rawsz); } - odb_write_object(the_repository->objects, buf.buf, buf.len, OBJ_TREE, oid); + odb_write_object(repo->objects, buf.buf, buf.len, OBJ_TREE, oid); strbuf_release(&buf); } @@ -72,7 +71,7 @@ static const char *const mktree_usage[] = { NULL }; -static void mktree_line(char *buf, int nul_term_line, int allow_missing) +static void mktree_line(struct repository *repo, char *buf, int nul_term_line, int allow_missing) { char *ptr, *ntr; const char *p; @@ -93,7 +92,7 @@ static void mktree_line(char *buf, int nul_term_line, int allow_missing) die("input format error: %s", buf); ptr = ntr + 1; /* type */ ntr = strchr(ptr, ' '); - if (!ntr || parse_oid_hex(ntr + 1, &oid, &p) || + if (!ntr || parse_oid_hex_algop(ntr + 1, &oid, &p, repo->hash_algo) || *p != '\t') die("input format error: %s", buf); @@ -124,7 +123,7 @@ static void mktree_line(char *buf, int nul_term_line, int allow_missing) /* Check the type of object identified by oid without fetching objects */ oi.typep = &obj_type; - if (odb_read_object_info_extended(the_repository->objects, &oid, &oi, + if (odb_read_object_info_extended(repo->objects, &oid, &oi, OBJECT_INFO_LOOKUP_REPLACE | OBJECT_INFO_QUICK | OBJECT_INFO_SKIP_FETCH_OBJECT) < 0) @@ -155,7 +154,7 @@ static void mktree_line(char *buf, int nul_term_line, int allow_missing) int cmd_mktree(int ac, const char **av, const char *prefix, - struct repository *repo UNUSED) + struct repository *repo) { struct strbuf sb = STRBUF_INIT; struct object_id oid; @@ -187,7 +186,7 @@ int cmd_mktree(int ac, break; die("input format error: (blank line only valid in batch mode)"); } - mktree_line(sb.buf, nul_term_line, allow_missing); + mktree_line(repo, sb.buf, nul_term_line, allow_missing); } if (is_batch_mode && got_eof && used < 1) { /* @@ -197,7 +196,7 @@ int cmd_mktree(int ac, */ ; /* skip creating an empty tree */ } else { - write_tree(&oid); + write_tree(repo, &oid); puts(oid_to_hex(&oid)); fflush(stdout); } From 233545cc60315863fea3fa17e2df86de23a6a4f7 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Thu, 12 Mar 2026 20:39:36 -0500 Subject: [PATCH 072/236] commit: remove unused forward declaration In 6206089cbd (commit: write commits for both hashes, 2023-10-01), `sign_with_header()` was removed, but its forward declaration in "commit.h" was left. Remove the unused declaration. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- commit.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/commit.h b/commit.h index f2f39e1a89ef62..e9dee8d26bc9ee 100644 --- a/commit.h +++ b/commit.h @@ -400,8 +400,6 @@ LAST_ARG_MUST_BE_NULL int run_commit_hook(int editor_is_used, const char *index_file, int *invoked_hook, const char *name, ...); -/* Sign a commit or tag buffer, storing the result in a header. */ -int sign_with_header(struct strbuf *buf, const char *keyid); /* Parse the signature out of a header. */ int parse_buffer_signed_by_header(const char *buffer, unsigned long size, From 86ebf870b909a7f4707aa2601d290bc992d21a53 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Thu, 12 Mar 2026 20:39:37 -0500 Subject: [PATCH 073/236] gpg-interface: allow sign_buffer() to use default signing key The `sign_commit_to_strbuf()` helper in "commit.c" provides fallback logic to get the default configured signing key when a key is not provided and handles generating the commit signature accordingly. This signing operation is not really specific to commits as any arbitrary buffer can be signed. Also, in a subsequent commit, this same logic is reused by git-fast-import(1) when signing commits with invalid signatures. Remove the `sign_commit_to_strbuf()` helper from "commit.c" and extend `sign_buffer()` in "gpg-interface.c" to support using the default key as a fallback when the `SIGN_BUFFER_USE_DEFAULT_KEY` flag is provided. Call sites are updated accordingly. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/tag.c | 4 ++-- commit.c | 19 +++++-------------- gpg-interface.c | 13 +++++++++++-- gpg-interface.h | 12 ++++++++++-- send-pack.c | 2 +- 5 files changed, 29 insertions(+), 21 deletions(-) diff --git a/builtin/tag.c b/builtin/tag.c index aeb04c487fe95a..540d783c67ded2 100644 --- a/builtin/tag.c +++ b/builtin/tag.c @@ -167,7 +167,7 @@ static int do_sign(struct strbuf *buffer, struct object_id **compat_oid, char *keyid = get_signing_key(); int ret = -1; - if (sign_buffer(buffer, &sig, keyid)) + if (sign_buffer(buffer, &sig, keyid, 0)) goto out; if (compat) { @@ -176,7 +176,7 @@ static int do_sign(struct strbuf *buffer, struct object_id **compat_oid, if (convert_object_file(the_repository ,&compat_buf, algo, compat, buffer->buf, buffer->len, OBJ_TAG, 1)) goto out; - if (sign_buffer(&compat_buf, &compat_sig, keyid)) + if (sign_buffer(&compat_buf, &compat_sig, keyid, 0)) goto out; add_header_signature(&compat_buf, &sig, algo); strbuf_addbuf(&compat_buf, &compat_sig); diff --git a/commit.c b/commit.c index 0ffdd6679eec80..80d8d078757dbc 100644 --- a/commit.c +++ b/commit.c @@ -1170,18 +1170,6 @@ int add_header_signature(struct strbuf *buf, struct strbuf *sig, const struct gi return 0; } -static int sign_commit_to_strbuf(struct strbuf *sig, struct strbuf *buf, const char *keyid) -{ - char *keyid_to_free = NULL; - int ret = 0; - if (!keyid || !*keyid) - keyid = keyid_to_free = get_signing_key(); - if (sign_buffer(buf, sig, keyid)) - ret = -1; - free(keyid_to_free); - return ret; -} - int parse_signed_commit(const struct commit *commit, struct strbuf *payload, struct strbuf *signature, const struct git_hash_algo *algop) @@ -1759,7 +1747,8 @@ int commit_tree_extended(const char *msg, size_t msg_len, oidcpy(&parent_buf[i++], &p->item->object.oid); write_commit_tree(&buffer, msg, msg_len, tree, parent_buf, nparents, author, committer, extra); - if (sign_commit && sign_commit_to_strbuf(&sig, &buffer, sign_commit)) { + if (sign_commit && sign_buffer(&buffer, &sig, sign_commit, + SIGN_BUFFER_USE_DEFAULT_KEY)) { result = -1; goto out; } @@ -1791,7 +1780,9 @@ int commit_tree_extended(const char *msg, size_t msg_len, free_commit_extra_headers(compat_extra); free(mapped_parents); - if (sign_commit && sign_commit_to_strbuf(&compat_sig, &compat_buffer, sign_commit)) { + if (sign_commit && sign_buffer(&compat_buffer, &compat_sig, + sign_commit, + SIGN_BUFFER_USE_DEFAULT_KEY)) { result = -1; goto out; } diff --git a/gpg-interface.c b/gpg-interface.c index 7e6a1520bd1535..27eacbb632628c 100644 --- a/gpg-interface.c +++ b/gpg-interface.c @@ -974,11 +974,20 @@ const char *gpg_trust_level_to_str(enum signature_trust_level level) return sigcheck_gpg_trust_level[level].display_key; } -int sign_buffer(struct strbuf *buffer, struct strbuf *signature, const char *signing_key) +int sign_buffer(struct strbuf *buffer, struct strbuf *signature, + const char *signing_key, enum sign_buffer_flags flags) { + char *keyid_to_free = NULL; + int ret = 0; + gpg_interface_lazy_init(); - return use_format->sign_buffer(buffer, signature, signing_key); + if ((flags & SIGN_BUFFER_USE_DEFAULT_KEY) && (!signing_key || !*signing_key)) + signing_key = keyid_to_free = get_signing_key(); + + ret = use_format->sign_buffer(buffer, signature, signing_key); + free(keyid_to_free); + return ret; } /* diff --git a/gpg-interface.h b/gpg-interface.h index 789d1ffac47850..37f3ac42dbb99a 100644 --- a/gpg-interface.h +++ b/gpg-interface.h @@ -74,6 +74,15 @@ int parse_signature(const char *buf, size_t size, struct strbuf *payload, struct */ size_t parse_signed_buffer(const char *buf, size_t size); +/* Flags for sign_buffer(). */ +enum sign_buffer_flags { + /* + * Use the default configured signing key as returned by `get_signing_key()` + * when the provided "signing_key" is NULL or empty. + */ + SIGN_BUFFER_USE_DEFAULT_KEY = (1 << 0), +}; + /* * Create a detached signature for the contents of "buffer" and append * it after "signature"; "buffer" and "signature" can be the same @@ -81,8 +90,7 @@ size_t parse_signed_buffer(const char *buf, size_t size); * at the end. Returns 0 on success, non-zero on failure. */ int sign_buffer(struct strbuf *buffer, struct strbuf *signature, - const char *signing_key); - + const char *signing_key, enum sign_buffer_flags flags); /* * Returns corresponding string in lowercase for a given member of diff --git a/send-pack.c b/send-pack.c index 67d6987b1ccd7e..07ecfae4de92a2 100644 --- a/send-pack.c +++ b/send-pack.c @@ -391,7 +391,7 @@ static int generate_push_cert(struct strbuf *req_buf, if (!update_seen) goto free_return; - if (sign_buffer(&cert, &cert, signing_key)) + if (sign_buffer(&cert, &cert, signing_key, 0)) die(_("failed to sign the push certificate")); packet_buf_write(req_buf, "push-cert%c%s", 0, cap_string); From ee66c793f84ef1c84ec3fe732bb26394ebefd257 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Thu, 12 Mar 2026 20:39:38 -0500 Subject: [PATCH 074/236] fast-import: add mode to sign commits with invalid signatures With git-fast-import(1), handling of signed commits is controlled via the `--signed-commits=` option. When an invalid signature is encountered, a user may want the option to sign the commit again as opposed to just stripping the signature. To facilitate this, introduce a "sign-if-invalid" mode for the `--signed-commits` option. Optionally, a key ID may be explicitly provided in the form `sign-if-invalid[=]` to specify which signing key should be used when signing invalid commit signatures. Note that to properly support interoperability mode when signing commit signatures, the commit buffer must be created in both the repository and compatability object formats to generate the appropriate signatures accordingly. As currently implemented, the commit buffer for the compatability object format is not reconstructed and thus signing commits in interoperability mode is not yet supported. Support may be added in the future. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- Documentation/git-fast-import.adoc | 4 + builtin/fast-export.c | 8 +- builtin/fast-import.c | 101 +++++++++++++++----- gpg-interface.c | 23 +++-- gpg-interface.h | 7 +- t/t9305-fast-import-signatures.sh | 144 ++++++++++++++++++----------- 6 files changed, 202 insertions(+), 85 deletions(-) diff --git a/Documentation/git-fast-import.adoc b/Documentation/git-fast-import.adoc index 479c4081da8f27..b3f42d46372a40 100644 --- a/Documentation/git-fast-import.adoc +++ b/Documentation/git-fast-import.adoc @@ -86,6 +86,10 @@ already trusted to run their own code. * `strip-if-invalid` will check signatures and, if they are invalid, will strip them and display a warning. The validation is performed in the same way as linkgit:git-verify-commit[1] does it. +* `sign-if-invalid[=]`, similar to `strip-if-invalid`, verifies + commit signatures and replaces invalid signatures with newly created ones. + Valid signatures are left unchanged. If `` is provided, that key is + used for signing; otherwise the configured default signing key is used. Options for Frontends ~~~~~~~~~~~~~~~~~~~~~ diff --git a/builtin/fast-export.c b/builtin/fast-export.c index 0c5d2386d81b92..13621b0d6a1552 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -64,7 +64,7 @@ static int parse_opt_sign_mode(const struct option *opt, if (unset) return 0; - if (parse_sign_mode(arg, val)) + if (parse_sign_mode(arg, val, NULL)) return error(_("unknown %s mode: %s"), opt->long_name, arg); return 0; @@ -825,6 +825,9 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, case SIGN_STRIP_IF_INVALID: die(_("'strip-if-invalid' is not a valid mode for " "git fast-export with --signed-commits=")); + case SIGN_SIGN_IF_INVALID: + die(_("'sign-if-invalid' is not a valid mode for " + "git fast-export with --signed-commits=")); default: BUG("invalid signed_commit_mode value %d", signed_commit_mode); } @@ -970,6 +973,9 @@ static void handle_tag(const char *name, struct tag *tag) case SIGN_STRIP_IF_INVALID: die(_("'strip-if-invalid' is not a valid mode for " "git fast-export with --signed-tags=")); + case SIGN_SIGN_IF_INVALID: + die(_("'sign-if-invalid' is not a valid mode for " + "git fast-export with --signed-tags=")); default: BUG("invalid signed_commit_mode value %d", signed_commit_mode); } diff --git a/builtin/fast-import.c b/builtin/fast-import.c index b8a7757cfda943..935e688e33ac8a 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -190,6 +190,7 @@ static const char *global_prefix; static enum sign_mode signed_tag_mode = SIGN_VERBATIM; static enum sign_mode signed_commit_mode = SIGN_VERBATIM; +static const char *signed_commit_keyid; /* Memory pools */ static struct mem_pool fi_mem_pool = { @@ -2836,26 +2837,15 @@ static void finalize_commit_buffer(struct strbuf *new_data, strbuf_addbuf(new_data, msg); } -static void handle_strip_if_invalid(struct strbuf *new_data, - struct signature_data *sig_sha1, - struct signature_data *sig_sha256, - struct strbuf *msg) +static void warn_invalid_signature(struct signature_check *check, + const char *msg, enum sign_mode mode) { - struct strbuf tmp_buf = STRBUF_INIT; - struct signature_check signature_check = { 0 }; - int ret; - - /* Check signature in a temporary commit buffer */ - strbuf_addbuf(&tmp_buf, new_data); - finalize_commit_buffer(&tmp_buf, sig_sha1, sig_sha256, msg); - ret = verify_commit_buffer(tmp_buf.buf, tmp_buf.len, &signature_check); - - if (ret) { - const char *signer = signature_check.signer ? - signature_check.signer : _("unknown"); - const char *subject; - int subject_len = find_commit_subject(msg->buf, &subject); + const char *signer = check->signer ? check->signer : _("unknown"); + const char *subject; + int subject_len = find_commit_subject(msg, &subject); + switch (mode) { + case SIGN_STRIP_IF_INVALID: if (subject_len > 100) warning(_("stripping invalid signature for commit '%.100s...'\n" " allegedly by %s"), subject, signer); @@ -2865,6 +2855,67 @@ static void handle_strip_if_invalid(struct strbuf *new_data, else warning(_("stripping invalid signature for commit\n" " allegedly by %s"), signer); + break; + case SIGN_SIGN_IF_INVALID: + if (subject_len > 100) + warning(_("replacing invalid signature for commit '%.100s...'\n" + " allegedly by %s"), subject, signer); + else if (subject_len > 0) + warning(_("replacing invalid signature for commit '%.*s'\n" + " allegedly by %s"), subject_len, subject, signer); + else + warning(_("replacing invalid signature for commit\n" + " allegedly by %s"), signer); + break; + default: + BUG("unsupported signing mode"); + } +} + +static void handle_signature_if_invalid(struct strbuf *new_data, + struct signature_data *sig_sha1, + struct signature_data *sig_sha256, + struct strbuf *msg, + enum sign_mode mode) +{ + struct strbuf tmp_buf = STRBUF_INIT; + struct signature_check signature_check = { 0 }; + int ret; + + /* Check signature in a temporary commit buffer */ + strbuf_addbuf(&tmp_buf, new_data); + finalize_commit_buffer(&tmp_buf, sig_sha1, sig_sha256, msg); + ret = verify_commit_buffer(tmp_buf.buf, tmp_buf.len, &signature_check); + + if (ret) { + warn_invalid_signature(&signature_check, msg->buf, mode); + + if (mode == SIGN_SIGN_IF_INVALID) { + struct strbuf signature = STRBUF_INIT; + struct strbuf payload = STRBUF_INIT; + + /* + * NEEDSWORK: To properly support interoperability mode + * when signing commit signatures, the commit buffer + * must be provided in both the repository and + * compatibility object formats. As currently + * implemented, only the repository object format is + * considered meaning compatibility signatures cannot be + * generated. Thus, attempting to sign commit signatures + * in interoperability mode is currently unsupported. + */ + if (the_repository->compat_hash_algo) + die(_("signing commits in interoperability mode is unsupported")); + + strbuf_addstr(&payload, signature_check.payload); + if (sign_buffer(&payload, &signature, signed_commit_keyid, + SIGN_BUFFER_USE_DEFAULT_KEY)) + die(_("failed to sign commit object")); + add_header_signature(new_data, &signature, the_hash_algo); + + strbuf_release(&signature); + strbuf_release(&payload); + } finalize_commit_buffer(new_data, NULL, NULL, msg); } else { @@ -2927,6 +2978,7 @@ static void parse_new_commit(const char *arg) /* fallthru */ case SIGN_VERBATIM: case SIGN_STRIP_IF_INVALID: + case SIGN_SIGN_IF_INVALID: import_one_signature(&sig_sha1, &sig_sha256, v); break; @@ -3011,9 +3063,11 @@ static void parse_new_commit(const char *arg) "encoding %s\n", encoding); - if (signed_commit_mode == SIGN_STRIP_IF_INVALID && + if ((signed_commit_mode == SIGN_STRIP_IF_INVALID || + signed_commit_mode == SIGN_SIGN_IF_INVALID) && (sig_sha1.hash_algo || sig_sha256.hash_algo)) - handle_strip_if_invalid(&new_data, &sig_sha1, &sig_sha256, &msg); + handle_signature_if_invalid(&new_data, &sig_sha1, &sig_sha256, + &msg, signed_commit_mode); else finalize_commit_buffer(&new_data, &sig_sha1, &sig_sha256, &msg); @@ -3060,6 +3114,9 @@ static void handle_tag_signature(struct strbuf *msg, const char *name) case SIGN_STRIP_IF_INVALID: die(_("'strip-if-invalid' is not a valid mode for " "git fast-import with --signed-tags=")); + case SIGN_SIGN_IF_INVALID: + die(_("'sign-if-invalid' is not a valid mode for " + "git fast-import with --signed-tags=")); default: BUG("invalid signed_tag_mode value %d from tag '%s'", signed_tag_mode, name); @@ -3649,10 +3706,10 @@ static int parse_one_option(const char *option) } else if (skip_prefix(option, "export-pack-edges=", &option)) { option_export_pack_edges(option); } else if (skip_prefix(option, "signed-commits=", &option)) { - if (parse_sign_mode(option, &signed_commit_mode)) + if (parse_sign_mode(option, &signed_commit_mode, &signed_commit_keyid)) usagef(_("unknown --signed-commits mode '%s'"), option); } else if (skip_prefix(option, "signed-tags=", &option)) { - if (parse_sign_mode(option, &signed_tag_mode)) + if (parse_sign_mode(option, &signed_tag_mode, NULL)) usagef(_("unknown --signed-tags mode '%s'"), option); } else if (!strcmp(option, "quiet")) { show_stats = 0; diff --git a/gpg-interface.c b/gpg-interface.c index 27eacbb632628c..d517425034ee6c 100644 --- a/gpg-interface.c +++ b/gpg-interface.c @@ -1152,21 +1152,28 @@ static int sign_buffer_ssh(struct strbuf *buffer, struct strbuf *signature, return ret; } -int parse_sign_mode(const char *arg, enum sign_mode *mode) +int parse_sign_mode(const char *arg, enum sign_mode *mode, const char **keyid) { - if (!strcmp(arg, "abort")) + if (!strcmp(arg, "abort")) { *mode = SIGN_ABORT; - else if (!strcmp(arg, "verbatim") || !strcmp(arg, "ignore")) + } else if (!strcmp(arg, "verbatim") || !strcmp(arg, "ignore")) { *mode = SIGN_VERBATIM; - else if (!strcmp(arg, "warn-verbatim") || !strcmp(arg, "warn")) + } else if (!strcmp(arg, "warn-verbatim") || !strcmp(arg, "warn")) { *mode = SIGN_WARN_VERBATIM; - else if (!strcmp(arg, "warn-strip")) + } else if (!strcmp(arg, "warn-strip")) { *mode = SIGN_WARN_STRIP; - else if (!strcmp(arg, "strip")) + } else if (!strcmp(arg, "strip")) { *mode = SIGN_STRIP; - else if (!strcmp(arg, "strip-if-invalid")) + } else if (!strcmp(arg, "strip-if-invalid")) { *mode = SIGN_STRIP_IF_INVALID; - else + } else if (!strcmp(arg, "sign-if-invalid")) { + *mode = SIGN_SIGN_IF_INVALID; + } else if (skip_prefix(arg, "sign-if-invalid=", &arg)) { + *mode = SIGN_SIGN_IF_INVALID; + if (keyid) + *keyid = arg; + } else { return -1; + } return 0; } diff --git a/gpg-interface.h b/gpg-interface.h index 37f3ac42dbb99a..a365586ce1e755 100644 --- a/gpg-interface.h +++ b/gpg-interface.h @@ -120,12 +120,15 @@ enum sign_mode { SIGN_WARN_STRIP, SIGN_STRIP, SIGN_STRIP_IF_INVALID, + SIGN_SIGN_IF_INVALID, }; /* * Return 0 if `arg` can be parsed into an `enum sign_mode`. Return -1 - * otherwise. + * otherwise. If the parsed mode is SIGN_SIGN_IF_INVALID and GPG key provided in + * the arguments in the form `sign-if-invalid=`, the key-ID is parsed + * into `char **keyid`. */ -int parse_sign_mode(const char *arg, enum sign_mode *mode); +int parse_sign_mode(const char *arg, enum sign_mode *mode, const char **keyid); #endif diff --git a/t/t9305-fast-import-signatures.sh b/t/t9305-fast-import-signatures.sh index 022dae02e48177..ac4228127a9651 100755 --- a/t/t9305-fast-import-signatures.sh +++ b/t/t9305-fast-import-signatures.sh @@ -103,71 +103,111 @@ test_expect_success GPG 'strip both OpenPGP signatures with --signed-commits=war test_line_count = 2 out ' -test_expect_success GPG 'import commit with no signature with --signed-commits=strip-if-invalid' ' - git fast-export main >output && - git -C new fast-import --quiet --signed-commits=strip-if-invalid log 2>&1 && - test_must_be_empty log -' - -test_expect_success GPG 'keep valid OpenPGP signature with --signed-commits=strip-if-invalid' ' - rm -rf new && - git init new && - - git fast-export --signed-commits=verbatim openpgp-signing >output && - git -C new fast-import --quiet --signed-commits=strip-if-invalid log 2>&1 && - IMPORTED=$(git -C new rev-parse --verify refs/heads/openpgp-signing) && - test $OPENPGP_SIGNING = $IMPORTED && - git -C new cat-file commit "$IMPORTED" >actual && - test_grep -E "^gpgsig(-sha256)? " actual && - test_must_be_empty log -' - -test_expect_success GPG 'strip signature invalidated by message change with --signed-commits=strip-if-invalid' ' +for mode in strip-if-invalid sign-if-invalid +do + test_expect_success GPG "import commit with no signature with --signed-commits=$mode" ' + git fast-export main >output && + git -C new fast-import --quiet --signed-commits=$mode log 2>&1 && + test_must_be_empty log + ' + + test_expect_success GPG "keep valid OpenPGP signature with --signed-commits=$mode" ' + rm -rf new && + git init new && + + git fast-export --signed-commits=verbatim openpgp-signing >output && + git -C new fast-import --quiet --signed-commits=$mode log 2>&1 && + IMPORTED=$(git -C new rev-parse --verify refs/heads/openpgp-signing) && + test $OPENPGP_SIGNING = $IMPORTED && + git -C new cat-file commit "$IMPORTED" >actual && + test_grep -E "^gpgsig(-sha256)? " actual && + test_must_be_empty log + ' + + test_expect_success GPG "handle signature invalidated by message change with --signed-commits=$mode" ' + rm -rf new && + git init new && + + git fast-export --signed-commits=verbatim openpgp-signing >output && + + # Change the commit message, which invalidates the signature. + # The commit message length should not change though, otherwise the + # corresponding `data ` command would have to be changed too. + sed "s/OpenPGP signed commit/OpenPGP forged commit/" output >modified && + + git -C new fast-import --quiet --signed-commits=$mode log 2>&1 && + + IMPORTED=$(git -C new rev-parse --verify refs/heads/openpgp-signing) && + test $OPENPGP_SIGNING != $IMPORTED && + git -C new cat-file commit "$IMPORTED" >actual && + + if test "$mode" = strip-if-invalid + then + test_grep "stripping invalid signature" log && + test_grep ! -E "^gpgsig" actual + else + test_grep "replacing invalid signature" log && + test_grep -E "^gpgsig(-sha256)? " actual && + git -C new verify-commit "$IMPORTED" + fi + ' + + test_expect_success GPGSM "keep valid X.509 signature with --signed-commits=$mode" ' + rm -rf new && + git init new && + + git fast-export --signed-commits=verbatim x509-signing >output && + git -C new fast-import --quiet --signed-commits=$mode log 2>&1 && + IMPORTED=$(git -C new rev-parse --verify refs/heads/x509-signing) && + test $X509_SIGNING = $IMPORTED && + git -C new cat-file commit "$IMPORTED" >actual && + test_grep -E "^gpgsig(-sha256)? " actual && + test_must_be_empty log + ' + + test_expect_success GPGSSH "keep valid SSH signature with --signed-commits=$mode" ' + rm -rf new && + git init new && + + test_config -C new gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + + git fast-export --signed-commits=verbatim ssh-signing >output && + git -C new fast-import --quiet --signed-commits=$mode log 2>&1 && + IMPORTED=$(git -C new rev-parse --verify refs/heads/ssh-signing) && + test $SSH_SIGNING = $IMPORTED && + git -C new cat-file commit "$IMPORTED" >actual && + test_grep -E "^gpgsig(-sha256)? " actual && + test_must_be_empty log + ' +done + +test_expect_success GPGSSH "sign invalid commit with explicit keyid" ' rm -rf new && git init new && - git fast-export --signed-commits=verbatim openpgp-signing >output && + git fast-export --signed-commits=verbatim ssh-signing >output && # Change the commit message, which invalidates the signature. # The commit message length should not change though, otherwise the # corresponding `data ` command would have to be changed too. - sed "s/OpenPGP signed commit/OpenPGP forged commit/" output >modified && - - git -C new fast-import --quiet --signed-commits=strip-if-invalid log 2>&1 && - - IMPORTED=$(git -C new rev-parse --verify refs/heads/openpgp-signing) && - test $OPENPGP_SIGNING != $IMPORTED && - git -C new cat-file commit "$IMPORTED" >actual && - test_grep ! -E "^gpgsig" actual && - test_grep "stripping invalid signature" log -' - -test_expect_success GPGSM 'keep valid X.509 signature with --signed-commits=strip-if-invalid' ' - rm -rf new && - git init new && - - git fast-export --signed-commits=verbatim x509-signing >output && - git -C new fast-import --quiet --signed-commits=strip-if-invalid log 2>&1 && - IMPORTED=$(git -C new rev-parse --verify refs/heads/x509-signing) && - test $X509_SIGNING = $IMPORTED && - git -C new cat-file commit "$IMPORTED" >actual && - test_grep -E "^gpgsig(-sha256)? " actual && - test_must_be_empty log -' - -test_expect_success GPGSSH 'keep valid SSH signature with --signed-commits=strip-if-invalid' ' - rm -rf new && - git init new && + sed "s/SSH signed commit/SSH forged commit/" output >modified && + # Configure the target repository with an invalid default signing key. + test_config -C new user.signingkey "not-a-real-key-id" && + test_config -C new gpg.format ssh && test_config -C new gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + test_must_fail git -C new fast-import --quiet \ + --signed-commits=sign-if-invalid /dev/null 2>&1 && + + # Import using explicitly provided signing key. + git -C new fast-import --quiet \ + --signed-commits=sign-if-invalid="${GPGSSH_KEY_PRIMARY}" output && - git -C new fast-import --quiet --signed-commits=strip-if-invalid log 2>&1 && IMPORTED=$(git -C new rev-parse --verify refs/heads/ssh-signing) && - test $SSH_SIGNING = $IMPORTED && + test $SSH_SIGNING != $IMPORTED && git -C new cat-file commit "$IMPORTED" >actual && test_grep -E "^gpgsig(-sha256)? " actual && - test_must_be_empty log + git -C new verify-commit "$IMPORTED" ' test_done From d39cef3a1af5da30fc8db2f31b0cd23cfb45d05b Mon Sep 17 00:00:00 2001 From: Siddharth Shrimali Date: Fri, 13 Mar 2026 11:01:59 +0530 Subject: [PATCH 075/236] t0410: modernize delete_object helper The delete_object helper currently relies on a manual sed command to calculate object paths. This works, but it's a bit brittle and forces us to maintain shell logic that Git's own test suite can already handle more elegantly. Switch to 'test_oid_to_path' to let Git handle the path logic. This makes the helper hash independent, which is much cleaner than manual string manipulation. While at it, use 'local' to declare helper-specific variables and quote them to follow Git's coding style. This prevents them from leaking into global shell scope and avoids potential naming conflicts with other parts of the test suite. Helped-by: Pushkar Singh Suggested-by: Jeff King Signed-off-by: Siddharth Shrimali Signed-off-by: Junio C Hamano --- t/t0410-partial-clone.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh index 2a5bdbeeb87f6e..52e19728a3fca0 100755 --- a/t/t0410-partial-clone.sh +++ b/t/t0410-partial-clone.sh @@ -11,7 +11,10 @@ test_description='partial clone' GIT_TEST_COMMIT_GRAPH=0 delete_object () { - rm $1/.git/objects/$(echo $2 | sed -e 's|^..|&/|') + local repo="$1" + local obj="$2" + local path="$repo/.git/objects/$(test_oid_to_path "$obj")" && + rm "$path" } pack_as_from_promisor () { From 8ba0f3ef3fa5ff540190f94b610d7af628fa35d3 Mon Sep 17 00:00:00 2001 From: Siddharth Asthana Date: Fri, 13 Mar 2026 11:10:34 +0530 Subject: [PATCH 076/236] sequencer: extract revert message formatting into shared function The logic for formatting revert commit messages (handling "Revert" and "Reapply" cases, appending "This reverts commit .", and handling merge-parent references) currently lives inline in do_pick_commit(). The upcoming replay --revert mode needs to reuse this logic. Extract all of this into a new sequencer_format_revert_message() function. The function takes a repository, the subject line, commit, parent, a use_commit_reference flag, and the output strbuf. It handles both regular reverts ("Revert """) and revert-of-revert cases ("Reapply """), and uses refer_to_commit() internally to format the commit reference. Update refer_to_commit() to take a struct repository parameter instead of relying on the_repository, and a bool instead of reading from replay_opts directly. This makes it usable from the new shared function without pulling in sequencer-specific state. Signed-off-by: Siddharth Asthana Signed-off-by: Junio C Hamano --- sequencer.c | 78 +++++++++++++++++++++++++++++++---------------------- sequencer.h | 14 ++++++++++ 2 files changed, 60 insertions(+), 32 deletions(-) diff --git a/sequencer.c b/sequencer.c index 1f492f8460e237..403c32e00da0e5 100644 --- a/sequencer.c +++ b/sequencer.c @@ -2198,15 +2198,16 @@ static int should_edit(struct replay_opts *opts) { return opts->edit; } -static void refer_to_commit(struct replay_opts *opts, - struct strbuf *msgbuf, struct commit *commit) +static void refer_to_commit(struct repository *r, struct strbuf *msgbuf, + const struct commit *commit, + bool use_commit_reference) { - if (opts->commit_use_reference) { + if (use_commit_reference) { struct pretty_print_context ctx = { .abbrev = DEFAULT_ABBREV, .date_mode.type = DATE_SHORT, }; - repo_format_commit_message(the_repository, commit, + repo_format_commit_message(r, commit, "%h (%s, %ad)", msgbuf, &ctx); } else { strbuf_addstr(msgbuf, oid_to_hex(&commit->object.oid)); @@ -2356,38 +2357,14 @@ static int do_pick_commit(struct repository *r, */ if (command == TODO_REVERT) { - const char *orig_subject; - base = commit; base_label = msg.label; next = parent; next_label = msg.parent_label; - if (opts->commit_use_reference) { - strbuf_commented_addf(&ctx->message, comment_line_str, - "*** SAY WHY WE ARE REVERTING ON THE TITLE LINE ***"); - } else if (skip_prefix(msg.subject, "Revert \"", &orig_subject) && - /* - * We don't touch pre-existing repeated reverts, because - * theoretically these can be nested arbitrarily deeply, - * thus requiring excessive complexity to deal with. - */ - !starts_with(orig_subject, "Revert \"")) { - strbuf_addstr(&ctx->message, "Reapply \""); - strbuf_addstr(&ctx->message, orig_subject); - strbuf_addstr(&ctx->message, "\n"); - } else { - strbuf_addstr(&ctx->message, "Revert \""); - strbuf_addstr(&ctx->message, msg.subject); - strbuf_addstr(&ctx->message, "\"\n"); - } - strbuf_addstr(&ctx->message, "\nThis reverts commit "); - refer_to_commit(opts, &ctx->message, commit); - - if (commit->parents && commit->parents->next) { - strbuf_addstr(&ctx->message, ", reversing\nchanges made to "); - refer_to_commit(opts, &ctx->message, parent); - } - strbuf_addstr(&ctx->message, ".\n"); + sequencer_format_revert_message(r, msg.subject, commit, + parent, + opts->commit_use_reference, + &ctx->message); } else { const char *p; @@ -5572,6 +5549,43 @@ int sequencer_pick_revisions(struct repository *r, return res; } +void sequencer_format_revert_message(struct repository *r, + const char *subject, + const struct commit *commit, + const struct commit *parent, + bool use_commit_reference, + struct strbuf *message) +{ + const char *orig_subject; + + if (use_commit_reference) { + strbuf_commented_addf(message, comment_line_str, + "*** SAY WHY WE ARE REVERTING ON THE TITLE LINE ***"); + } else if (skip_prefix(subject, "Revert \"", &orig_subject) && + /* + * We don't touch pre-existing repeated reverts, because + * theoretically these can be nested arbitrarily deeply, + * thus requiring excessive complexity to deal with. + */ + !starts_with(orig_subject, "Revert \"")) { + strbuf_addstr(message, "Reapply \""); + strbuf_addstr(message, orig_subject); + strbuf_addstr(message, "\n"); + } else { + strbuf_addstr(message, "Revert \""); + strbuf_addstr(message, subject); + strbuf_addstr(message, "\"\n"); + } + strbuf_addstr(message, "\nThis reverts commit "); + refer_to_commit(r, message, commit, use_commit_reference); + + if (commit->parents && commit->parents->next) { + strbuf_addstr(message, ", reversing\nchanges made to "); + refer_to_commit(r, message, parent, use_commit_reference); + } + strbuf_addstr(message, ".\n"); +} + void append_signoff(struct strbuf *msgbuf, size_t ignore_footer, unsigned flag) { unsigned no_dup_sob = flag & APPEND_SIGNOFF_DEDUP; diff --git a/sequencer.h b/sequencer.h index 719684c8a9fb2e..de0bd6e8a27821 100644 --- a/sequencer.h +++ b/sequencer.h @@ -271,4 +271,18 @@ int sequencer_determine_whence(struct repository *r, enum commit_whence *whence) */ int sequencer_get_update_refs_state(const char *wt_dir, struct string_list *refs); +/* + * Formats a complete revert commit message following standard Git conventions. + * Handles regular reverts ("Revert \"\""), revert of revert cases + * ("Reapply \"\""), and the --reference style. Appends "This reverts + * commit ." using either the abbreviated or full commit reference + * depending on use_commit_reference. Also handles merge-parent references. + */ +void sequencer_format_revert_message(struct repository *r, + const char *subject, + const struct commit *commit, + const struct commit *parent, + bool use_commit_reference, + struct strbuf *message); + #endif /* SEQUENCER_H */ From ba5c0d03d39a8b1798c8f988bdc95f9a08f10fac Mon Sep 17 00:00:00 2001 From: Siddharth Asthana Date: Fri, 13 Mar 2026 11:10:35 +0530 Subject: [PATCH 077/236] replay: add --revert mode to reverse commit changes Add a `--revert ` mode to git replay that undoes the changes introduced by the specified commits. Like --onto and --advance, --revert is a standalone mode: it takes a branch argument and updates that branch with the newly created revert commits. At GitLab, we need this in Gitaly for reverting commits directly on bare repositories without requiring a working tree checkout. The approach is the same as sequencer.c's do_pick_commit() -- cherry-pick and revert are just the same three-way merge with swapped arguments: - Cherry-pick: merge(ancestor=parent, ours=current, theirs=commit) - Revert: merge(ancestor=commit, ours=current, theirs=parent) We swap the base and pickme trees passed to merge_incore_nonrecursive() to reverse the diff direction. Reverts are processed newest-first (matching git revert behavior) to reduce conflicts by peeling off changes from the top. Each revert builds on the result of the previous one via the last_commit fallback in the main replay loop, rather than relying on the parent-mapping used for cherry-pick. Revert commit messages follow the usual git revert conventions: prefixed with "Revert" (or "Reapply" when reverting a revert), and including "This reverts commit .". The author is set to the current user rather than preserving the original author, matching git revert behavior. Helped-by: Christian Couder Helped-by: Patrick Steinhardt Helped-by: Elijah Newren Helped-by: Phillip Wood Helped-by: Johannes Schindelin Helped-by: Junio C Hamano Helped-by: Toon Claes Signed-off-by: Siddharth Asthana Signed-off-by: Junio C Hamano --- Documentation/git-replay.adoc | 43 ++++++++- builtin/replay.c | 46 ++++++---- replay.c | 161 +++++++++++++++++++++++++--------- replay.h | 11 ++- t/t3650-replay-basics.sh | 114 ++++++++++++++++++++++-- 5 files changed, 305 insertions(+), 70 deletions(-) diff --git a/Documentation/git-replay.adoc b/Documentation/git-replay.adoc index 8d696ce3abb491..6698cfc047d6ad 100644 --- a/Documentation/git-replay.adoc +++ b/Documentation/git-replay.adoc @@ -9,7 +9,7 @@ git-replay - EXPERIMENTAL: Replay commits on a new base, works with bare repos t SYNOPSIS -------- [verse] -(EXPERIMENTAL!) 'git replay' ([--contained] --onto | --advance ) [--ref-action[=]] +(EXPERIMENTAL!) 'git replay' ([--contained] --onto | --advance | --revert ) [--ref-action[=]] ... DESCRIPTION ----------- @@ -42,6 +42,25 @@ The history is replayed on top of the and is updated to point at the tip of the resulting history. This is different from `--onto`, which uses the target only as a starting point without updating it. +--revert :: + Starting point at which to create the reverted commits; must be a + branch name. ++ +When `--revert` is specified, the commits in the revision range are reverted +(their changes are undone) and the reverted commits are created on top of +. The is then updated to point at the new commits. This is +the same as running `git revert ` but does not update the +working tree. ++ +The commit messages follow `git revert` conventions: they are prefixed with +"Revert" and include "This reverts commit ." When reverting a commit +whose message starts with "Revert", the new message uses "Reapply" instead. +Unlike cherry-pick which preserves the original author, revert commits use +the current user as the author, matching the behavior of `git revert`. ++ +This option is mutually exclusive with `--onto` and `--advance`. It is also +incompatible with `--contained` (which is a modifier for `--onto` only). + --contained:: Update all branches that point at commits in . Requires `--onto`. @@ -84,9 +103,10 @@ When using `--ref-action=print`, the output is usable as input to update refs/heads/branch3 ${NEW_branch3_HASH} ${OLD_branch3_HASH} where the number of refs updated depends on the arguments passed and -the shape of the history being replayed. When using `--advance`, the -number of refs updated is always one, but for `--onto`, it can be one -or more (rebasing multiple branches simultaneously is supported). +the shape of the history being replayed. When using `--advance` or +`--revert`, the number of refs updated is always one, but for `--onto`, +it can be one or more (rebasing multiple branches simultaneously is +supported). There is no stderr output on conflicts; see the <> section below. @@ -152,6 +172,21 @@ all commits they have since `base`, playing them on top of `origin/main`. These three branches may have commits on top of `base` that they have in common, but that does not need to be the case. +To revert commits on a branch: + +------------ +$ git replay --revert main topic~2..topic +------------ + +This reverts the last two commits from `topic`, creating revert commits on +top of `main`, and updates `main` to point at the result. This is useful when +commits from `topic` were previously merged or cherry-picked into `main` and +need to be undone. + +NOTE: For reverting an entire merge request as a single commit (rather than +commit-by-commit), consider using `git merge-tree --merge-base $TIP HEAD $BASE` +which can avoid unnecessary merge conflicts. + GIT --- Part of the linkgit:git[1] suite diff --git a/builtin/replay.c b/builtin/replay.c index 2cdde830a8a0f4..fe69f6f8cedfd7 100644 --- a/builtin/replay.c +++ b/builtin/replay.c @@ -83,8 +83,8 @@ int cmd_replay(int argc, const char *const replay_usage[] = { N_("(EXPERIMENTAL!) git replay " - "([--contained] --onto | --advance ) " - "[--ref-action[=]] "), + "([--contained] --onto | --advance | --revert ) " + "[--ref-action[=]] ..."), NULL }; struct option replay_options[] = { @@ -96,6 +96,9 @@ int cmd_replay(int argc, N_("replay onto given commit")), OPT_BOOL(0, "contained", &opts.contained, N_("update all branches that point at commits in ")), + OPT_STRING(0, "revert", &opts.revert, + N_("branch"), + N_("revert commits onto given branch")), OPT_STRING(0, "ref-action", &ref_action, N_("mode"), N_("control ref update behavior (update|print)")), @@ -105,15 +108,17 @@ int cmd_replay(int argc, argc = parse_options(argc, argv, prefix, replay_options, replay_usage, PARSE_OPT_KEEP_ARGV0 | PARSE_OPT_KEEP_UNKNOWN_OPT); - if (!opts.onto && !opts.advance) { - error(_("option --onto or --advance is mandatory")); + /* Exactly one mode must be specified */ + if (!opts.onto && !opts.advance && !opts.revert) { + error(_("exactly one of --onto, --advance, or --revert is required")); usage_with_options(replay_usage, replay_options); } - die_for_incompatible_opt2(!!opts.advance, "--advance", - opts.contained, "--contained"); - die_for_incompatible_opt2(!!opts.advance, "--advance", - !!opts.onto, "--onto"); + die_for_incompatible_opt3(!!opts.onto, "--onto", + !!opts.advance, "--advance", + !!opts.revert, "--revert"); + if (opts.contained && !opts.onto) + die(_("--contained requires --onto")); /* Parse ref action mode from command line or config */ ref_mode = get_ref_action_mode(repo, ref_action); @@ -129,7 +134,13 @@ int cmd_replay(int argc, * some options changing these values if we think they could * be useful. */ - revs.reverse = 1; + /* + * Cherry-pick/rebase need oldest-first ordering so that each + * replayed commit can build on its already-replayed parent. + * Revert needs newest-first ordering (like git revert) to + * reduce conflicts by peeling off changes from the top. + */ + revs.reverse = opts.revert ? 0 : 1; revs.sort_order = REV_SORT_IN_GRAPH_ORDER; revs.topo_order = 1; revs.simplify_history = 0; @@ -144,11 +155,14 @@ int cmd_replay(int argc, * Detect and warn if we override some user specified rev * walking options. */ - if (revs.reverse != 1) { - warning(_("some rev walking options will be overridden as " - "'%s' bit in 'struct rev_info' will be forced"), - "reverse"); - revs.reverse = 1; + { + int desired_reverse = opts.revert ? 0 : 1; + if (revs.reverse != desired_reverse) { + warning(_("some rev walking options will be overridden as " + "'%s' bit in 'struct rev_info' will be forced"), + "reverse"); + revs.reverse = desired_reverse; + } } if (revs.sort_order != REV_SORT_IN_GRAPH_ORDER) { warning(_("some rev walking options will be overridden as " @@ -174,7 +188,9 @@ int cmd_replay(int argc, goto cleanup; /* Build reflog message */ - if (opts.advance) { + if (opts.revert) { + strbuf_addf(&reflog_msg, "replay --revert %s", opts.revert); + } else if (opts.advance) { strbuf_addf(&reflog_msg, "replay --advance %s", opts.advance); } else { struct object_id oid; diff --git a/replay.c b/replay.c index f97d652f338f1d..199066f6b34afe 100644 --- a/replay.c +++ b/replay.c @@ -8,9 +8,15 @@ #include "refs.h" #include "replay.h" #include "revision.h" +#include "sequencer.h" #include "strmap.h" #include "tree.h" +enum replay_mode { + REPLAY_MODE_PICK, + REPLAY_MODE_REVERT, +}; + static const char *short_commit_name(struct repository *repo, struct commit *commit) { @@ -44,15 +50,37 @@ static char *get_author(const char *message) return NULL; } +static void generate_revert_message(struct strbuf *msg, + struct commit *commit, + struct repository *repo) +{ + const char *out_enc = get_commit_output_encoding(); + const char *message = repo_logmsg_reencode(repo, commit, NULL, out_enc); + const char *subject_start; + int subject_len; + char *subject; + + subject_len = find_commit_subject(message, &subject_start); + subject = xmemdupz(subject_start, subject_len); + + sequencer_format_revert_message(repo, subject, commit, + commit->parents ? commit->parents->item : NULL, + false, msg); + + free(subject); + repo_unuse_commit_buffer(repo, commit, message); +} + static struct commit *create_commit(struct repository *repo, struct tree *tree, struct commit *based_on, - struct commit *parent) + struct commit *parent, + enum replay_mode mode) { struct object_id ret; struct object *obj = NULL; struct commit_list *parents = NULL; - char *author; + char *author = NULL; char *sign_commit = NULL; /* FIXME: cli users might want to sign again */ struct commit_extra_header *extra = NULL; struct strbuf msg = STRBUF_INIT; @@ -64,9 +92,16 @@ static struct commit *create_commit(struct repository *repo, commit_list_insert(parent, &parents); extra = read_commit_extra_headers(based_on, exclude_gpgsig); - find_commit_subject(message, &orig_message); - strbuf_addstr(&msg, orig_message); - author = get_author(message); + if (mode == REPLAY_MODE_REVERT) { + generate_revert_message(&msg, based_on, repo); + /* For revert, use current user as author (NULL = use default) */ + } else if (mode == REPLAY_MODE_PICK) { + find_commit_subject(message, &orig_message); + strbuf_addstr(&msg, orig_message); + author = get_author(message); + } else { + BUG("unexpected replay mode %d", mode); + } reset_ident_date(); if (commit_tree_extended(msg.buf, msg.len, &tree->object.oid, parents, &ret, author, NULL, sign_commit, extra)) { @@ -147,11 +182,35 @@ static void get_ref_information(struct repository *repo, } } +static void set_up_branch_mode(struct repository *repo, + char **branch_name, + const char *option_name, + struct ref_info *rinfo, + struct commit **onto) +{ + struct object_id oid; + char *fullname = NULL; + + if (repo_dwim_ref(repo, *branch_name, strlen(*branch_name), + &oid, &fullname, 0) == 1) { + free(*branch_name); + *branch_name = fullname; + } else { + die(_("argument to %s must be a reference"), option_name); + } + *onto = peel_committish(repo, *branch_name, option_name); + if (rinfo->positive_refexprs > 1) + die(_("'%s' cannot be used with multiple revision ranges " + "because the ordering would be ill-defined"), + option_name); +} + static void set_up_replay_mode(struct repository *repo, struct rev_cmdline_info *cmd_info, const char *onto_name, bool *detached_head, char **advance_name, + char **revert_name, struct commit **onto, struct strset **update_refs) { @@ -166,9 +225,6 @@ static void set_up_replay_mode(struct repository *repo, if (!rinfo.positive_refexprs) die(_("need some commits to replay")); - if (!onto_name == !*advance_name) - BUG("one and only one of onto_name and *advance_name must be given"); - if (onto_name) { *onto = peel_committish(repo, onto_name, "--onto"); if (rinfo.positive_refexprs < @@ -177,23 +233,12 @@ static void set_up_replay_mode(struct repository *repo, *update_refs = xcalloc(1, sizeof(**update_refs)); **update_refs = rinfo.positive_refs; memset(&rinfo.positive_refs, 0, sizeof(**update_refs)); + } else if (*advance_name) { + set_up_branch_mode(repo, advance_name, "--advance", &rinfo, onto); + } else if (*revert_name) { + set_up_branch_mode(repo, revert_name, "--revert", &rinfo, onto); } else { - struct object_id oid; - char *fullname = NULL; - - if (!*advance_name) - BUG("expected either onto_name or *advance_name in this function"); - - if (repo_dwim_ref(repo, *advance_name, strlen(*advance_name), - &oid, &fullname, 0) == 1) { - free(*advance_name); - *advance_name = fullname; - } else { - die(_("argument to --advance must be a reference")); - } - *onto = peel_committish(repo, *advance_name, "--advance"); - if (rinfo.positive_refexprs > 1) - die(_("cannot advance target with multiple sources because ordering would be ill-defined")); + BUG("expected one of onto_name, *advance_name, or *revert_name"); } strset_clear(&rinfo.negative_refs); strset_clear(&rinfo.positive_refs); @@ -214,7 +259,8 @@ static struct commit *pick_regular_commit(struct repository *repo, kh_oid_map_t *replayed_commits, struct commit *onto, struct merge_options *merge_opt, - struct merge_result *result) + struct merge_result *result, + enum replay_mode mode) { struct commit *base, *replayed_base; struct tree *pickme_tree, *base_tree, *replayed_base_tree; @@ -226,25 +272,45 @@ static struct commit *pick_regular_commit(struct repository *repo, pickme_tree = repo_get_commit_tree(repo, pickme); base_tree = repo_get_commit_tree(repo, base); - merge_opt->branch1 = short_commit_name(repo, replayed_base); - merge_opt->branch2 = short_commit_name(repo, pickme); - merge_opt->ancestor = xstrfmt("parent of %s", merge_opt->branch2); - - merge_incore_nonrecursive(merge_opt, - base_tree, - replayed_base_tree, - pickme_tree, - result); - - free((char*)merge_opt->ancestor); + if (mode == REPLAY_MODE_PICK) { + /* Cherry-pick: normal order */ + merge_opt->branch1 = short_commit_name(repo, replayed_base); + merge_opt->branch2 = short_commit_name(repo, pickme); + merge_opt->ancestor = xstrfmt("parent of %s", merge_opt->branch2); + + merge_incore_nonrecursive(merge_opt, + base_tree, + replayed_base_tree, + pickme_tree, + result); + + free((char *)merge_opt->ancestor); + } else if (mode == REPLAY_MODE_REVERT) { + /* Revert: swap base and pickme to reverse the diff */ + const char *pickme_name = short_commit_name(repo, pickme); + merge_opt->branch1 = short_commit_name(repo, replayed_base); + merge_opt->branch2 = xstrfmt("parent of %s", pickme_name); + merge_opt->ancestor = pickme_name; + + merge_incore_nonrecursive(merge_opt, + pickme_tree, + replayed_base_tree, + base_tree, + result); + + free((char *)merge_opt->branch2); + } else { + BUG("unexpected replay mode %d", mode); + } merge_opt->ancestor = NULL; + merge_opt->branch2 = NULL; if (!result->clean) return NULL; /* Drop commits that become empty */ if (oideq(&replayed_base_tree->object.oid, &result->tree->object.oid) && !oideq(&pickme_tree->object.oid, &base_tree->object.oid)) return replayed_base; - return create_commit(repo, result->tree, pickme, replayed_base); + return create_commit(repo, result->tree, pickme, replayed_base, mode); } void replay_result_release(struct replay_result *result) @@ -281,11 +347,16 @@ int replay_revisions(struct rev_info *revs, }; bool detached_head; char *advance; + char *revert; + enum replay_mode mode = REPLAY_MODE_PICK; int ret; advance = xstrdup_or_null(opts->advance); + revert = xstrdup_or_null(opts->revert); + if (revert) + mode = REPLAY_MODE_REVERT; set_up_replay_mode(revs->repo, &revs->cmdline, opts->onto, - &detached_head, &advance, &onto, &update_refs); + &detached_head, &advance, &revert, &onto, &update_refs); /* FIXME: Should allow replaying commits with the first as a root commit */ @@ -309,7 +380,8 @@ int replay_revisions(struct rev_info *revs, die(_("replaying merge commits is not supported yet!")); last_commit = pick_regular_commit(revs->repo, commit, replayed_commits, - onto, &merge_opt, &result); + mode == REPLAY_MODE_REVERT ? last_commit : onto, + &merge_opt, &result, mode); if (!last_commit) break; @@ -321,7 +393,7 @@ int replay_revisions(struct rev_info *revs, kh_value(replayed_commits, pos) = last_commit; /* Update any necessary branches */ - if (advance) + if (advance || revert) continue; for (decoration = get_name_decoration(&commit->object); @@ -355,11 +427,13 @@ int replay_revisions(struct rev_info *revs, goto out; } - /* In --advance mode, advance the target ref */ - if (advance) - replay_result_queue_update(out, advance, + /* In --advance or --revert mode, update the target ref */ + if (advance || revert) { + const char *ref = advance ? advance : revert; + replay_result_queue_update(out, ref, &onto->object.oid, &last_commit->object.oid); + } ret = 0; @@ -371,5 +445,6 @@ int replay_revisions(struct rev_info *revs, kh_destroy_oid_map(replayed_commits); merge_finalize(&merge_opt, &result); free(advance); + free(revert); return ret; } diff --git a/replay.h b/replay.h index d8407dc7f710fc..e916a5f975be26 100644 --- a/replay.h +++ b/replay.h @@ -13,7 +13,7 @@ struct replay_revisions_options { /* * Starting point at which to create the new commits; must be a branch * name. The branch will be updated to point to the rewritten commits. - * This option is mutually exclusive with `onto`. + * This option is mutually exclusive with `onto` and `revert`. */ const char *advance; @@ -22,7 +22,14 @@ struct replay_revisions_options { * committish. References pointing at decendants of `onto` will be * updated to point to the new commits. */ - const char *onto; + const char *onto; + + /* + * Starting point at which to create revert commits; must be a branch + * name. The branch will be updated to point to the revert commits. + * This option is mutually exclusive with `onto` and `advance`. + */ + const char *revert; /* * Update branches that point at commits in the given revision range. diff --git a/t/t3650-replay-basics.sh b/t/t3650-replay-basics.sh index a03f8f9293eb12..0c1e03e0fbaa0d 100755 --- a/t/t3650-replay-basics.sh +++ b/t/t3650-replay-basics.sh @@ -74,8 +74,8 @@ test_expect_success '--onto with invalid commit-ish' ' test_cmp expect actual ' -test_expect_success 'option --onto or --advance is mandatory' ' - echo "error: option --onto or --advance is mandatory" >expect && +test_expect_success 'exactly one of --onto, --advance, or --revert is required' ' + echo "error: exactly one of --onto, --advance, or --revert is required" >expect && test_might_fail git replay -h >>expect && test_must_fail git replay topic1..topic2 2>actual && test_cmp expect actual @@ -87,16 +87,17 @@ test_expect_success 'no base or negative ref gives no-replaying down to root err test_cmp expect actual ' -test_expect_success 'options --advance and --contained cannot be used together' ' - printf "fatal: options ${SQ}--advance${SQ} " >expect && - printf "and ${SQ}--contained${SQ} cannot be used together\n" >>expect && +test_expect_success '--contained requires --onto' ' + echo "fatal: --contained requires --onto" >expect && test_must_fail git replay --advance=main --contained \ topic1..topic2 2>actual && test_cmp expect actual ' test_expect_success 'cannot advance target ... ordering would be ill-defined' ' - echo "fatal: cannot advance target with multiple sources because ordering would be ill-defined" >expect && + cat >expect <<-\EOF && + fatal: '"'"'--advance'"'"' cannot be used with multiple revision ranges because the ordering would be ill-defined + EOF test_must_fail git replay --advance=main main topic1 topic2 2>actual && test_cmp expect actual ' @@ -398,4 +399,105 @@ test_expect_success 'invalid replay.refAction value' ' test_grep "invalid.*replay.refAction.*value" error ' +test_expect_success 'argument to --revert must be a reference' ' + echo "fatal: argument to --revert must be a reference" >expect && + oid=$(git rev-parse main) && + test_must_fail git replay --revert=$oid topic1..topic2 2>actual && + test_cmp expect actual +' + +test_expect_success 'cannot revert with multiple sources' ' + cat >expect <<-\EOF && + fatal: '"'"'--revert'"'"' cannot be used with multiple revision ranges because the ordering would be ill-defined + EOF + test_must_fail git replay --revert main main topic1 topic2 2>actual && + test_cmp expect actual +' + +test_expect_success 'using replay --revert to revert commits' ' + # Reuse existing topic4 branch (has commits I and J on top of main) + START=$(git rev-parse topic4) && + test_when_finished "git branch -f topic4 $START" && + + # Revert commits I and J + git replay --revert topic4 topic4~2..topic4 && + + # Verify the revert commits were created (newest-first ordering + # means J is reverted first, then I on top) + git log --format=%s -4 topic4 >actual && + cat >expect <<-\EOF && + Revert "I" + Revert "J" + J + I + EOF + test_cmp expect actual && + + # Verify commit message format includes hash (tip is Revert "I") + test_commit_message topic4 <<-EOF && + Revert "I" + + This reverts commit $(git rev-parse I). + EOF + + # Verify reflog message + git reflog topic4 -1 --format=%gs >reflog-msg && + echo "replay --revert topic4" >expect-reflog && + test_cmp expect-reflog reflog-msg +' + +test_expect_success 'using replay --revert in bare repo' ' + # Reuse existing topic4 in bare repo + START=$(git -C bare rev-parse topic4) && + test_when_finished "git -C bare update-ref refs/heads/topic4 $START" && + + # Revert commit J in bare repo + git -C bare replay --revert topic4 topic4~1..topic4 && + + # Verify revert was created + git -C bare log -1 --format=%s topic4 >actual && + echo "Revert \"J\"" >expect && + test_cmp expect actual +' + +test_expect_success 'revert of revert uses Reapply' ' + # Use topic4 and first revert J, then revert the revert + START=$(git rev-parse topic4) && + test_when_finished "git branch -f topic4 $START" && + + # First revert J + git replay --revert topic4 topic4~1..topic4 && + REVERT_J=$(git rev-parse topic4) && + + # Now revert the revert - should become Reapply + git replay --revert topic4 topic4~1..topic4 && + + # Verify Reapply prefix and message format + test_commit_message topic4 <<-EOF + Reapply "J" + + This reverts commit $REVERT_J. + EOF +' + +test_expect_success 'git replay --revert with conflict' ' + # conflict branch has C.conflict which conflicts with topic1s C + test_expect_code 1 git replay --revert conflict B..topic1 +' + +test_expect_success 'git replay --revert incompatible with --contained' ' + test_must_fail git replay --revert topic4 --contained topic4~1..topic4 2>error && + test_grep "requires --onto" error +' + +test_expect_success 'git replay --revert incompatible with --onto' ' + test_must_fail git replay --revert topic4 --onto main topic4~1..topic4 2>error && + test_grep "cannot be used together" error +' + +test_expect_success 'git replay --revert incompatible with --advance' ' + test_must_fail git replay --revert topic4 --advance main topic4~1..topic4 2>error && + test_grep "cannot be used together" error +' + test_done From 9f4cdf590177eacf8d73f2295c07ad4d4e27ab19 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 13 Mar 2026 07:45:12 +0100 Subject: [PATCH 078/236] upload-pack: fix debug statement when flushing packfile data When git-upload-pack(1) writes packfile data to the client we have some logic in place that buffers some partial lines. When that buffer still contains data after git-pack-objects(1) has finished we flush the buffer so that all remaining bytes are sent out. Curiously, when we do so we also print the string "flushed." to stderr. This statement has been introduced in b1c71b7281 (upload-pack: avoid sending an incomplete pack upon failure, 2006-06-20), so quite a while ago. What's interesting though is that stderr is typically spliced through to the client-side, and consequently the client would see this message. Munging the way how we do the caching indeed confirms this: $ git clone file:///home/pks/Development/linux/ Cloning into bare repository 'linux.git'... remote: Enumerating objects: 12980346, done. remote: Counting objects: 100% (131820/131820), done. remote: Compressing objects: 100% (50290/50290), done. remote: Total 12980346 (delta 96319), reused 104500 (delta 81217), pack-reused 12848526 (from 1) Receiving objects: 100% (12980346/12980346), 3.23 GiB | 57.44 MiB/s, done. flushed. Resolving deltas: 100% (10676718/10676718), done. It's quite clear that this string shouldn't ever be visible to the client, so it rather feels like this is a left-over debug statement. The menitoned commit doesn't mention this line, either. Remove the debug output to prepare for a change in how we do the buffering in the next commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- upload-pack.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/upload-pack.c b/upload-pack.c index 2d2b70cbf2dd0b..c2643c0295bad8 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -457,11 +457,9 @@ static void create_pack_file(struct upload_pack_data *pack_data, } /* flush the data */ - if (output_state->used > 0) { + if (output_state->used > 0) send_client_data(1, output_state->buffer, output_state->used, pack_data->use_sideband); - fprintf(stderr, "flushed.\n"); - } free(output_state); if (pack_data->use_sideband) packet_flush(1); From 5d4b7ddce15950ed71ec60453aadfe13d9e633d5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 13 Mar 2026 07:45:13 +0100 Subject: [PATCH 079/236] upload-pack: adapt keepalives based on buffering The function `create_pack_file()` is responsible for sending the packfile data to the client of git-upload-pack(1). As generating the bytes may take significant computing resources we also have a mechanism in place that optionally sends keepalive pktlines in case we haven't sent out any data. The keepalive logic is purely based poll(3p): we pass a timeout to that syscall, and if the call times out we send out the keepalive pktline. While reasonable, this logic isn't entirely sufficient: even if the call to poll(3p) ends because we have received data on any of the file descriptors we may not necessarily send data to the client. The most important edge case here happens in `relay_pack_data()`. When we haven't seen the initial "PACK" signature from git-pack-objects(1) yet we buffer incoming data. So in the worst case, if each of the bytes of that signature arrive shortly before the configured keepalive timeout, then we may not send out any data for a time period that is (almost) four times as long as the configured timeout. This edge case is rather unlikely to matter in practice. But in a subsequent commit we're going to adapt our buffering mechanism to become more aggressive, which makes it more likely that we don't send any data for an extended amount of time. Adapt the logic so that instead of using a fixed timeout on every call to poll(3p), we instead figure out how much time has passed since the last-sent data. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- upload-pack.c | 49 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 9 deletions(-) diff --git a/upload-pack.c b/upload-pack.c index c2643c0295bad8..04521e57c967f3 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -29,6 +29,7 @@ #include "commit-graph.h" #include "commit-reach.h" #include "shallow.h" +#include "trace.h" #include "write-or-die.h" #include "json-writer.h" #include "strmap.h" @@ -218,7 +219,8 @@ struct output_state { }; static int relay_pack_data(int pack_objects_out, struct output_state *os, - int use_sideband, int write_packfile_line) + int use_sideband, int write_packfile_line, + bool *did_send_data) { /* * We keep the last byte to ourselves @@ -232,6 +234,8 @@ static int relay_pack_data(int pack_objects_out, struct output_state *os, */ ssize_t readsz; + *did_send_data = false; + readsz = xread(pack_objects_out, os->buffer + os->used, sizeof(os->buffer) - os->used); if (readsz < 0) { @@ -247,6 +251,7 @@ static int relay_pack_data(int pack_objects_out, struct output_state *os, if (os->packfile_uris_started) packet_delim(1); packet_write_fmt(1, "\1packfile\n"); + *did_send_data = true; } break; } @@ -259,6 +264,7 @@ static int relay_pack_data(int pack_objects_out, struct output_state *os, } *p = '\0'; packet_write_fmt(1, "\1%s\n", os->buffer); + *did_send_data = true; os->used -= p - os->buffer + 1; memmove(os->buffer, p + 1, os->used); @@ -279,6 +285,7 @@ static int relay_pack_data(int pack_objects_out, struct output_state *os, os->used = 0; } + *did_send_data = true; return readsz; } @@ -290,6 +297,7 @@ static void create_pack_file(struct upload_pack_data *pack_data, char progress[128]; char abort_msg[] = "aborting due to possible repository " "corruption on the remote side."; + uint64_t last_sent_ms = 0; ssize_t sz; int i; FILE *pipe_fd; @@ -365,10 +373,14 @@ static void create_pack_file(struct upload_pack_data *pack_data, */ while (1) { + uint64_t now_ms = getnanotime() / 1000000; struct pollfd pfd[2]; - int pe, pu, pollsize, polltimeout; + int pe, pu, pollsize, polltimeout_ms; int ret; + if (!last_sent_ms) + last_sent_ms = now_ms; + reset_timeout(pack_data->timeout); pollsize = 0; @@ -390,11 +402,21 @@ static void create_pack_file(struct upload_pack_data *pack_data, if (!pollsize) break; - polltimeout = pack_data->keepalive < 0 - ? -1 - : 1000 * pack_data->keepalive; + if (pack_data->keepalive < 0) { + polltimeout_ms = -1; + } else { + /* + * The polling timeout needs to be adjusted based on + * the time we have sent our last package. The longer + * it's been in the past, the shorter the timeout + * becomes until we eventually don't block at all. + */ + polltimeout_ms = 1000 * pack_data->keepalive - (now_ms - last_sent_ms); + if (polltimeout_ms < 0) + polltimeout_ms = 0; + } - ret = poll(pfd, pollsize, polltimeout); + ret = poll(pfd, pollsize, polltimeout_ms); if (ret < 0) { if (errno != EINTR) { @@ -403,16 +425,18 @@ static void create_pack_file(struct upload_pack_data *pack_data, } continue; } + if (0 <= pe && (pfd[pe].revents & (POLLIN|POLLHUP))) { /* Status ready; we ship that in the side-band * or dump to the standard error. */ sz = xread(pack_objects.err, progress, sizeof(progress)); - if (0 < sz) + if (0 < sz) { send_client_data(2, progress, sz, pack_data->use_sideband); - else if (sz == 0) { + last_sent_ms = now_ms; + } else if (sz == 0) { close(pack_objects.err); pack_objects.err = -1; } @@ -421,11 +445,14 @@ static void create_pack_file(struct upload_pack_data *pack_data, /* give priority to status messages */ continue; } + if (0 <= pu && (pfd[pu].revents & (POLLIN|POLLHUP))) { + bool did_send_data; int result = relay_pack_data(pack_objects.out, output_state, pack_data->use_sideband, - !!uri_protocols); + !!uri_protocols, + &did_send_data); if (result == 0) { close(pack_objects.out); @@ -433,6 +460,9 @@ static void create_pack_file(struct upload_pack_data *pack_data, } else if (result < 0) { goto fail; } + + if (did_send_data) + last_sent_ms = now_ms; } /* @@ -448,6 +478,7 @@ static void create_pack_file(struct upload_pack_data *pack_data, if (!ret && pack_data->use_sideband) { static const char buf[] = "0005\1"; write_or_die(1, buf, 5); + last_sent_ms = now_ms; } } From c422ec886c92d27b702a2b8cf657cede54658d94 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 13 Mar 2026 07:45:14 +0100 Subject: [PATCH 080/236] upload-pack: prefer flushing data over sending keepalive When using the sideband in git-upload-pack(1) we know to send out keepalive packets in case generating the pack takes too long. These keepalives take the form of a simple empty pktline. In the preceding commit we have adapted git-upload-pack(1) to buffer data more aggressively before sending it to the client. This creates an obvious optimization opportunity: when we hit the keepalive timeout while we still hold on to some buffered data, then it makes more sense to flush out the data instead of sending the empty keepalive packet. This is overall not going to be a significant win. Most keepalives will come before the pack data starts, and once pack-objects starts producing data, it tends to do so pretty consistently. And of course we can't send data before we see the PACK header, because the whole point is to buffer the early bit waiting for packfile URIs. But the optimization is easy enough to realize. Do so and flush out data instead of sending an empty pktline. Suggested-by: Jeff King Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- upload-pack.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/upload-pack.c b/upload-pack.c index 04521e57c967f3..ef8f8189c1714f 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -466,18 +466,27 @@ static void create_pack_file(struct upload_pack_data *pack_data, } /* - * We hit the keepalive timeout without saying anything; send - * an empty message on the data sideband just to let the other - * side know we're still working on it, but don't have any data - * yet. + * We hit the keepalive timeout without saying anything. If we + * have pending data we flush it out to the caller now. + * Otherwise, we send an empty message on the data sideband + * just to let the other side know we're still working on it, + * but don't have any data yet. * * If we don't have a sideband channel, there's no room in the * protocol to say anything, so those clients are just out of * luck. */ if (!ret && pack_data->use_sideband) { - static const char buf[] = "0005\1"; - write_or_die(1, buf, 5); + if (output_state->packfile_started && output_state->used > 1) { + send_client_data(1, output_state->buffer, output_state->used - 1, + pack_data->use_sideband); + output_state->buffer[0] = output_state->buffer[output_state->used - 1]; + output_state->used = 1; + } else { + static const char buf[] = "0005\1"; + write_or_die(1, buf, 5); + } + last_sent_ms = now_ms; } } From a5816e4596891f1c7552b049053e94929c699b78 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 13 Mar 2026 07:45:15 +0100 Subject: [PATCH 081/236] upload-pack: reduce lock contention when writing packfile data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In our production systems we have recently observed write contention in git-upload-pack(1). The system in question was consistently streaming packfiles at a rate of dozens of gigabits per second, but curiously the system was neither bottlenecked on CPU, memory or IOPS. We eventually discovered that Git was spending 80% of its time in `pipe_write()`, out of which almost all of the time was spent in the `ep_poll_callback` function in the kernel. Quoting the reporter: This infrastructure is part of an event notification queue designed to allow for multiple producers to emit events, but that concurrency safety is guarded by 3 layers of locking. The layer we're hitting contention in uses a simple reader/writer lock mode (a.k.a. shared versus exclusive mode), where producers need shared-mode (read mode), and various other actions use exclusive (write) mode. The system in question generates workloads where we have hundreds of git-upload-pack(1) processes active at the same point in time. These processes end up contending around those locks, and the consequence is that the Git processes stall. Now git-upload-pack(1) already has the infrastructure in place to buffer some of the data it reads from git-pack-objects(1) before actually sending it out. We only use this infrastructure in very limited ways though, so we generally end up matching one read(3p) call with one write(3p) call. Even worse, when the sideband is enabled we end up matching one read with _two_ writes: one for the pkt-line length, and one for the packfile data. Extend our use of the buffering infrastructure so that we soak up bytes until the buffer is filled up at least 2/3rds of its capacity. The change is relatively simple to implement as we already know to flush the buffer in `create_pack_file()` after git-pack-objects(1) has finished. This significantly reduces the number of write(3p) syscalls we need to do. Before this change, cloning the Linux repository resulted in around 400,000 write(3p) syscalls. With the buffering in place we only do around 130,000 syscalls. Now we could of course go even further and make sure that we always fill up the whole buffer. But this might cause an increase in read(3p) syscalls, and some tests show that this only reduces the number of write(3p) syscalls from 130,000 to 100,000. So overall this doesn't seem worth it. Note that the issue could also be fixed by adapting the write buffer that we use in the downstream git-pack-objects(1) command, and such a change would have roughly the same result. But the command that generates the packfile data may not always be git-pack-objects(1) as it can be changed via "uploadpack.packObjectsHook", so such a fix would only help in _some_ cases. Regardless of that, we'll also adapt the write buffer size of git-pack-objects(1) in a subsequent commit. Helped-by: Matt Smiley Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- upload-pack.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/upload-pack.c b/upload-pack.c index ef8f8189c1714f..92e1ff3ba2ec6e 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -276,6 +276,13 @@ static int relay_pack_data(int pack_objects_out, struct output_state *os, } } + /* + * Make sure that we buffer some data before sending it to the client. + * This significantly reduces the number of write(3p) syscalls. + */ + if (readsz && os->used < (sizeof(os->buffer) * 2 / 3)) + return readsz; + if (os->used > 1) { send_client_data(1, os->buffer, os->used - 1, use_sideband); os->buffer[0] = os->buffer[os->used - 1]; From 3b9b2c2a29a1d529ca9884fa0a6529f6e2496abe Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 13 Mar 2026 07:45:16 +0100 Subject: [PATCH 082/236] compat/posix: introduce writev(3p) wrapper In a subsequent commit we're going to add the first caller to writev(3p). Introduce a compatibility wrapper for this syscall that we can use on systems that don't have this syscall. The syscall exists on modern Unixes like Linux and macOS, and seemingly even for NonStop according to [1]. It doesn't seem to exist on Windows though. [1]: http://nonstoptools.com/manuals/OSS-SystemCalls.pdf [2]: https://www.gnu.org/software/gnulib/manual/html_node/writev.html Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Makefile | 4 ++++ compat/posix.h | 14 ++++++++++++++ compat/writev.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ config.mak.uname | 2 ++ meson.build | 1 + 5 files changed, 65 insertions(+) create mode 100644 compat/writev.c diff --git a/Makefile b/Makefile index 8aa489f3b6812f..61c7dff942a0fe 100644 --- a/Makefile +++ b/Makefile @@ -2015,6 +2015,10 @@ ifdef NO_PREAD COMPAT_CFLAGS += -DNO_PREAD COMPAT_OBJS += compat/pread.o endif +ifdef NO_WRITEV + COMPAT_CFLAGS += -DNO_WRITEV + COMPAT_OBJS += compat/writev.o +endif ifdef NO_FAST_WORKING_DIRECTORY BASIC_CFLAGS += -DNO_FAST_WORKING_DIRECTORY endif diff --git a/compat/posix.h b/compat/posix.h index 245386fa4a9f4e..3c611d2736c47a 100644 --- a/compat/posix.h +++ b/compat/posix.h @@ -137,6 +137,9 @@ #include #include #include +#ifndef NO_WRITEV +#include +#endif #include #ifndef NO_SYS_SELECT_H #include @@ -323,6 +326,17 @@ int git_lstat(const char *, struct stat *); ssize_t git_pread(int fd, void *buf, size_t count, off_t offset); #endif +#ifdef NO_WRITEV +#define writev git_writev +#define iovec git_iovec +struct git_iovec { + void *iov_base; + size_t iov_len; +}; + +ssize_t git_writev(int fd, const struct iovec *iov, int iovcnt); +#endif + #ifdef NO_SETENV #define setenv gitsetenv int gitsetenv(const char *, const char *, int); diff --git a/compat/writev.c b/compat/writev.c new file mode 100644 index 00000000000000..3a94870a2f5855 --- /dev/null +++ b/compat/writev.c @@ -0,0 +1,44 @@ +#include "../git-compat-util.h" +#include "../wrapper.h" + +ssize_t git_writev(int fd, const struct iovec *iov, int iovcnt) +{ + size_t total_written = 0; + size_t sum = 0; + + /* + * According to writev(3p), the syscall shall error with EINVAL in case + * the sum of `iov_len` overflows `ssize_t`. + */ + for (int i = 0; i < iovcnt; i++) { + if (iov[i].iov_len > maximum_signed_value_of_type(ssize_t) || + iov[i].iov_len + sum > maximum_signed_value_of_type(ssize_t)) { + errno = EINVAL; + return -1; + } + + sum += iov[i].iov_len; + } + + for (int i = 0; i < iovcnt; i++) { + const char *bytes = iov[i].iov_base; + size_t iovec_written = 0; + + while (iovec_written < iov[i].iov_len) { + ssize_t bytes_written = xwrite(fd, bytes + iovec_written, + iov[i].iov_len - iovec_written); + if (bytes_written < 0) { + if (total_written) + goto out; + return bytes_written; + } + if (!bytes_written) + goto out; + iovec_written += bytes_written; + total_written += bytes_written; + } + } + +out: + return (ssize_t) total_written; +} diff --git a/config.mak.uname b/config.mak.uname index 3c35ae33a3c0c0..8e66a1cc0a04c2 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -457,6 +457,7 @@ ifeq ($(uname_S),Windows) SANE_TOOL_PATH ?= $(msvc_bin_dir_msys) HAVE_ALLOCA_H = YesPlease NO_PREAD = YesPlease + NO_WRITEV = YesPlease NEEDS_CRYPTO_WITH_SSL = YesPlease NO_LIBGEN_H = YesPlease NO_POLL = YesPlease @@ -672,6 +673,7 @@ ifeq ($(uname_S),MINGW) pathsep = ; HAVE_ALLOCA_H = YesPlease NO_PREAD = YesPlease + NO_WRITEV = YesPlease NEEDS_CRYPTO_WITH_SSL = YesPlease NO_LIBGEN_H = YesPlease NO_POLL = YesPlease diff --git a/meson.build b/meson.build index dd52efd1c87574..f1cd89efc7d38e 100644 --- a/meson.build +++ b/meson.build @@ -1405,6 +1405,7 @@ checkfuncs = { 'initgroups' : [], 'strtoumax' : ['strtoumax.c', 'strtoimax.c'], 'pread' : ['pread.c'], + 'writev' : ['writev.c'], } if host_machine.system() == 'windows' From 1970fcef93adcc5a35f6468d00a5a634d5af2b3c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 13 Mar 2026 07:45:17 +0100 Subject: [PATCH 083/236] wrapper: introduce writev(3p) wrappers In the preceding commit we have added a compatibility wrapper for the writev(3p) syscall. Introduce some generic wrappers for this function that we nowadays take for granted in the Git codebase. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- wrapper.c | 41 +++++++++++++++++++++++++++++++++++++++++ wrapper.h | 9 +++++++++ write-or-die.c | 8 ++++++++ write-or-die.h | 1 + 4 files changed, 59 insertions(+) diff --git a/wrapper.c b/wrapper.c index b794fb20e71879..fa40f399b3452d 100644 --- a/wrapper.c +++ b/wrapper.c @@ -323,6 +323,47 @@ ssize_t write_in_full(int fd, const void *buf, size_t count) return total; } +ssize_t writev_in_full(int fd, struct iovec *iov, int iovcnt) +{ + ssize_t total_written = 0; + + while (iovcnt) { + ssize_t bytes_written = writev(fd, iov, iovcnt); + if (bytes_written < 0) { + if (errno == EINTR || errno == EAGAIN) + continue; + return -1; + } + if (!bytes_written) { + errno = ENOSPC; + return -1; + } + + total_written += bytes_written; + + /* + * We first need to discard any iovec entities that have been + * fully written. + */ + while (iovcnt && (size_t)bytes_written >= iov->iov_len) { + bytes_written -= iov->iov_len; + iov++; + iovcnt--; + } + + /* + * Finally, we need to adjust the last iovec in case we have + * performed a partial write. + */ + if (iovcnt && bytes_written) { + iov->iov_base = (char *) iov->iov_base + bytes_written; + iov->iov_len -= bytes_written; + } + } + + return total_written; +} + ssize_t pread_in_full(int fd, void *buf, size_t count, off_t offset) { char *p = buf; diff --git a/wrapper.h b/wrapper.h index 15ac3bab6e9748..27519b32d1782d 100644 --- a/wrapper.h +++ b/wrapper.h @@ -47,6 +47,15 @@ ssize_t read_in_full(int fd, void *buf, size_t count); ssize_t write_in_full(int fd, const void *buf, size_t count); ssize_t pread_in_full(int fd, void *buf, size_t count, off_t offset); +/* + * Try to write all iovecs. Returns -1 in case an error occurred with a proper + * errno set, the number of bytes written otherwise. + * + * Note that the iovec will be modified as a result of this call to adjust for + * partial writes! + */ +ssize_t writev_in_full(int fd, struct iovec *iov, int iovcnt); + static inline ssize_t write_str_in_full(int fd, const char *str) { return write_in_full(fd, str, strlen(str)); diff --git a/write-or-die.c b/write-or-die.c index 01a9a51fa2fcd7..5f522fb7287382 100644 --- a/write-or-die.c +++ b/write-or-die.c @@ -96,6 +96,14 @@ void write_or_die(int fd, const void *buf, size_t count) } } +void writev_or_die(int fd, struct iovec *iov, int iovlen) +{ + if (writev_in_full(fd, iov, iovlen) < 0) { + check_pipe(errno); + die_errno("writev error"); + } +} + void fwrite_or_die(FILE *f, const void *buf, size_t count) { if (fwrite(buf, 1, count, f) != count) diff --git a/write-or-die.h b/write-or-die.h index 65a5c42a47ac86..ae3d7d88b87699 100644 --- a/write-or-die.h +++ b/write-or-die.h @@ -7,6 +7,7 @@ void fprintf_or_die(FILE *, const char *fmt, ...); void fwrite_or_die(FILE *f, const void *buf, size_t count); void fflush_or_die(FILE *f); void write_or_die(int fd, const void *buf, size_t count); +void writev_or_die(int fd, struct iovec *iov, int iovlen); /* * These values are used to help identify parts of a repository to fsync. From 26986f4cbaf38d84a82b0b35da211389ce49552c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 13 Mar 2026 07:45:18 +0100 Subject: [PATCH 084/236] sideband: use writev(3p) to send pktlines Every pktline that we send out via `send_sideband()` currently requires two syscalls: one to write the pktline's length, and one to send its data. This typically isn't all that much of a problem, but under extreme load the syscalls may cause contention in the kernel. Refactor the code to instead use the newly introduced writev(3p) infra so that we can send out the data with a single syscall. This reduces the number of syscalls from around 133,000 calls to write(3p) to around 67,000 calls to writev(3p). Suggested-by: Jeff King Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- sideband.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/sideband.c b/sideband.c index ea7c25211ef7e1..1ed6614eaf1baf 100644 --- a/sideband.c +++ b/sideband.c @@ -264,6 +264,7 @@ void send_sideband(int fd, int band, const char *data, ssize_t sz, int packet_ma const char *p = data; while (sz) { + struct iovec iov[2]; unsigned n; char hdr[5]; @@ -273,12 +274,19 @@ void send_sideband(int fd, int band, const char *data, ssize_t sz, int packet_ma if (0 <= band) { xsnprintf(hdr, sizeof(hdr), "%04x", n + 5); hdr[4] = band; - write_or_die(fd, hdr, 5); + iov[0].iov_base = hdr; + iov[0].iov_len = 5; } else { xsnprintf(hdr, sizeof(hdr), "%04x", n + 4); - write_or_die(fd, hdr, 4); + iov[0].iov_base = hdr; + iov[0].iov_len = 4; } - write_or_die(fd, p, n); + + iov[1].iov_base = (void *) p; + iov[1].iov_len = n; + + writev_or_die(fd, iov, ARRAY_SIZE(iov)); + p += n; sz -= n; } From a1118c0a44606e0b71e515b05112ff38fef989c0 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 13 Mar 2026 07:45:19 +0100 Subject: [PATCH 085/236] csum-file: introduce `hashfd_ext()` Introduce a new `hashfd_ext()` function that takes an options structure. This function will replace `hashd_throughput()` in the next commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- csum-file.c | 22 +++++++++++++--------- csum-file.h | 14 ++++++++++++++ 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/csum-file.c b/csum-file.c index 6e21e3cac8a636..a50416247edf25 100644 --- a/csum-file.c +++ b/csum-file.c @@ -161,17 +161,16 @@ struct hashfile *hashfd_check(const struct git_hash_algo *algop, return f; } -static struct hashfile *hashfd_internal(const struct git_hash_algo *algop, - int fd, const char *name, - struct progress *tp, - size_t buffer_len) +struct hashfile *hashfd_ext(const struct git_hash_algo *algop, + int fd, const char *name, + const struct hashfd_options *opts) { struct hashfile *f = xmalloc(sizeof(*f)); f->fd = fd; f->check_fd = -1; f->offset = 0; f->total = 0; - f->tp = tp; + f->tp = opts->progress; f->name = name; f->do_crc = 0; f->skip_hash = 0; @@ -179,8 +178,8 @@ static struct hashfile *hashfd_internal(const struct git_hash_algo *algop, f->algop = unsafe_hash_algo(algop); f->algop->init_fn(&f->ctx); - f->buffer_len = buffer_len; - f->buffer = xmalloc(buffer_len); + f->buffer_len = opts->buffer_len ? opts->buffer_len : 128 * 1024; + f->buffer = xmalloc(f->buffer_len); f->check_buffer = NULL; return f; @@ -194,7 +193,8 @@ struct hashfile *hashfd(const struct git_hash_algo *algop, * measure the rate of data passing through this hashfile, * use a larger buffer size to reduce fsync() calls. */ - return hashfd_internal(algop, fd, name, NULL, 128 * 1024); + struct hashfd_options opts = { 0 }; + return hashfd_ext(algop, fd, name, &opts); } struct hashfile *hashfd_throughput(const struct git_hash_algo *algop, @@ -206,7 +206,11 @@ struct hashfile *hashfd_throughput(const struct git_hash_algo *algop, * size so the progress indicators arrive at a more * frequent rate. */ - return hashfd_internal(algop, fd, name, tp, 8 * 1024); + struct hashfd_options opts = { + .progress = tp, + .buffer_len = 8 * 1024, + }; + return hashfd_ext(algop, fd, name, &opts); } void hashfile_checkpoint_init(struct hashfile *f, diff --git a/csum-file.h b/csum-file.h index 07ae11024afc34..a03b60120d85f1 100644 --- a/csum-file.h +++ b/csum-file.h @@ -45,6 +45,20 @@ int hashfile_truncate(struct hashfile *, struct hashfile_checkpoint *); #define CSUM_FSYNC 2 #define CSUM_HASH_IN_STREAM 4 +struct hashfd_options { + /* + * Throughput progress that counts the number of bytes that have been + * hashed. + */ + struct progress *progress; + + /* The length of the buffer that shall be used read read data. */ + size_t buffer_len; +}; + +struct hashfile *hashfd_ext(const struct git_hash_algo *algop, + int fd, const char *name, + const struct hashfd_options *opts); struct hashfile *hashfd(const struct git_hash_algo *algop, int fd, const char *name); struct hashfile *hashfd_check(const struct git_hash_algo *algop, From 2bf8f36ddb308b084912f8265ad6fd60df34a036 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 13 Mar 2026 07:45:20 +0100 Subject: [PATCH 086/236] csum-file: drop `hashfd_throughput()` The `hashfd_throughput()` function is used by a single callsite in git-pack-objects(1). In contrast to `hashfd()`, this function uses a progress meter to measure throughput and a smaller buffer length so that the progress meter can provide more granular metrics. We're going to change that caller in the next commit to be a bit more specific to packing objects. As such, `hashfd_throughput()` will be a somewhat unfitting mechanism for any potential new callers. Drop the function and replace it with a call to `hashfd_ext()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 19 +++++++++++++++---- csum-file.c | 16 ---------------- csum-file.h | 2 -- 3 files changed, 15 insertions(+), 22 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 5846b6a293a27b..ba150a80ada2df 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1331,11 +1331,22 @@ static void write_pack_file(void) unsigned char hash[GIT_MAX_RAWSZ]; char *pack_tmp_name = NULL; - if (pack_to_stdout) - f = hashfd_throughput(the_repository->hash_algo, 1, - "", progress_state); - else + if (pack_to_stdout) { + /* + * Since we are expecting to report progress of the + * write into this hashfile, use a smaller buffer + * size so the progress indicators arrive at a more + * frequent rate. + */ + struct hashfd_options opts = { + .progress = progress_state, + .buffer_len = 8 * 1024, + }; + f = hashfd_ext(the_repository->hash_algo, 1, + "", &opts); + } else { f = create_tmp_packfile(the_repository, &pack_tmp_name); + } offset = write_pack_header(f, nr_remaining); diff --git a/csum-file.c b/csum-file.c index a50416247edf25..5dfaca55435d8f 100644 --- a/csum-file.c +++ b/csum-file.c @@ -197,22 +197,6 @@ struct hashfile *hashfd(const struct git_hash_algo *algop, return hashfd_ext(algop, fd, name, &opts); } -struct hashfile *hashfd_throughput(const struct git_hash_algo *algop, - int fd, const char *name, struct progress *tp) -{ - /* - * Since we are expecting to report progress of the - * write into this hashfile, use a smaller buffer - * size so the progress indicators arrive at a more - * frequent rate. - */ - struct hashfd_options opts = { - .progress = tp, - .buffer_len = 8 * 1024, - }; - return hashfd_ext(algop, fd, name, &opts); -} - void hashfile_checkpoint_init(struct hashfile *f, struct hashfile_checkpoint *checkpoint) { diff --git a/csum-file.h b/csum-file.h index a03b60120d85f1..01472555c81e3b 100644 --- a/csum-file.h +++ b/csum-file.h @@ -63,8 +63,6 @@ struct hashfile *hashfd(const struct git_hash_algo *algop, int fd, const char *name); struct hashfile *hashfd_check(const struct git_hash_algo *algop, const char *name); -struct hashfile *hashfd_throughput(const struct git_hash_algo *algop, - int fd, const char *name, struct progress *tp); /* * Free the hashfile without flushing its contents to disk. This only From 835e0aaf6f0e07e9f9a393ed0e456db7c1be33ef Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 13 Mar 2026 07:45:21 +0100 Subject: [PATCH 087/236] builtin/pack-objects: reduce lock contention when writing packfile data When running `git pack-objects --stdout` we feed the data through `hashfd_ext()` with a progress meter and a smaller-than-usual buffer length of 8kB so that we can track throughput more granularly. But as packfiles tend to be on the larger side, this small buffer size may cause a ton of write(3p) syscalls. Originally, the buffer we used in `hashfd()` was 8kB for all use cases. This was changed though in 2ca245f8be (csum-file.h: increase hashfile buffer size, 2021-05-18) because we noticed that the number of writes can have an impact on performance. So the buffer size was increased to 128kB, which improved performance a bit for some use cases. But the commit didn't touch the buffer size for `hashd_throughput()`. The reasoning here was that callers expect the progress indicator to update frequently, and a larger buffer size would of course reduce the update frequency especially on slow networks. While that is of course true, there was (and still is, even though it's now a call to `hashfd_ext()`) only a single caller of this function in git-pack-objects(1). This command is responsible for writing packfiles, and those packfiles are often on the bigger side. So arguably: - The user won't care about increments of 8kB when packfiles tend to be megabytes or even gigabytes in size. - Reducing the number of syscalls would be even more valuable here than it would be for multi-pack indices, which was the benchmark done in the mentioned commit, as MIDXs are typically significantly smaller than packfiles. - Nowadays, many internet connections should be able to transfer data at a rate significantly higher than 8kB per second. Update the buffer to instead have a size of `LARGE_PACKET_DATA_MAX - 1`, which translates to ~64kB. This limit was chosen because `git pack-objects --stdout` is most often used when sending packfiles via git-upload-pack(1), where packfile data is chunked into pktlines when using the sideband. Furthermore, most internet connections should have a bandwidth signifcantly higher than 64kB/s, so we'd still be able to observe progress updates at a rate of at least once per second. This change significantly reduces the number of write(3p) syscalls from 355,000 to 44,000 when packing the Linux repository. While this results in a small performance improvement on an otherwise-unused system, this improvement is mostly negligible. More importantly though, it will reduce lock contention in the kernel on an extremely busy system where we have many processes writing data at once. Suggested-by: Jeff King Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index ba150a80ada2df..7301ed8c681dc5 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -41,6 +41,7 @@ #include "promisor-remote.h" #include "pack-mtimes.h" #include "parse-options.h" +#include "pkt-line.h" #include "blob.h" #include "tree.h" #include "path-walk.h" @@ -1333,14 +1334,17 @@ static void write_pack_file(void) if (pack_to_stdout) { /* - * Since we are expecting to report progress of the - * write into this hashfile, use a smaller buffer - * size so the progress indicators arrive at a more - * frequent rate. + * This command is most often invoked via + * git-upload-pack(1), which will typically chunk data + * into pktlines. As such, we use the maximum data + * length of them as buffer length. + * + * Note that we need to subtract one though to + * accomodate for the sideband byte. */ struct hashfd_options opts = { .progress = progress_state, - .buffer_len = 8 * 1024, + .buffer_len = LARGE_PACKET_DATA_MAX - 1, }; f = hashfd_ext(the_repository->hash_algo, 1, "", &opts); From ff5a3ea97dd302bfd7430180c8512a6e887362cb Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Sun, 15 Mar 2026 16:18:50 +0000 Subject: [PATCH 088/236] worktree: remove "the_repository" from is_current_worktree() is_current_worktree() compares the gitdir of the worktree to the gitdir of "the_repository" and returns true when they match. To get the gitdir of the worktree it calls get_workree_git_dir() which also depends on "the_repository". This has the effect that even if "wt->path" matches "wt->repo->worktree" is_current_worktree(wt) will return false when "wt->repo" is not "the_repository" which is confusing. The use of "the_repository" in is_current_wortree() comes from replacing get_git_dir() with repo_get_git_dir() in 246deeac951 (environment: make `get_git_dir()` accept a repository, 2024-09-12). In get_worktree_git_dir() it comes from replacing git_common_path() with repo_common_path() in 07242c2a5af (path: drop `git_common_path()` in favor of `repo_common_path()`, 2025-02-07). In both cases we have a repository instance available so use that instead. This means that a worktree "wt" is always considered current when "wt->path" matches "wt->repo->worktree" and so the worktree returned by get_worktree_from_repository() is always considered current. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- worktree.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/worktree.c b/worktree.c index e9ff6e6ef2eb58..344ad0c031bf1f 100644 --- a/worktree.c +++ b/worktree.c @@ -58,7 +58,7 @@ static void add_head_info(struct worktree *wt) static int is_current_worktree(struct worktree *wt) { - char *git_dir = absolute_pathdup(repo_get_git_dir(the_repository)); + char *git_dir = absolute_pathdup(repo_get_git_dir(wt->repo)); char *wt_git_dir = get_worktree_git_dir(wt); int is_current = !fspathcmp(git_dir, absolute_path(wt_git_dir)); free(wt_git_dir); @@ -78,7 +78,7 @@ struct worktree *get_worktree_from_repository(struct repository *repo) wt->is_bare = !repo->worktree; if (fspathcmp(gitdir, commondir)) wt->id = xstrdup(find_last_dir_sep(gitdir) + 1); - wt->is_current = is_current_worktree(wt); + wt->is_current = true; add_head_info(wt); free(gitdir); @@ -229,9 +229,9 @@ char *get_worktree_git_dir(const struct worktree *wt) if (!wt) return xstrdup(repo_get_git_dir(the_repository)); else if (!wt->id) - return xstrdup(repo_get_common_dir(the_repository)); + return xstrdup(repo_get_common_dir(wt->repo)); else - return repo_common_path(the_repository, "worktrees/%s", wt->id); + return repo_common_path(wt->repo, "worktrees/%s", wt->id); } static struct worktree *find_worktree_by_suffix(struct worktree **list, From 4a40675ae10b336ff016a585ba9a5d37a2fd1c3b Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Sun, 15 Mar 2026 16:18:51 +0000 Subject: [PATCH 089/236] worktree add: stop reading ".git/HEAD" The function can_use_local_refs() prints a warning if there are no local branches and HEAD is invalid or points to an unborn branch. As part of the warning it prints the contents of ".git/HEAD". In a repository using the reftable backend HEAD is not stored in the filesystem so reading that file is pointless. In a repository using the files backend it is unclear how useful printing it is - it would be better to diagnose the problem for the user. For now, simplify the warning by not printing the file contents and adjust the relevant test case accordingly. Also fixup the test case to use test_grep so that anyone trying to debug a test failure in the future is not met by a wall of silence. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- builtin/worktree.c | 21 ++------------------- t/t2400-worktree-add.sh | 28 ++++++++++++---------------- 2 files changed, 14 insertions(+), 35 deletions(-) diff --git a/builtin/worktree.c b/builtin/worktree.c index bc2d0d645ba945..9170b2e8981b65 100644 --- a/builtin/worktree.c +++ b/builtin/worktree.c @@ -692,25 +692,8 @@ static int can_use_local_refs(const struct add_opts *opts) if (refs_head_ref(get_main_ref_store(the_repository), first_valid_ref, NULL)) { return 1; } else if (refs_for_each_branch_ref(get_main_ref_store(the_repository), first_valid_ref, NULL)) { - if (!opts->quiet) { - struct strbuf path = STRBUF_INIT; - struct strbuf contents = STRBUF_INIT; - char *wt_gitdir = get_worktree_git_dir(NULL); - - strbuf_add_real_path(&path, wt_gitdir); - strbuf_addstr(&path, "/HEAD"); - strbuf_read_file(&contents, path.buf, 64); - strbuf_stripspace(&contents, NULL); - strbuf_strip_suffix(&contents, "\n"); - - warning(_("HEAD points to an invalid (or orphaned) reference.\n" - "HEAD path: '%s'\n" - "HEAD contents: '%s'"), - path.buf, contents.buf); - strbuf_release(&path); - strbuf_release(&contents); - free(wt_gitdir); - } + if (!opts->quiet) + warning(_("HEAD points to an invalid (or orphaned) reference.\n")); return 1; } return 0; diff --git a/t/t2400-worktree-add.sh b/t/t2400-worktree-add.sh index 023e1301c8e68e..58b4445cc441a8 100755 --- a/t/t2400-worktree-add.sh +++ b/t/t2400-worktree-add.sh @@ -987,7 +987,7 @@ test_dwim_orphan () { then test_must_be_empty actual else - grep "$info_text" actual + test_grep "$info_text" actual fi elif [ "$outcome" = "no_infer" ] then @@ -996,39 +996,35 @@ test_dwim_orphan () { then test_must_be_empty actual else - ! grep "$info_text" actual + test_grep ! "$info_text" actual fi elif [ "$outcome" = "fetch_error" ] then test_must_fail git $dashc_args worktree add $args 2>actual && - grep "$fetch_error_text" actual + test_grep "$fetch_error_text" actual elif [ "$outcome" = "fatal_orphan_bad_combo" ] then test_must_fail git $dashc_args worktree add $args 2>actual && if [ $use_quiet -eq 1 ] then - ! grep "$info_text" actual + test_grep ! "$info_text" actual else - grep "$info_text" actual + test_grep "$info_text" actual fi && - grep "$bad_combo_regex" actual + test_grep "$bad_combo_regex" actual elif [ "$outcome" = "warn_bad_head" ] then test_must_fail git $dashc_args worktree add $args 2>actual && if [ $use_quiet -eq 1 ] then - grep "$invalid_ref_regex" actual && - ! grep "$orphan_hint" actual + test_grep "$invalid_ref_regex" actual && + test_grep ! "$orphan_hint" actual else - headpath=$(git $dashc_args rev-parse --path-format=absolute --git-path HEAD) && - headcontents=$(cat "$headpath") && - grep "HEAD points to an invalid (or orphaned) reference" actual && - grep "HEAD path: .$headpath." actual && - grep "HEAD contents: .$headcontents." actual && - grep "$orphan_hint" actual && - ! grep "$info_text" actual + test_grep "HEAD points to an invalid (or orphaned) reference" actual && + test_grep "$orphan_hint" actual && + test_grep ! "$info_text" actual fi && - grep "$invalid_ref_regex" actual + test_grep "$invalid_ref_regex" actual else # Unreachable false From 7d33b82c5d8e1b381c675490ece4c8f101973c85 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Sun, 15 Mar 2026 16:18:52 +0000 Subject: [PATCH 090/236] worktree: reject NULL worktree in get_worktree_git_dir() This removes the final dependence on "the_repository" in get_worktree_git_dir(). The last commit removed only caller that passed a NULL worktree. get_worktree_git_dir() has the following callers: - branch.c:prepare_checked_out_branches() which loops over all worktrees. - builtin/fsck.c:cmd_fsck() which loops over all worktrees. - builtin/receive-pack.c:update_worktree() which is called from update() only when "worktree" is non-NULL. - builtin/worktree.c:validate_no_submodules() which is called from check_clean_worktree() and move_worktree(), both of which supply a non-NULL worktree. - reachable.c:add_rebase_files() which loops over all worktrees. - revision.c:add_index_objects_to_pending() which loops over all worktrees. - worktree.c:is_current_worktree() which expects a non-NULL worktree. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- worktree.c | 2 +- worktree.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/worktree.c b/worktree.c index 344ad0c031bf1f..1ed5e8c3cd27a1 100644 --- a/worktree.c +++ b/worktree.c @@ -227,7 +227,7 @@ struct worktree **get_worktrees_without_reading_head(void) char *get_worktree_git_dir(const struct worktree *wt) { if (!wt) - return xstrdup(repo_get_git_dir(the_repository)); + BUG("%s() called with NULL worktree", __func__); else if (!wt->id) return xstrdup(repo_get_common_dir(wt->repo)); else diff --git a/worktree.h b/worktree.h index e450d1a3317e23..85d634c36c03cc 100644 --- a/worktree.h +++ b/worktree.h @@ -51,7 +51,6 @@ int submodule_uses_worktrees(const char *path); /* * Return git dir of the worktree. Note that the path may be relative. - * If wt is NULL, git dir of current worktree is returned. */ char *get_worktree_git_dir(const struct worktree *wt); From 2f0503971716ac21f37a0822a39740324b89c054 Mon Sep 17 00:00:00 2001 From: Ritesh Singh Jadoun Date: Sun, 15 Mar 2026 13:40:32 +0530 Subject: [PATCH 091/236] t/pack-refs-tests: use test_path_is_missing The pack-refs tests previously used raw 'test -f' and 'test -e' checks with negation. Update them to use Git's standard helper function test_path_is_missing for consistency and clearer failure reporting. As suggested in review, replaced the negated 'test_path_exists' with test_path_is_missing to better reflect the expected absence of paths. Signed-off-by: Ritesh Singh Jadoun Signed-off-by: Junio C Hamano --- t/pack-refs-tests.sh | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/t/pack-refs-tests.sh b/t/pack-refs-tests.sh index 81086c369089b9..0b196ea511863f 100644 --- a/t/pack-refs-tests.sh +++ b/t/pack-refs-tests.sh @@ -61,13 +61,13 @@ test_expect_success 'see if a branch still exists after git ${pack_refs} --prune test_expect_success 'see if git ${pack_refs} --prune remove ref files' ' git branch f && git ${pack_refs} --all --prune && - ! test -f .git/refs/heads/f + test_path_is_missing .git/refs/heads/f ' test_expect_success 'see if git ${pack_refs} --prune removes empty dirs' ' git branch r/s/t && git ${pack_refs} --all --prune && - ! test -e .git/refs/heads/r + test_path_is_missing .git/refs/heads/r ' test_expect_success 'git branch g should work when git branch g/h has been deleted' ' @@ -111,43 +111,43 @@ test_expect_success 'test excluded refs are not packed' ' git branch dont_pack2 && git branch pack_this && git ${pack_refs} --all --exclude "refs/heads/dont_pack*" && - test -f .git/refs/heads/dont_pack1 && - test -f .git/refs/heads/dont_pack2 && - ! test -f .git/refs/heads/pack_this' + test_path_is_file .git/refs/heads/dont_pack1 && + test_path_is_file .git/refs/heads/dont_pack2 && + test_path_is_missing .git/refs/heads/pack_this' test_expect_success 'test --no-exclude refs clears excluded refs' ' git branch dont_pack3 && git branch dont_pack4 && git ${pack_refs} --all --exclude "refs/heads/dont_pack*" --no-exclude && - ! test -f .git/refs/heads/dont_pack3 && - ! test -f .git/refs/heads/dont_pack4' + test_path_is_missing .git/refs/heads/dont_pack3 && + test_path_is_missing .git/refs/heads/dont_pack4' test_expect_success 'test only included refs are packed' ' git branch pack_this1 && git branch pack_this2 && git tag dont_pack5 && git ${pack_refs} --include "refs/heads/pack_this*" && - test -f .git/refs/tags/dont_pack5 && - ! test -f .git/refs/heads/pack_this1 && - ! test -f .git/refs/heads/pack_this2' + test_path_is_file .git/refs/tags/dont_pack5 && + test_path_is_missing .git/refs/heads/pack_this1 && + test_path_is_missing .git/refs/heads/pack_this2' test_expect_success 'test --no-include refs clears included refs' ' git branch pack1 && git branch pack2 && git ${pack_refs} --include "refs/heads/pack*" --no-include && - test -f .git/refs/heads/pack1 && - test -f .git/refs/heads/pack2' + test_path_is_file .git/refs/heads/pack1 && + test_path_is_file .git/refs/heads/pack2' test_expect_success 'test --exclude takes precedence over --include' ' git branch dont_pack5 && git ${pack_refs} --include "refs/heads/pack*" --exclude "refs/heads/pack*" && - test -f .git/refs/heads/dont_pack5' + test_path_is_file .git/refs/heads/dont_pack5' test_expect_success 'see if up-to-date packed refs are preserved' ' git branch q && git ${pack_refs} --all --prune && git update-ref refs/heads/q refs/heads/q && - ! test -f .git/refs/heads/q + test_path_is_missing .git/refs/heads/q ' test_expect_success 'pack, prune and repack' ' From acefb71d0d4e5a16ccda9226acac0920202de771 Mon Sep 17 00:00:00 2001 From: Alan Braithwaite Date: Sun, 15 Mar 2026 05:37:02 +0000 Subject: [PATCH 092/236] clone: add clone..defaultObjectFilter config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new configuration option that lets users specify a default partial clone filter, optionally scoped by URL pattern. When cloning a repository whose URL matches a configured pattern, git-clone automatically applies the filter, equivalent to passing --filter on the command line. [clone] defaultObjectFilter = blob:limit=1m [clone "https://github.com/"] defaultObjectFilter = blob:limit=5m [clone "https://internal.corp.com/large-project/"] defaultObjectFilter = blob:none The bare clone.defaultObjectFilter applies to all clones. The URL-qualified form clone..defaultObjectFilter restricts the setting to matching URLs. URL matching uses the existing urlmatch_config_entry() infrastructure, following the same rules as http..* — a domain, namespace, or specific project can be matched, and the most specific match wins. The config only affects the initial clone. Once the clone completes, the filter is recorded in remote..partialCloneFilter, so subsequent fetches inherit it automatically. An explicit --filter on the command line takes precedence, and --no-filter defeats the configured default entirely. Signed-off-by: Alan Braithwaite Signed-off-by: Junio C Hamano --- Documentation/config/clone.adoc | 34 +++++++++ builtin/clone.c | 54 ++++++++++++++ t/t5616-partial-clone.sh | 126 ++++++++++++++++++++++++++++++++ 3 files changed, 214 insertions(+) diff --git a/Documentation/config/clone.adoc b/Documentation/config/clone.adoc index 0a10efd174ea4b..1d6c0957a066c5 100644 --- a/Documentation/config/clone.adoc +++ b/Documentation/config/clone.adoc @@ -21,3 +21,37 @@ endif::[] If a partial clone filter is provided (see `--filter` in linkgit:git-rev-list[1]) and `--recurse-submodules` is used, also apply the filter to submodules. + +`clone.defaultObjectFilter`:: +`clone..defaultObjectFilter`:: + When set to a filter spec string (e.g., `blob:limit=1m`, + `blob:none`, `tree:0`), linkgit:git-clone[1] will automatically + use `--filter=` to enable partial clone behavior. + Objects matching the filter are excluded from the initial + transfer and lazily fetched on demand (e.g., during checkout). + Subsequent fetches inherit the filter via the per-remote config + that is written during the clone. ++ +The bare `clone.defaultObjectFilter` applies to all clones. The +URL-qualified form `clone..defaultObjectFilter` restricts the +setting to clones whose URL matches ``, following the same +rules as `http..*` (see linkgit:git-config[1]). The most +specific URL match wins. You can match a domain, a namespace, or a +specific project: ++ +---- +[clone] + defaultObjectFilter = blob:limit=1m + +[clone "https://github.com/"] + defaultObjectFilter = blob:limit=5m + +[clone "https://internal.corp.com/large-project/"] + defaultObjectFilter = blob:none +---- ++ +An explicit `--filter` option on the command line takes precedence +over this config, and `--no-filter` defeats it entirely to force a +full clone. Only affects the initial clone; it has no effect on +later fetches into an existing repository. If the server does not +support object filtering, the setting is silently ignored. diff --git a/builtin/clone.c b/builtin/clone.c index fba3c9c508bc06..ed3af9325921f1 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -44,6 +44,7 @@ #include "path.h" #include "pkt-line.h" #include "list-objects-filter-options.h" +#include "urlmatch.h" #include "hook.h" #include "bundle.h" #include "bundle-uri.h" @@ -759,6 +760,51 @@ static int git_clone_config(const char *k, const char *v, return git_default_config(k, v, ctx, cb); } +static int clone_filter_collect(const char *var, const char *value, + const struct config_context *ctx UNUSED, + void *cb) +{ + char **filter_spec_p = cb; + + if (!strcmp(var, "clone.defaultobjectfilter")) { + if (!value) + return config_error_nonbool(var); + free(*filter_spec_p); + *filter_spec_p = xstrdup(value); + } + return 0; +} + +/* + * Look up clone.defaultObjectFilter or clone..defaultObjectFilter + * using the urlmatch infrastructure. A URL-qualified entry that matches + * the clone URL takes precedence over the bare form, following the same + * rules as http..* configuration variables. + */ +static char *get_default_object_filter(const char *url) +{ + struct urlmatch_config config = URLMATCH_CONFIG_INIT; + char *filter_spec = NULL; + char *normalized_url; + + config.section = "clone"; + config.key = "defaultobjectfilter"; + config.collect_fn = clone_filter_collect; + config.cb = &filter_spec; + + normalized_url = url_normalize(url, &config.url); + if (!normalized_url) { + urlmatch_config_release(&config); + return NULL; + } + + repo_config(the_repository, urlmatch_config_entry, &config); + free(normalized_url); + urlmatch_config_release(&config); + + return filter_spec; +} + static int write_one_config(const char *key, const char *value, const struct config_context *ctx, void *data) @@ -1059,6 +1105,14 @@ int cmd_clone(int argc, } else die(_("repository '%s' does not exist"), repo_name); + if (!filter_options.choice && !filter_options.no_filter) { + char *config_filter = get_default_object_filter(repo); + if (config_filter) { + parse_list_objects_filter(&filter_options, config_filter); + free(config_filter); + } + } + /* no need to be strict, transport_set_option() will validate it again */ if (option_depth && atoi(option_depth) < 1) die(_("depth %s is not a positive number"), option_depth); diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index 1c2805accac636..cff3e06873bdb7 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -723,6 +723,132 @@ test_expect_success 'after fetching descendants of non-promisor commits, gc work git -C partial gc --prune=now ' +# Test clone..defaultObjectFilter config + +test_expect_success 'setup for clone.defaultObjectFilter tests' ' + git init default-filter-src && + echo "small" >default-filter-src/small.txt && + git -C default-filter-src add . && + git -C default-filter-src commit -m "initial" && + + git clone --bare "file://$(pwd)/default-filter-src" default-filter-srv.bare && + git -C default-filter-srv.bare config --local uploadpack.allowfilter 1 && + git -C default-filter-srv.bare config --local uploadpack.allowanysha1inwant 1 +' + +test_expect_success 'clone with clone..defaultObjectFilter applies filter' ' + test_when_finished "rm -r default-filter-clone" && + SERVER_URL="file://$(pwd)/default-filter-srv.bare" && + git -c "clone.$SERVER_URL.defaultObjectFilter=blob:limit=1k" clone \ + "$SERVER_URL" default-filter-clone && + + echo true >expect && + git -C default-filter-clone config --local remote.origin.promisor >actual && + test_cmp expect actual && + + echo "blob:limit=1024" >expect && + git -C default-filter-clone config --local remote.origin.partialclonefilter >actual && + test_cmp expect actual +' + +test_expect_success 'clone with --filter overrides clone..defaultObjectFilter' ' + test_when_finished "rm -r default-filter-override" && + SERVER_URL="file://$(pwd)/default-filter-srv.bare" && + git -c "clone.$SERVER_URL.defaultObjectFilter=blob:limit=1k" \ + clone --filter=blob:none "$SERVER_URL" default-filter-override && + + echo "blob:none" >expect && + git -C default-filter-override config --local remote.origin.partialclonefilter >actual && + test_cmp expect actual +' + +test_expect_success 'clone with clone..defaultObjectFilter=blob:none works' ' + test_when_finished "rm -r default-filter-blobnone" && + SERVER_URL="file://$(pwd)/default-filter-srv.bare" && + git -c "clone.$SERVER_URL.defaultObjectFilter=blob:none" clone \ + "$SERVER_URL" default-filter-blobnone && + + echo true >expect && + git -C default-filter-blobnone config --local remote.origin.promisor >actual && + test_cmp expect actual && + + echo "blob:none" >expect && + git -C default-filter-blobnone config --local remote.origin.partialclonefilter >actual && + test_cmp expect actual +' + +test_expect_success 'clone..defaultObjectFilter with tree:0 works' ' + test_when_finished "rm -r default-filter-tree0" && + SERVER_URL="file://$(pwd)/default-filter-srv.bare" && + git -c "clone.$SERVER_URL.defaultObjectFilter=tree:0" clone \ + "$SERVER_URL" default-filter-tree0 && + + echo true >expect && + git -C default-filter-tree0 config --local remote.origin.promisor >actual && + test_cmp expect actual && + + echo "tree:0" >expect && + git -C default-filter-tree0 config --local remote.origin.partialclonefilter >actual && + test_cmp expect actual +' + +test_expect_success 'most specific URL match wins for clone.defaultObjectFilter' ' + test_when_finished "rm -r default-filter-url-specific" && + SERVER_URL="file://$(pwd)/default-filter-srv.bare" && + git \ + -c "clone.file://.defaultObjectFilter=blob:limit=1k" \ + -c "clone.$SERVER_URL.defaultObjectFilter=blob:none" \ + clone "$SERVER_URL" default-filter-url-specific && + + echo "blob:none" >expect && + git -C default-filter-url-specific config --local remote.origin.partialclonefilter >actual && + test_cmp expect actual +' + +test_expect_success 'non-matching URL does not apply clone.defaultObjectFilter' ' + test_when_finished "rm -r default-filter-url-nomatch" && + git \ + -c "clone.https://other.example.com/.defaultObjectFilter=blob:none" \ + clone "file://$(pwd)/default-filter-srv.bare" default-filter-url-nomatch && + + test_must_fail git -C default-filter-url-nomatch config --local remote.origin.promisor +' + +test_expect_success 'bare clone.defaultObjectFilter applies to all clones' ' + test_when_finished "rm -r default-filter-bare-key" && + git -c clone.defaultObjectFilter=blob:none \ + clone "file://$(pwd)/default-filter-srv.bare" default-filter-bare-key && + + echo true >expect && + git -C default-filter-bare-key config --local remote.origin.promisor >actual && + test_cmp expect actual && + + echo "blob:none" >expect && + git -C default-filter-bare-key config --local remote.origin.partialclonefilter >actual && + test_cmp expect actual +' + +test_expect_success 'URL-specific clone.defaultObjectFilter overrides bare form' ' + test_when_finished "rm -r default-filter-url-over-bare" && + SERVER_URL="file://$(pwd)/default-filter-srv.bare" && + git \ + -c clone.defaultObjectFilter=blob:limit=1k \ + -c "clone.$SERVER_URL.defaultObjectFilter=blob:none" \ + clone "$SERVER_URL" default-filter-url-over-bare && + + echo "blob:none" >expect && + git -C default-filter-url-over-bare config --local remote.origin.partialclonefilter >actual && + test_cmp expect actual +' + +test_expect_success '--no-filter defeats clone.defaultObjectFilter' ' + test_when_finished "rm -r default-filter-no-filter" && + SERVER_URL="file://$(pwd)/default-filter-srv.bare" && + git -c "clone.$SERVER_URL.defaultObjectFilter=blob:none" \ + clone --no-filter "$SERVER_URL" default-filter-no-filter && + + test_must_fail git -C default-filter-no-filter config --local remote.origin.promisor +' . "$TEST_DIRECTORY"/lib-httpd.sh start_httpd From 65fec23b577d09122865d239ce454d7946691c2a Mon Sep 17 00:00:00 2001 From: Deveshi Dwivedi Date: Sun, 15 Mar 2026 09:44:43 +0000 Subject: [PATCH 093/236] coccinelle: detect struct strbuf passed by value Passing a struct strbuf by value to a function copies the struct but shares the underlying character array between caller and callee. If the callee causes a reallocation, the caller's copy becomes a dangling pointer, leading to a double-free when strbuf_release() is called. There is no coccinelle rule to catch this pattern. Jeff King suggested adding one during review of the write_worktree_linking_files() fix [1], and noted that a reporting rule using coccinelle's Python scripting extensions could emit a descriptive warning, but we do not currently require Python support in coccinelle. Add a transformation rule that rewrites a by-value strbuf parameter to a pointer. The detection is identical to what a Python-based reporting rule would catch; only the presentation differs. The resulting diff will not produce compilable code on its own (callers and the function body still need updating), but the spatch output alerts the developer that the signature needs attention. This is consistent with the other rules in strbuf.cocci, which also rewrite to the preferred form. [1] https://lore.kernel.org/git/20260309192600.GC309867@coredump.intra.peff.net/ Signed-off-by: Deveshi Dwivedi Signed-off-by: Junio C Hamano --- contrib/coccinelle/strbuf.cocci | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/contrib/coccinelle/strbuf.cocci b/contrib/coccinelle/strbuf.cocci index 5f06105df6db7b..83bd93be5fce8d 100644 --- a/contrib/coccinelle/strbuf.cocci +++ b/contrib/coccinelle/strbuf.cocci @@ -60,3 +60,14 @@ expression E1, E2; @@ - strbuf_addstr(E1, real_path(E2)); + strbuf_add_real_path(E1, E2); + +@@ +identifier fn, param; +@@ + fn(..., +- struct strbuf param ++ struct strbuf *param + ,...) + { + ... + } From 8f8e1b080701d4b02ca3732eed2706b1a5328c5d Mon Sep 17 00:00:00 2001 From: Deveshi Dwivedi Date: Sun, 15 Mar 2026 09:44:44 +0000 Subject: [PATCH 094/236] stash: do not pass strbuf by value save_untracked_files() takes its 'files' parameter as struct strbuf by value. Passing a strbuf by value copies the struct but shares the underlying buffer between caller and callee, risking a dangling pointer and double-free if the callee reallocates. The function needs both the buffer and its length for pipe_command(), so a plain const char * is not sufficient here. Switch the parameter to struct strbuf * and update the caller to pass a pointer. Signed-off-by: Deveshi Dwivedi Signed-off-by: Junio C Hamano --- builtin/stash.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/builtin/stash.c b/builtin/stash.c index e79d612e572e7c..472eebd6ed656d 100644 --- a/builtin/stash.c +++ b/builtin/stash.c @@ -1232,7 +1232,7 @@ static int check_changes(const struct pathspec *ps, int include_untracked, } static int save_untracked_files(struct stash_info *info, struct strbuf *msg, - struct strbuf files) + struct strbuf *files) { int ret = 0; struct strbuf untracked_msg = STRBUF_INIT; @@ -1246,7 +1246,7 @@ static int save_untracked_files(struct stash_info *info, struct strbuf *msg, stash_index_path.buf); strbuf_addf(&untracked_msg, "untracked files on %s\n", msg->buf); - if (pipe_command(&cp_upd_index, files.buf, files.len, NULL, 0, + if (pipe_command(&cp_upd_index, files->buf, files->len, NULL, 0, NULL, 0)) { ret = -1; goto done; @@ -1499,7 +1499,7 @@ static int do_create_stash(const struct pathspec *ps, struct strbuf *stash_msg_b parents = NULL; if (include_untracked) { - if (save_untracked_files(info, &msg, untracked_files)) { + if (save_untracked_files(info, &msg, &untracked_files)) { if (!quiet) fprintf_ln(stderr, _("Cannot save " "the untracked files")); From 3cf4024117f5c15361e68c5d9b0c3e9dd01ec105 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 16 Mar 2026 08:50:43 +0100 Subject: [PATCH 095/236] clar: update to fix compilation on platforms without PATH_MAX Update clar to e4172e3 (Merge pull request #134 from clar-test/ethomson/const, 2026-01-10). Besides some changes to "generate.py" which don't have any impact on us, this commit also fixes compilation on platforms that don't have PATH_MAX, like for example GNU/Hurd. Reported-by: Samuel Thibault Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- t/unit-tests/clar/clar.h | 4 +- t/unit-tests/clar/generate.py | 79 ++++++++++++++++++++++++++++------- 2 files changed, 68 insertions(+), 15 deletions(-) diff --git a/t/unit-tests/clar/clar.h b/t/unit-tests/clar/clar.h index f7e43630226434..9ea91d3d0e88c2 100644 --- a/t/unit-tests/clar/clar.h +++ b/t/unit-tests/clar/clar.h @@ -15,8 +15,10 @@ # define CLAR_MAX_PATH 4096 #elif defined(_WIN32) # define CLAR_MAX_PATH MAX_PATH -#else +#elif defined(PATH_MAX) # define CLAR_MAX_PATH PATH_MAX +#else +# define CLAR_MAX_PATH 4096 #endif #ifndef CLAR_SELFTEST diff --git a/t/unit-tests/clar/generate.py b/t/unit-tests/clar/generate.py index fd2f0ee83b55c5..2357b2d6d299dc 100755 --- a/t/unit-tests/clar/generate.py +++ b/t/unit-tests/clar/generate.py @@ -8,7 +8,7 @@ from __future__ import with_statement from string import Template -import re, fnmatch, os, sys, codecs, pickle +import re, fnmatch, os, sys, codecs, pickle, io class Module(object): class Template(object): @@ -147,7 +147,7 @@ def __init__(self, path, output): self.path = path self.output = output - def should_generate(self, path): + def maybe_generate(self, path): if not os.path.isfile(path): return True @@ -223,34 +223,85 @@ def callback_count(self): return sum(len(module.callbacks) for module in self.modules.values()) def write(self): - output = os.path.join(self.output, 'clar.suite') - os.makedirs(self.output, exist_ok=True) + if not os.path.exists(self.output): + os.makedirs(self.output) - if not self.should_generate(output): + wrote_suite = self.write_suite() + wrote_header = self.write_header() + + if wrote_suite or wrote_header: + self.save_cache() + return True + + return False + + def write_output(self, fn, data): + if not self.maybe_generate(fn): + return False + + current = None + + try: + with open(fn, 'r') as input: + current = input.read() + except OSError: + pass + except IOError: + pass + + if current == data: return False - with open(output, 'w') as data: + with open(fn, 'w') as output: + output.write(data) + + return True + + def write_suite(self): + suite_fn = os.path.join(self.output, 'clar.suite') + + with io.StringIO() as suite_file: modules = sorted(self.modules.values(), key=lambda module: module.name) for module in modules: t = Module.DeclarationTemplate(module) - data.write(t.render()) + suite_file.write(t.render()) for module in modules: t = Module.CallbacksTemplate(module) - data.write(t.render()) + suite_file.write(t.render()) suites = "static struct clar_suite _clar_suites[] = {" + ','.join( Module.InfoTemplate(module).render() for module in modules ) + "\n};\n" - data.write(suites) + suite_file.write(suites) - data.write("static const size_t _clar_suite_count = %d;\n" % self.suite_count()) - data.write("static const size_t _clar_callback_count = %d;\n" % self.callback_count()) + suite_file.write(u"static const size_t _clar_suite_count = %d;\n" % self.suite_count()) + suite_file.write(u"static const size_t _clar_callback_count = %d;\n" % self.callback_count()) - self.save_cache() - return True + return self.write_output(suite_fn, suite_file.getvalue()) + + return False + + def write_header(self): + header_fn = os.path.join(self.output, 'clar_suite.h') + + with io.StringIO() as header_file: + header_file.write(u"#ifndef _____clar_suite_h_____\n") + header_file.write(u"#define _____clar_suite_h_____\n") + + modules = sorted(self.modules.values(), key=lambda module: module.name) + + for module in modules: + t = Module.DeclarationTemplate(module) + header_file.write(t.render()) + + header_file.write(u"#endif\n") + + return self.write_output(header_fn, header_file.getvalue()) + + return False if __name__ == '__main__': from optparse import OptionParser @@ -275,4 +326,4 @@ def write(self): suite.load(options.force) suite.disable(options.excluded) if suite.write(): - print("Written `clar.suite` (%d tests in %d suites)" % (suite.callback_count(), suite.suite_count())) + print("Written `clar.suite`, `clar_suite.h` (%d tests in %d suites)" % (suite.callback_count(), suite.suite_count())) From 5514f146171349afd1965c13f92c786ed90f8dbe Mon Sep 17 00:00:00 2001 From: Guillaume Jacob Date: Mon, 16 Mar 2026 14:15:36 +0000 Subject: [PATCH 096/236] doc: fix git grep args order in Quick Reference The example provided has its arguments in the wrong order. The revision should follow the pattern, and not the other way around. Signed-off-by: Guillaume Jacob Signed-off-by: Junio C Hamano --- Documentation/user-manual.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/user-manual.adoc b/Documentation/user-manual.adoc index d2b478ad23221a..c6996a2141c222 100644 --- a/Documentation/user-manual.adoc +++ b/Documentation/user-manual.adoc @@ -4466,7 +4466,7 @@ $ git show # most recent commit $ git diff v2.6.15..v2.6.16 # diff between two tagged versions $ git diff v2.6.15..HEAD # diff with current head $ git grep "foo()" # search working directory for "foo()" -$ git grep v2.6.15 "foo()" # search old tree for "foo()" +$ git grep "foo()" v2.6.15 # search old tree for "foo()" $ git show v2.6.15:a.txt # look at old version of a.txt ----------------------------------------------- From 386fe44951c3d0f8eaee98809aae5f2d886bac83 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:14 +0900 Subject: [PATCH 097/236] parseopt: extract subcommand handling from parse_options_step() Move the subcommand branch out of parse_options_step() into a new handle_subcommand() helper. Also, make parse_subcommand() return a simple success/failure status. This removes the switch over impossible parse_opt_result values and makes the non-option path easier to follow and maintain. Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- parse-options.c | 87 ++++++++++++++++++++++++++----------------------- 1 file changed, 46 insertions(+), 41 deletions(-) diff --git a/parse-options.c b/parse-options.c index c9cafc21b90355..02a4f00919f6d6 100644 --- a/parse-options.c +++ b/parse-options.c @@ -605,17 +605,44 @@ static enum parse_opt_result parse_nodash_opt(struct parse_opt_ctx_t *p, return PARSE_OPT_ERROR; } -static enum parse_opt_result parse_subcommand(const char *arg, - const struct option *options) +static int parse_subcommand(const char *arg, const struct option *options) { - for (; options->type != OPTION_END; options++) - if (options->type == OPTION_SUBCOMMAND && - !strcmp(options->long_name, arg)) { - *(parse_opt_subcommand_fn **)options->value = options->subcommand_fn; - return PARSE_OPT_SUBCOMMAND; - } + for (; options->type != OPTION_END; options++) { + parse_opt_subcommand_fn **opt_val; - return PARSE_OPT_UNKNOWN; + if (options->type != OPTION_SUBCOMMAND || + strcmp(options->long_name, arg)) + continue; + + opt_val = options->value; + *opt_val = options->subcommand_fn; + return 0; + } + + return -1; +} + +static enum parse_opt_result handle_subcommand(struct parse_opt_ctx_t *ctx, + const char *arg, + const struct option *options, + const char * const usagestr[]) +{ + int err = parse_subcommand(arg, options); + + if (!err) + return PARSE_OPT_SUBCOMMAND; + + /* + * arg is neither a short or long option nor a subcommand. Since this + * command has a default operation mode, we have to treat this arg and + * all remaining args as args meant to that default operation mode. + * So we are done parsing. + */ + if (ctx->flags & PARSE_OPT_SUBCOMMAND_OPTIONAL) + return PARSE_OPT_DONE; + + error(_("unknown subcommand: `%s'"), arg); + usage_with_options(usagestr, options); } static void check_typos(const char *arg, const struct option *options) @@ -990,38 +1017,16 @@ enum parse_opt_result parse_options_step(struct parse_opt_ctx_t *ctx, if (*arg != '-' || !arg[1]) { if (parse_nodash_opt(ctx, arg, options) == 0) continue; - if (!ctx->has_subcommands) { - if (ctx->flags & PARSE_OPT_STOP_AT_NON_OPTION) - return PARSE_OPT_NON_OPTION; - ctx->out[ctx->cpidx++] = ctx->argv[0]; - continue; - } - switch (parse_subcommand(arg, options)) { - case PARSE_OPT_SUBCOMMAND: - return PARSE_OPT_SUBCOMMAND; - case PARSE_OPT_UNKNOWN: - if (ctx->flags & PARSE_OPT_SUBCOMMAND_OPTIONAL) - /* - * arg is neither a short or long - * option nor a subcommand. Since - * this command has a default - * operation mode, we have to treat - * this arg and all remaining args - * as args meant to that default - * operation mode. - * So we are done parsing. - */ - return PARSE_OPT_DONE; - error(_("unknown subcommand: `%s'"), arg); - usage_with_options(usagestr, options); - case PARSE_OPT_COMPLETE: - case PARSE_OPT_HELP: - case PARSE_OPT_ERROR: - case PARSE_OPT_DONE: - case PARSE_OPT_NON_OPTION: - /* Impossible. */ - BUG("parse_subcommand() cannot return these"); - } + + if (ctx->has_subcommands) + return handle_subcommand(ctx, arg, options, + usagestr); + + if (ctx->flags & PARSE_OPT_STOP_AT_NON_OPTION) + return PARSE_OPT_NON_OPTION; + + ctx->out[ctx->cpidx++] = ctx->argv[0]; + continue; } /* lone -h asks for help */ From e0245a1169b2acddd94be4da02c426419507f0b5 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:15 +0900 Subject: [PATCH 098/236] help: make autocorrect handling reusable Move config parsing and prompt/delay handling into autocorrect.c and expose them in autocorrect.h. This makes autocorrect reusable regardless of which target links against it. Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- Makefile | 1 + autocorrect.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++ autocorrect.h | 16 ++++++++++++ help.c | 64 +++------------------------------------------ meson.build | 1 + 5 files changed, 94 insertions(+), 60 deletions(-) create mode 100644 autocorrect.c create mode 100644 autocorrect.h diff --git a/Makefile b/Makefile index f3264d0a37cc50..6111631c2caaea 100644 --- a/Makefile +++ b/Makefile @@ -1098,6 +1098,7 @@ LIB_OBJS += archive-tar.o LIB_OBJS += archive-zip.o LIB_OBJS += archive.o LIB_OBJS += attr.o +LIB_OBJS += autocorrect.o LIB_OBJS += base85.o LIB_OBJS += bisect.o LIB_OBJS += blame.o diff --git a/autocorrect.c b/autocorrect.c new file mode 100644 index 00000000000000..97145d3a53ce07 --- /dev/null +++ b/autocorrect.c @@ -0,0 +1,72 @@ +#include "git-compat-util.h" +#include "autocorrect.h" +#include "config.h" +#include "parse.h" +#include "strbuf.h" +#include "prompt.h" +#include "gettext.h" + +static int parse_autocorrect(const char *value) +{ + switch (git_parse_maybe_bool_text(value)) { + case 1: + return AUTOCORRECT_IMMEDIATELY; + case 0: + return AUTOCORRECT_SHOW; + default: /* other random text */ + break; + } + + if (!strcmp(value, "prompt")) + return AUTOCORRECT_PROMPT; + if (!strcmp(value, "never")) + return AUTOCORRECT_NEVER; + if (!strcmp(value, "immediate")) + return AUTOCORRECT_IMMEDIATELY; + if (!strcmp(value, "show")) + return AUTOCORRECT_SHOW; + + return 0; +} + +void autocorrect_resolve_config(const char *var, const char *value, + const struct config_context *ctx, void *data) +{ + int *out = data; + + if (!strcmp(var, "help.autocorrect")) { + int v = parse_autocorrect(value); + + if (!v) { + v = git_config_int(var, value, ctx->kvi); + if (v < 0 || v == 1) + v = AUTOCORRECT_IMMEDIATELY; + } + + *out = v; + } +} + +void autocorrect_confirm(int autocorrect, const char *assumed) +{ + if (autocorrect == AUTOCORRECT_IMMEDIATELY) { + fprintf_ln(stderr, + _("Continuing under the assumption that you meant '%s'."), + assumed); + } else if (autocorrect == AUTOCORRECT_PROMPT) { + char *answer; + struct strbuf msg = STRBUF_INIT; + + strbuf_addf(&msg, _("Run '%s' instead [y/N]? "), assumed); + answer = git_prompt(msg.buf, PROMPT_ECHO); + strbuf_release(&msg); + + if (!(starts_with(answer, "y") || starts_with(answer, "Y"))) + exit(1); + } else { + fprintf_ln(stderr, + _("Continuing in %0.1f seconds, assuming that you meant '%s'."), + (float)autocorrect / 10.0, assumed); + sleep_millisec(autocorrect * 100); + } +} diff --git a/autocorrect.h b/autocorrect.h new file mode 100644 index 00000000000000..f5fadf9d96059b --- /dev/null +++ b/autocorrect.h @@ -0,0 +1,16 @@ +#ifndef AUTOCORRECT_H +#define AUTOCORRECT_H + +#define AUTOCORRECT_SHOW (-4) +#define AUTOCORRECT_PROMPT (-3) +#define AUTOCORRECT_NEVER (-2) +#define AUTOCORRECT_IMMEDIATELY (-1) + +struct config_context; + +void autocorrect_resolve_config(const char *var, const char *value, + const struct config_context *ctx, void *data); + +void autocorrect_confirm(int autocorrect, const char *assumed); + +#endif /* AUTOCORRECT_H */ diff --git a/help.c b/help.c index 95f576c5c81d9f..4acb6ca585ff8f 100644 --- a/help.c +++ b/help.c @@ -22,6 +22,7 @@ #include "repository.h" #include "alias.h" #include "utf8.h" +#include "autocorrect.h" #ifndef NO_CURL #include "git-curl-compat.h" /* For LIBCURL_VERSION only */ @@ -541,34 +542,6 @@ struct help_unknown_cmd_config { struct cmdnames aliases; }; -#define AUTOCORRECT_SHOW (-4) -#define AUTOCORRECT_PROMPT (-3) -#define AUTOCORRECT_NEVER (-2) -#define AUTOCORRECT_IMMEDIATELY (-1) - -static int parse_autocorrect(const char *value) -{ - switch (git_parse_maybe_bool_text(value)) { - case 1: - return AUTOCORRECT_IMMEDIATELY; - case 0: - return AUTOCORRECT_SHOW; - default: /* other random text */ - break; - } - - if (!strcmp(value, "prompt")) - return AUTOCORRECT_PROMPT; - if (!strcmp(value, "never")) - return AUTOCORRECT_NEVER; - if (!strcmp(value, "immediate")) - return AUTOCORRECT_IMMEDIATELY; - if (!strcmp(value, "show")) - return AUTOCORRECT_SHOW; - - return 0; -} - static int git_unknown_cmd_config(const char *var, const char *value, const struct config_context *ctx, void *cb) @@ -577,17 +550,7 @@ static int git_unknown_cmd_config(const char *var, const char *value, const char *subsection, *key; size_t subsection_len; - if (!strcmp(var, "help.autocorrect")) { - int v = parse_autocorrect(value); - - if (!v) { - v = git_config_int(var, value, ctx->kvi); - if (v < 0 || v == 1) - v = AUTOCORRECT_IMMEDIATELY; - } - - cfg->autocorrect = v; - } + autocorrect_resolve_config(var, value, ctx, &cfg->autocorrect); /* Also use aliases for command lookup */ if (!parse_config_key(var, "alias", &subsection, &subsection_len, @@ -724,27 +687,8 @@ char *help_unknown_cmd(const char *cmd) _("WARNING: You called a Git command named '%s', " "which does not exist."), cmd); - if (cfg.autocorrect == AUTOCORRECT_IMMEDIATELY) - fprintf_ln(stderr, - _("Continuing under the assumption that " - "you meant '%s'."), - assumed); - else if (cfg.autocorrect == AUTOCORRECT_PROMPT) { - char *answer; - struct strbuf msg = STRBUF_INIT; - strbuf_addf(&msg, _("Run '%s' instead [y/N]? "), assumed); - answer = git_prompt(msg.buf, PROMPT_ECHO); - strbuf_release(&msg); - if (!(starts_with(answer, "y") || - starts_with(answer, "Y"))) - exit(1); - } else { - fprintf_ln(stderr, - _("Continuing in %0.1f seconds, " - "assuming that you meant '%s'."), - (float)cfg.autocorrect/10.0, assumed); - sleep_millisec(cfg.autocorrect * 100); - } + + autocorrect_confirm(cfg.autocorrect, assumed); cmdnames_release(&cfg.aliases); cmdnames_release(&main_cmds); diff --git a/meson.build b/meson.build index 4b536e012481ca..0429e80a5c966c 100644 --- a/meson.build +++ b/meson.build @@ -283,6 +283,7 @@ libgit_sources = [ 'archive-zip.c', 'archive.c', 'attr.c', + 'autocorrect.c', 'base85.c', 'bisect.c', 'blame.c', From 916b96c0ec006216fcb9475fea37c9bc8e6b6505 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:16 +0900 Subject: [PATCH 099/236] help: move tty check for autocorrection to autocorrect.c TTY checking is the autocorrect config parser's responsibility. It must ensure the parsed value is correct and reliable. Thus, move the check to autocorrect_resolve_config(). Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- autocorrect.c | 24 ++++++++++++++++-------- help.c | 6 ------ 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/autocorrect.c b/autocorrect.c index 97145d3a53ce07..887d2396da44b9 100644 --- a/autocorrect.c +++ b/autocorrect.c @@ -33,18 +33,26 @@ void autocorrect_resolve_config(const char *var, const char *value, const struct config_context *ctx, void *data) { int *out = data; + int parsed; - if (!strcmp(var, "help.autocorrect")) { - int v = parse_autocorrect(value); + if (strcmp(var, "help.autocorrect")) + return; - if (!v) { - v = git_config_int(var, value, ctx->kvi); - if (v < 0 || v == 1) - v = AUTOCORRECT_IMMEDIATELY; - } + parsed = parse_autocorrect(value); - *out = v; + /* + * Disable autocorrection prompt in a non-interactive session + */ + if (parsed == AUTOCORRECT_PROMPT && (!isatty(0) || !isatty(2))) + parsed = AUTOCORRECT_NEVER; + + if (!parsed) { + parsed = git_config_int(var, value, ctx->kvi); + if (parsed < 0 || parsed == 1) + parsed = AUTOCORRECT_IMMEDIATELY; } + + *out = parsed; } void autocorrect_confirm(int autocorrect, const char *assumed) diff --git a/help.c b/help.c index 4acb6ca585ff8f..983057970e7c7f 100644 --- a/help.c +++ b/help.c @@ -607,12 +607,6 @@ char *help_unknown_cmd(const char *cmd) read_early_config(the_repository, git_unknown_cmd_config, &cfg); - /* - * Disable autocorrection prompt in a non-interactive session - */ - if ((cfg.autocorrect == AUTOCORRECT_PROMPT) && (!isatty(0) || !isatty(2))) - cfg.autocorrect = AUTOCORRECT_NEVER; - if (cfg.autocorrect == AUTOCORRECT_NEVER) { fprintf_ln(stderr, _("git: '%s' is not a git command. See 'git --help'."), cmd); exit(1); From a6e0ccbd38e4b274fa2360bc8a49d8049d1ded95 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:17 +0900 Subject: [PATCH 100/236] autocorrect: use mode and delay instead of magic numbers Drop magic numbers and describe autocorrect config with a mode enum and an integer delay. This reduces errors when mutating config values and makes the values easier to access. Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- autocorrect.c | 46 +++++++++++++++++++++++----------------------- autocorrect.h | 20 ++++++++++++++------ help.c | 9 +++++---- 3 files changed, 42 insertions(+), 33 deletions(-) diff --git a/autocorrect.c b/autocorrect.c index 887d2396da44b9..2484546fc731d9 100644 --- a/autocorrect.c +++ b/autocorrect.c @@ -6,7 +6,7 @@ #include "prompt.h" #include "gettext.h" -static int parse_autocorrect(const char *value) +static enum autocorrect_mode parse_autocorrect(const char *value) { switch (git_parse_maybe_bool_text(value)) { case 1: @@ -19,49 +19,49 @@ static int parse_autocorrect(const char *value) if (!strcmp(value, "prompt")) return AUTOCORRECT_PROMPT; - if (!strcmp(value, "never")) + else if (!strcmp(value, "never")) return AUTOCORRECT_NEVER; - if (!strcmp(value, "immediate")) + else if (!strcmp(value, "immediate")) return AUTOCORRECT_IMMEDIATELY; - if (!strcmp(value, "show")) + else if (!strcmp(value, "show")) return AUTOCORRECT_SHOW; - - return 0; + else + return AUTOCORRECT_DELAY; } void autocorrect_resolve_config(const char *var, const char *value, const struct config_context *ctx, void *data) { - int *out = data; - int parsed; + struct autocorrect *conf = data; if (strcmp(var, "help.autocorrect")) return; - parsed = parse_autocorrect(value); + conf->mode = parse_autocorrect(value); /* * Disable autocorrection prompt in a non-interactive session */ - if (parsed == AUTOCORRECT_PROMPT && (!isatty(0) || !isatty(2))) - parsed = AUTOCORRECT_NEVER; + if (conf->mode == AUTOCORRECT_PROMPT && (!isatty(0) || !isatty(2))) + conf->mode = AUTOCORRECT_NEVER; - if (!parsed) { - parsed = git_config_int(var, value, ctx->kvi); - if (parsed < 0 || parsed == 1) - parsed = AUTOCORRECT_IMMEDIATELY; - } + if (conf->mode == AUTOCORRECT_DELAY) { + conf->delay = git_config_int(var, value, ctx->kvi); - *out = parsed; + if (!conf->delay) + conf->mode = AUTOCORRECT_SHOW; + else if (conf->delay < 0 || conf->delay == 1) + conf->mode = AUTOCORRECT_IMMEDIATELY; + } } -void autocorrect_confirm(int autocorrect, const char *assumed) +void autocorrect_confirm(struct autocorrect *conf, const char *assumed) { - if (autocorrect == AUTOCORRECT_IMMEDIATELY) { + if (conf->mode == AUTOCORRECT_IMMEDIATELY) { fprintf_ln(stderr, _("Continuing under the assumption that you meant '%s'."), assumed); - } else if (autocorrect == AUTOCORRECT_PROMPT) { + } else if (conf->mode == AUTOCORRECT_PROMPT) { char *answer; struct strbuf msg = STRBUF_INIT; @@ -71,10 +71,10 @@ void autocorrect_confirm(int autocorrect, const char *assumed) if (!(starts_with(answer, "y") || starts_with(answer, "Y"))) exit(1); - } else { + } else if (conf->mode == AUTOCORRECT_DELAY) { fprintf_ln(stderr, _("Continuing in %0.1f seconds, assuming that you meant '%s'."), - (float)autocorrect / 10.0, assumed); - sleep_millisec(autocorrect * 100); + conf->delay / 10.0, assumed); + sleep_millisec(conf->delay * 100); } } diff --git a/autocorrect.h b/autocorrect.h index f5fadf9d96059b..5506a36f11a7cc 100644 --- a/autocorrect.h +++ b/autocorrect.h @@ -1,16 +1,24 @@ #ifndef AUTOCORRECT_H #define AUTOCORRECT_H -#define AUTOCORRECT_SHOW (-4) -#define AUTOCORRECT_PROMPT (-3) -#define AUTOCORRECT_NEVER (-2) -#define AUTOCORRECT_IMMEDIATELY (-1) - struct config_context; +enum autocorrect_mode { + AUTOCORRECT_SHOW, + AUTOCORRECT_NEVER, + AUTOCORRECT_PROMPT, + AUTOCORRECT_IMMEDIATELY, + AUTOCORRECT_DELAY, +}; + +struct autocorrect { + enum autocorrect_mode mode; + int delay; +}; + void autocorrect_resolve_config(const char *var, const char *value, const struct config_context *ctx, void *data); -void autocorrect_confirm(int autocorrect, const char *assumed); +void autocorrect_confirm(struct autocorrect *conf, const char *assumed); #endif /* AUTOCORRECT_H */ diff --git a/help.c b/help.c index 983057970e7c7f..a89ac5aced9994 100644 --- a/help.c +++ b/help.c @@ -538,7 +538,7 @@ int is_in_cmdlist(struct cmdnames *c, const char *s) } struct help_unknown_cmd_config { - int autocorrect; + struct autocorrect autocorrect; struct cmdnames aliases; }; @@ -607,7 +607,7 @@ char *help_unknown_cmd(const char *cmd) read_early_config(the_repository, git_unknown_cmd_config, &cfg); - if (cfg.autocorrect == AUTOCORRECT_NEVER) { + if (cfg.autocorrect.mode == AUTOCORRECT_NEVER) { fprintf_ln(stderr, _("git: '%s' is not a git command. See 'git --help'."), cmd); exit(1); } @@ -673,7 +673,8 @@ char *help_unknown_cmd(const char *cmd) n++) ; /* still counting */ } - if (cfg.autocorrect && cfg.autocorrect != AUTOCORRECT_SHOW && n == 1 && + + if (cfg.autocorrect.mode != AUTOCORRECT_SHOW && n == 1 && SIMILAR_ENOUGH(best_similarity)) { char *assumed = xstrdup(main_cmds.names[0]->name); @@ -682,7 +683,7 @@ char *help_unknown_cmd(const char *cmd) "which does not exist."), cmd); - autocorrect_confirm(cfg.autocorrect, assumed); + autocorrect_confirm(&cfg.autocorrect, assumed); cmdnames_release(&cfg.aliases); cmdnames_release(&main_cmds); From f06f1f043cbf58b82f1243fd09fc2c8b0202a853 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:18 +0900 Subject: [PATCH 101/236] autocorrect: rename AUTOCORRECT_SHOW to AUTOCORRECT_HINT AUTOCORRECT_SHOW is ambiguous. Its purpose is to show commands similar to the unknown one and take no other action. Rename it to fit the semantics. Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- autocorrect.c | 6 +++--- autocorrect.h | 2 +- help.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/autocorrect.c b/autocorrect.c index 2484546fc731d9..de0fa282c934a8 100644 --- a/autocorrect.c +++ b/autocorrect.c @@ -12,7 +12,7 @@ static enum autocorrect_mode parse_autocorrect(const char *value) case 1: return AUTOCORRECT_IMMEDIATELY; case 0: - return AUTOCORRECT_SHOW; + return AUTOCORRECT_HINT; default: /* other random text */ break; } @@ -24,7 +24,7 @@ static enum autocorrect_mode parse_autocorrect(const char *value) else if (!strcmp(value, "immediate")) return AUTOCORRECT_IMMEDIATELY; else if (!strcmp(value, "show")) - return AUTOCORRECT_SHOW; + return AUTOCORRECT_HINT; else return AUTOCORRECT_DELAY; } @@ -49,7 +49,7 @@ void autocorrect_resolve_config(const char *var, const char *value, conf->delay = git_config_int(var, value, ctx->kvi); if (!conf->delay) - conf->mode = AUTOCORRECT_SHOW; + conf->mode = AUTOCORRECT_HINT; else if (conf->delay < 0 || conf->delay == 1) conf->mode = AUTOCORRECT_IMMEDIATELY; } diff --git a/autocorrect.h b/autocorrect.h index 5506a36f11a7cc..328807242c15ab 100644 --- a/autocorrect.h +++ b/autocorrect.h @@ -4,7 +4,7 @@ struct config_context; enum autocorrect_mode { - AUTOCORRECT_SHOW, + AUTOCORRECT_HINT, AUTOCORRECT_NEVER, AUTOCORRECT_PROMPT, AUTOCORRECT_IMMEDIATELY, diff --git a/help.c b/help.c index a89ac5aced9994..2d441ded3f1489 100644 --- a/help.c +++ b/help.c @@ -674,7 +674,7 @@ char *help_unknown_cmd(const char *cmd) ; /* still counting */ } - if (cfg.autocorrect.mode != AUTOCORRECT_SHOW && n == 1 && + if (cfg.autocorrect.mode != AUTOCORRECT_HINT && n == 1 && SIMILAR_ENOUGH(best_similarity)) { char *assumed = xstrdup(main_cmds.names[0]->name); From 7cd07f167d2980ad58de08a8bd7787d2ef5882b9 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:19 +0900 Subject: [PATCH 102/236] autocorrect: provide config resolution API Add autocorrect_resolve(). This resolves and populates the correct values for autocorrect config. Make autocorrect config callback internal. The API is meant to provide a high-level way to retrieve the config. Allowing access to the config callback from outside violates that intent. Additionally, in some cases, without access to the config callback, two config iterations cannot be merged into one, which can hurt performance. This is fine, as the code path that calls autocorrect_resolve() is cold. Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- autocorrect.c | 15 ++++++++++++--- autocorrect.h | 5 +---- help.c | 40 +++++++++++++++++----------------------- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/autocorrect.c b/autocorrect.c index de0fa282c934a8..b2ee9f51e8c09a 100644 --- a/autocorrect.c +++ b/autocorrect.c @@ -1,3 +1,5 @@ +#define USE_THE_REPOSITORY_VARIABLE + #include "git-compat-util.h" #include "autocorrect.h" #include "config.h" @@ -29,13 +31,13 @@ static enum autocorrect_mode parse_autocorrect(const char *value) return AUTOCORRECT_DELAY; } -void autocorrect_resolve_config(const char *var, const char *value, - const struct config_context *ctx, void *data) +static int resolve_autocorrect(const char *var, const char *value, + const struct config_context *ctx, void *data) { struct autocorrect *conf = data; if (strcmp(var, "help.autocorrect")) - return; + return 0; conf->mode = parse_autocorrect(value); @@ -53,6 +55,13 @@ void autocorrect_resolve_config(const char *var, const char *value, else if (conf->delay < 0 || conf->delay == 1) conf->mode = AUTOCORRECT_IMMEDIATELY; } + + return 0; +} + +void autocorrect_resolve(struct autocorrect *conf) +{ + read_early_config(the_repository, resolve_autocorrect, conf); } void autocorrect_confirm(struct autocorrect *conf, const char *assumed) diff --git a/autocorrect.h b/autocorrect.h index 328807242c15ab..0d3e819262edee 100644 --- a/autocorrect.h +++ b/autocorrect.h @@ -1,8 +1,6 @@ #ifndef AUTOCORRECT_H #define AUTOCORRECT_H -struct config_context; - enum autocorrect_mode { AUTOCORRECT_HINT, AUTOCORRECT_NEVER, @@ -16,8 +14,7 @@ struct autocorrect { int delay; }; -void autocorrect_resolve_config(const char *var, const char *value, - const struct config_context *ctx, void *data); +void autocorrect_resolve(struct autocorrect *conf); void autocorrect_confirm(struct autocorrect *conf, const char *assumed); diff --git a/help.c b/help.c index 2d441ded3f1489..81efdb13d4a375 100644 --- a/help.c +++ b/help.c @@ -537,32 +537,23 @@ int is_in_cmdlist(struct cmdnames *c, const char *s) return 0; } -struct help_unknown_cmd_config { - struct autocorrect autocorrect; - struct cmdnames aliases; -}; - -static int git_unknown_cmd_config(const char *var, const char *value, - const struct config_context *ctx, - void *cb) +static int resolve_aliases(const char *var, const char *value UNUSED, + const struct config_context *ctx UNUSED, void *data) { - struct help_unknown_cmd_config *cfg = cb; + struct cmdnames *aliases = data; const char *subsection, *key; size_t subsection_len; - autocorrect_resolve_config(var, value, ctx, &cfg->autocorrect); - - /* Also use aliases for command lookup */ if (!parse_config_key(var, "alias", &subsection, &subsection_len, &key)) { if (subsection) { /* [alias "name"] command = value */ if (!strcmp(key, "command")) - add_cmdname(&cfg->aliases, subsection, + add_cmdname(aliases, subsection, subsection_len); } else { /* alias.name = value */ - add_cmdname(&cfg->aliases, key, strlen(key)); + add_cmdname(aliases, key, strlen(key)); } } @@ -599,22 +590,26 @@ static const char bad_interpreter_advice[] = char *help_unknown_cmd(const char *cmd) { - struct help_unknown_cmd_config cfg = { 0 }; + struct cmdnames aliases = { 0 }; + struct autocorrect autocorrect = { 0 }; int i, n, best_similarity = 0; struct cmdnames main_cmds = { 0 }; struct cmdnames other_cmds = { 0 }; struct cmdname_help *common_cmds; - read_early_config(the_repository, git_unknown_cmd_config, &cfg); + autocorrect_resolve(&autocorrect); - if (cfg.autocorrect.mode == AUTOCORRECT_NEVER) { + if (autocorrect.mode == AUTOCORRECT_NEVER) { fprintf_ln(stderr, _("git: '%s' is not a git command. See 'git --help'."), cmd); exit(1); } load_command_list("git-", &main_cmds, &other_cmds); - add_cmd_list(&main_cmds, &cfg.aliases); + /* Also use aliases for command lookup */ + read_early_config(the_repository, resolve_aliases, &aliases); + + add_cmd_list(&main_cmds, &aliases); add_cmd_list(&main_cmds, &other_cmds); QSORT(main_cmds.names, main_cmds.cnt, cmdname_compare); uniq(&main_cmds); @@ -674,18 +669,17 @@ char *help_unknown_cmd(const char *cmd) ; /* still counting */ } - if (cfg.autocorrect.mode != AUTOCORRECT_HINT && n == 1 && + if (autocorrect.mode != AUTOCORRECT_HINT && n == 1 && SIMILAR_ENOUGH(best_similarity)) { char *assumed = xstrdup(main_cmds.names[0]->name); fprintf_ln(stderr, - _("WARNING: You called a Git command named '%s', " - "which does not exist."), + _("WARNING: You called a Git command named '%s', which does not exist."), cmd); - autocorrect_confirm(&cfg.autocorrect, assumed); + autocorrect_confirm(&autocorrect, assumed); - cmdnames_release(&cfg.aliases); + cmdnames_release(&aliases); cmdnames_release(&main_cmds); cmdnames_release(&other_cmds); return assumed; From be9df6de6e2cfd09ffc327e9d4edd451248296f9 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:20 +0900 Subject: [PATCH 103/236] parseopt: autocorrect mistyped subcommands Try to autocorrect the mistyped mandatory subcommand before showing an error and exiting. Subcommands parsed with PARSE_OPT_SUBCOMMAND_OPTIONAL are skipped. Use standard Damerau-Levenshtein distance (weights 1, 1, 1, 1) to establish a predictable, mathematically sound baseline. Scale the allowed edit distance based on input length to prevent false positives on short commands, following common practice for fuzziness thresholds (e.g., Elasticsearch's AUTO fuzziness): - Length 0-2: 0 edits allowed - Length 3-5: 1 edit allowed - Length 6+: 2 edits allowed Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- parse-options.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 78 insertions(+), 3 deletions(-) diff --git a/parse-options.c b/parse-options.c index 02a4f00919f6d6..1f1b72762790c0 100644 --- a/parse-options.c +++ b/parse-options.c @@ -6,6 +6,8 @@ #include "strbuf.h" #include "string-list.h" #include "utf8.h" +#include "autocorrect.h" +#include "levenshtein.h" static int disallow_abbreviated_options; @@ -622,13 +624,77 @@ static int parse_subcommand(const char *arg, const struct option *options) return -1; } +static void find_subcommands(struct string_list *list, + const struct option *options) +{ + for (; options->type != OPTION_END; options++) { + if (options->type == OPTION_SUBCOMMAND) + string_list_append(list, options->long_name); + } +} + +static int similar_enough(const char *cmd, unsigned int edit) +{ + size_t len = strlen(cmd); + unsigned int allowed = len < 3 ? 0 : len < 6 ? 1 : 2; + + return edit <= allowed; +} + +static const char *autocorrect_subcommand(const char *cmd, + struct string_list *cmds) +{ + struct autocorrect autocorrect = { 0 }; + unsigned int min = UINT_MAX; + unsigned int ties = 0; + struct string_list_item *cand; + struct string_list_item *best = NULL; + + autocorrect_resolve(&autocorrect); + + /* + * Builtin subcommands are small enough that printing them all via + * usage_with_options() is sufficient. Therefore, AUTOCORRECT_HINT + * acts like AUTOCORRECT_NEVER. + */ + if (autocorrect.mode == AUTOCORRECT_HINT || + autocorrect.mode == AUTOCORRECT_NEVER) + return NULL; + + for_each_string_list_item(cand, cmds) { + unsigned int edit = levenshtein(cmd, cand->string, 1, 1, 1, 1); + + if (edit < min) { + min = edit; + best = cand; + ties = 0; + } else if (edit == min) { + ties++; + } + } + + if (!ties && similar_enough(cmd, min)) { + fprintf_ln(stderr, + _("WARNING: You called a subcommand named '%s', which does not exist."), + cmd); + + autocorrect_confirm(&autocorrect, best->string); + return best->string; + } + + return NULL; +} + static enum parse_opt_result handle_subcommand(struct parse_opt_ctx_t *ctx, const char *arg, const struct option *options, const char * const usagestr[]) { - int err = parse_subcommand(arg, options); + int err; + const char *assumed; + struct string_list cmds = STRING_LIST_INIT_NODUP; + err = parse_subcommand(arg, options); if (!err) return PARSE_OPT_SUBCOMMAND; @@ -641,8 +707,17 @@ static enum parse_opt_result handle_subcommand(struct parse_opt_ctx_t *ctx, if (ctx->flags & PARSE_OPT_SUBCOMMAND_OPTIONAL) return PARSE_OPT_DONE; - error(_("unknown subcommand: `%s'"), arg); - usage_with_options(usagestr, options); + find_subcommands(&cmds, options); + assumed = autocorrect_subcommand(arg, &cmds); + + if (!assumed) { + error(_("unknown subcommand: `%s'"), arg); + usage_with_options(usagestr, options); + } + + string_list_clear(&cmds, 0); + parse_subcommand(assumed, options); + return PARSE_OPT_SUBCOMMAND; } static void check_typos(const char *arg, const struct option *options) From ae8b7e1d200977165755c2e6d5a22e1df8ab6bf1 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:21 +0900 Subject: [PATCH 104/236] parseopt: enable subcommand autocorrection for git-remote and git-notes Add PARSE_OPT_SUBCOMMAND_AUTOCORR to enable autocorrection for subcommands parsed with PARSE_OPT_SUBCOMMAND_OPTIONAL. Use it for git-remote and git-notes, so mistyped subcommands can be automatically corrected, and builtin entry points no longer need to handle the unknown subcommand error path themselves. This is safe for these two builtins, because they either resolve to a single subcommand or take no subcommand at all. This means that if the subcommand parser encounters an unknown argument, it must be a mistyped subcommand. Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- builtin/notes.c | 10 +++------- builtin/remote.c | 12 ++++-------- parse-options.c | 16 +++++++++------- parse-options.h | 1 + 4 files changed, 17 insertions(+), 22 deletions(-) diff --git a/builtin/notes.c b/builtin/notes.c index 9af602bdd7b402..087eb898a4415f 100644 --- a/builtin/notes.c +++ b/builtin/notes.c @@ -1149,14 +1149,10 @@ int cmd_notes(int argc, repo_config(the_repository, git_default_config, NULL); argc = parse_options(argc, argv, prefix, options, git_notes_usage, - PARSE_OPT_SUBCOMMAND_OPTIONAL); - if (!fn) { - if (argc) { - error(_("unknown subcommand: `%s'"), argv[0]); - usage_with_options(git_notes_usage, options); - } + PARSE_OPT_SUBCOMMAND_OPTIONAL | + PARSE_OPT_SUBCOMMAND_AUTOCORR); + if (!fn) fn = list; - } if (override_notes_ref) { struct strbuf sb = STRBUF_INIT; diff --git a/builtin/remote.c b/builtin/remote.c index 0fddaa177331f6..9415f6cb03e807 100644 --- a/builtin/remote.c +++ b/builtin/remote.c @@ -1953,15 +1953,11 @@ int cmd_remote(int argc, }; argc = parse_options(argc, argv, prefix, options, builtin_remote_usage, - PARSE_OPT_SUBCOMMAND_OPTIONAL); + PARSE_OPT_SUBCOMMAND_OPTIONAL | + PARSE_OPT_SUBCOMMAND_AUTOCORR); - if (fn) { + if (fn) return !!fn(argc, argv, prefix, repo); - } else { - if (argc) { - error(_("unknown subcommand: `%s'"), argv[0]); - usage_with_options(builtin_remote_usage, options); - } + else return !!show_all(); - } } diff --git a/parse-options.c b/parse-options.c index 1f1b72762790c0..0b84061a381153 100644 --- a/parse-options.c +++ b/parse-options.c @@ -698,14 +698,16 @@ static enum parse_opt_result handle_subcommand(struct parse_opt_ctx_t *ctx, if (!err) return PARSE_OPT_SUBCOMMAND; - /* - * arg is neither a short or long option nor a subcommand. Since this - * command has a default operation mode, we have to treat this arg and - * all remaining args as args meant to that default operation mode. - * So we are done parsing. - */ - if (ctx->flags & PARSE_OPT_SUBCOMMAND_OPTIONAL) + if (ctx->flags & PARSE_OPT_SUBCOMMAND_OPTIONAL && + !(ctx->flags & PARSE_OPT_SUBCOMMAND_AUTOCORR)) { + /* + * arg is neither a short or long option nor a subcommand. + * Since this command has a default operation mode, we have to + * treat this arg and all remaining args as args meant to that + * default operation mode. So we are done parsing. + */ return PARSE_OPT_DONE; + } find_subcommands(&cmds, options); assumed = autocorrect_subcommand(arg, &cmds); diff --git a/parse-options.h b/parse-options.h index 706de9729f6b3f..f29ac337893c92 100644 --- a/parse-options.h +++ b/parse-options.h @@ -40,6 +40,7 @@ enum parse_opt_flags { PARSE_OPT_ONE_SHOT = 1 << 5, PARSE_OPT_SHELL_EVAL = 1 << 6, PARSE_OPT_SUBCOMMAND_OPTIONAL = 1 << 7, + PARSE_OPT_SUBCOMMAND_AUTOCORR = 1 << 8, }; enum parse_opt_option_flags { From 273faabea8db47247564df092adf5a675ddfa284 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:22 +0900 Subject: [PATCH 105/236] parseopt: add tests for subcommand autocorrection These tests cover default behavior (help.autocorrect is unset), no correction, immediate correction, delayed correction, and rejection when the typo is too dissimilar. Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- t/meson.build | 1 + t/t9004-autocorrect-subcommand.sh | 51 +++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100755 t/t9004-autocorrect-subcommand.sh diff --git a/t/meson.build b/t/meson.build index f66a73f8a07d93..bf0503d705a9a3 100644 --- a/t/meson.build +++ b/t/meson.build @@ -973,6 +973,7 @@ integration_tests = [ 't9001-send-email.sh', 't9002-column.sh', 't9003-help-autocorrect.sh', + 't9004-autocorrect-subcommand.sh', 't9100-git-svn-basic.sh', 't9101-git-svn-props.sh', 't9102-git-svn-deep-rmdir.sh', diff --git a/t/t9004-autocorrect-subcommand.sh b/t/t9004-autocorrect-subcommand.sh new file mode 100755 index 00000000000000..d10031659b940b --- /dev/null +++ b/t/t9004-autocorrect-subcommand.sh @@ -0,0 +1,51 @@ +#!/bin/sh + +test_description='subcommand auto-correction test + +Test autocorrection for subcommands with different +help.autocorrect mode.' + +. ./test-lib.sh + +test_expect_success 'setup' " + echo '^error: unknown subcommand: ' >grep_unknown +" + +test_expect_success 'default is not to autocorrect' ' + test_must_fail git worktree lsit 2>actual && + head -n1 actual >first && test_grep -f grep_unknown first +' + +for mode in false no off 0 show never +do + test_expect_success "'$mode' disables autocorrection" " + test_config help.autocorrect $mode && + + test_must_fail git worktree lsit 2>actual && + head -n1 actual >first && test_grep -f grep_unknown first + " +done + +for mode in -39 immediate 1 +do + test_expect_success "autocorrect immediately with '$mode'" - <<-EOT + test_config help.autocorrect $mode && + + git worktree lsit 2>actual && + test_grep "you meant 'list'\.$" actual + EOT +done + +test_expect_success 'delay path is executed' - <<-\EOT + test_config help.autocorrect 2 && + + git worktree lsit 2>actual && + test_grep '^Continuing in 0.2 seconds, ' actual +EOT + +test_expect_success 'deny if too dissimilar' - <<-\EOT + test_must_fail git remote rensnr 2>actual && + head -n1 actual >first && test_grep -f grep_unknown first +EOT + +test_done From 916b45080534e9603094dda39c4599a2f2a466d1 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:23 +0900 Subject: [PATCH 106/236] doc: document autocorrect API Explain behaviors for autocorrect_resolve(), autocorrect_confirm(), and struct autocorrect. Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- autocorrect.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/autocorrect.h b/autocorrect.h index 0d3e819262edee..bfa3ba20a4fb73 100644 --- a/autocorrect.h +++ b/autocorrect.h @@ -9,13 +9,24 @@ enum autocorrect_mode { AUTOCORRECT_DELAY, }; +/** + * `mode` indicates which action will be performed by autocorrect_confirm(). + * `delay` is the timeout before autocorrect_confirm() returns, in tenths of a + * second. Use it only with AUTOCORRECT_DELAY. + */ struct autocorrect { enum autocorrect_mode mode; int delay; }; +/** + * Resolve the autocorrect configuration into `conf`. + */ void autocorrect_resolve(struct autocorrect *conf); +/** + * Interact with the user in different ways depending on `conf->mode`. + */ void autocorrect_confirm(struct autocorrect *conf, const char *assumed); #endif /* AUTOCORRECT_H */ From 48430e44ace97055567294d412dde9bb8adc0fbb Mon Sep 17 00:00:00 2001 From: Mirko Faina Date: Mon, 16 Mar 2026 02:15:42 +0100 Subject: [PATCH 107/236] t0008: improve test cleanup to fix failing test The "large exclude file ignored in tree" test fails. This is due to an additional warning message that is generated in the test. "warning: unable to access 'subdir/.gitignore': Too many levels of symbolic links", the extra warning that is not supposed to be there, happens because of some leftover files left by previous tests. To fix this we improve cleanup on "symlinks not respected in-tree", and because the tests in t0008 in general have poor cleanup, at the start of "large exclude file ignored in tree" we search for any leftover .gitignore and remove them before starting the test. Improve post-test cleanup and add pre-test cleanup to make sure that we have a workable environment for the test. Signed-off-by: Mirko Faina Signed-off-by: Junio C Hamano --- t/t0008-ignores.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/t/t0008-ignores.sh b/t/t0008-ignores.sh index db8bde280ecfc9..e716b5cdfa1bee 100755 --- a/t/t0008-ignores.sh +++ b/t/t0008-ignores.sh @@ -946,7 +946,7 @@ test_expect_success SYMLINKS 'symlinks respected in info/exclude' ' ' test_expect_success SYMLINKS 'symlinks not respected in-tree' ' - test_when_finished "rm .gitignore" && + test_when_finished "rm -rf subdir .gitignore err actual" && ln -s ignore .gitignore && mkdir subdir && ln -s ignore subdir/.gitignore && @@ -957,6 +957,7 @@ test_expect_success SYMLINKS 'symlinks not respected in-tree' ' test_expect_success EXPENSIVE 'large exclude file ignored in tree' ' test_when_finished "rm .gitignore" && + find . -name .gitignore -exec rm "{}" ";" && dd if=/dev/zero of=.gitignore bs=101M count=1 && git ls-files -o --exclude-standard 2>err && echo "warning: ignoring excessively large pattern file: .gitignore" >expect && From d05d84c5f507e8b973982e9cf3a27a07cd94fcb8 Mon Sep 17 00:00:00 2001 From: Mirko Faina Date: Mon, 16 Mar 2026 01:51:16 +0100 Subject: [PATCH 108/236] apply.c: fix -p argument parsing "git apply" has an option -p that takes an integer as its argument. Unfortunately the function apply_option_parse_p() in charge of parsing this argument uses atoi() to convert from string to integer, which allows a non-digit after the number (e.g. "1q") to be silently ignored. As a consequence, an argument that does not begin with a digit silently becomes a zero. Despite this command working fine when a non-positive argument is passed, it might be useful for the end user to know that their input contains non-digits that might've been unintended. Replace atoi() with strtol_i() to catch malformed inputs. Signed-off-by: Mirko Faina Signed-off-by: Junio C Hamano --- apply.c | 3 ++- t/t4120-apply-popt.sh | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/apply.c b/apply.c index 3de4aa4d2eaac5..faf75b544977ad 100644 --- a/apply.c +++ b/apply.c @@ -4961,7 +4961,8 @@ static int apply_option_parse_p(const struct option *opt, BUG_ON_OPT_NEG(unset); - state->p_value = atoi(arg); + if (strtol_i(arg, 10, &state->p_value) < 0 || state->p_value < 0) + die(_("option -p expects a non-negative integer, got '%s'"), arg); state->p_value_known = 1; return 0; } diff --git a/t/t4120-apply-popt.sh b/t/t4120-apply-popt.sh index 697e86c0ff4560..c960fdf622512f 100755 --- a/t/t4120-apply-popt.sh +++ b/t/t4120-apply-popt.sh @@ -23,6 +23,47 @@ test_expect_success setup ' rmdir süb ' +test_expect_success 'git apply -p 1 patch' ' + cat >patch <<-\EOF && + From 90ad11d5b2d437e82d4d992f72fb44c2227798b5 Mon Sep 17 00:00:00 2001 + From: Mroik + Date: Mon, 9 Mar 2026 23:25:00 +0100 + Subject: [PATCH] Test + + --- + t/test/test | 0 + 1 file changed, 0 insertions(+), 0 deletions(-) + create mode 100644 t/test/test + + diff --git a/t/test/test b/t/test/test + new file mode 100644 + index 0000000000..e69de29bb2 + -- + 2.53.0.851.ga537e3e6e9 + EOF + test_when_finished "rm -rf t" && + git apply -p 1 patch && + test_path_is_dir t +' + +test_expect_success 'apply fails due to non-num -p' ' + test_when_finished "rm -rf t test err" && + test_must_fail git apply -p malformed patch 2>err && + test_grep "option -p expects a non-negative integer" err +' + +test_expect_success 'apply fails due to trailing non-digit in -p' ' + test_when_finished "rm -rf t test err" && + test_must_fail git apply -p 2q patch 2>err && + test_grep "option -p expects a non-negative integer" err +' + +test_expect_success 'apply fails due to negative number in -p' ' + test_when_finished "rm -rf t test err patch" && + test_must_fail git apply -p -1 patch 2> err && + test_grep "option -p expects a non-negative integer" err +' + test_expect_success 'apply git diff with -p2' ' cp file1.saved file1 && git apply -p2 patch.file From d893f3e7cc0a49ac62ff0ec16d7fdbf606399333 Mon Sep 17 00:00:00 2001 From: PRASHANT S BISHT Date: Mon, 16 Mar 2026 22:54:57 +0530 Subject: [PATCH 109/236] t4200: convert test -[df] checks to test_path_* helpers Replace old-style path existence checks in t4200-rerere.sh with the appropriate test_path_* helper functions. These helpers provide clearer diagnostic messages on failure than the raw shell test builtin. Signed-off-by: Prashant S Bisht Signed-off-by: Junio C Hamano --- t/t4200-rerere.sh | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/t/t4200-rerere.sh b/t/t4200-rerere.sh index 204325f4d53df1..1717f407c80d86 100755 --- a/t/t4200-rerere.sh +++ b/t/t4200-rerere.sh @@ -72,7 +72,7 @@ test_expect_success 'nothing recorded without rerere' ' rm -rf .git/rr-cache && git config rerere.enabled false && test_must_fail git merge first && - ! test -d .git/rr-cache + test_path_is_missing .git/rr-cache ' test_expect_success 'activate rerere, old style (conflicting merge)' ' @@ -84,8 +84,8 @@ test_expect_success 'activate rerere, old style (conflicting merge)' ' sha1=$(sed "s/ .*//" .git/MERGE_RR) && rr=.git/rr-cache/$sha1 && grep "^=======\$" $rr/preimage && - ! test -f $rr/postimage && - ! test -f $rr/thisimage + test_path_is_missing $rr/postimage && + test_path_is_missing $rr/thisimage ' test_expect_success 'rerere.enabled works, too' ' @@ -110,8 +110,8 @@ test_expect_success 'set up rr-cache' ' test_expect_success 'rr-cache looks sane' ' # no postimage or thisimage yet - ! test -f $rr/postimage && - ! test -f $rr/thisimage && + test_path_is_missing $rr/postimage && + test_path_is_missing $rr/thisimage && # preimage has right number of lines cnt=$(sed -ne "/^<<<<<<>>>>>>/p" $rr/preimage | wc -l) && @@ -167,7 +167,7 @@ test_expect_success 'first postimage wins' ' git show first:a1 | sed "s/To die: t/To die! T/" >expect && git commit -q -a -m "prefer first over second" && - test -f $rr/postimage && + test_path_is_file $rr/postimage && oldmtimepost=$(test-tool chmtime --get -60 $rr/postimage) && @@ -190,14 +190,14 @@ test_expect_success 'rerere clear' ' mv $rr/postimage .git/post-saved && echo "$sha1 a1" | tr "\012" "\000" >.git/MERGE_RR && git rerere clear && - ! test -d $rr + test_path_is_missing $rr ' test_expect_success 'leftover directory' ' git reset --hard && mkdir -p $rr && test_must_fail git merge first && - test -f $rr/preimage + test_path_is_file $rr/preimage ' test_expect_success 'missing preimage' ' @@ -205,7 +205,7 @@ test_expect_success 'missing preimage' ' mkdir -p $rr && cp .git/post-saved $rr/postimage && test_must_fail git merge first && - test -f $rr/preimage + test_path_is_file $rr/preimage ' test_expect_success 'set up for garbage collection tests' ' @@ -230,16 +230,16 @@ test_expect_success 'set up for garbage collection tests' ' test_expect_success 'gc preserves young or recently used records' ' git rerere gc && - test -f $rr/preimage && - test -f $rr2/preimage + test_path_is_file $rr/preimage && + test_path_is_file $rr2/preimage ' test_expect_success 'old records rest in peace' ' test-tool chmtime =$just_over_60_days_ago $rr/postimage && test-tool chmtime =$just_over_15_days_ago $rr2/preimage && git rerere gc && - ! test -f $rr/preimage && - ! test -f $rr2/preimage + test_path_is_missing $rr/preimage && + test_path_is_missing $rr2/preimage ' rerere_gc_custom_expiry_test () { From 2594747ad1b52f5a4739de662d5ad14621c94738 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sat, 14 Mar 2026 12:08:14 -0400 Subject: [PATCH 110/236] transport: plug leaks in transport_color_config() We retrieve config values with repo_config_get_string(), which will allocate a new copy of the string for us. But we don't hold on to those strings, since they are just fed to git_config_colorbool() and color_parse(). But nor do we free them, which means they leak. We can fix this by using the "_tmp" form of repo_config_get_string(), which just hands us a pointer directly to the internal storage. This is OK for our purposes, since we don't need it to last for longer than our parsing calls. Two interesting side notes here: 1. Many types already have a repo_config_get_X() variant that handles this for us (e.g., repo_config_get_bool()). But neither colorbools nor colors themselves have such helpers. We might think about adding them, but converting all callers is a larger task, and out of scope for this fix. 2. As far as I can tell, this leak has been there since 960786e761 (push: colorize errors, 2018-04-21), but wasn't detected by LSan in our test suite. It started triggering when we applied dd3693eb08 (transport-helper, connect: use clean_on_exit to reap children on abnormal exit, 2026-03-12) which is mostly unrelated. Even weirder, it seems to trigger only with clang (and not gcc), and only with GIT_TEST_DEFAULT_REF_FORMAT=reftable. So I think this is another odd case where the pointers happened to be hanging around in stack memory, but changing the pattern of function calls in nearby code was enough for them to be incidentally overwritten. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- transport.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/transport.c b/transport.c index e305d6bd228b45..7985b42a7431ca 100644 --- a/transport.c +++ b/transport.c @@ -47,21 +47,21 @@ static int transport_color_config(void) "color.transport.reset", "color.transport.rejected" }, *key = "color.transport"; - char *value; + const char *value; static int initialized; if (initialized) return 0; initialized = 1; - if (!repo_config_get_string(the_repository, key, &value)) + if (!repo_config_get_string_tmp(the_repository, key, &value)) transport_use_color = git_config_colorbool(key, value); if (!want_color_stderr(transport_use_color)) return 0; for (size_t i = 0; i < ARRAY_SIZE(keys); i++) - if (!repo_config_get_string(the_repository, keys[i], &value)) { + if (!repo_config_get_string_tmp(the_repository, keys[i], &value)) { if (!value) return config_error_nonbool(keys[i]); if (color_parse(value, transport_colors[i]) < 0) From daa91c693eb1fef0cd04aed8c6b37ee79d5897e0 Mon Sep 17 00:00:00 2001 From: Kristoffer Haugsbakk Date: Mon, 16 Mar 2026 22:48:24 +0100 Subject: [PATCH 111/236] doc: interpret-trailers: convert to synopsis style MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See e.g. 0ae23ab5 (doc: convert git worktree to synopsis style, 2025-10-05) for the markup rules for this style. There aren’t many subtleties to the transformation of this doc since it doesn’t use any advanced constructs. The only thing is that "`:`{nbsp}" is used instead of `': '` to refer to effective inline-verbatim with a space (␠).[1] I also use (_) for emphasis although (') gives the same result. Also prefer linking to Git commands instead of saying e.g. `git format-patch`. But for this command we can type out git-interpret- trailers(1) to avoid a self-reference. Also replace camel case `` with kebab case ``. And while doing that make sure to replace `trailer.*` with `trailer.`. † 1: Similar to "`tag:`{nbsp}" in `Documentation/pretty-formats.adoc` Signed-off-by: Kristoffer Haugsbakk Signed-off-by: Junio C Hamano --- Documentation/git-interpret-trailers.adoc | 145 +++++++++++----------- 1 file changed, 73 insertions(+), 72 deletions(-) diff --git a/Documentation/git-interpret-trailers.adoc b/Documentation/git-interpret-trailers.adoc index fd335fe772ab99..ea47f2f7ae5f63 100644 --- a/Documentation/git-interpret-trailers.adoc +++ b/Documentation/git-interpret-trailers.adoc @@ -7,14 +7,14 @@ git-interpret-trailers - Add or parse structured information in commit messages SYNOPSIS -------- -[verse] -'git interpret-trailers' [--in-place] [--trim-empty] +[synopsis] +git interpret-trailers [--in-place] [--trim-empty] [(--trailer (|)[(=|:)])...] [--parse] [...] DESCRIPTION ----------- -Add or parse 'trailer' lines that look similar to RFC 822 e-mail +Add or parse _trailer_ lines that look similar to RFC 822 e-mail headers, at the end of the otherwise free-form part of a commit message. For example, in the following commit message @@ -27,23 +27,24 @@ Signed-off-by: Alice Signed-off-by: Bob ------------------------------------------------ -the last two lines starting with "Signed-off-by" are trailers. +the last two lines starting with `Signed-off-by` are trailers. This command reads commit messages from either the - arguments or the standard input if no is specified. +__ arguments or the standard input if no __ is specified. If `--parse` is specified, the output consists of the parsed trailers coming from the input, without influencing them with any command line options or configuration variables. -Otherwise, this command applies `trailer.*` configuration variables -(which could potentially add new trailers, as well as reposition them), -as well as any command line arguments that can override configuration -variables (such as `--trailer=...` which could also add new trailers), -to each input file. The result is emitted on the standard output. +Otherwise, this command applies `trailer.` configuration +variables (which could potentially add new trailers, as well as +reposition them), as well as any command line arguments that can +override configuration variables (such as `--trailer=...` which could +also add new trailers), to each input file. The result is emitted on the +standard output. This command can also operate on the output of linkgit:git-format-patch[1], which is more elaborate than a plain commit message. Namely, such output -includes a commit message (as above), a "---" divider line, and a patch part. +includes a commit message (as above), a `---` divider line, and a patch part. For these inputs, the divider and patch parts are not modified by this command and are emitted as is on the output, unless `--no-divider` is specified. @@ -53,24 +54,24 @@ are applied to each input and the way any existing trailer in the input is changed. They also make it possible to automatically add some trailers. -By default, a '=' or ':' argument given +By default, a `=` or `:` argument given using `--trailer` will be appended after the existing trailers only if -the last trailer has a different (, ) pair (or if there -is no existing trailer). The and parts will be trimmed +the last trailer has a different (__, __) pair (or if there +is no existing trailer). The __ and __ parts will be trimmed to remove starting and trailing whitespace, and the resulting trimmed - and will appear in the output like this: +__ and __ will appear in the output like this: ------------------------------------------------ key: value ------------------------------------------------ -This means that the trimmed and will be separated by -`': '` (one colon followed by one space). +This means that the trimmed __ and __ will be separated by +"`:`{nbsp}" (one colon followed by one space). -For convenience, a can be configured to make using `--trailer` +For convenience, a __ can be configured to make using `--trailer` shorter to type on the command line. This can be configured using the -'trailer..key' configuration variable. The must be a prefix -of the full string, although case sensitivity does not matter. For +`trailer..key` configuration variable. The __ must be a prefix +of the full __ string, although case sensitivity does not matter. For example, if you have ------------------------------------------------ @@ -91,13 +92,13 @@ least one Git-generated or user-configured trailer and consists of at least 25% trailers. The group must be preceded by one or more empty (or whitespace-only) lines. The group must either be at the end of the input or be the last -non-whitespace lines before a line that starts with '---' (followed by a +non-whitespace lines before a line that starts with `---` (followed by a space or the end of the line). When reading trailers, there can be no whitespace before or inside the -, but any number of regular space and tab characters are allowed -between the and the separator. There can be whitespaces before, -inside or after the . The may be split over multiple lines +__, but any number of regular space and tab characters are allowed +between the __ and the separator. There can be whitespaces before, +inside or after the __. The __ may be split over multiple lines with each subsequent line starting with at least one whitespace, like the "folding" in RFC 822. Example: @@ -111,77 +112,77 @@ rules for RFC 822 headers. For example they do not follow the encoding rule. OPTIONS ------- ---in-place:: +`--in-place`:: Edit the files in place. ---trim-empty:: - If the part of any trailer contains only whitespace, +`--trim-empty`:: + If the __ part of any trailer contains only whitespace, the whole trailer will be removed from the output. This applies to existing trailers as well as new trailers. ---trailer [(=|:)]:: - Specify a (, ) pair that should be applied as a +`--trailer [(=|:)]`:: + Specify a (__, __) pair that should be applied as a trailer to the inputs. See the description of this command. ---where :: ---no-where:: +`--where `:: +`--no-where`:: Specify where all new trailers will be added. A setting - provided with '--where' overrides the `trailer.where` and any - applicable `trailer..where` configuration variables - and applies to all '--trailer' options until the next occurrence of - '--where' or '--no-where'. Upon encountering '--no-where', clear the - effect of any previous use of '--where', such that the relevant configuration + provided with `--where` overrides the `trailer.where` and any + applicable `trailer..where` configuration variables + and applies to all `--trailer` options until the next occurrence of + `--where` or `--no-where`. Upon encountering `--no-where`, clear the + effect of any previous use of `--where`, such that the relevant configuration variables are no longer overridden. Possible placements are `after`, `before`, `end` or `start`. ---if-exists :: ---no-if-exists:: +`--if-exists `:: +`--no-if-exists`:: Specify what action will be performed when there is already at - least one trailer with the same in the input. A setting - provided with '--if-exists' overrides the `trailer.ifExists` and any - applicable `trailer..ifExists` configuration variables - and applies to all '--trailer' options until the next occurrence of - '--if-exists' or '--no-if-exists'. Upon encountering '--no-if-exists', clear the - effect of any previous use of '--if-exists', such that the relevant configuration + least one trailer with the same __ in the input. A setting + provided with `--if-exists` overrides the `trailer.ifExists` and any + applicable `trailer..ifExists` configuration variables + and applies to all `--trailer` options until the next occurrence of + `--if-exists` or `--no-if-exists`. Upon encountering `--no-if-exists`, clear the + effect of any previous use of `--if-exists`, such that the relevant configuration variables are no longer overridden. Possible actions are `addIfDifferent`, `addIfDifferentNeighbor`, `add`, `replace` and `doNothing`. ---if-missing :: ---no-if-missing:: +`--if-missing `:: +`--no-if-missing`:: Specify what action will be performed when there is no other - trailer with the same in the input. A setting - provided with '--if-missing' overrides the `trailer.ifMissing` and any - applicable `trailer..ifMissing` configuration variables - and applies to all '--trailer' options until the next occurrence of - '--if-missing' or '--no-if-missing'. Upon encountering '--no-if-missing', - clear the effect of any previous use of '--if-missing', such that the relevant + trailer with the same __ in the input. A setting + provided with `--if-missing` overrides the `trailer.ifMissing` and any + applicable `trailer..ifMissing` configuration variables + and applies to all `--trailer` options until the next occurrence of + `--if-missing` or `--no-if-missing`. Upon encountering `--no-if-missing`, + clear the effect of any previous use of `--if-missing`, such that the relevant configuration variables are no longer overridden. Possible actions are `doNothing` or `add`. ---only-trailers:: +`--only-trailers`:: Output only the trailers, not any other parts of the input. ---only-input:: +`--only-input`:: Output only trailers that exist in the input; do not add any - from the command-line or by applying `trailer.*` configuration + from the command-line or by applying `trailer.` configuration variables. ---unfold:: +`--unfold`:: If a trailer has a value that runs over multiple lines (aka "folded"), reformat the value into a single line. ---parse:: +`--parse`:: A convenience alias for `--only-trailers --only-input --unfold`. This makes it easier to only see the trailers coming from the input without influencing them with any command line options or configuration variables, while also making the output machine-friendly with - --unfold. + `--unfold`. ---no-divider:: +`--no-divider`:: Do not treat `---` as the end of the commit message. Use this when you know your input contains just the commit message itself - (and not an email or the output of `git format-patch`). + (and not an email or the output of linkgit:git-format-patch[1]). CONFIGURATION VARIABLES ----------------------- @@ -193,7 +194,7 @@ include::config/trailer.adoc[] EXAMPLES -------- -* Configure a 'sign' trailer with a 'Signed-off-by' key, and then +* Configure a `sign` trailer with a `Signed-off-by` key, and then add two of these trailers to a commit message file: + ------------ @@ -230,8 +231,8 @@ Signed-off-by: Bob Acked-by: Alice ------------ -* Extract the last commit as a patch, and add a 'Cc' and a - 'Reviewed-by' trailer to it: +* Extract the last commit as a patch, and add a `Cc` and a + `Reviewed-by` trailer to it: + ------------ $ git format-patch -1 @@ -239,9 +240,9 @@ $ git format-patch -1 $ git interpret-trailers --trailer 'Cc: Alice ' --trailer 'Reviewed-by: Bob ' 0001-foo.patch >0001-bar.patch ------------ -* Configure a 'sign' trailer with a command to automatically add a - 'Signed-off-by: ' with the author information only if there is no - 'Signed-off-by: ' already, and show how it works: +* Configure a `sign` trailer with a command to automatically add a + "`Signed-off-by:`{nbsp}" with the author information only if there is no + "`Signed-off-by:`{nbsp}" already, and show how it works: + ------------ $ cat msg1.txt @@ -272,7 +273,7 @@ body text Signed-off-by: Alice ------------ -* Configure a 'fix' trailer with a key that contains a '#' and no +* Configure a `fix` trailer with a key that contains a `#` and no space after this character, and show how it works: + ------------ @@ -284,7 +285,7 @@ subject Fix #42 ------------ -* Configure a 'help' trailer with a cmd use a script `glog-find-author` +* Configure a `help` trailer with a cmd use a script `glog-find-author` which search specified author identity from git log in git repository and show how it works: + @@ -308,7 +309,7 @@ Helped-by: Junio C Hamano Helped-by: Christian Couder ------------ -* Configure a 'ref' trailer with a cmd use a script `glog-grep` +* Configure a `ref` trailer with a cmd use a script `glog-grep` to grep last relevant commit from git log in the git repository and show how it works: + @@ -331,7 +332,7 @@ body text Reference-to: 8bc9a0c769 (Add copyright notices., 2005-04-07) ------------ -* Configure a 'see' trailer with a command to show the subject of a +* Configure a `see` trailer with a command to show the subject of a commit that is related, and show how it works: + ------------ @@ -359,8 +360,8 @@ See-also: fe3187489d69c4 (subject of related commit) * Configure a commit template with some trailers with empty values (using sed to show and keep the trailing spaces at the end of the trailers), then configure a commit-msg hook that uses - 'git interpret-trailers' to remove trailers with empty values and - to add a 'git-version' trailer: + git-interpret-trailers(1) to remove trailers with empty values and to + add a `git-version` trailer: + ------------ $ cat temp.txt From bec94f79e4b7473adf70c7b0623057883c549d16 Mon Sep 17 00:00:00 2001 From: Kristoffer Haugsbakk Date: Mon, 16 Mar 2026 22:48:25 +0100 Subject: [PATCH 112/236] doc: interpret-trailers: normalize and fill out options MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some negated options are missing according to `git interpret-trailers -h`. Also normalize to the “stuck form” (see gitcli(7)) like what was done in 806337c7 (doc: notes: use stuck form throughout, 2025-05-27).[1] Also normalize the order of the regular and negated options according to the current convention.[2] Also note that `--no-trailer` will reset the list. † 1: See also https://lore.kernel.org/git/6f7d027e-088a-4d66-92af-b8d1c32d730c@app.fastmail.com/ † 2: https://lore.kernel.org/git/xmqqcyct1mtq.fsf@gitster.g/ Signed-off-by: Kristoffer Haugsbakk Signed-off-by: Junio C Hamano --- Documentation/git-interpret-trailers.adoc | 66 +++++++++++++++-------- 1 file changed, 43 insertions(+), 23 deletions(-) diff --git a/Documentation/git-interpret-trailers.adoc b/Documentation/git-interpret-trailers.adoc index ea47f2f7ae5f63..77b4f63b05cf5b 100644 --- a/Documentation/git-interpret-trailers.adoc +++ b/Documentation/git-interpret-trailers.adoc @@ -113,64 +113,80 @@ rules for RFC 822 headers. For example they do not follow the encoding rule. OPTIONS ------- `--in-place`:: - Edit the files in place. +`--no-in-place`:: + Edit the files in place. The default is `--no-in-place`. `--trim-empty`:: +`--no-trim-empty`:: If the __ part of any trailer contains only whitespace, the whole trailer will be removed from the output. This applies to existing trailers as well as new trailers. ++ +The default is `--no-trim-empty`. -`--trailer [(=|:)]`:: +`--trailer=[(=|:)]`:: +`--no-trailer`:: Specify a (__, __) pair that should be applied as a - trailer to the inputs. See the description of this - command. + trailer to the inputs. See the description of this command. Can + be given multiple times. ++ +Use `--no-trailer` to reset the list. -`--where `:: +`--where=`:: `--no-where`:: Specify where all new trailers will be added. A setting provided with `--where` overrides the `trailer.where` and any applicable `trailer..where` configuration variables and applies to all `--trailer` options until the next occurrence of - `--where` or `--no-where`. Upon encountering `--no-where`, clear the - effect of any previous use of `--where`, such that the relevant configuration - variables are no longer overridden. Possible placements are `after`, + `--where` or `--no-where`. Possible placements are `after`, `before`, `end` or `start`. ++ +Use `--no-where` to clear the effect of any previous use of `--where`, +such that the relevant configuration variables are no longer overridden. -`--if-exists `:: +`--if-exists=`:: `--no-if-exists`:: Specify what action will be performed when there is already at least one trailer with the same __ in the input. A setting provided with `--if-exists` overrides the `trailer.ifExists` and any applicable `trailer..ifExists` configuration variables and applies to all `--trailer` options until the next occurrence of - `--if-exists` or `--no-if-exists`. Upon encountering `--no-if-exists`, clear the - effect of any previous use of `--if-exists`, such that the relevant configuration - variables are no longer overridden. Possible actions are `addIfDifferent`, + `--if-exists` or `--no-if-exists`. Possible actions are `addIfDifferent`, `addIfDifferentNeighbor`, `add`, `replace` and `doNothing`. ++ +Use `--no-if-exists` to clear the effect of any previous use of +`--if-exists`, such that the relevant configuration variables are no +longer overridden. -`--if-missing `:: +`--if-missing=`:: `--no-if-missing`:: Specify what action will be performed when there is no other trailer with the same __ in the input. A setting provided with `--if-missing` overrides the `trailer.ifMissing` and any applicable `trailer..ifMissing` configuration variables and applies to all `--trailer` options until the next occurrence of - `--if-missing` or `--no-if-missing`. Upon encountering `--no-if-missing`, - clear the effect of any previous use of `--if-missing`, such that the relevant - configuration variables are no longer overridden. Possible actions are `doNothing` - or `add`. + `--if-missing` or `--no-if-missing`. Possible actions are + `doNothing` or `add`. ++ +Use `--no-if-missing` to clear the effect of any previous use of +`--if-missing`, such that the relevant configuration variables are no +longer overridden. `--only-trailers`:: - Output only the trailers, not any other parts of the input. +`--no-only-trailers`:: + Output only the trailers, not any other parts of the + input. The default is `--no-only-trailers`. `--only-input`:: +`--no-only-input`:: Output only trailers that exist in the input; do not add any from the command-line or by applying `trailer.` configuration - variables. + variables. The default is `--no-only-input`. `--unfold`:: +`--no-unfold`:: If a trailer has a value that runs over multiple lines (aka "folded"), - reformat the value into a single line. + reformat the value into a single line. The default is `--no-unfold`. `--parse`:: A convenience alias for `--only-trailers --only-input @@ -178,11 +194,15 @@ OPTIONS input without influencing them with any command line options or configuration variables, while also making the output machine-friendly with `--unfold`. ++ +There is no convenience alias to negate this alias. +`--divider`:: `--no-divider`:: - Do not treat `---` as the end of the commit message. Use this - when you know your input contains just the commit message itself - (and not an email or the output of linkgit:git-format-patch[1]). + Treat `---` as the end of the commit message. This is the default. + Use `--no-divider` when you know your input contains just the + commit message itself (and not an email or the output of + linkgit:git-format-patch[1]). CONFIGURATION VARIABLES ----------------------- From 95bd86772eae65917fe5723ed89ee86c76907ef9 Mon Sep 17 00:00:00 2001 From: Kristoffer Haugsbakk Date: Mon, 16 Mar 2026 22:48:26 +0100 Subject: [PATCH 113/236] doc: config: convert trailers section to synopsis style Convert this part of the configuration documentation to synopsis style so that all of git-interpret-trailers(1) is consistent. See the commit message from two commits ago. Signed-off-by: Kristoffer Haugsbakk Signed-off-by: Junio C Hamano --- Documentation/config/trailer.adoc | 121 +++++++++++++++--------------- 1 file changed, 61 insertions(+), 60 deletions(-) diff --git a/Documentation/config/trailer.adoc b/Documentation/config/trailer.adoc index 60bc221c88b801..1bc70192d3a547 100644 --- a/Documentation/config/trailer.adoc +++ b/Documentation/config/trailer.adoc @@ -1,21 +1,21 @@ -trailer.separators:: +`trailer.separators`:: This option tells which characters are recognized as trailer - separators. By default only ':' is recognized as a trailer - separator, except that '=' is always accepted on the command + separators. By default only `:` is recognized as a trailer + separator, except that `=` is always accepted on the command line for compatibility with other git commands. + The first character given by this option will be the default character used when another separator is not specified in the config for this trailer. + -For example, if the value for this option is "%=$", then only lines -using the format '' with containing '%', '=' -or '$' and then spaces will be considered trailers. And '%' will be +For example, if the value for this option is `%=$`, then only lines +using the format __ with __ containing `%`, `=` +or `$` and then spaces will be considered trailers. And `%` will be the default separator used, so by default trailers will appear like: -'% ' (one percent sign and one space will appear between +`% ` (one percent sign and one space will appear between the key and the value). -trailer.where:: +`trailer.where`:: This option tells where a new trailer will be added. + This can be `end`, which is the default, `start`, `after` or `before`. @@ -27,41 +27,41 @@ If it is `start`, then each new trailer will appear at the start, instead of the end, of the existing trailers. + If it is `after`, then each new trailer will appear just after the -last trailer with the same . +last trailer with the same __. + If it is `before`, then each new trailer will appear just before the -first trailer with the same . +first trailer with the same __. -trailer.ifexists:: +`trailer.ifexists`:: This option makes it possible to choose what action will be performed when there is already at least one trailer with the - same in the input. + same __ in the input. + The valid values for this option are: `addIfDifferentNeighbor` (this is the default), `addIfDifferent`, `add`, `replace` or `doNothing`. + With `addIfDifferentNeighbor`, a new trailer will be added only if no -trailer with the same (, ) pair is above or below the line +trailer with the same (__, __) pair is above or below the line where the new trailer will be added. + With `addIfDifferent`, a new trailer will be added only if no trailer -with the same (, ) pair is already in the input. +with the same (__, __) pair is already in the input. + With `add`, a new trailer will be added, even if some trailers with -the same (, ) pair are already in the input. +the same (__, __) pair are already in the input. + -With `replace`, an existing trailer with the same will be +With `replace`, an existing trailer with the same __ will be deleted and the new trailer will be added. The deleted trailer will be -the closest one (with the same ) to the place where the new one +the closest one (with the same __) to the place where the new one will be added. + With `doNothing`, nothing will be done; that is no new trailer will be -added if there is already one with the same in the input. +added if there is already one with the same __ in the input. -trailer.ifmissing:: +`trailer.ifmissing`:: This option makes it possible to choose what action will be performed when there is not yet any trailer with the same - in the input. + __ in the input. + The valid values for this option are: `add` (this is the default) and `doNothing`. @@ -70,67 +70,68 @@ With `add`, a new trailer will be added. + With `doNothing`, nothing will be done. -trailer..key:: - Defines a for the . The must be a - prefix (case does not matter) of the . For example, in `git - config trailer.ack.key "Acked-by"` the "Acked-by" is the and - the "ack" is the . This configuration allows the shorter +`trailer..key`:: + Defines a __ for the __. The __ must be a + prefix (case does not matter) of the __. For example, in `git + config trailer.ack.key "Acked-by"` the `Acked-by` is the __ and + the `ack` is the __. This configuration allows the shorter `--trailer "ack:..."` invocation on the command line using the "ack" - instead of the longer `--trailer "Acked-by:..."`. + `` instead of the longer `--trailer "Acked-by:..."`. + -At the end of the , a separator can appear and then some -space characters. By default the only valid separator is ':', +At the end of the __, a separator can appear and then some +space characters. By default the only valid separator is `:`, but this can be changed using the `trailer.separators` config variable. + If there is a separator in the key, then it overrides the default separator when adding the trailer. -trailer..where:: - This option takes the same values as the 'trailer.where' +`trailer..where`:: + This option takes the same values as the `trailer.where` configuration variable and it overrides what is specified by - that option for trailers with the specified . + that option for trailers with the specified __. -trailer..ifexists:: - This option takes the same values as the 'trailer.ifexists' +`trailer..ifexists`:: + This option takes the same values as the `trailer.ifexists` configuration variable and it overrides what is specified by - that option for trailers with the specified . + that option for trailers with the specified __. -trailer..ifmissing:: - This option takes the same values as the 'trailer.ifmissing' +`trailer..ifmissing`:: + This option takes the same values as the `trailer.ifmissing` configuration variable and it overrides what is specified by - that option for trailers with the specified . + that option for trailers with the specified __. -trailer..command:: - Deprecated in favor of 'trailer..cmd'. - This option behaves in the same way as 'trailer..cmd', except +`trailer..command`:: + Deprecated in favor of `trailer..cmd`. + This option behaves in the same way as `trailer..cmd`, except that it doesn't pass anything as argument to the specified command. - Instead the first occurrence of substring $ARG is replaced by the - that would be passed as argument. + Instead the first occurrence of substring `$ARG` is replaced by the + __ that would be passed as argument. + -Note that $ARG in the user's command is -only replaced once and that the original way of replacing $ARG is not safe. +Note that `$ARG` in the user's command is +only replaced once and that the original way of replacing `$ARG` is not safe. + -When both 'trailer..cmd' and 'trailer..command' are given -for the same , 'trailer..cmd' is used and -'trailer..command' is ignored. +When both `trailer..cmd` and `trailer..command` are given +for the same __, `trailer..cmd` is used and +`trailer..command` is ignored. -trailer..cmd:: +`trailer..cmd`:: This option can be used to specify a shell command that will be called - once to automatically add a trailer with the specified , and then - called each time a '--trailer =' argument is specified to - modify the of the trailer that this option would produce. + once to automatically add a trailer with the specified __, and then + called each time a `--trailer =` argument is specified to + modify the __ of the trailer that this option would produce. + When the specified command is first called to add a trailer -with the specified , the behavior is as if a special -'--trailer =' argument was added at the beginning -of the "git interpret-trailers" command, where -is taken to be the standard output of the command with any -leading and trailing whitespace trimmed off. +with the specified __, the behavior is as if a special +`--trailer =` argument was added at the beginning +of linkgit:git-interpret-trailers[1], where __ is taken to be the +standard output of the command with any leading and trailing whitespace +trimmed off. + -If some '--trailer =' arguments are also passed +If some `--trailer =` arguments are also passed on the command line, the command is called again once for each -of these arguments with the same . And the part +of these arguments with the same __. And the __ part of these arguments, if any, will be passed to the command as its -first argument. This way the command can produce a computed -from the passed in the '--trailer =' argument. +first argument. This way the command can produce a __ computed +from the __ passed in the `--trailer =` +argument. From 37182267a051906d7c625fd134c041297e757b3e Mon Sep 17 00:00:00 2001 From: Kristoffer Haugsbakk Date: Mon, 16 Mar 2026 22:48:27 +0100 Subject: [PATCH 114/236] interpret-trailers: use placeholder instead of * Use `` instead of `*` in order to be consistent with the documentation. Signed-off-by: Kristoffer Haugsbakk Signed-off-by: Junio C Hamano --- builtin/interpret-trailers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/interpret-trailers.c b/builtin/interpret-trailers.c index 41b0750e5af324..4b617c3ecb0c55 100644 --- a/builtin/interpret-trailers.c +++ b/builtin/interpret-trailers.c @@ -211,7 +211,7 @@ int cmd_interpret_trailers(int argc, N_("action if trailer is missing"), option_parse_if_missing), OPT_BOOL(0, "only-trailers", &opts.only_trailers, N_("output only the trailers")), - OPT_BOOL(0, "only-input", &opts.only_input, N_("do not apply trailer.* configuration variables")), + OPT_BOOL(0, "only-input", &opts.only_input, N_("do not apply trailer. configuration variables")), OPT_BOOL(0, "unfold", &opts.unfold, N_("reformat multiline trailer values as single-line values")), OPT_CALLBACK_F(0, "parse", &opts, NULL, N_("alias for --only-trailers --only-input --unfold"), PARSE_OPT_NOARG | PARSE_OPT_NONEG, parse_opt_parse), From 60d8c1e97d62c27ef60db0bc3d5deadd6dfdb98d Mon Sep 17 00:00:00 2001 From: Eric Ju Date: Mon, 16 Mar 2026 22:36:24 -0400 Subject: [PATCH 115/236] refs: add 'preparing' phase to the reference-transaction hook The "reference-transaction" hook is invoked multiple times during a ref transaction. Each invocation corresponds to a different phase: - The "prepared" phase indicates that references have been locked. - The "committed" phase indicates that all updates have been written to disk. - The "aborted" phase indicates that the transaction has been aborted and that all changes have been rolled back. This hook can be used to learn about the updates that Git wants to perform. For example, forges use it to coordinate reference updates across multiple nodes. However, the phases are insufficient for some specific use cases. The earliest observable phase in the "reference-transaction" hook is "prepared", at which point Git has already taken exclusive locks on every affected reference. This makes it suitable for last-chance validation, but not for serialization. So by the time a hook sees the "prepared" phase, it has no way to defer locking, and thus it cannot rearrange multiple concurrent ref transactions relative to one another. Introduce a new "preparing" phase that runs before the "prepared" phase, that is before Git acquires any reference lock on disk. This gives callers a well-defined window to perform validation, enable higher-level ordering of concurrent transactions, or reject the transaction entirely, all without interfering with the locking state. This change is strictly speaking not backwards compatible. Existing hook scripts that do not know how to handle unknown phases may treat 'preparing' as an error and return non-zero. But the hook is considered to expose internal implementation details of how Git works, and as such we have been a bit more lenient with changing its exact semantics, like for example in a8ae923f85 (refs: support symrefs in 'reference-transaction' hook, 2024-05-07). An alternative would be to introduce a "reference-transaction-v2" hook that knows about the new phase. This feels like a rather heavy-weight option though, and was thus discarded. Helped-by: Patrick Steinhardt Helped-by: Justin Tobler Helped-by: Karthik Nayak Signed-off-by: Eric Ju Signed-off-by: Junio C Hamano --- Documentation/githooks.adoc | 19 ++++++++++++------- refs.c | 12 +++++++++++- t/t1416-ref-transaction-hooks.sh | 30 ++++++++++++++++++++++++++---- t/t5510-fetch.sh | 7 ++++++- 4 files changed, 55 insertions(+), 13 deletions(-) diff --git a/Documentation/githooks.adoc b/Documentation/githooks.adoc index 056553788d4f43..ed045940d18ba5 100644 --- a/Documentation/githooks.adoc +++ b/Documentation/githooks.adoc @@ -484,13 +484,16 @@ reference-transaction ~~~~~~~~~~~~~~~~~~~~~ This hook is invoked by any Git command that performs reference -updates. It executes whenever a reference transaction is prepared, -committed or aborted and may thus get called multiple times. The hook -also supports symbolic reference updates. +updates. It executes whenever a reference transaction is preparing, +prepared, committed or aborted and may thus get called multiple times. +The hook also supports symbolic reference updates. The hook takes exactly one argument, which is the current state the given reference transaction is in: + - "preparing": All reference updates have been queued to the + transaction but references are not yet locked on disk. + - "prepared": All reference updates have been queued to the transaction and references were locked on disk. @@ -511,16 +514,18 @@ ref and `` is the full name of the ref. When force updating the reference regardless of its current value or when the reference is to be created anew, `` is the all-zeroes object name. To distinguish these cases, you can inspect the current value of -`` via `git rev-parse`. +`` via `git rev-parse`. During the "preparing" state, symbolic +references are not resolved: `` will reflect the symbolic reference +itself rather than the object it points to. For symbolic reference updates the `` and `` fields could denote references instead of objects. A reference will be denoted with a 'ref:' prefix, like `ref:`. The exit status of the hook is ignored for any state except for the -"prepared" state. In the "prepared" state, a non-zero exit status will -cause the transaction to be aborted. The hook will not be called with -"aborted" state in that case. +"preparing" and "prepared" states. In these states, a non-zero exit +status will cause the transaction to be aborted. The hook will not be +called with "aborted" state in that case. push-to-checkout ~~~~~~~~~~~~~~~~ diff --git a/refs.c b/refs.c index 6fb8f9d10c9800..e66cf4861d7ec5 100644 --- a/refs.c +++ b/refs.c @@ -64,6 +64,9 @@ const char *ref_storage_format_to_name(enum ref_storage_format ref_storage_forma return be->name; } +static const char *abort_by_ref_transaction_hook = + N_("in '%s' phase, update aborted by the reference-transaction hook"); + /* * How to handle various characters in refnames: * 0: An acceptable character for refs @@ -2655,6 +2658,13 @@ int ref_transaction_prepare(struct ref_transaction *transaction, if (ref_update_reject_duplicates(&transaction->refnames, err)) return REF_TRANSACTION_ERROR_GENERIC; + /* Preparing checks before locking references */ + ret = run_transaction_hook(transaction, "preparing"); + if (ret) { + ref_transaction_abort(transaction, err); + die(_(abort_by_ref_transaction_hook), "preparing"); + } + ret = refs->be->transaction_prepare(refs, transaction, err); if (ret) return ret; @@ -2662,7 +2672,7 @@ int ref_transaction_prepare(struct ref_transaction *transaction, ret = run_transaction_hook(transaction, "prepared"); if (ret) { ref_transaction_abort(transaction, err); - die(_("ref updates aborted by hook")); + die(_(abort_by_ref_transaction_hook), "prepared"); } return 0; diff --git a/t/t1416-ref-transaction-hooks.sh b/t/t1416-ref-transaction-hooks.sh index d91dd3a3b55e46..4fe9d9b23465ab 100755 --- a/t/t1416-ref-transaction-hooks.sh +++ b/t/t1416-ref-transaction-hooks.sh @@ -20,6 +20,7 @@ test_expect_success 'hook allows updating ref if successful' ' echo "$*" >>actual EOF cat >expect <<-EOF && + preparing prepared committed EOF @@ -27,6 +28,18 @@ test_expect_success 'hook allows updating ref if successful' ' test_cmp expect actual ' +test_expect_success 'hook aborts updating ref in preparing state' ' + git reset --hard PRE && + test_hook reference-transaction <<-\EOF && + if test "$1" = preparing + then + exit 1 + fi + EOF + test_must_fail git update-ref HEAD POST 2>err && + test_grep "in '\''preparing'\'' phase, update aborted by the reference-transaction hook" err +' + test_expect_success 'hook aborts updating ref in prepared state' ' git reset --hard PRE && test_hook reference-transaction <<-\EOF && @@ -36,7 +49,7 @@ test_expect_success 'hook aborts updating ref in prepared state' ' fi EOF test_must_fail git update-ref HEAD POST 2>err && - test_grep "ref updates aborted by hook" err + test_grep "in '\''prepared'\'' phase, update aborted by the reference-transaction hook" err ' test_expect_success 'hook gets all queued updates in prepared state' ' @@ -121,6 +134,7 @@ test_expect_success 'interleaving hook calls succeed' ' cat >expect <<-EOF && hooks/update refs/tags/PRE $ZERO_OID $PRE_OID hooks/update refs/tags/POST $ZERO_OID $POST_OID + hooks/reference-transaction preparing hooks/reference-transaction prepared hooks/reference-transaction committed EOF @@ -143,6 +157,8 @@ test_expect_success 'hook captures git-symbolic-ref updates' ' git symbolic-ref refs/heads/symref refs/heads/main && cat >expect <<-EOF && + preparing + $ZERO_OID ref:refs/heads/main refs/heads/symref prepared $ZERO_OID ref:refs/heads/main refs/heads/symref committed @@ -171,14 +187,20 @@ test_expect_success 'hook gets all queued symref updates' ' # In the files backend, "delete" also triggers an additional transaction # update on the packed-refs backend, which constitutes additional reflog # entries. + cat >expect <<-EOF && + preparing + ref:refs/heads/main $ZERO_OID refs/heads/symref + ref:refs/heads/main $ZERO_OID refs/heads/symrefd + $ZERO_OID ref:refs/heads/main refs/heads/symrefc + ref:refs/heads/main ref:refs/heads/branch refs/heads/symrefu + EOF + if test_have_prereq REFFILES then - cat >expect <<-EOF + cat >>expect <<-EOF aborted $ZERO_OID $ZERO_OID refs/heads/symrefd EOF - else - >expect fi && cat >>expect <<-EOF && diff --git a/t/t5510-fetch.sh b/t/t5510-fetch.sh index 5dcb4b51a47d88..6fe21e2b3a6b1b 100755 --- a/t/t5510-fetch.sh +++ b/t/t5510-fetch.sh @@ -469,12 +469,17 @@ test_expect_success 'fetch --atomic executes a single reference transaction only head_oid=$(git rev-parse HEAD) && cat >expected <<-EOF && + preparing + $ZERO_OID $head_oid refs/remotes/origin/atomic-hooks-1 + $ZERO_OID $head_oid refs/remotes/origin/atomic-hooks-2 prepared $ZERO_OID $head_oid refs/remotes/origin/atomic-hooks-1 $ZERO_OID $head_oid refs/remotes/origin/atomic-hooks-2 committed $ZERO_OID $head_oid refs/remotes/origin/atomic-hooks-1 $ZERO_OID $head_oid refs/remotes/origin/atomic-hooks-2 + preparing + $ZERO_OID ref:refs/remotes/origin/main refs/remotes/origin/HEAD EOF rm -f atomic/actual && @@ -497,7 +502,7 @@ test_expect_success 'fetch --atomic aborts all reference updates if hook aborts' head_oid=$(git rev-parse HEAD) && cat >expected <<-EOF && - prepared + preparing $ZERO_OID $head_oid refs/remotes/origin/atomic-hooks-abort-1 $ZERO_OID $head_oid refs/remotes/origin/atomic-hooks-abort-2 $ZERO_OID $head_oid refs/remotes/origin/atomic-hooks-abort-3 From 81cf6ccc29002467f44798ada7d74993a44c94b0 Mon Sep 17 00:00:00 2001 From: Michael Montalbo Date: Tue, 17 Mar 2026 02:21:32 +0000 Subject: [PATCH 116/236] line-log: fix crash when combined with pickaxe options queue_diffs() passes the caller's diff_options, which may carry user-specified pickaxe state, to diff_tree_oid() and diffcore_std() when detecting renames for line-level history tracking. When pickaxe options are present on the command line (-G and -S to filter by text pattern, --find-object to filter by object identity), diffcore_std() also runs diffcore_pickaxe(), which may discard diff pairs that are relevant for rename detection. Losing those pairs breaks rename following. Before a2bb801f6a (line-log: avoid unnecessary full tree diffs, 2019-08-21), this silently truncated history at rename boundaries. That commit moved filter_diffs_for_paths() inside the rename- detection block, so it only runs when diff_might_be_rename() returns true. When pickaxe discards a rename pair, the rename goes undetected, and a deletion pair at a subsequent commit passes through uncleaned, reaching process_diff_filepair() with an invalid filespec and triggering an assertion failure. Fix this by building a private diff_options for the rename-detection path inside queue_diffs(), following the same pattern used by blame's find_rename(). This isolates the rename machinery from unrelated user-specified options. Reported-by: Matthew Hughes Signed-off-by: Michael Montalbo Signed-off-by: Junio C Hamano --- line-log.c | 22 ++++++++++++++---- t/t4211-line-log.sh | 55 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 4 deletions(-) diff --git a/line-log.c b/line-log.c index eeaf68454e2246..9d12ece18133ca 100644 --- a/line-log.c +++ b/line-log.c @@ -858,15 +858,29 @@ static void queue_diffs(struct line_log_data *range, diff_queue_clear(&diff_queued_diff); diff_tree_oid(parent_tree_oid, tree_oid, "", opt); if (opt->detect_rename && diff_might_be_rename()) { + struct diff_options rename_opts; + + /* + * Build a private diff_options for rename detection so + * that any user-specified options on the original opts + * (e.g. pickaxe) cannot discard diff pairs needed for + * rename tracking. Similar to blame's find_rename(). + */ + repo_diff_setup(opt->repo, &rename_opts); + rename_opts.flags.recursive = 1; + rename_opts.detect_rename = opt->detect_rename; + rename_opts.rename_score = opt->rename_score; + rename_opts.output_format = DIFF_FORMAT_NO_OUTPUT; + diff_setup_done(&rename_opts); + /* must look at the full tree diff to detect renames */ - clear_pathspec(&opt->pathspec); diff_queue_clear(&diff_queued_diff); - - diff_tree_oid(parent_tree_oid, tree_oid, "", opt); + diff_tree_oid(parent_tree_oid, tree_oid, "", &rename_opts); filter_diffs_for_paths(range, 1); - diffcore_std(opt); + diffcore_std(&rename_opts); filter_diffs_for_paths(range, 0); + diff_free(&rename_opts); } move_diff_queue(queue, &diff_queued_diff); } diff --git a/t/t4211-line-log.sh b/t/t4211-line-log.sh index 0a7c3ca42f80fa..659a943aa196b7 100755 --- a/t/t4211-line-log.sh +++ b/t/t4211-line-log.sh @@ -367,4 +367,59 @@ test_expect_success 'show line-log with graph' ' test_cmp expect actual ' +test_expect_success 'setup for -L with -G/-S/--find-object and a merge with rename' ' + git checkout --orphan pickaxe-rename && + git reset --hard && + + echo content >file && + git add file && + git commit -m "add file" && + + git checkout -b pickaxe-rename-side && + git mv file renamed-file && + git commit -m "rename file" && + + git checkout pickaxe-rename && + git commit --allow-empty -m "diverge" && + git merge --no-edit pickaxe-rename-side && + + git mv renamed-file file && + git commit -m "rename back" +' + +test_expect_success '-L -G does not crash with merge and rename' ' + git log --format="%s" --no-patch -L 1,1:file -G "." >actual +' + +test_expect_success '-L -S does not crash with merge and rename' ' + git log --format="%s" --no-patch -L 1,1:file -S content >actual +' + +test_expect_success '-L --find-object does not crash with merge and rename' ' + git log --format="%s" --no-patch -L 1,1:file \ + --find-object=$(git rev-parse HEAD:file) >actual +' + +# Commit-level filtering with pickaxe does not yet work for -L. +# show_log() prints the commit header before diffcore_std() runs +# pickaxe, so commits cannot be suppressed even when no diff pairs +# survive filtering. Fixing this would require deferring show_log() +# until after diffcore_std(), which is a larger restructuring of the +# log-tree output pipeline. +test_expect_failure '-L -G should filter commits by pattern' ' + git log --format="%s" --no-patch -L 1,1:file -G "nomatch" >actual && + test_must_be_empty actual +' + +test_expect_failure '-L -S should filter commits by pattern' ' + git log --format="%s" --no-patch -L 1,1:file -S "nomatch" >actual && + test_must_be_empty actual +' + +test_expect_failure '-L --find-object should filter commits by object' ' + git log --format="%s" --no-patch -L 1,1:file \ + --find-object=$ZERO_OID >actual && + test_must_be_empty actual +' + test_done From 86e986f166d207e1f4b80062c2befb4f94c191c4 Mon Sep 17 00:00:00 2001 From: Michael Montalbo Date: Tue, 17 Mar 2026 02:21:33 +0000 Subject: [PATCH 117/236] line-log: route -L output through the standard diff pipeline `git log -L` has always bypassed the standard diff pipeline. `dump_diff_hacky()` in line-log.c hand-rolls its own diff headers and hunk output, which means most diff formatting options are silently ignored. A NEEDSWORK comment has acknowledged this since the feature was introduced: /* * NEEDSWORK: manually building a diff here is not the Right * Thing(tm). log -L should be built into the diff pipeline. */ Remove `dump_diff_hacky()` and its helpers and route -L output through `builtin_diff()` / `fn_out_consume()`, the same path used by `git diff` and `git log -p`. The mechanism is a pair of callback wrappers that sit between `xdi_diff_outf()` and `fn_out_consume()`, filtering xdiff's output to only the tracked line ranges. To ensure xdiff emits all lines within each range as context, the context length is inflated to span the largest range. Wire up the `-L` implies `--patch` default in revision setup rather than forcing it at output time, so `line_log_print()` is just `diffcore_std()` + `diff_flush()` with no format save/restore. Rename detection is a no-op since pairs are already resolved during the history walk in `queue_diffs()`, but running `diffcore_std()` means `-S`/`-G` (pickaxe), `--orderfile`, and `--diff-filter` now work with `-L`, and `diff_resolve_rename_copy()` sets pair statuses correctly without manual assignment. Switch `diff_filepair_dup()` from `xmalloc` to `xcalloc` so that new fields (including `line_ranges`) are zero-initialized by default. As a result, diff formatting options that were previously silently ignored (e.g. --word-diff, --no-prefix, -w, --color-moved) now work with -L, and output gains `index` lines, `new file mode` headers, and funcname context in `@@` headers. This is a user-visible output change: tools that parse -L output may need to handle the additional header lines. The context-length inflation means xdiff may process more output than needed for very wide line ranges, but benchmarks on files up to 7800 lines show no measurable regression. Signed-off-by: Michael Montalbo Signed-off-by: Junio C Hamano --- diff.c | 279 +++++++++++++++++- diffcore.h | 16 + line-log.c | 174 ++--------- line-log.h | 14 +- revision.c | 2 + t/t4211-line-log.sh | 12 +- t/t4211/sha1/expect.beginning-of-file | 4 + t/t4211/sha1/expect.end-of-file | 11 +- t/t4211/sha1/expect.move-support-f | 5 + t/t4211/sha1/expect.multiple | 10 +- t/t4211/sha1/expect.multiple-overlapping | 7 + t/t4211/sha1/expect.multiple-superset | 7 + t/t4211/sha1/expect.no-assertion-error | 12 +- t/t4211/sha1/expect.parallel-change-f-to-main | 7 + t/t4211/sha1/expect.simple-f | 4 + t/t4211/sha1/expect.simple-f-to-main | 5 + t/t4211/sha1/expect.simple-main | 11 +- t/t4211/sha1/expect.simple-main-to-end | 11 +- t/t4211/sha1/expect.two-ranges | 10 +- t/t4211/sha1/expect.vanishes-early | 10 +- t/t4211/sha256/expect.beginning-of-file | 4 + t/t4211/sha256/expect.end-of-file | 11 +- t/t4211/sha256/expect.move-support-f | 5 + t/t4211/sha256/expect.multiple | 10 +- t/t4211/sha256/expect.multiple-overlapping | 7 + t/t4211/sha256/expect.multiple-superset | 7 + t/t4211/sha256/expect.no-assertion-error | 12 +- .../sha256/expect.parallel-change-f-to-main | 7 + t/t4211/sha256/expect.simple-f | 4 + t/t4211/sha256/expect.simple-f-to-main | 5 + t/t4211/sha256/expect.simple-main | 11 +- t/t4211/sha256/expect.simple-main-to-end | 11 +- t/t4211/sha256/expect.two-ranges | 10 +- t/t4211/sha256/expect.vanishes-early | 10 +- 34 files changed, 512 insertions(+), 213 deletions(-) diff --git a/diff.c b/diff.c index 501648a5c49abc..f79e37a210f9ab 100644 --- a/diff.c +++ b/diff.c @@ -608,6 +608,52 @@ struct emit_callback { struct strbuf *header; }; +/* + * State for the line-range callback wrappers that sit between + * xdi_diff_outf() and fn_out_consume(). xdiff produces a normal, + * unfiltered diff; the wrappers intercept each hunk header and line, + * track post-image position, and forward only lines that fall within + * the requested ranges. Contiguous in-range lines are collected into + * range hunks and flushed with a synthetic @@ header so that + * fn_out_consume() sees well-formed unified-diff fragments. + * + * Removal lines ('-') cannot be classified by post-image position, so + * they are buffered in pending_rm until the next '+' or ' ' line + * reveals whether they precede an in-range line (flush into range hunk) or + * an out-of-range line (discard). + */ +struct line_range_callback { + xdiff_emit_line_fn orig_line_fn; + void *orig_cb_data; + const struct range_set *ranges; /* 0-based [start, end) */ + unsigned int cur_range; /* index into the range_set */ + + /* Post/pre-image line counters (1-based, set from hunk headers) */ + long lno_post; + long lno_pre; + + /* + * Function name from most recent xdiff hunk header; + * size matches struct func_line.buf in xdiff/xemit.c. + */ + char func[80]; + long funclen; + + /* Range hunk being accumulated for the current range */ + struct strbuf rhunk; + long rhunk_old_begin, rhunk_old_count; + long rhunk_new_begin, rhunk_new_count; + int rhunk_active; + int rhunk_has_changes; /* any '+' or '-' lines? */ + + /* Removal lines not yet known to be in-range */ + struct strbuf pending_rm; + int pending_rm_count; + long pending_rm_pre_begin; /* pre-image line of first pending */ + + int ret; /* latched error from orig_line_fn */ +}; + static int count_lines(const char *data, int size) { int count, ch, completely_empty = 1, nl_just_seen = 0; @@ -2493,6 +2539,188 @@ static int quick_consume(void *priv, char *line UNUSED, unsigned long len UNUSED return 1; } +static void discard_pending_rm(struct line_range_callback *s) +{ + strbuf_reset(&s->pending_rm); + s->pending_rm_count = 0; +} + +static void flush_rhunk(struct line_range_callback *s) +{ + struct strbuf hdr = STRBUF_INIT; + const char *p, *end; + + if (!s->rhunk_active || s->ret) + return; + + /* Drain any pending removal lines into the range hunk */ + if (s->pending_rm_count) { + strbuf_addbuf(&s->rhunk, &s->pending_rm); + s->rhunk_old_count += s->pending_rm_count; + s->rhunk_has_changes = 1; + discard_pending_rm(s); + } + + /* + * Suppress context-only hunks: they contain no actual changes + * and would just be noise. This can happen when the inflated + * ctxlen causes xdiff to emit context covering a range that + * has no changes in this commit. + */ + if (!s->rhunk_has_changes) { + s->rhunk_active = 0; + strbuf_reset(&s->rhunk); + return; + } + + strbuf_addf(&hdr, "@@ -%ld,%ld +%ld,%ld @@", + s->rhunk_old_begin, s->rhunk_old_count, + s->rhunk_new_begin, s->rhunk_new_count); + if (s->funclen > 0) { + strbuf_addch(&hdr, ' '); + strbuf_add(&hdr, s->func, s->funclen); + } + strbuf_addch(&hdr, '\n'); + + s->ret = s->orig_line_fn(s->orig_cb_data, hdr.buf, hdr.len); + strbuf_release(&hdr); + + /* + * Replay buffered lines one at a time through fn_out_consume. + * The cast discards const because xdiff_emit_line_fn takes + * char *, though fn_out_consume does not modify the buffer. + */ + p = s->rhunk.buf; + end = p + s->rhunk.len; + while (!s->ret && p < end) { + const char *eol = memchr(p, '\n', end - p); + unsigned long line_len = eol ? (unsigned long)(eol - p + 1) + : (unsigned long)(end - p); + s->ret = s->orig_line_fn(s->orig_cb_data, (char *)p, line_len); + p += line_len; + } + + s->rhunk_active = 0; + strbuf_reset(&s->rhunk); +} + +static void line_range_hunk_fn(void *data, + long old_begin, long old_nr UNUSED, + long new_begin, long new_nr UNUSED, + const char *func, long funclen) +{ + struct line_range_callback *s = data; + + /* + * When count > 0, begin is 1-based. When count == 0, begin is + * adjusted down by 1 by xdl_emit_hunk_hdr(), but no lines of + * that type will arrive, so the value is unused. + * + * Any pending removal lines from the previous xdiff hunk are + * intentionally left in pending_rm: the line callback will + * flush or discard them when the next content line reveals + * whether the removals precede in-range content. + */ + s->lno_post = new_begin; + s->lno_pre = old_begin; + + if (funclen > 0) { + if (funclen > (long)sizeof(s->func)) + funclen = sizeof(s->func); + memcpy(s->func, func, funclen); + } + s->funclen = funclen; +} + +static int line_range_line_fn(void *priv, char *line, unsigned long len) +{ + struct line_range_callback *s = priv; + const struct range *cur; + long lno_0, cur_pre; + + if (s->ret) + return s->ret; + + if (line[0] == '-') { + if (!s->pending_rm_count) + s->pending_rm_pre_begin = s->lno_pre; + s->lno_pre++; + strbuf_add(&s->pending_rm, line, len); + s->pending_rm_count++; + return s->ret; + } + + if (line[0] == '\\') { + if (s->pending_rm_count) + strbuf_add(&s->pending_rm, line, len); + else if (s->rhunk_active) + strbuf_add(&s->rhunk, line, len); + /* otherwise outside tracked range; drop silently */ + return s->ret; + } + + if (line[0] != '+' && line[0] != ' ') + BUG("unexpected diff line type '%c'", line[0]); + + lno_0 = s->lno_post - 1; + cur_pre = s->lno_pre; /* save before advancing for context lines */ + s->lno_post++; + if (line[0] == ' ') + s->lno_pre++; + + /* Advance past ranges we've passed */ + while (s->cur_range < s->ranges->nr && + lno_0 >= s->ranges->ranges[s->cur_range].end) { + if (s->rhunk_active) + flush_rhunk(s); + discard_pending_rm(s); + s->cur_range++; + } + + /* Past all ranges */ + if (s->cur_range >= s->ranges->nr) { + discard_pending_rm(s); + return s->ret; + } + + cur = &s->ranges->ranges[s->cur_range]; + + /* Before current range */ + if (lno_0 < cur->start) { + discard_pending_rm(s); + return s->ret; + } + + /* In range so start a new range hunk if needed */ + if (!s->rhunk_active) { + s->rhunk_active = 1; + s->rhunk_has_changes = 0; + s->rhunk_new_begin = lno_0 + 1; + s->rhunk_old_begin = s->pending_rm_count + ? s->pending_rm_pre_begin : cur_pre; + s->rhunk_old_count = 0; + s->rhunk_new_count = 0; + strbuf_reset(&s->rhunk); + } + + /* Flush pending removals into range hunk */ + if (s->pending_rm_count) { + strbuf_addbuf(&s->rhunk, &s->pending_rm); + s->rhunk_old_count += s->pending_rm_count; + s->rhunk_has_changes = 1; + discard_pending_rm(s); + } + + strbuf_add(&s->rhunk, line, len); + s->rhunk_new_count++; + if (line[0] == '+') + s->rhunk_has_changes = 1; + else + s->rhunk_old_count++; + + return s->ret; +} + static void pprint_rename(struct strbuf *name, const char *a, const char *b) { const char *old_name = a; @@ -3596,7 +3824,8 @@ static void builtin_diff(const char *name_a, const char *xfrm_msg, int must_show_header, struct diff_options *o, - int complete_rewrite) + int complete_rewrite, + const struct range_set *line_ranges) { mmfile_t mf1, mf2; const char *lbl[2]; @@ -3837,6 +4066,52 @@ static void builtin_diff(const char *name_a, */ xdi_diff_outf(&mf1, &mf2, NULL, quick_consume, &ecbdata, &xpp, &xecfg); + } else if (line_ranges) { + struct line_range_callback lr_state; + unsigned int i; + long max_span = 0; + + memset(&lr_state, 0, sizeof(lr_state)); + lr_state.orig_line_fn = fn_out_consume; + lr_state.orig_cb_data = &ecbdata; + lr_state.ranges = line_ranges; + strbuf_init(&lr_state.rhunk, 0); + strbuf_init(&lr_state.pending_rm, 0); + + /* + * Inflate ctxlen so that all changes within + * any single range are merged into one xdiff + * hunk and the inter-change context is emitted. + * The callback clips back to range boundaries. + * + * The optimal ctxlen depends on where changes + * fall within the range, which is only known + * after xdiff runs; the max range span is the + * upper bound that guarantees correctness in a + * single pass. + */ + for (i = 0; i < line_ranges->nr; i++) { + long span = line_ranges->ranges[i].end - + line_ranges->ranges[i].start; + if (span > max_span) + max_span = span; + } + if (max_span > xecfg.ctxlen) + xecfg.ctxlen = max_span; + + if (xdi_diff_outf(&mf1, &mf2, + line_range_hunk_fn, + line_range_line_fn, + &lr_state, &xpp, &xecfg)) + die("unable to generate diff for %s", + one->path); + + flush_rhunk(&lr_state); + if (lr_state.ret) + die("unable to generate diff for %s", + one->path); + strbuf_release(&lr_state.rhunk); + strbuf_release(&lr_state.pending_rm); } else if (xdi_diff_outf(&mf1, &mf2, NULL, fn_out_consume, &ecbdata, &xpp, &xecfg)) die("unable to generate diff for %s", one->path); @@ -4678,7 +4953,7 @@ static void run_diff_cmd(const struct external_diff *pgm, builtin_diff(name, other ? other : name, one, two, xfrm_msg, must_show_header, - o, complete_rewrite); + o, complete_rewrite, p->line_ranges); if (p->status == DIFF_STATUS_COPIED || p->status == DIFF_STATUS_RENAMED) o->found_changes = 1; diff --git a/diffcore.h b/diffcore.h index 9c0a0e7aaff85a..d75038d1b3a1b2 100644 --- a/diffcore.h +++ b/diffcore.h @@ -19,6 +19,17 @@ struct userdiff_driver; * in anything else. */ +/* A range [start, end). Lines are numbered starting at 0. */ +struct range { + long start, end; +}; + +/* A set of ranges. The ranges must always be disjoint and sorted. */ +struct range_set { + unsigned int alloc, nr; + struct range *ranges; +}; + /* We internally use unsigned short as the score value, * and rely on an int capable to hold 32-bits. -B can take * -Bmerge_score/break_score format and the two scores are @@ -106,6 +117,11 @@ int diff_filespec_is_binary(struct repository *, struct diff_filespec *); struct diff_filepair { struct diff_filespec *one; struct diff_filespec *two; + /* + * Tracked line ranges for -L filtering; borrowed from + * line_log_data and must not be freed. + */ + const struct range_set *line_ranges; unsigned short int score; char status; /* M C R A D U etc. (see Documentation/diff-format.adoc or DIFF_STATUS_* in diff.h) */ unsigned broken_pair : 1; diff --git a/line-log.c b/line-log.c index 9d12ece18133ca..858a899cd2a61d 100644 --- a/line-log.c +++ b/line-log.c @@ -885,160 +885,6 @@ static void queue_diffs(struct line_log_data *range, move_diff_queue(queue, &diff_queued_diff); } -static char *get_nth_line(long line, unsigned long *ends, void *data) -{ - if (line == 0) - return (char *)data; - else - return (char *)data + ends[line] + 1; -} - -static void print_line(const char *prefix, char first, - long line, unsigned long *ends, void *data, - const char *color, const char *reset, FILE *file) -{ - char *begin = get_nth_line(line, ends, data); - char *end = get_nth_line(line+1, ends, data); - int had_nl = 0; - - if (end > begin && end[-1] == '\n') { - end--; - had_nl = 1; - } - - fputs(prefix, file); - fputs(color, file); - putc(first, file); - fwrite(begin, 1, end-begin, file); - fputs(reset, file); - putc('\n', file); - if (!had_nl) - fputs("\\ No newline at end of file\n", file); -} - -static void dump_diff_hacky_one(struct rev_info *rev, struct line_log_data *range) -{ - unsigned int i, j = 0; - long p_lines, t_lines; - unsigned long *p_ends = NULL, *t_ends = NULL; - struct diff_filepair *pair = range->pair; - struct diff_ranges *diff = &range->diff; - - struct diff_options *opt = &rev->diffopt; - const char *prefix = diff_line_prefix(opt); - const char *c_reset = diff_get_color(opt->use_color, DIFF_RESET); - const char *c_frag = diff_get_color(opt->use_color, DIFF_FRAGINFO); - const char *c_meta = diff_get_color(opt->use_color, DIFF_METAINFO); - const char *c_old = diff_get_color(opt->use_color, DIFF_FILE_OLD); - const char *c_new = diff_get_color(opt->use_color, DIFF_FILE_NEW); - const char *c_context = diff_get_color(opt->use_color, DIFF_CONTEXT); - - if (!pair || !diff) - goto out; - - if (pair->one->oid_valid) - fill_line_ends(rev->diffopt.repo, pair->one, &p_lines, &p_ends); - fill_line_ends(rev->diffopt.repo, pair->two, &t_lines, &t_ends); - - fprintf(opt->file, "%s%sdiff --git a/%s b/%s%s\n", prefix, c_meta, pair->one->path, pair->two->path, c_reset); - fprintf(opt->file, "%s%s--- %s%s%s\n", prefix, c_meta, - pair->one->oid_valid ? "a/" : "", - pair->one->oid_valid ? pair->one->path : "/dev/null", - c_reset); - fprintf(opt->file, "%s%s+++ b/%s%s\n", prefix, c_meta, pair->two->path, c_reset); - for (i = 0; i < range->ranges.nr; i++) { - long p_start, p_end; - long t_start = range->ranges.ranges[i].start; - long t_end = range->ranges.ranges[i].end; - long t_cur = t_start; - unsigned int j_last; - - /* - * If a diff range touches multiple line ranges, then all - * those line ranges should be shown, so take a step back if - * the current line range is still in the previous diff range - * (even if only partially). - */ - if (j > 0 && diff->target.ranges[j-1].end > t_start) - j--; - - while (j < diff->target.nr && diff->target.ranges[j].end < t_start) - j++; - if (j == diff->target.nr || diff->target.ranges[j].start >= t_end) - continue; - - /* Scan ahead to determine the last diff that falls in this range */ - j_last = j; - while (j_last < diff->target.nr && diff->target.ranges[j_last].start < t_end) - j_last++; - if (j_last > j) - j_last--; - - /* - * Compute parent hunk headers: we know that the diff - * has the correct line numbers (but not all hunks). - * So it suffices to shift the start/end according to - * the line numbers of the first/last hunk(s) that - * fall in this range. - */ - if (t_start < diff->target.ranges[j].start) - p_start = diff->parent.ranges[j].start - (diff->target.ranges[j].start-t_start); - else - p_start = diff->parent.ranges[j].start; - if (t_end > diff->target.ranges[j_last].end) - p_end = diff->parent.ranges[j_last].end + (t_end-diff->target.ranges[j_last].end); - else - p_end = diff->parent.ranges[j_last].end; - - if (!p_start && !p_end) { - p_start = -1; - p_end = -1; - } - - /* Now output a diff hunk for this range */ - fprintf(opt->file, "%s%s@@ -%ld,%ld +%ld,%ld @@%s\n", - prefix, c_frag, - p_start+1, p_end-p_start, t_start+1, t_end-t_start, - c_reset); - while (j < diff->target.nr && diff->target.ranges[j].start < t_end) { - int k; - for (; t_cur < diff->target.ranges[j].start; t_cur++) - print_line(prefix, ' ', t_cur, t_ends, pair->two->data, - c_context, c_reset, opt->file); - for (k = diff->parent.ranges[j].start; k < diff->parent.ranges[j].end; k++) - print_line(prefix, '-', k, p_ends, pair->one->data, - c_old, c_reset, opt->file); - for (; t_cur < diff->target.ranges[j].end && t_cur < t_end; t_cur++) - print_line(prefix, '+', t_cur, t_ends, pair->two->data, - c_new, c_reset, opt->file); - j++; - } - for (; t_cur < t_end; t_cur++) - print_line(prefix, ' ', t_cur, t_ends, pair->two->data, - c_context, c_reset, opt->file); - } - -out: - free(p_ends); - free(t_ends); -} - -/* - * NEEDSWORK: manually building a diff here is not the Right - * Thing(tm). log -L should be built into the diff pipeline. - */ -static void dump_diff_hacky(struct rev_info *rev, struct line_log_data *range) -{ - const char *prefix = diff_line_prefix(&rev->diffopt); - - fprintf(rev->diffopt.file, "%s\n", prefix); - - while (range) { - dump_diff_hacky_one(rev, range); - range = range->next; - } -} - /* * Unlike most other functions, this destructively operates on * 'range'. @@ -1102,7 +948,7 @@ static int process_diff_filepair(struct rev_info *rev, static struct diff_filepair *diff_filepair_dup(struct diff_filepair *pair) { - struct diff_filepair *new_filepair = xmalloc(sizeof(struct diff_filepair)); + struct diff_filepair *new_filepair = xcalloc(1, sizeof(struct diff_filepair)); new_filepair->one = pair->one; new_filepair->two = pair->two; new_filepair->one->count++; @@ -1160,11 +1006,25 @@ static int process_all_files(struct line_log_data **range_out, int line_log_print(struct rev_info *rev, struct commit *commit) { - show_log(rev); if (!(rev->diffopt.output_format & DIFF_FORMAT_NO_OUTPUT)) { struct line_log_data *range = lookup_line_range(rev, commit); - dump_diff_hacky(rev, range); + struct line_log_data *r; + const char *prefix = diff_line_prefix(&rev->diffopt); + + fprintf(rev->diffopt.file, "%s\n", prefix); + + for (r = range; r; r = r->next) { + if (r->pair) { + struct diff_filepair *p = + diff_filepair_dup(r->pair); + p->line_ranges = &r->ranges; + diff_q(&diff_queued_diff, p); + } + } + + diffcore_std(&rev->diffopt); + diff_flush(&rev->diffopt); } return 1; } diff --git a/line-log.h b/line-log.h index e9dadbc1a58e2c..04a6ea64d3d68f 100644 --- a/line-log.h +++ b/line-log.h @@ -1,22 +1,12 @@ #ifndef LINE_LOG_H #define LINE_LOG_H +#include "diffcore.h" /* struct range, struct range_set */ + struct rev_info; struct commit; struct string_list; -/* A range [start,end]. Lines are numbered starting at 0, and the - * ranges include start but exclude end. */ -struct range { - long start, end; -}; - -/* A set of ranges. The ranges must always be disjoint and sorted. */ -struct range_set { - unsigned int alloc, nr; - struct range *ranges; -}; - /* A diff, encoded as the set of pre- and post-image ranges where the * files differ. A pair of ranges corresponds to a hunk. */ struct diff_ranges { diff --git a/revision.c b/revision.c index 402eb1b0298214..12e04bc53ac4cf 100644 --- a/revision.c +++ b/revision.c @@ -3111,6 +3111,8 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s if (want_ancestry(revs)) revs->limited = 1; revs->topo_order = 1; + if (!revs->diffopt.output_format) + revs->diffopt.output_format = DIFF_FORMAT_PATCH; } if (revs->topo_order && !generation_numbers_enabled(the_repository)) diff --git a/t/t4211-line-log.sh b/t/t4211-line-log.sh index 659a943aa196b7..6a307e911b7a42 100755 --- a/t/t4211-line-log.sh +++ b/t/t4211-line-log.sh @@ -129,7 +129,7 @@ test_expect_success '-L with --output' ' git checkout parallel-change && git log --output=log -L :main:b.c >output && test_must_be_empty output && - test_line_count = 70 log + test_line_count = 75 log ' test_expect_success 'range_set_union' ' @@ -340,13 +340,19 @@ test_expect_success 'zero-width regex .* matches any function name' ' ' test_expect_success 'show line-log with graph' ' + git checkout parent-oids && + head_blob_old=$(git rev-parse --short HEAD^:file.c) && + head_blob_new=$(git rev-parse --short HEAD:file.c) && + root_blob=$(git rev-parse --short HEAD~4:file.c) && + null_blob=$(test_oid zero | cut -c1-7) && qz_to_tab_space >expect <<-EOF && * $head_oid Modify func2() in file.c |Z | diff --git a/file.c b/file.c + | index $head_blob_old..$head_blob_new 100644 | --- a/file.c | +++ b/file.c - | @@ -6,4 +6,4 @@ + | @@ -6,4 +6,4 @@ int func1() | int func2() | { | - return F2; @@ -355,6 +361,8 @@ test_expect_success 'show line-log with graph' ' * $root_oid Add func1() and func2() in file.c ZZ diff --git a/file.c b/file.c + new file mode 100644 + index $null_blob..$root_blob --- /dev/null +++ b/file.c @@ -0,0 +6,4 @@ diff --git a/t/t4211/sha1/expect.beginning-of-file b/t/t4211/sha1/expect.beginning-of-file index 91b405489892bd..52c90afb3a7758 100644 --- a/t/t4211/sha1/expect.beginning-of-file +++ b/t/t4211/sha1/expect.beginning-of-file @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:47:40 2013 +0100 change at very beginning diff --git a/a.c b/a.c +index bdb2bb1..5e709a1 100644 --- a/a.c +++ b/a.c @@ -1,3 +1,4 @@ @@ -20,6 +21,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 3233403..e51de13 100644 --- a/a.c +++ b/a.c @@ -1,3 +1,3 @@ @@ -35,6 +37,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +1,3 @@ diff --git a/t/t4211/sha1/expect.end-of-file b/t/t4211/sha1/expect.end-of-file index bd25bb2f591f0d..c40036899a6ec0 100644 --- a/t/t4211/sha1/expect.end-of-file +++ b/t/t4211/sha1/expect.end-of-file @@ -5,9 +5,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index 0b9cae5..5de3ea4 100644 --- a/a.c +++ b/a.c -@@ -20,3 +20,5 @@ +@@ -20,3 +20,5 @@ long f(long x) printf("%ld\n", f(15)); return 0; -} @@ -23,9 +24,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index 5e709a1..0b9cae5 100644 --- a/a.c +++ b/a.c -@@ -20,3 +20,3 @@ +@@ -20,3 +20,3 @@ int main () printf("%ld\n", f(15)); return 0; -} @@ -39,9 +41,10 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 3233403..e51de13 100644 --- a/a.c +++ b/a.c -@@ -19,3 +19,3 @@ +@@ -19,3 +19,3 @@ int f(int x) - printf("%d\n", f(15)); + printf("%ld\n", f(15)); return 0; @@ -54,6 +57,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +18,3 @@ diff --git a/t/t4211/sha1/expect.move-support-f b/t/t4211/sha1/expect.move-support-f index c905e01bc25c3e..ead6500d4d44d8 100644 --- a/t/t4211/sha1/expect.move-support-f +++ b/t/t4211/sha1/expect.move-support-f @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:49:50 2013 +0100 another simple change diff --git a/b.c b/b.c +index 5de3ea4..bf79c2f 100644 --- a/b.c +++ b/b.c @@ -4,9 +4,9 @@ @@ -26,6 +27,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 3233403..e51de13 100644 --- a/a.c +++ b/a.c @@ -3,9 +3,9 @@ @@ -47,6 +49,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 444e415..3233403 100644 --- a/a.c +++ b/a.c @@ -3,8 +3,9 @@ @@ -67,6 +70,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +3,8 @@ diff --git a/t/t4211/sha1/expect.multiple b/t/t4211/sha1/expect.multiple index 1eee8a7801055d..a41851a51d2a9f 100644 --- a/t/t4211/sha1/expect.multiple +++ b/t/t4211/sha1/expect.multiple @@ -5,9 +5,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index 0b9cae5..5de3ea4 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,7 @@ +@@ -18,5 +18,7 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -25,9 +26,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index 5e709a1..0b9cae5 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -43,6 +45,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 3233403..e51de13 100644 --- a/a.c +++ b/a.c @@ -3,9 +3,9 @@ @@ -71,6 +74,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 444e415..3233403 100644 --- a/a.c +++ b/a.c @@ -3,8 +3,9 @@ @@ -91,6 +95,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +3,8 @@ diff --git a/t/t4211/sha1/expect.multiple-overlapping b/t/t4211/sha1/expect.multiple-overlapping index d930b6eec4c469..0ec9990eab32c6 100644 --- a/t/t4211/sha1/expect.multiple-overlapping +++ b/t/t4211/sha1/expect.multiple-overlapping @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index 0b9cae5..5de3ea4 100644 --- a/a.c +++ b/a.c @@ -4,19 +4,21 @@ @@ -39,6 +40,7 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index 5e709a1..0b9cae5 100644 --- a/a.c +++ b/a.c @@ -4,19 +4,19 @@ @@ -71,6 +73,7 @@ Date: Thu Feb 28 10:45:41 2013 +0100 touch comment diff --git a/a.c b/a.c +index e51de13..bdb2bb1 100644 --- a/a.c +++ b/a.c @@ -3,19 +3,19 @@ @@ -102,6 +105,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 3233403..e51de13 100644 --- a/a.c +++ b/a.c @@ -3,19 +3,19 @@ @@ -134,6 +138,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 444e415..3233403 100644 --- a/a.c +++ b/a.c @@ -3,18 +3,19 @@ @@ -164,6 +169,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +3,18 @@ diff --git a/t/t4211/sha1/expect.multiple-superset b/t/t4211/sha1/expect.multiple-superset index d930b6eec4c469..0ec9990eab32c6 100644 --- a/t/t4211/sha1/expect.multiple-superset +++ b/t/t4211/sha1/expect.multiple-superset @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index 0b9cae5..5de3ea4 100644 --- a/a.c +++ b/a.c @@ -4,19 +4,21 @@ @@ -39,6 +40,7 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index 5e709a1..0b9cae5 100644 --- a/a.c +++ b/a.c @@ -4,19 +4,19 @@ @@ -71,6 +73,7 @@ Date: Thu Feb 28 10:45:41 2013 +0100 touch comment diff --git a/a.c b/a.c +index e51de13..bdb2bb1 100644 --- a/a.c +++ b/a.c @@ -3,19 +3,19 @@ @@ -102,6 +105,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 3233403..e51de13 100644 --- a/a.c +++ b/a.c @@ -3,19 +3,19 @@ @@ -134,6 +138,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 444e415..3233403 100644 --- a/a.c +++ b/a.c @@ -3,18 +3,19 @@ @@ -164,6 +169,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +3,18 @@ diff --git a/t/t4211/sha1/expect.no-assertion-error b/t/t4211/sha1/expect.no-assertion-error index 994c37db1efec4..54c568f273a6d2 100644 --- a/t/t4211/sha1/expect.no-assertion-error +++ b/t/t4211/sha1/expect.no-assertion-error @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:50:24 2013 +0100 move within the file diff --git a/b.c b/b.c +index bf79c2f..27c829c 100644 --- a/b.c +++ b/b.c @@ -25,0 +18,9 @@ @@ -25,9 +26,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index 0b9cae5..5de3ea4 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,7 @@ +@@ -18,5 +18,7 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -45,9 +47,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index 5e709a1..0b9cae5 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -63,9 +66,10 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 3233403..e51de13 100644 --- a/a.c +++ b/a.c -@@ -17,5 +17,5 @@ +@@ -17,5 +17,5 @@ int f(int x) int main () { - printf("%d\n", f(15)); @@ -80,6 +84,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +16,5 @@ diff --git a/t/t4211/sha1/expect.parallel-change-f-to-main b/t/t4211/sha1/expect.parallel-change-f-to-main index 052def8074da29..65a8cc673a6fca 100644 --- a/t/t4211/sha1/expect.parallel-change-f-to-main +++ b/t/t4211/sha1/expect.parallel-change-f-to-main @@ -13,6 +13,7 @@ Date: Thu Feb 28 10:49:50 2013 +0100 another simple change diff --git a/b.c b/b.c +index 5de3ea4..bf79c2f 100644 --- a/b.c +++ b/b.c @@ -4,14 +4,14 @@ @@ -39,6 +40,7 @@ Date: Fri Apr 12 16:15:57 2013 +0200 change on another line of history while rename happens diff --git a/a.c b/a.c +index 5de3ea4..01b5b65 100644 --- a/a.c +++ b/a.c @@ -4,14 +4,14 @@ @@ -65,6 +67,7 @@ Date: Thu Feb 28 10:45:41 2013 +0100 touch comment diff --git a/a.c b/a.c +index e51de13..bdb2bb1 100644 --- a/a.c +++ b/a.c @@ -3,14 +3,14 @@ @@ -91,6 +94,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 3233403..e51de13 100644 --- a/a.c +++ b/a.c @@ -3,14 +3,14 @@ @@ -117,6 +121,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 444e415..3233403 100644 --- a/a.c +++ b/a.c @@ -3,13 +3,14 @@ @@ -142,6 +147,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +3,13 @@ diff --git a/t/t4211/sha1/expect.simple-f b/t/t4211/sha1/expect.simple-f index a1f5bc49c879e4..b24ae40e03cbd3 100644 --- a/t/t4211/sha1/expect.simple-f +++ b/t/t4211/sha1/expect.simple-f @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 3233403..e51de13 100644 --- a/a.c +++ b/a.c @@ -3,9 +3,9 @@ @@ -26,6 +27,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 444e415..3233403 100644 --- a/a.c +++ b/a.c @@ -3,8 +3,9 @@ @@ -46,6 +48,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +3,8 @@ diff --git a/t/t4211/sha1/expect.simple-f-to-main b/t/t4211/sha1/expect.simple-f-to-main index a475768710b5a6..cd92100dfc468d 100644 --- a/t/t4211/sha1/expect.simple-f-to-main +++ b/t/t4211/sha1/expect.simple-f-to-main @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:45:41 2013 +0100 touch comment diff --git a/a.c b/a.c +index e51de13..bdb2bb1 100644 --- a/a.c +++ b/a.c @@ -3,14 +3,14 @@ @@ -31,6 +32,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 3233403..e51de13 100644 --- a/a.c +++ b/a.c @@ -3,14 +3,14 @@ @@ -57,6 +59,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 444e415..3233403 100644 --- a/a.c +++ b/a.c @@ -3,13 +3,14 @@ @@ -82,6 +85,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +3,13 @@ diff --git a/t/t4211/sha1/expect.simple-main b/t/t4211/sha1/expect.simple-main index 39ce39bebed75b..ff31291d348e33 100644 --- a/t/t4211/sha1/expect.simple-main +++ b/t/t4211/sha1/expect.simple-main @@ -5,9 +5,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index 0b9cae5..5de3ea4 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -23,9 +24,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index 5e709a1..0b9cae5 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -41,9 +43,10 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 3233403..e51de13 100644 --- a/a.c +++ b/a.c -@@ -17,5 +17,5 @@ +@@ -17,5 +17,5 @@ int f(int x) int main () { - printf("%d\n", f(15)); @@ -58,6 +61,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +16,5 @@ diff --git a/t/t4211/sha1/expect.simple-main-to-end b/t/t4211/sha1/expect.simple-main-to-end index 8480bd9cc45bc7..4bef21e6578f11 100644 --- a/t/t4211/sha1/expect.simple-main-to-end +++ b/t/t4211/sha1/expect.simple-main-to-end @@ -5,9 +5,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index 0b9cae5..5de3ea4 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,7 @@ +@@ -18,5 +18,7 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -25,9 +26,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index 5e709a1..0b9cae5 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -43,9 +45,10 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 3233403..e51de13 100644 --- a/a.c +++ b/a.c -@@ -17,5 +17,5 @@ +@@ -17,5 +17,5 @@ int f(int x) int main () { - printf("%d\n", f(15)); @@ -60,6 +63,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +16,5 @@ diff --git a/t/t4211/sha1/expect.two-ranges b/t/t4211/sha1/expect.two-ranges index c5164f3be3a287..aed01522e3970d 100644 --- a/t/t4211/sha1/expect.two-ranges +++ b/t/t4211/sha1/expect.two-ranges @@ -5,9 +5,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index 0b9cae5..5de3ea4 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -23,9 +24,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index 5e709a1..0b9cae5 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -41,6 +43,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 3233403..e51de13 100644 --- a/a.c +++ b/a.c @@ -3,9 +3,9 @@ @@ -69,6 +72,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 444e415..3233403 100644 --- a/a.c +++ b/a.c @@ -3,8 +3,9 @@ @@ -89,6 +93,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +3,8 @@ diff --git a/t/t4211/sha1/expect.vanishes-early b/t/t4211/sha1/expect.vanishes-early index 1f7cd06941495a..a413ad36598ddf 100644 --- a/t/t4211/sha1/expect.vanishes-early +++ b/t/t4211/sha1/expect.vanishes-early @@ -5,11 +5,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index 0b9cae5..5de3ea4 100644 --- a/a.c +++ b/a.c -@@ -22,1 +24,1 @@ --} -\ No newline at end of file +@@ -23,0 +24,1 @@ int main () +/* incomplete lines are bad! */ commit 100b61a6f2f720f812620a9d10afb3a960ccb73c @@ -19,9 +18,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index 5e709a1..0b9cae5 100644 --- a/a.c +++ b/a.c -@@ -22,1 +22,1 @@ +@@ -22,1 +22,1 @@ int main () -} +} \ No newline at end of file @@ -33,6 +33,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..444e415 --- /dev/null +++ b/a.c @@ -0,0 +20,1 @@ diff --git a/t/t4211/sha256/expect.beginning-of-file b/t/t4211/sha256/expect.beginning-of-file index 5adfdfc1a12076..e8d62328cf87c5 100644 --- a/t/t4211/sha256/expect.beginning-of-file +++ b/t/t4211/sha256/expect.beginning-of-file @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:47:40 2013 +0100 change at very beginning diff --git a/a.c b/a.c +index 3a78aaf..d325124 100644 --- a/a.c +++ b/a.c @@ -1,3 +1,4 @@ @@ -20,6 +21,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 7a296b9..75c0119 100644 --- a/a.c +++ b/a.c @@ -1,3 +1,3 @@ @@ -35,6 +37,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +1,3 @@ diff --git a/t/t4211/sha256/expect.end-of-file b/t/t4211/sha256/expect.end-of-file index 03ab5c1784e289..3b2e2384da7b7f 100644 --- a/t/t4211/sha256/expect.end-of-file +++ b/t/t4211/sha256/expect.end-of-file @@ -5,9 +5,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index e4fa1d8..62c1fc2 100644 --- a/a.c +++ b/a.c -@@ -20,3 +20,5 @@ +@@ -20,3 +20,5 @@ long f(long x) printf("%ld\n", f(15)); return 0; -} @@ -23,9 +24,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index d325124..e4fa1d8 100644 --- a/a.c +++ b/a.c -@@ -20,3 +20,3 @@ +@@ -20,3 +20,3 @@ int main () printf("%ld\n", f(15)); return 0; -} @@ -39,9 +41,10 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 7a296b9..75c0119 100644 --- a/a.c +++ b/a.c -@@ -19,3 +19,3 @@ +@@ -19,3 +19,3 @@ int f(int x) - printf("%d\n", f(15)); + printf("%ld\n", f(15)); return 0; @@ -54,6 +57,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +18,3 @@ diff --git a/t/t4211/sha256/expect.move-support-f b/t/t4211/sha256/expect.move-support-f index 223b4ed2a0cfca..f49abcea3e7899 100644 --- a/t/t4211/sha256/expect.move-support-f +++ b/t/t4211/sha256/expect.move-support-f @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:49:50 2013 +0100 another simple change diff --git a/b.c b/b.c +index 62c1fc2..69cb69c 100644 --- a/b.c +++ b/b.c @@ -4,9 +4,9 @@ @@ -26,6 +27,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 7a296b9..75c0119 100644 --- a/a.c +++ b/a.c @@ -3,9 +3,9 @@ @@ -47,6 +49,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 9f550c3..7a296b9 100644 --- a/a.c +++ b/a.c @@ -3,8 +3,9 @@ @@ -67,6 +70,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +3,8 @@ diff --git a/t/t4211/sha256/expect.multiple b/t/t4211/sha256/expect.multiple index dbd987b74a451f..0dee50ffb7c694 100644 --- a/t/t4211/sha256/expect.multiple +++ b/t/t4211/sha256/expect.multiple @@ -5,9 +5,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index e4fa1d8..62c1fc2 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,7 @@ +@@ -18,5 +18,7 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -25,9 +26,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index d325124..e4fa1d8 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -43,6 +45,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 7a296b9..75c0119 100644 --- a/a.c +++ b/a.c @@ -3,9 +3,9 @@ @@ -71,6 +74,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 9f550c3..7a296b9 100644 --- a/a.c +++ b/a.c @@ -3,8 +3,9 @@ @@ -91,6 +95,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +3,8 @@ diff --git a/t/t4211/sha256/expect.multiple-overlapping b/t/t4211/sha256/expect.multiple-overlapping index 9015a45a256fdc..b8c260e8aea049 100644 --- a/t/t4211/sha256/expect.multiple-overlapping +++ b/t/t4211/sha256/expect.multiple-overlapping @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index e4fa1d8..62c1fc2 100644 --- a/a.c +++ b/a.c @@ -4,19 +4,21 @@ @@ -39,6 +40,7 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index d325124..e4fa1d8 100644 --- a/a.c +++ b/a.c @@ -4,19 +4,19 @@ @@ -71,6 +73,7 @@ Date: Thu Feb 28 10:45:41 2013 +0100 touch comment diff --git a/a.c b/a.c +index 75c0119..3a78aaf 100644 --- a/a.c +++ b/a.c @@ -3,19 +3,19 @@ @@ -102,6 +105,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 7a296b9..75c0119 100644 --- a/a.c +++ b/a.c @@ -3,19 +3,19 @@ @@ -134,6 +138,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 9f550c3..7a296b9 100644 --- a/a.c +++ b/a.c @@ -3,18 +3,19 @@ @@ -164,6 +169,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +3,18 @@ diff --git a/t/t4211/sha256/expect.multiple-superset b/t/t4211/sha256/expect.multiple-superset index 9015a45a256fdc..b8c260e8aea049 100644 --- a/t/t4211/sha256/expect.multiple-superset +++ b/t/t4211/sha256/expect.multiple-superset @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index e4fa1d8..62c1fc2 100644 --- a/a.c +++ b/a.c @@ -4,19 +4,21 @@ @@ -39,6 +40,7 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index d325124..e4fa1d8 100644 --- a/a.c +++ b/a.c @@ -4,19 +4,19 @@ @@ -71,6 +73,7 @@ Date: Thu Feb 28 10:45:41 2013 +0100 touch comment diff --git a/a.c b/a.c +index 75c0119..3a78aaf 100644 --- a/a.c +++ b/a.c @@ -3,19 +3,19 @@ @@ -102,6 +105,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 7a296b9..75c0119 100644 --- a/a.c +++ b/a.c @@ -3,19 +3,19 @@ @@ -134,6 +138,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 9f550c3..7a296b9 100644 --- a/a.c +++ b/a.c @@ -3,18 +3,19 @@ @@ -164,6 +169,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +3,18 @@ diff --git a/t/t4211/sha256/expect.no-assertion-error b/t/t4211/sha256/expect.no-assertion-error index 36ed12aa9cb8b9..c25f2ce19c05d9 100644 --- a/t/t4211/sha256/expect.no-assertion-error +++ b/t/t4211/sha256/expect.no-assertion-error @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:50:24 2013 +0100 move within the file diff --git a/b.c b/b.c +index 69cb69c..a0d566e 100644 --- a/b.c +++ b/b.c @@ -25,0 +18,9 @@ @@ -25,9 +26,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index e4fa1d8..62c1fc2 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,7 @@ +@@ -18,5 +18,7 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -45,9 +47,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index d325124..e4fa1d8 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -63,9 +66,10 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 7a296b9..75c0119 100644 --- a/a.c +++ b/a.c -@@ -17,5 +17,5 @@ +@@ -17,5 +17,5 @@ int f(int x) int main () { - printf("%d\n", f(15)); @@ -80,6 +84,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +16,5 @@ diff --git a/t/t4211/sha256/expect.parallel-change-f-to-main b/t/t4211/sha256/expect.parallel-change-f-to-main index e68f8928ea70af..3178989253a885 100644 --- a/t/t4211/sha256/expect.parallel-change-f-to-main +++ b/t/t4211/sha256/expect.parallel-change-f-to-main @@ -13,6 +13,7 @@ Date: Thu Feb 28 10:49:50 2013 +0100 another simple change diff --git a/b.c b/b.c +index 62c1fc2..69cb69c 100644 --- a/b.c +++ b/b.c @@ -4,14 +4,14 @@ @@ -39,6 +40,7 @@ Date: Fri Apr 12 16:15:57 2013 +0200 change on another line of history while rename happens diff --git a/a.c b/a.c +index 62c1fc2..e1e8475 100644 --- a/a.c +++ b/a.c @@ -4,14 +4,14 @@ @@ -65,6 +67,7 @@ Date: Thu Feb 28 10:45:41 2013 +0100 touch comment diff --git a/a.c b/a.c +index 75c0119..3a78aaf 100644 --- a/a.c +++ b/a.c @@ -3,14 +3,14 @@ @@ -91,6 +94,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 7a296b9..75c0119 100644 --- a/a.c +++ b/a.c @@ -3,14 +3,14 @@ @@ -117,6 +121,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 9f550c3..7a296b9 100644 --- a/a.c +++ b/a.c @@ -3,13 +3,14 @@ @@ -142,6 +147,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +3,13 @@ diff --git a/t/t4211/sha256/expect.simple-f b/t/t4211/sha256/expect.simple-f index 65508d7c0bc6ae..983c711fe3d6e5 100644 --- a/t/t4211/sha256/expect.simple-f +++ b/t/t4211/sha256/expect.simple-f @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 7a296b9..75c0119 100644 --- a/a.c +++ b/a.c @@ -3,9 +3,9 @@ @@ -26,6 +27,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 9f550c3..7a296b9 100644 --- a/a.c +++ b/a.c @@ -3,8 +3,9 @@ @@ -46,6 +48,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +3,8 @@ diff --git a/t/t4211/sha256/expect.simple-f-to-main b/t/t4211/sha256/expect.simple-f-to-main index 77b721c196b191..e67fa017a7b3bc 100644 --- a/t/t4211/sha256/expect.simple-f-to-main +++ b/t/t4211/sha256/expect.simple-f-to-main @@ -5,6 +5,7 @@ Date: Thu Feb 28 10:45:41 2013 +0100 touch comment diff --git a/a.c b/a.c +index 75c0119..3a78aaf 100644 --- a/a.c +++ b/a.c @@ -3,14 +3,14 @@ @@ -31,6 +32,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 7a296b9..75c0119 100644 --- a/a.c +++ b/a.c @@ -3,14 +3,14 @@ @@ -57,6 +59,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 9f550c3..7a296b9 100644 --- a/a.c +++ b/a.c @@ -3,13 +3,14 @@ @@ -82,6 +85,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +3,13 @@ diff --git a/t/t4211/sha256/expect.simple-main b/t/t4211/sha256/expect.simple-main index d20708c9f9a0f4..0792b27cad89ac 100644 --- a/t/t4211/sha256/expect.simple-main +++ b/t/t4211/sha256/expect.simple-main @@ -5,9 +5,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index e4fa1d8..62c1fc2 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -23,9 +24,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index d325124..e4fa1d8 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -41,9 +43,10 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 7a296b9..75c0119 100644 --- a/a.c +++ b/a.c -@@ -17,5 +17,5 @@ +@@ -17,5 +17,5 @@ int f(int x) int main () { - printf("%d\n", f(15)); @@ -58,6 +61,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +16,5 @@ diff --git a/t/t4211/sha256/expect.simple-main-to-end b/t/t4211/sha256/expect.simple-main-to-end index 617cdf34819e5e..d3bd7c7bc625b5 100644 --- a/t/t4211/sha256/expect.simple-main-to-end +++ b/t/t4211/sha256/expect.simple-main-to-end @@ -5,9 +5,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index e4fa1d8..62c1fc2 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,7 @@ +@@ -18,5 +18,7 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -25,9 +26,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index d325124..e4fa1d8 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -43,9 +45,10 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 7a296b9..75c0119 100644 --- a/a.c +++ b/a.c -@@ -17,5 +17,5 @@ +@@ -17,5 +17,5 @@ int f(int x) int main () { - printf("%d\n", f(15)); @@ -60,6 +63,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +16,5 @@ diff --git a/t/t4211/sha256/expect.two-ranges b/t/t4211/sha256/expect.two-ranges index 6a94d3b9cba2dd..7735b1972380be 100644 --- a/t/t4211/sha256/expect.two-ranges +++ b/t/t4211/sha256/expect.two-ranges @@ -5,9 +5,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index e4fa1d8..62c1fc2 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -23,9 +24,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index d325124..e4fa1d8 100644 --- a/a.c +++ b/a.c -@@ -18,5 +18,5 @@ +@@ -18,5 +18,5 @@ long f(long x) int main () { printf("%ld\n", f(15)); @@ -41,6 +43,7 @@ Date: Thu Feb 28 10:45:16 2013 +0100 touch both functions diff --git a/a.c b/a.c +index 7a296b9..75c0119 100644 --- a/a.c +++ b/a.c @@ -3,9 +3,9 @@ @@ -69,6 +72,7 @@ Date: Thu Feb 28 10:44:55 2013 +0100 change f() diff --git a/a.c b/a.c +index 9f550c3..7a296b9 100644 --- a/a.c +++ b/a.c @@ -3,8 +3,9 @@ @@ -89,6 +93,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +3,8 @@ diff --git a/t/t4211/sha256/expect.vanishes-early b/t/t4211/sha256/expect.vanishes-early index 11ec9bdecfcc8d..bc33b963dc8570 100644 --- a/t/t4211/sha256/expect.vanishes-early +++ b/t/t4211/sha256/expect.vanishes-early @@ -5,11 +5,10 @@ Date: Thu Feb 28 10:48:43 2013 +0100 change back to complete line diff --git a/a.c b/a.c +index e4fa1d8..62c1fc2 100644 --- a/a.c +++ b/a.c -@@ -22,1 +24,1 @@ --} -\ No newline at end of file +@@ -23,0 +24,1 @@ int main () +/* incomplete lines are bad! */ commit 29f32ac3141c48b22803e5c4127b719917b67d0f8ca8c5248bebfa2a19f7da10 @@ -19,9 +18,10 @@ Date: Thu Feb 28 10:48:10 2013 +0100 change to an incomplete line at end diff --git a/a.c b/a.c +index d325124..e4fa1d8 100644 --- a/a.c +++ b/a.c -@@ -22,1 +22,1 @@ +@@ -22,1 +22,1 @@ int main () -} +} \ No newline at end of file @@ -33,6 +33,8 @@ Date: Thu Feb 28 10:44:48 2013 +0100 initial diff --git a/a.c b/a.c +new file mode 100644 +index 0000000..9f550c3 --- /dev/null +++ b/a.c @@ -0,0 +20,1 @@ From 0e51f7a7faae18d9e6819c38cc822d6232deacae Mon Sep 17 00:00:00 2001 From: Michael Montalbo Date: Tue, 17 Mar 2026 02:21:34 +0000 Subject: [PATCH 118/236] t4211: add tests for -L with standard diff options Now that -L output flows through the standard diff pipeline, verify that previously-ignored diff options work: formatting (--word-diff, --word-diff-regex, --no-prefix, --src/dst-prefix, --full-index, --abbrev), whitespace handling (-w, -b), output indicators (--output-indicator-new/old/context), direction reversal (-R), --color-moved, and pickaxe options (-S, -G). Signed-off-by: Michael Montalbo Signed-off-by: Junio C Hamano --- t/t4211-line-log.sh | 281 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 281 insertions(+) diff --git a/t/t4211-line-log.sh b/t/t4211-line-log.sh index 6a307e911b7a42..aaf197d2edc4d8 100755 --- a/t/t4211-line-log.sh +++ b/t/t4211-line-log.sh @@ -339,6 +339,92 @@ test_expect_success 'zero-width regex .* matches any function name' ' test_cmp expect actual ' +test_expect_success 'setup for diff pipeline tests' ' + git checkout parent-oids && + + head_blob_old=$(git rev-parse --short HEAD^:file.c) && + head_blob_new=$(git rev-parse --short HEAD:file.c) && + root_blob=$(git rev-parse --short HEAD~4:file.c) && + null_blob=$(test_oid zero | cut -c1-7) && + head_blob_old_full=$(git rev-parse HEAD^:file.c) && + head_blob_new_full=$(git rev-parse HEAD:file.c) && + root_blob_full=$(git rev-parse HEAD~4:file.c) && + null_blob_full=$(test_oid zero) +' + +test_expect_success '-L diff output includes index and new file mode' ' + git log -L:func2:file.c --format= >actual && + + # Output should contain index headers (not present in old code path) + grep "^index $head_blob_old\.\.$head_blob_new 100644" actual && + + # Root commit should show new file mode and null index + grep "^new file mode 100644" actual && + grep "^index $null_blob\.\.$root_blob$" actual && + + # Hunk headers should include funcname context + grep "^@@ .* @@ int func1()" actual +' + +test_expect_success '-L with --word-diff' ' + cat >expect <<-\EOF && + + diff --git a/file.c b/file.c + --- a/file.c + +++ b/file.c + @@ -6,4 +6,4 @@ int func1() + int func2() + { + return [-F2;-]{+F2 + 2;+} + } + + diff --git a/file.c b/file.c + new file mode 100644 + --- /dev/null + +++ b/file.c + @@ -0,0 +6,4 @@ + {+int func2()+} + {+{+} + {+ return F2;+} + {+}+} + EOF + git log -L:func2:file.c --word-diff --format= >actual && + grep -v "^index " actual >actual.filtered && + grep -v "^index " expect >expect.filtered && + test_cmp expect.filtered actual.filtered +' + +test_expect_success '-L with --no-prefix' ' + git log -L:func2:file.c --no-prefix --format= >actual && + grep "^diff --git file.c file.c" actual && + grep "^--- file.c" actual && + ! grep "^--- a/" actual +' + +test_expect_success '-L with --full-index' ' + git log -L:func2:file.c --full-index --format= >actual && + grep "^index $head_blob_old_full\.\.$head_blob_new_full 100644" actual && + grep "^index $null_blob_full\.\.$root_blob_full$" actual +' + +test_expect_success 'setup -L with whitespace change' ' + git checkout -b ws-change parent-oids && + sed "s/ return F2 + 2;/ return F2 + 2;/" file.c >tmp && + mv tmp file.c && + git commit -a -m "Whitespace change in func2()" +' + +test_expect_success '-L with --ignore-all-space suppresses whitespace-only diff' ' + git log -L:func2:file.c --format= >without_w && + git log -L:func2:file.c --format= -w >with_w && + + # Without -w: three commits produce diffs (whitespace, modify, root) + test $(grep -c "^diff --git" without_w) = 3 && + + # With -w: whitespace-only commit produces no hunk, so only two diffs + test $(grep -c "^diff --git" with_w) = 2 +' + test_expect_success 'show line-log with graph' ' git checkout parent-oids && head_blob_old=$(git rev-parse --short HEAD^:file.c) && @@ -430,4 +516,199 @@ test_expect_failure '-L --find-object should filter commits by object' ' test_must_be_empty actual ' +test_expect_success '-L with --word-diff-regex' ' + git checkout parent-oids && + git log -L:func2:file.c --word-diff \ + --word-diff-regex="[a-zA-Z0-9_]+" --format= >actual && + # Word-diff markers must be present + grep "{+" actual && + grep "+}" actual && + # No line-level +/- markers (word-diff replaces them); + # exclude --- header lines from the check + ! grep "^+[^+]" actual && + ! grep "^-[^-]" actual +' + +test_expect_success '-L with --src-prefix and --dst-prefix' ' + git checkout parent-oids && + git log -L:func2:file.c --src-prefix=old/ --dst-prefix=new/ \ + --format= >actual && + grep "^diff --git old/file.c new/file.c" actual && + grep "^--- old/file.c" actual && + grep "^+++ new/file.c" actual && + ! grep "^--- a/" actual +' + +test_expect_success '-L with --abbrev' ' + git checkout parent-oids && + git log -L:func2:file.c --abbrev=4 --format= -1 >actual && + # 4-char abbreviated hashes on index line + grep "^index [0-9a-f]\{4\}\.\.[0-9a-f]\{4\}" actual +' + +test_expect_success '-L with -b suppresses whitespace-only diff' ' + git checkout ws-change && + git log -L:func2:file.c --format= >without_b && + git log -L:func2:file.c --format= -b >with_b && + test $(grep -c "^diff --git" without_b) = 3 && + test $(grep -c "^diff --git" with_b) = 2 +' + +test_expect_success '-L with --output-indicator-*' ' + git checkout parent-oids && + git log -L:func2:file.c --output-indicator-new=">" \ + --output-indicator-old="<" --output-indicator-context="|" \ + --format= -1 >actual && + grep "^>" actual && + grep "^<" actual && + grep "^|" actual && + # No standard +/-/space content markers; exclude ---/+++ headers + ! grep "^+[^+]" actual && + ! grep "^-[^-]" actual && + ! grep "^ " actual +' + +test_expect_success '-L with -R reverses diff' ' + git checkout parent-oids && + git log -L:func2:file.c -R --format= -1 >actual && + grep "^diff --git b/file.c a/file.c" actual && + grep "^--- b/file.c" actual && + grep "^+++ a/file.c" actual && + # The modification added "F2 + 2", so reversed it is removed + grep "^-.*F2 + 2" actual && + grep "^+.*return F2;" actual +' + +test_expect_success 'setup for color-moved test' ' + git checkout -b color-moved-test parent-oids && + cat >big.c <<-\EOF && + int bigfunc() + { + int a = 1; + int b = 2; + int c = 3; + return a + b + c; + } + EOF + git add big.c && + git commit -m "add bigfunc" && + sed "s/ / /" big.c >tmp && mv tmp big.c && + git commit -a -m "reindent bigfunc" +' + +test_expect_success '-L with --color-moved' ' + git log -L:bigfunc:big.c --color-moved=zebra \ + --color-moved-ws=ignore-all-space \ + --color=always --format= -1 >actual.raw && + test_decode_color actual && + # Old moved lines: bold magenta; new moved lines: bold cyan + grep "BOLD;MAGENTA" actual && + grep "BOLD;CYAN" actual +' + +test_expect_success 'setup for no-newline-at-eof tests' ' + git checkout --orphan no-newline && + git reset --hard && + printf "int top()\n{\n return 1;\n}\n\nint bot()\n{\n return 2;\n}" >noeol.c && + git add noeol.c && + test_tick && + git commit -m "add noeol.c (no trailing newline)" && + sed "s/return 2/return 22/" noeol.c >tmp && mv tmp noeol.c && + git commit -a -m "modify bot()" && + printf "int top()\n{\n return 1;\n}\n\nint bot()\n{\n return 33;\n}\n" >noeol.c && + git commit -a -m "modify bot() and add trailing newline" +' + +# When the tracked function is at the end of a file with no trailing +# newline, the "\ No newline at end of file" marker should appear. +test_expect_success '-L no-newline-at-eof appears in tracked range' ' + git log -L:bot:noeol.c --format= -1 HEAD~1 >actual && + grep "No newline at end of file" actual +' + +# When tracking a function that ends before the no-newline content, +# the marker should not appear in the output. +test_expect_success '-L no-newline-at-eof suppressed outside range' ' + git log -L:top:noeol.c --format= >actual && + ! grep "No newline at end of file" actual +' + +# When a commit removes a no-newline last line and replaces it with +# a newline-terminated line, the marker should still appear (on the +# old side of the diff). +test_expect_success '-L no-newline-at-eof marker with deleted line' ' + git log -L:bot:noeol.c --format= -1 >actual && + grep "No newline at end of file" actual +' + +test_expect_success 'setup for range boundary deletion test' ' + git checkout --orphan range-boundary && + git reset --hard && + cat >boundary.c <<-\EOF && + void above() + { + return; + } + + void tracked() + { + int x = 1; + int y = 2; + } + + void below() + { + return; + } + EOF + git add boundary.c && + test_tick && + git commit -m "add boundary.c" && + cat >boundary.c <<-\EOF && + void above() + { + return; + } + + void tracked() + { + int x = 1; + int y = 2; + } + + void below_renamed() + { + return 0; + } + EOF + git commit -a -m "modify below() only" +' + +# When only a function below the tracked range is modified, the +# tracked function should not produce a diff. +test_expect_success '-L suppresses deletions outside tracked range' ' + git log -L:tracked:boundary.c --format= >actual && + test $(grep -c "^diff --git" actual) = 1 +' + +test_expect_success '-L with -S filters to string-count changes' ' + git checkout parent-oids && + git log -L:func2:file.c -S "F2 + 2" --format= >actual && + # -S searches the whole file, not just the tracked range; + # combined with the -L range walk, this selects commits that + # both touch func2 and change the count of "F2 + 2" in the file. + test $(grep -c "^diff --git" actual) = 1 && + grep "F2 + 2" actual +' + +test_expect_success '-L with -G filters to diff-text matches' ' + git checkout parent-oids && + git log -L:func2:file.c -G "F2 [+] 2" --format= >actual && + # -G greps the whole-file diff text, not just the tracked range; + # combined with -L, this selects commits that both touch func2 + # and have "F2 + 2" in their diff. + test $(grep -c "^diff --git" actual) = 1 && + grep "F2 + 2" actual +' + test_done From 512536a09ea2964e93226f219898ee0a09d85a70 Mon Sep 17 00:00:00 2001 From: Michael Montalbo Date: Tue, 17 Mar 2026 02:21:35 +0000 Subject: [PATCH 119/236] doc: note that -L supports patch formatting and pickaxe options Now that -L output flows through the standard diff pipeline, document that patch formatting options like --word-diff, --color-moved, --no-prefix, whitespace handling (-w, -b), and pickaxe options (-S, -G) are supported. Signed-off-by: Michael Montalbo Signed-off-by: Junio C Hamano --- Documentation/line-range-options.adoc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/line-range-options.adoc b/Documentation/line-range-options.adoc index c44ba05320f9bf..ecb2c79fb9bde8 100644 --- a/Documentation/line-range-options.adoc +++ b/Documentation/line-range-options.adoc @@ -12,4 +12,8 @@ (namely `--raw`, `--numstat`, `--shortstat`, `--dirstat`, `--summary`, `--name-only`, `--name-status`, `--check`) are not currently implemented. + +Patch formatting options such as `--word-diff`, `--color-moved`, +`--no-prefix`, and whitespace options (`-w`, `-b`) are supported, +as are pickaxe options (`-S`, `-G`). ++ include::line-range-format.adoc[] From 4c2390067030d67c1b79fd96eb656ae63b91643b Mon Sep 17 00:00:00 2001 From: Jialong Wang Date: Mon, 16 Mar 2026 21:15:44 -0400 Subject: [PATCH 120/236] t2203: avoid suppressing git status exit code When git status is piped into grep, the exit status of the Git command is hidden by the pipeline. Capture the status output in a temporary file first, and then filter it as needed, so that any failure from git status is still noticed by the test suite. Signed-off-by: Jialong Wang Signed-off-by: Junio C Hamano --- t/t2203-add-intent.sh | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/t/t2203-add-intent.sh b/t/t2203-add-intent.sh index 192ad14b5f474f..44c1936e4d698b 100755 --- a/t/t2203-add-intent.sh +++ b/t/t2203-add-intent.sh @@ -16,7 +16,8 @@ test_expect_success 'intent to add' ' ' test_expect_success 'git status' ' - git status --porcelain | grep -v actual >actual && + git status --porcelain >actual.raw && + grep -v actual actual.raw >actual && cat >expect <<-\EOF && DA 1.t A elif @@ -26,7 +27,8 @@ test_expect_success 'git status' ' ' test_expect_success 'git status with porcelain v2' ' - git status --porcelain=v2 | grep -v "^?" >actual && + git status --porcelain=v2 >actual.raw && + grep -v "^?" actual.raw >actual && nam1=$(echo 1 | git hash-object --stdin) && nam2=$(git hash-object elif) && cat >expect <<-EOF && @@ -171,17 +173,20 @@ test_expect_success 'rename detection finds the right names' ' mv first third && git add -N third && - git status | grep -v "^?" >actual.1 && + git status >actual.raw.1 && + grep -v "^?" actual.raw.1 >actual.1 && test_grep "renamed: *first -> third" actual.1 && - git status --porcelain | grep -v "^?" >actual.2 && + git status --porcelain >actual.raw.2 && + grep -v "^?" actual.raw.2 >actual.2 && cat >expected.2 <<-\EOF && R first -> third EOF test_cmp expected.2 actual.2 && hash=$(git hash-object third) && - git status --porcelain=v2 | grep -v "^?" >actual.3 && + git status --porcelain=v2 >actual.raw.3 && + grep -v "^?" actual.raw.3 >actual.3 && cat >expected.3 <<-EOF && 2 .R N... 100644 100644 100644 $hash $hash R100 third first EOF @@ -211,11 +216,13 @@ test_expect_success 'double rename detection in status' ' mv second third && git add -N third && - git status | grep -v "^?" >actual.1 && + git status >actual.raw.1 && + grep -v "^?" actual.raw.1 >actual.1 && test_grep "renamed: *first -> second" actual.1 && test_grep "renamed: *second -> third" actual.1 && - git status --porcelain | grep -v "^?" >actual.2 && + git status --porcelain >actual.raw.2 && + grep -v "^?" actual.raw.2 >actual.2 && cat >expected.2 <<-\EOF && R first -> second R second -> third @@ -223,7 +230,8 @@ test_expect_success 'double rename detection in status' ' test_cmp expected.2 actual.2 && hash=$(git hash-object third) && - git status --porcelain=v2 | grep -v "^?" >actual.3 && + git status --porcelain=v2 >actual.raw.3 && + grep -v "^?" actual.raw.3 >actual.3 && cat >expected.3 <<-EOF && 2 R. N... 100644 100644 100644 $hash $hash R100 second first 2 .R N... 100644 100644 100644 $hash $hash R100 third second From 7cc4c0903ce1cb7bc66d62d234be4abf5cf8f887 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 17 Mar 2026 00:29:17 +0000 Subject: [PATCH 121/236] revision: include object-name.h The REV_INFO_INIT macro includes a use of the DEFAULT_ABBREV macro, which is defined in object-name.h. Include it in revision.h so consumers of REV_INFO_INIT do not need to include this hidden dependency. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- revision.h | 1 + 1 file changed, 1 insertion(+) diff --git a/revision.h b/revision.h index 69242ecb189a52..584f1338b5e323 100644 --- a/revision.h +++ b/revision.h @@ -4,6 +4,7 @@ #include "commit.h" #include "grep.h" #include "notes.h" +#include "object-name.h" #include "oidset.h" #include "pretty.h" #include "diff.h" From 5374a9c708fee31c660b6d8301862b3e8cd34698 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 17 Mar 2026 00:29:18 +0000 Subject: [PATCH 122/236] t5620: prepare branched repo for revision tests Prepare the test infrastructure for upcoming changes that teach 'git backfill' to accept revision arguments and pathspecs. Add test_tick before each commit in the setup loop so that commit dates are deterministic. This enables reliable testing with '--since'. Rename the 'd/e/' directory to 'd/f/' so that the prefix 'd/f' is ambiguous with the files 'd/file.*.txt'. This exercises the subtlety in prefix pathspec matching that will be added in a later commit. Create a branched version of the test repository (src-revs) with: - A 'side' branch merged into main, adding s/file.{1,2}.txt with two versions (4 new blobs, 52 total from main HEAD). - An unmerged 'other' branch adding o/file.{1,2}.txt (2 more blobs, 54 total reachable from --all). This structure makes --all, --first-parent, and --since produce meaningfully different results when used with 'git backfill'. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t5620-backfill.sh | 52 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/t/t5620-backfill.sh b/t/t5620-backfill.sh index 58c81556e72c89..1331949be47ea8 100755 --- a/t/t5620-backfill.sh +++ b/t/t5620-backfill.sh @@ -15,7 +15,7 @@ test_expect_success 'setup repo for object creation' ' git init src && mkdir -p src/a/b/c && - mkdir -p src/d/e && + mkdir -p src/d/f && for i in 1 2 do @@ -26,8 +26,9 @@ test_expect_success 'setup repo for object creation' ' echo "Version $i of file a/b/$n" > src/a/b/file.$n.txt && echo "Version $i of file a/b/c/$n" > src/a/b/c/file.$n.txt && echo "Version $i of file d/$n" > src/d/file.$n.txt && - echo "Version $i of file d/e/$n" > src/d/e/file.$n.txt && + echo "Version $i of file d/f/$n" > src/d/f/file.$n.txt && git -C src add . && + test_tick && git -C src commit -m "Iteration $n" || return 1 done done @@ -41,6 +42,53 @@ test_expect_success 'setup bare clone for server' ' git -C srv.bare config --local uploadpack.allowanysha1inwant 1 ' +# Create a version of the repo with branches for testing revision +# arguments like --all, --first-parent, and --since. +# +# main: 8 commits (linear) + merge of side branch +# 48 original blobs + 4 side blobs = 52 blobs from main HEAD +# side: 2 commits adding s/file.{1,2}.txt (v1, v2), merged into main +# other: 1 commit adding o/file.{1,2}.txt (not merged) +# 54 total blobs reachable from --all +test_expect_success 'setup branched repo for revision tests' ' + git clone src src-revs && + + # Side branch from tip of main with unique files + git -C src-revs checkout -b side HEAD && + mkdir -p src-revs/s && + echo "Side version 1 of file 1" >src-revs/s/file.1.txt && + echo "Side version 1 of file 2" >src-revs/s/file.2.txt && + test_tick && + git -C src-revs add . && + git -C src-revs commit -m "Side commit 1" && + + echo "Side version 2 of file 1" >src-revs/s/file.1.txt && + echo "Side version 2 of file 2" >src-revs/s/file.2.txt && + test_tick && + git -C src-revs add . && + git -C src-revs commit -m "Side commit 2" && + + # Merge side into main + git -C src-revs checkout main && + test_tick && + git -C src-revs merge side --no-ff -m "Merge side branch" && + + # Other branch (not merged) for --all testing + git -C src-revs checkout -b other main~1 && + mkdir -p src-revs/o && + echo "Other content 1" >src-revs/o/file.1.txt && + echo "Other content 2" >src-revs/o/file.2.txt && + test_tick && + git -C src-revs add . && + git -C src-revs commit -m "Other commit" && + + git -C src-revs checkout main && + + git clone --bare "file://$(pwd)/src-revs" srv-revs.bare && + git -C srv-revs.bare config --local uploadpack.allowfilter 1 && + git -C srv-revs.bare config --local uploadpack.allowanysha1inwant 1 +' + # do basic partial clone from "srv.bare" test_expect_success 'do partial clone 1, backfill gets all objects' ' git clone --no-checkout --filter=blob:none \ From 231f1ed7a0c870888f0f79f9b3c7cc8403339b02 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 17 Mar 2026 00:29:19 +0000 Subject: [PATCH 123/236] backfill: accept revision arguments The existing implementation of 'git backfill' only includes downloading missing blobs reachable from HEAD. Advanced uses may desire more general commit limiting options, such as '--all' for all references, specifying a commit range via negative references, or specifying a recency of use such as with '--since='. All of these options are available if we use setup_revisions() to parse the unknown arguments with the revision machinery. This opens up a large number of possibilities, only a small set of which are tested here. For documentation, we avoid duplicating the option documentation and instead link to the documentation of 'git rev-list'. Note that these arguments currently allow specifying a pathspec, which modifies the commit history checks but does not limit the paths used in the backfill logic. This will be updated in a future change. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-backfill.adoc | 3 + builtin/backfill.c | 19 ++-- t/t5620-backfill.sh | 156 ++++++++++++++++++++++++++++++++ 3 files changed, 172 insertions(+), 6 deletions(-) diff --git a/Documentation/git-backfill.adoc b/Documentation/git-backfill.adoc index b8394dcf22b6e1..fdfe22d6232844 100644 --- a/Documentation/git-backfill.adoc +++ b/Documentation/git-backfill.adoc @@ -63,9 +63,12 @@ OPTIONS current sparse-checkout. If the sparse-checkout feature is enabled, then `--sparse` is assumed and can be disabled with `--no-sparse`. +You may also specify the commit limiting options from linkgit:git-rev-list[1]. + SEE ALSO -------- linkgit:git-clone[1]. +linkgit:git-rev-list[1]. GIT --- diff --git a/builtin/backfill.c b/builtin/backfill.c index e9a33e81be4cdb..8294c5b71ca841 100644 --- a/builtin/backfill.c +++ b/builtin/backfill.c @@ -35,6 +35,7 @@ struct backfill_context { struct oid_array current_batch; size_t min_batch_size; int sparse; + struct rev_info revs; }; static void backfill_context_clear(struct backfill_context *ctx) @@ -79,7 +80,6 @@ static int fill_missing_blobs(const char *path UNUSED, static int do_backfill(struct backfill_context *ctx) { - struct rev_info revs; struct path_walk_info info = PATH_WALK_INFO_INIT; int ret; @@ -91,13 +91,14 @@ static int do_backfill(struct backfill_context *ctx) } } - repo_init_revisions(ctx->repo, &revs, ""); - handle_revision_arg("HEAD", &revs, 0, 0); + /* Walk from HEAD if otherwise unspecified. */ + if (!ctx->revs.pending.nr) + handle_revision_arg("HEAD", &ctx->revs, 0, 0); info.blobs = 1; info.tags = info.commits = info.trees = 0; - info.revs = &revs; + info.revs = &ctx->revs; info.path_fn = fill_missing_blobs; info.path_fn_data = ctx; @@ -108,7 +109,6 @@ static int do_backfill(struct backfill_context *ctx) download_batch(ctx); path_walk_info_clear(&info); - release_revisions(&revs); return ret; } @@ -120,6 +120,7 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit .current_batch = OID_ARRAY_INIT, .min_batch_size = 50000, .sparse = 0, + .revs = REV_INFO_INIT, }; struct option options[] = { OPT_UNSIGNED(0, "min-batch-size", &ctx.min_batch_size, @@ -134,7 +135,12 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit builtin_backfill_usage, options); argc = parse_options(argc, argv, prefix, options, builtin_backfill_usage, - 0); + PARSE_OPT_KEEP_UNKNOWN_OPT | + PARSE_OPT_KEEP_ARGV0 | + PARSE_OPT_KEEP_DASHDASH); + + repo_init_revisions(repo, &ctx.revs, prefix); + argc = setup_revisions(argc, argv, &ctx.revs, NULL); repo_config(repo, git_default_config, NULL); @@ -143,5 +149,6 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit result = do_backfill(&ctx); backfill_context_clear(&ctx); + release_revisions(&ctx.revs); return result; } diff --git a/t/t5620-backfill.sh b/t/t5620-backfill.sh index 1331949be47ea8..db66d8b614dd19 100755 --- a/t/t5620-backfill.sh +++ b/t/t5620-backfill.sh @@ -224,6 +224,162 @@ test_expect_success 'backfill --sparse without cone mode (negative)' ' test_line_count = 12 missing ' +test_expect_success 'backfill with revision range' ' + test_when_finished rm -rf backfill-revs && + git clone --no-checkout --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv.bare" backfill-revs && + + # No blobs yet + git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 48 missing && + + git -C backfill-revs backfill HEAD~2..HEAD && + + # 30 objects downloaded. + git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 18 missing +' + +test_expect_success 'backfill with revisions over stdin' ' + test_when_finished rm -rf backfill-revs && + git clone --no-checkout --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv.bare" backfill-revs && + + # No blobs yet + git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 48 missing && + + cat >in <<-EOF && + HEAD + ^HEAD~2 + EOF + + git -C backfill-revs backfill --stdin missing && + test_line_count = 18 missing +' + +test_expect_success 'backfill with prefix pathspec' ' + test_when_finished rm -rf backfill-path && + git clone --bare --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv.bare" backfill-path && + + # No blobs yet + git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 48 missing && + + # TODO: The pathspec should limit the downloaded blobs to + # only those matching the prefix "d/f", but currently all + # blobs are downloaded. + git -C backfill-path backfill HEAD -- d/f && + + git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 0 missing +' + +test_expect_success 'backfill with multiple pathspecs' ' + test_when_finished rm -rf backfill-path && + git clone --bare --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv.bare" backfill-path && + + # No blobs yet + git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 48 missing && + + # TODO: The pathspecs should limit the downloaded blobs to + # only those matching "d/f" or "a", but currently all blobs + # are downloaded. + git -C backfill-path backfill HEAD -- d/f a && + + git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 0 missing +' + +test_expect_success 'backfill with wildcard pathspec' ' + test_when_finished rm -rf backfill-path && + git clone --bare --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv.bare" backfill-path && + + # No blobs yet + git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 48 missing && + + # TODO: The wildcard pathspec should limit downloaded blobs, + # but currently all blobs are downloaded. + git -C backfill-path backfill HEAD -- "d/file.*.txt" && + + git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 0 missing +' + +test_expect_success 'backfill with --all' ' + test_when_finished rm -rf backfill-all && + git clone --no-checkout --filter=blob:none \ + "file://$(pwd)/srv-revs.bare" backfill-all && + + # All blobs from all refs are missing + git -C backfill-all rev-list --quiet --objects --all --missing=print >missing && + test_line_count = 54 missing && + + # Backfill from HEAD gets main blobs only + git -C backfill-all backfill HEAD && + + # Other branch blobs still missing + git -C backfill-all rev-list --quiet --objects --all --missing=print >missing && + test_line_count = 2 missing && + + # Backfill with --all gets everything + git -C backfill-all backfill --all && + + git -C backfill-all rev-list --quiet --objects --all --missing=print >missing && + test_line_count = 0 missing +' + +test_expect_success 'backfill with --first-parent' ' + test_when_finished rm -rf backfill-fp && + git clone --no-checkout --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv-revs.bare" backfill-fp && + + git -C backfill-fp rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 52 missing && + + # --first-parent skips the side branch commits, so + # s/file.{1,2}.txt v1 blobs (only in side commit 1) are missed. + git -C backfill-fp backfill --first-parent HEAD && + + git -C backfill-fp rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 2 missing +' + +test_expect_success 'backfill with --since' ' + test_when_finished rm -rf backfill-since && + git clone --no-checkout --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv-revs.bare" backfill-since && + + git -C backfill-since rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 52 missing && + + # Use a cutoff between commits 4 and 5 (between v1 and v2 + # iterations). Commits 5-8 still carry v1 of files 2-4 in + # their trees, but v1 of file.1.txt is only in commits 1-4. + SINCE=$(git -C backfill-since log --first-parent --reverse \ + --format=%ct HEAD~1 | sed -n 5p) && + git -C backfill-since backfill --since="@$((SINCE - 1))" HEAD && + + # 6 missing: v1 of file.1.txt in all 6 directories + git -C backfill-since rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 6 missing +' + . "$TEST_DIRECTORY"/lib-httpd.sh start_httpd From 9998486900a561410135eca60085947c5d97662d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 17 Mar 2026 00:29:20 +0000 Subject: [PATCH 124/236] backfill: work with prefix pathspecs The previous change allowed specifying revision arguments over the 'git backfill' command-line. This created the opportunity for pathspecs that specify a smaller set of starting commits, but otherwise did not restrict the blob paths that were downloaded. Update the path-walk API to accept certain kinds of pathspecs and to silently ignore anything too complex. The current behavior focuses on pathspecs that match paths exactly. This includes exact filenames, including directory names as prefixes. Pathspecs containing wildcards or magic are cleared so the path walk downloads all blobs, as before. The reason for this restriction is to allow for a faster execution by pruning the path walk to only trees that could contribute towards one of those paths as a parent directory. The test directory 'd/f/' (next to 'd/file*.txt') was prepared in a previous commit to exercise the subtlety in prefix matching. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- path-walk.c | 55 +++++++++++++++++++++++++++++++++++++++++++++ t/t5620-backfill.sh | 16 +++++-------- 2 files changed, 61 insertions(+), 10 deletions(-) diff --git a/path-walk.c b/path-walk.c index 364e4cfa19b2e4..e1ad4b02081169 100644 --- a/path-walk.c +++ b/path-walk.c @@ -206,6 +206,49 @@ static int add_tree_entries(struct path_walk_context *ctx, match != MATCHED) continue; } + if (ctx->revs->prune_data.nr) { + struct pathspec *pd = &ctx->revs->prune_data; + bool found = false; + + for (int i = 0; i < pd->nr; i++) { + struct pathspec_item *item = &pd->items[i]; + + /* + * Is this path a parent directory of + * the pathspec item? + */ + if (path.len < (size_t)item->len && + !strncmp(path.buf, item->match, path.len) && + item->match[path.len - 1] == '/') { + found = true; + break; + } + + /* + * Or, is the pathspec an exact match? + */ + if (path.len == (size_t)item->len && + !strcmp(path.buf, item->match)) { + found = true; + break; + } + + /* + * Or, is the pathspec a directory prefix + * match? + */ + if (path.len > (size_t)item->len && + !strncmp(path.buf, item->match, item->len) && + path.buf[item->len] == '/') { + found = true; + break; + } + } + + /* Skip paths that do not match the prefix. */ + if (!found) + continue; + } add_path_to_list(ctx, path.buf, type, &entry.oid, !(o->flags & UNINTERESTING)); @@ -481,6 +524,18 @@ int walk_objects_by_path(struct path_walk_info *info) if (info->tags) info->revs->tag_objects = 1; + if (ctx.revs->prune_data.nr) { + /* + * Only exact prefix pathspecs are currently supported. + * Clear any wildcard or magic pathspecs to avoid + * incorrect prefix matching. + */ + struct pathspec *pd = &ctx.revs->prune_data; + + if (pd->has_wildcard || pd->magic) + pd->nr = 0; + } + /* Insert a single list for the root tree into the paths. */ CALLOC_ARRAY(root_tree_list, 1); root_tree_list->type = OBJ_TREE; diff --git a/t/t5620-backfill.sh b/t/t5620-backfill.sh index db66d8b614dd19..52f6484ca18e55 100755 --- a/t/t5620-backfill.sh +++ b/t/t5620-backfill.sh @@ -273,13 +273,11 @@ test_expect_success 'backfill with prefix pathspec' ' git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && test_line_count = 48 missing && - # TODO: The pathspec should limit the downloaded blobs to - # only those matching the prefix "d/f", but currently all - # blobs are downloaded. - git -C backfill-path backfill HEAD -- d/f && + git -C backfill-path backfill HEAD -- d/f 2>err && + test_must_be_empty err && git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && - test_line_count = 0 missing + test_line_count = 40 missing ' test_expect_success 'backfill with multiple pathspecs' ' @@ -292,13 +290,11 @@ test_expect_success 'backfill with multiple pathspecs' ' git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && test_line_count = 48 missing && - # TODO: The pathspecs should limit the downloaded blobs to - # only those matching "d/f" or "a", but currently all blobs - # are downloaded. - git -C backfill-path backfill HEAD -- d/f a && + git -C backfill-path backfill HEAD -- d/f a 2>err && + test_must_be_empty err && git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && - test_line_count = 0 missing + test_line_count = 16 missing ' test_expect_success 'backfill with wildcard pathspec' ' From 97c1972226c29185d1420fcf9c99346b97e3f142 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 17 Mar 2026 00:29:21 +0000 Subject: [PATCH 125/236] path-walk: support wildcard pathspecs for blob filtering Previously, walk_objects_by_path() silently ignored pathspecs containing wildcards or magic by clearing them. This caused all blobs to be downloaded regardless of the given pathspec. Wildcard pathspecs like "d/file.*.txt" are useful for narrowing which blobs to process (e.g., during 'git backfill'). Support wildcard pathspecs by making three changes: 1. Add an 'exact_pathspecs' flag to path_walk_context. When the pathspec has no wildcards or magic, set this flag and use the existing fast-path prefix matching in add_tree_entries(). When wildcards are present, skip that block since prefix matching cannot handle glob patterns. 2. Disable revision-level commit pruning (revs->prune = 0) for wildcard pathspecs. The revision walk uses the pathspec to filter commits via TREESAME detection. For exact prefix pathspecs this works well, but wildcard pathspecs may fail to match through TREESAME because fnmatch with WM_PATHNAME does not cross directory boundaries. Disabling pruning ensures all commits are visited and their trees are available for the path-walk to filter. 3. Add a match_pathspec() check in walk_path() to filter out blobs whose full path does not match the pathspec. This provides the actual blob-level filtering for wildcard pathspecs. Signed-off-by: Derrick Stolee Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: Junio C Hamano --- path-walk.c | 22 ++++++++++++++-------- t/t5620-backfill.sh | 7 +++---- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/path-walk.c b/path-walk.c index e1ad4b02081169..67fb0f7572c890 100644 --- a/path-walk.c +++ b/path-walk.c @@ -62,6 +62,8 @@ struct path_walk_context { */ struct prio_queue path_stack; struct strset path_stack_pushed; + + unsigned exact_pathspecs:1; }; static int compare_by_type(const void *one, const void *two, void *cb_data) @@ -206,7 +208,7 @@ static int add_tree_entries(struct path_walk_context *ctx, match != MATCHED) continue; } - if (ctx->revs->prune_data.nr) { + if (ctx->revs->prune_data.nr && ctx->exact_pathspecs) { struct pathspec *pd = &ctx->revs->prune_data; bool found = false; @@ -317,6 +319,13 @@ static int walk_path(struct path_walk_context *ctx, return 0; } + if (list->type == OBJ_BLOB && + ctx->revs->prune_data.nr && + !match_pathspec(ctx->repo->index, &ctx->revs->prune_data, + path, strlen(path), 0, + NULL, 0)) + return 0; + /* Evaluate function pointer on this data, if requested. */ if ((list->type == OBJ_TREE && ctx->info->trees) || (list->type == OBJ_BLOB && ctx->info->blobs) || @@ -525,15 +534,12 @@ int walk_objects_by_path(struct path_walk_info *info) info->revs->tag_objects = 1; if (ctx.revs->prune_data.nr) { - /* - * Only exact prefix pathspecs are currently supported. - * Clear any wildcard or magic pathspecs to avoid - * incorrect prefix matching. - */ struct pathspec *pd = &ctx.revs->prune_data; - if (pd->has_wildcard || pd->magic) - pd->nr = 0; + if (!pd->has_wildcard && !pd->magic) + ctx.exact_pathspecs = 1; + else + ctx.revs->prune = 0; } /* Insert a single list for the root tree into the paths. */ diff --git a/t/t5620-backfill.sh b/t/t5620-backfill.sh index 52f6484ca18e55..c6f54ee91ccc6a 100755 --- a/t/t5620-backfill.sh +++ b/t/t5620-backfill.sh @@ -307,12 +307,11 @@ test_expect_success 'backfill with wildcard pathspec' ' git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && test_line_count = 48 missing && - # TODO: The wildcard pathspec should limit downloaded blobs, - # but currently all blobs are downloaded. - git -C backfill-path backfill HEAD -- "d/file.*.txt" && + git -C backfill-path backfill HEAD -- "d/file.*.txt" 2>err && + test_must_be_empty err && git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && - test_line_count = 0 missing + test_line_count = 40 missing ' test_expect_success 'backfill with --all' ' From 331e1b1e9c399e0fa4b6d46b183620e1d12b0a51 Mon Sep 17 00:00:00 2001 From: Harald Nordgren Date: Tue, 17 Mar 2026 09:35:36 +0000 Subject: [PATCH 126/236] stash: add --ours-label, --theirs-label, --base-label for apply Allow callers of "git stash apply" to pass custom labels for conflict markers instead of the default "Updated upstream" and "Stashed changes". Document the new options and add a test. Signed-off-by: Harald Nordgren Signed-off-by: Junio C Hamano --- Documentation/git-stash.adoc | 11 ++++++++++- builtin/stash.c | 2 +- t/t3903-stash.sh | 18 ++++++++++++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/Documentation/git-stash.adoc b/Documentation/git-stash.adoc index 235d57ddd8f5d1..43650a3c1a092f 100644 --- a/Documentation/git-stash.adoc +++ b/Documentation/git-stash.adoc @@ -12,7 +12,7 @@ git stash list [] git stash show [-u | --include-untracked | --only-untracked] [] [] git stash drop [-q | --quiet] [] git stash pop [--index] [-q | --quiet] [] -git stash apply [--index] [-q | --quiet] [] +git stash apply [--index] [-q | --quiet] [--ours-label=