From 04949538203ba7aeb298a607280715d4f26dbe0a Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 5 Mar 2026 15:34:46 -0800 Subject: [PATCH 001/241] sideband: mask control characters The output of `git clone` is a vital component for understanding what has happened when things go wrong. However, these logs are partially under the control of the remote server (via the "sideband", which typically contains what the remote `git pack-objects` process sends to `stderr`), and is currently not sanitized by Git. This makes Git susceptible to ANSI escape sequence injection (see CWE-150, https://cwe.mitre.org/data/definitions/150.html), which allows attackers to corrupt terminal state, to hide information, and even to insert characters into the input buffer (i.e. as if the user had typed those characters). To plug this vulnerability, disallow any control character in the sideband, replacing them instead with the common `^` (e.g. `^[` for `\x1b`, `^A` for `\x01`). There is likely a need for more fine-grained controls instead of using a "heavy hammer" like this, which will be introduced subsequently. Helped-by: Phillip Wood Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- sideband.c | 17 +++++++++++++++-- t/t5409-colorize-remote-messages.sh | 12 ++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/sideband.c b/sideband.c index ea7c25211ef7e1..c1bbadccac682b 100644 --- a/sideband.c +++ b/sideband.c @@ -66,6 +66,19 @@ void list_config_color_sideband_slots(struct string_list *list, const char *pref list_config_item(list, prefix, keywords[i].keyword); } +static void strbuf_add_sanitized(struct strbuf *dest, const char *src, int n) +{ + strbuf_grow(dest, n); + for (; n && *src; src++, n--) { + if (!iscntrl(*src) || *src == '\t' || *src == '\n') { + strbuf_addch(dest, *src); + } else { + strbuf_addch(dest, '^'); + strbuf_addch(dest, *src == 0x7f ? '?' : 0x40 + *src); + } + } +} + /* * Optionally highlight one keyword in remote output if it appears at the start * of the line. This should be called for a single line only, which is @@ -81,7 +94,7 @@ static void maybe_colorize_sideband(struct strbuf *dest, const char *src, int n) int i; if (!want_color_stderr(use_sideband_colors())) { - strbuf_add(dest, src, n); + strbuf_add_sanitized(dest, src, n); return; } @@ -114,7 +127,7 @@ static void maybe_colorize_sideband(struct strbuf *dest, const char *src, int n) } } - strbuf_add(dest, src, n); + strbuf_add_sanitized(dest, src, n); } diff --git a/t/t5409-colorize-remote-messages.sh b/t/t5409-colorize-remote-messages.sh index fa5de4500a4f50..aa5b57057148e0 100755 --- a/t/t5409-colorize-remote-messages.sh +++ b/t/t5409-colorize-remote-messages.sh @@ -98,4 +98,16 @@ test_expect_success 'fallback to color.ui' ' grep "error: error" decoded ' +test_expect_success 'disallow (color) control sequences in sideband' ' + write_script .git/color-me-surprised <<-\EOF && + printf "error: Have you \\033[31mread\\033[m this?\\n" >&2 + exec "$@" + EOF + test_config_global uploadPack.packObjectsHook ./color-me-surprised && + test_commit need-at-least-one-commit && + git clone --no-local . throw-away 2>stderr && + test_decode_color decoded && + test_grep ! RED decoded +' + test_done From 9ed1625a581a35d7ec2d851258cf4c7fc08c1ed7 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 5 Mar 2026 15:34:47 -0800 Subject: [PATCH 002/241] sideband: introduce an "escape hatch" to allow control characters The preceding commit fixed the vulnerability whereas sideband messages (that are under the control of the remote server) could contain ANSI escape sequences that would be sent to the terminal verbatim. However, this fix may not be desirable under all circumstances, e.g. when remote servers deliberately add coloring to their messages to increase their urgency. To help with those use cases, give users a way to opt-out of the protections: `sideband.allowControlCharacters`. Suggested-by: brian m. carlson Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- Documentation/config.adoc | 2 ++ Documentation/config/sideband.adoc | 5 +++++ sideband.c | 10 ++++++++++ t/t5409-colorize-remote-messages.sh | 8 +++++++- 4 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 Documentation/config/sideband.adoc diff --git a/Documentation/config.adoc b/Documentation/config.adoc index 62eebe7c54501c..dcea3c0c15e2a9 100644 --- a/Documentation/config.adoc +++ b/Documentation/config.adoc @@ -523,6 +523,8 @@ include::config/sequencer.adoc[] include::config/showbranch.adoc[] +include::config/sideband.adoc[] + include::config/sparse.adoc[] include::config/splitindex.adoc[] diff --git a/Documentation/config/sideband.adoc b/Documentation/config/sideband.adoc new file mode 100644 index 00000000000000..3fb5045cd79581 --- /dev/null +++ b/Documentation/config/sideband.adoc @@ -0,0 +1,5 @@ +sideband.allowControlCharacters:: + By default, control characters that are delivered via the sideband + are masked, to prevent potentially unwanted ANSI escape sequences + from being sent to the terminal. Use this config setting to override + this behavior. diff --git a/sideband.c b/sideband.c index c1bbadccac682b..682f1cbbedb9b8 100644 --- a/sideband.c +++ b/sideband.c @@ -26,6 +26,8 @@ static struct keyword_entry keywords[] = { { "error", GIT_COLOR_BOLD_RED }, }; +static int allow_control_characters; + /* Returns a color setting (GIT_COLOR_NEVER, etc). */ static enum git_colorbool use_sideband_colors(void) { @@ -39,6 +41,9 @@ static enum git_colorbool use_sideband_colors(void) if (use_sideband_colors_cached != GIT_COLOR_UNKNOWN) return use_sideband_colors_cached; + repo_config_get_bool(the_repository, "sideband.allowcontrolcharacters", + &allow_control_characters); + if (!repo_config_get_string_tmp(the_repository, key, &value)) use_sideband_colors_cached = git_config_colorbool(key, value); else if (!repo_config_get_string_tmp(the_repository, "color.ui", &value)) @@ -68,6 +73,11 @@ void list_config_color_sideband_slots(struct string_list *list, const char *pref static void strbuf_add_sanitized(struct strbuf *dest, const char *src, int n) { + if (allow_control_characters) { + strbuf_add(dest, src, n); + return; + } + strbuf_grow(dest, n); for (; n && *src; src++, n--) { if (!iscntrl(*src) || *src == '\t' || *src == '\n') { diff --git a/t/t5409-colorize-remote-messages.sh b/t/t5409-colorize-remote-messages.sh index aa5b57057148e0..9caee9a07f1556 100755 --- a/t/t5409-colorize-remote-messages.sh +++ b/t/t5409-colorize-remote-messages.sh @@ -105,9 +105,15 @@ test_expect_success 'disallow (color) control sequences in sideband' ' EOF test_config_global uploadPack.packObjectsHook ./color-me-surprised && test_commit need-at-least-one-commit && + git clone --no-local . throw-away 2>stderr && test_decode_color decoded && - test_grep ! RED decoded + test_grep ! RED decoded && + + rm -rf throw-away && + git -c sideband.allowControlCharacters clone --no-local . throw-away 2>stderr && + test_decode_color decoded && + test_grep RED decoded ' test_done From 12f0fda905b4af3a15c125f96808e49ddbe39742 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 5 Mar 2026 15:34:48 -0800 Subject: [PATCH 003/241] sideband: do allow ANSI color sequences by default The preceding two commits introduced special handling of the sideband channel to neutralize ANSI escape sequences before sending the payload to the terminal, and `sideband.allowControlCharacters` to override that behavior. However, as reported by brian m. carlson, some `pre-receive` hooks that are actively used in practice want to color their messages and therefore rely on the fact that Git passes them through to the terminal, even though they have no way to determine whether the receiving side can actually handle Escape sequences (think e.g. about the practice recommended by Git that third-party applications wishing to use Git functionality parse the output of Git commands). In contrast to other ANSI escape sequences, it is highly unlikely that coloring sequences can be essential tools in attack vectors that mislead Git users e.g. by hiding crucial information. Therefore we can have both: Continue to allow ANSI coloring sequences to be passed to the terminal by default, and neutralize all other ANSI Escape sequences. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- Documentation/config/sideband.adoc | 18 ++++++-- sideband.c | 66 +++++++++++++++++++++++++++-- t/t5409-colorize-remote-messages.sh | 16 ++++++- 3 files changed, 91 insertions(+), 9 deletions(-) diff --git a/Documentation/config/sideband.adoc b/Documentation/config/sideband.adoc index 3fb5045cd79581..b55c73726fe2c7 100644 --- a/Documentation/config/sideband.adoc +++ b/Documentation/config/sideband.adoc @@ -1,5 +1,17 @@ sideband.allowControlCharacters:: By default, control characters that are delivered via the sideband - are masked, to prevent potentially unwanted ANSI escape sequences - from being sent to the terminal. Use this config setting to override - this behavior. + are masked, except ANSI color sequences. This prevents potentially + unwanted ANSI escape sequences from being sent to the terminal. Use + this config setting to override this behavior: ++ +-- + `default`:: + `color`:: + Allow ANSI color sequences, line feeds and horizontal tabs, + but mask all other control characters. This is the default. + `false`:: + Mask all control characters other than line feeds and + horizontal tabs. + `true`:: + Allow all control characters to be sent to the terminal. +-- diff --git a/sideband.c b/sideband.c index 682f1cbbedb9b8..eeba6fa2ca8dd6 100644 --- a/sideband.c +++ b/sideband.c @@ -26,7 +26,12 @@ static struct keyword_entry keywords[] = { { "error", GIT_COLOR_BOLD_RED }, }; -static int allow_control_characters; +static enum { + ALLOW_NO_CONTROL_CHARACTERS = 0, + ALLOW_ANSI_COLOR_SEQUENCES = 1<<0, + ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ANSI_COLOR_SEQUENCES, + ALLOW_ALL_CONTROL_CHARACTERS = 1<<1, +} allow_control_characters = ALLOW_ANSI_COLOR_SEQUENCES; /* Returns a color setting (GIT_COLOR_NEVER, etc). */ static enum git_colorbool use_sideband_colors(void) @@ -41,8 +46,26 @@ static enum git_colorbool use_sideband_colors(void) if (use_sideband_colors_cached != GIT_COLOR_UNKNOWN) return use_sideband_colors_cached; - repo_config_get_bool(the_repository, "sideband.allowcontrolcharacters", - &allow_control_characters); + switch (repo_config_get_maybe_bool(the_repository, "sideband.allowcontrolcharacters", &i)) { + case 0: /* Boolean value */ + allow_control_characters = i ? ALLOW_ALL_CONTROL_CHARACTERS : + ALLOW_NO_CONTROL_CHARACTERS; + break; + case -1: /* non-Boolean value */ + if (repo_config_get_string_tmp(the_repository, "sideband.allowcontrolcharacters", + &value)) + ; /* huh? `get_maybe_bool()` returned -1 */ + else if (!strcmp(value, "default")) + allow_control_characters = ALLOW_DEFAULT_ANSI_SEQUENCES; + else if (!strcmp(value, "color")) + allow_control_characters = ALLOW_ANSI_COLOR_SEQUENCES; + else + warning(_("unrecognized value for `sideband." + "allowControlCharacters`: '%s'"), value); + break; + default: + break; /* not configured */ + } if (!repo_config_get_string_tmp(the_repository, key, &value)) use_sideband_colors_cached = git_config_colorbool(key, value); @@ -71,9 +94,41 @@ void list_config_color_sideband_slots(struct string_list *list, const char *pref list_config_item(list, prefix, keywords[i].keyword); } +static int handle_ansi_color_sequence(struct strbuf *dest, const char *src, int n) +{ + int i; + + /* + * Valid ANSI color sequences are of the form + * + * ESC [ [ [; ]*] m + * + * These are part of the Select Graphic Rendition sequences which + * contain more than just color sequences, for more details see + * https://en.wikipedia.org/wiki/ANSI_escape_code#SGR. + */ + + if (allow_control_characters != ALLOW_ANSI_COLOR_SEQUENCES || + n < 3 || src[0] != '\x1b' || src[1] != '[') + return 0; + + for (i = 2; i < n; i++) { + if (src[i] == 'm') { + strbuf_add(dest, src, i + 1); + return i; + } + if (!isdigit(src[i]) && src[i] != ';') + break; + } + + return 0; +} + static void strbuf_add_sanitized(struct strbuf *dest, const char *src, int n) { - if (allow_control_characters) { + int i; + + if (allow_control_characters == ALLOW_ALL_CONTROL_CHARACTERS) { strbuf_add(dest, src, n); return; } @@ -82,6 +137,9 @@ static void strbuf_add_sanitized(struct strbuf *dest, const char *src, int n) for (; n && *src; src++, n--) { if (!iscntrl(*src) || *src == '\t' || *src == '\n') { strbuf_addch(dest, *src); + } else if ((i = handle_ansi_color_sequence(dest, src, n))) { + src += i; + n -= i; } else { strbuf_addch(dest, '^'); strbuf_addch(dest, *src == 0x7f ? '?' : 0x40 + *src); diff --git a/t/t5409-colorize-remote-messages.sh b/t/t5409-colorize-remote-messages.sh index 9caee9a07f1556..e5092d3b426cd3 100755 --- a/t/t5409-colorize-remote-messages.sh +++ b/t/t5409-colorize-remote-messages.sh @@ -100,7 +100,7 @@ test_expect_success 'fallback to color.ui' ' test_expect_success 'disallow (color) control sequences in sideband' ' write_script .git/color-me-surprised <<-\EOF && - printf "error: Have you \\033[31mread\\033[m this?\\n" >&2 + printf "error: Have you \\033[31mread\\033[m this?\\a\\n" >&2 exec "$@" EOF test_config_global uploadPack.packObjectsHook ./color-me-surprised && @@ -108,12 +108,24 @@ test_expect_success 'disallow (color) control sequences in sideband' ' git clone --no-local . throw-away 2>stderr && test_decode_color decoded && + test_grep RED decoded && + test_grep "\\^G" stderr && + tr -dc "\\007" actual && + test_must_be_empty actual && + + rm -rf throw-away && + git -c sideband.allowControlCharacters=false \ + clone --no-local . throw-away 2>stderr && + test_decode_color decoded && test_grep ! RED decoded && + test_grep "\\^G" stderr && rm -rf throw-away && git -c sideband.allowControlCharacters clone --no-local . throw-away 2>stderr && test_decode_color decoded && - test_grep RED decoded + test_grep RED decoded && + tr -dc "\\007" actual && + test_file_not_empty actual ' test_done From 128914438a0d2d55ae34314a0881f55a797024d5 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 5 Mar 2026 15:34:49 -0800 Subject: [PATCH 004/241] sideband: add options to allow more control sequences to be passed through Even though control sequences that erase characters are quite juicy for attack scenarios, where attackers are eager to hide traces of suspicious activities, during the review of the side band sanitizing patch series concerns were raised that there might be some legimitate scenarios where Git server's `pre-receive` hooks use those sequences in a benign way. Control sequences to move the cursor can likewise be used to hide tracks by overwriting characters, and have been equally pointed out as having legitimate users. Let's add options to let users opt into passing through those ANSI Escape sequences: `sideband.allowControlCharacters` now supports also `cursor` and `erase`, and it parses the value as a comma-separated list. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- Documentation/config/sideband.adoc | 9 ++- sideband.c | 91 ++++++++++++++++++++++++----- t/t5409-colorize-remote-messages.sh | 38 ++++++++++++ 3 files changed, 123 insertions(+), 15 deletions(-) diff --git a/Documentation/config/sideband.adoc b/Documentation/config/sideband.adoc index b55c73726fe2c7..2bf04262840b02 100644 --- a/Documentation/config/sideband.adoc +++ b/Documentation/config/sideband.adoc @@ -2,13 +2,20 @@ sideband.allowControlCharacters:: By default, control characters that are delivered via the sideband are masked, except ANSI color sequences. This prevents potentially unwanted ANSI escape sequences from being sent to the terminal. Use - this config setting to override this behavior: + this config setting to override this behavior (the value can be + a comma-separated list of the following keywords): + -- `default`:: `color`:: Allow ANSI color sequences, line feeds and horizontal tabs, but mask all other control characters. This is the default. + `cursor:`: + Allow control sequences that move the cursor. This is + disabled by default. + `erase`:: + Allow control sequences that erase charactrs. This is + disabled by default. `false`:: Mask all control characters other than line feeds and horizontal tabs. diff --git a/sideband.c b/sideband.c index eeba6fa2ca8dd6..0b420ca3193888 100644 --- a/sideband.c +++ b/sideband.c @@ -29,9 +29,43 @@ static struct keyword_entry keywords[] = { static enum { ALLOW_NO_CONTROL_CHARACTERS = 0, ALLOW_ANSI_COLOR_SEQUENCES = 1<<0, + ALLOW_ANSI_CURSOR_MOVEMENTS = 1<<1, + ALLOW_ANSI_ERASE = 1<<2, ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ANSI_COLOR_SEQUENCES, - ALLOW_ALL_CONTROL_CHARACTERS = 1<<1, -} allow_control_characters = ALLOW_ANSI_COLOR_SEQUENCES; + ALLOW_ALL_CONTROL_CHARACTERS = 1<<3, +} allow_control_characters = ALLOW_DEFAULT_ANSI_SEQUENCES; + +static inline int skip_prefix_in_csv(const char *value, const char *prefix, + const char **out) +{ + if (!skip_prefix(value, prefix, &value) || + (*value && *value != ',')) + return 0; + *out = value + !!*value; + return 1; +} + +static void parse_allow_control_characters(const char *value) +{ + allow_control_characters = ALLOW_NO_CONTROL_CHARACTERS; + while (*value) { + if (skip_prefix_in_csv(value, "default", &value)) + allow_control_characters |= ALLOW_DEFAULT_ANSI_SEQUENCES; + else if (skip_prefix_in_csv(value, "color", &value)) + allow_control_characters |= ALLOW_ANSI_COLOR_SEQUENCES; + else if (skip_prefix_in_csv(value, "cursor", &value)) + allow_control_characters |= ALLOW_ANSI_CURSOR_MOVEMENTS; + else if (skip_prefix_in_csv(value, "erase", &value)) + allow_control_characters |= ALLOW_ANSI_ERASE; + else if (skip_prefix_in_csv(value, "true", &value)) + allow_control_characters = ALLOW_ALL_CONTROL_CHARACTERS; + else if (skip_prefix_in_csv(value, "false", &value)) + allow_control_characters = ALLOW_NO_CONTROL_CHARACTERS; + else + warning(_("unrecognized value for `sideband." + "allowControlCharacters`: '%s'"), value); + } +} /* Returns a color setting (GIT_COLOR_NEVER, etc). */ static enum git_colorbool use_sideband_colors(void) @@ -55,13 +89,8 @@ static enum git_colorbool use_sideband_colors(void) if (repo_config_get_string_tmp(the_repository, "sideband.allowcontrolcharacters", &value)) ; /* huh? `get_maybe_bool()` returned -1 */ - else if (!strcmp(value, "default")) - allow_control_characters = ALLOW_DEFAULT_ANSI_SEQUENCES; - else if (!strcmp(value, "color")) - allow_control_characters = ALLOW_ANSI_COLOR_SEQUENCES; else - warning(_("unrecognized value for `sideband." - "allowControlCharacters`: '%s'"), value); + parse_allow_control_characters(value); break; default: break; /* not configured */ @@ -94,7 +123,7 @@ void list_config_color_sideband_slots(struct string_list *list, const char *pref list_config_item(list, prefix, keywords[i].keyword); } -static int handle_ansi_color_sequence(struct strbuf *dest, const char *src, int n) +static int handle_ansi_sequence(struct strbuf *dest, const char *src, int n) { int i; @@ -106,14 +135,47 @@ static int handle_ansi_color_sequence(struct strbuf *dest, const char *src, int * These are part of the Select Graphic Rendition sequences which * contain more than just color sequences, for more details see * https://en.wikipedia.org/wiki/ANSI_escape_code#SGR. + * + * The cursor movement sequences are: + * + * ESC [ n A - Cursor up n lines (CUU) + * ESC [ n B - Cursor down n lines (CUD) + * ESC [ n C - Cursor forward n columns (CUF) + * ESC [ n D - Cursor back n columns (CUB) + * ESC [ n E - Cursor next line, beginning (CNL) + * ESC [ n F - Cursor previous line, beginning (CPL) + * ESC [ n G - Cursor to column n (CHA) + * ESC [ n ; m H - Cursor position (row n, col m) (CUP) + * ESC [ n ; m f - Same as H (HVP) + * + * The sequences to erase characters are: + * + * + * ESC [ 0 J - Clear from cursor to end of screen (ED) + * ESC [ 1 J - Clear from cursor to beginning of screen (ED) + * ESC [ 2 J - Clear entire screen (ED) + * ESC [ 3 J - Clear entire screen + scrollback (ED) - xterm extension + * ESC [ 0 K - Clear from cursor to end of line (EL) + * ESC [ 1 K - Clear from cursor to beginning of line (EL) + * ESC [ 2 K - Clear entire line (EL) + * ESC [ n M - Delete n lines (DL) + * ESC [ n P - Delete n characters (DCH) + * ESC [ n X - Erase n characters (ECH) + * + * For a comprehensive list of common ANSI Escape sequences, see + * https://www.xfree86.org/current/ctlseqs.html */ - if (allow_control_characters != ALLOW_ANSI_COLOR_SEQUENCES || - n < 3 || src[0] != '\x1b' || src[1] != '[') + if (n < 3 || src[0] != '\x1b' || src[1] != '[') return 0; for (i = 2; i < n; i++) { - if (src[i] == 'm') { + if (((allow_control_characters & ALLOW_ANSI_COLOR_SEQUENCES) && + src[i] == 'm') || + ((allow_control_characters & ALLOW_ANSI_CURSOR_MOVEMENTS) && + strchr("ABCDEFGHf", src[i])) || + ((allow_control_characters & ALLOW_ANSI_ERASE) && + strchr("JKMPX", src[i]))) { strbuf_add(dest, src, i + 1); return i; } @@ -128,7 +190,7 @@ static void strbuf_add_sanitized(struct strbuf *dest, const char *src, int n) { int i; - if (allow_control_characters == ALLOW_ALL_CONTROL_CHARACTERS) { + if ((allow_control_characters & ALLOW_ALL_CONTROL_CHARACTERS)) { strbuf_add(dest, src, n); return; } @@ -137,7 +199,8 @@ static void strbuf_add_sanitized(struct strbuf *dest, const char *src, int n) for (; n && *src; src++, n--) { if (!iscntrl(*src) || *src == '\t' || *src == '\n') { strbuf_addch(dest, *src); - } else if ((i = handle_ansi_color_sequence(dest, src, n))) { + } else if (allow_control_characters != ALLOW_NO_CONTROL_CHARACTERS && + (i = handle_ansi_sequence(dest, src, n))) { src += i; n -= i; } else { diff --git a/t/t5409-colorize-remote-messages.sh b/t/t5409-colorize-remote-messages.sh index e5092d3b426cd3..896e790bf955cd 100755 --- a/t/t5409-colorize-remote-messages.sh +++ b/t/t5409-colorize-remote-messages.sh @@ -128,4 +128,42 @@ test_expect_success 'disallow (color) control sequences in sideband' ' test_file_not_empty actual ' +test_decode_csi() { + awk '{ + while (match($0, /\033/) != 0) { + printf "%sCSI ", substr($0, 1, RSTART-1); + $0 = substr($0, RSTART + RLENGTH, length($0) - RSTART - RLENGTH + 1); + } + print + }' +} + +test_expect_success 'control sequences in sideband allowed by default' ' + write_script .git/color-me-surprised <<-\EOF && + printf "error: \\033[31mcolor\\033[m\\033[Goverwrite\\033[Gerase\\033[K\\033?25l\\n" >&2 + exec "$@" + EOF + test_config_global uploadPack.packObjectsHook ./color-me-surprised && + test_commit need-at-least-one-commit-at-least && + + rm -rf throw-away && + git clone --no-local . throw-away 2>stderr && + test_decode_color color-decoded && + test_decode_csi decoded && + test_grep ! "CSI \\[K" decoded && + test_grep ! "CSI \\[G" decoded && + test_grep "\\^\\[?25l" decoded && + + rm -rf throw-away && + git -c sideband.allowControlCharacters=erase,cursor,color \ + clone --no-local . throw-away 2>stderr && + test_decode_color color-decoded && + test_decode_csi decoded && + test_grep "RED" decoded && + test_grep "CSI \\[K" decoded && + test_grep "CSI \\[G" decoded && + test_grep ! "\\^\\[\\[K" decoded && + test_grep ! "\\^\\[\\[G" decoded +' + test_done From 602c83f0efed46c2e86a36273673bf8776ded04e Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 5 Mar 2026 15:34:50 -0800 Subject: [PATCH 005/241] sideband: offer to configure sanitizing on a per-URL basis The main objection against sanitizing the sideband that was raised during the review of the sideband sanitizing patches, first on the git-security mailing list, then on the public mailing list, was that there are some setups where server-side `pre-receive` hooks want to error out, giving colorful messages to the users on the client side (if they are not redirecting the output into a file, that is). To avoid breaking such setups, the default chosen by the sideband sanitizing patches is to pass through ANSI color sequences. Still, there might be some use case out there where that is not enough. Therefore the `sideband.allowControlCharacters` config setting allows for configuring levels of sanitizing. As Junio Hamano pointed out, to keep users safe by default, we need to be able to scope this to some servers because while a user may trust their company's Git server, the same might not apply to other Git servers. To allow for this, let's imitate the way `http..*` offers to scope config settings to certain URLs, by letting users override the `sideband.allowControlCharacters` setting via `sideband..allowControlCharacters`. Suggested-by: Junio Hamano Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- Documentation/config/sideband.adoc | 4 ++ sideband.c | 81 ++++++++++++++++++++--------- sideband.h | 14 +++++ t/t5409-colorize-remote-messages.sh | 24 +++++++++ transport.c | 3 ++ 5 files changed, 102 insertions(+), 24 deletions(-) diff --git a/Documentation/config/sideband.adoc b/Documentation/config/sideband.adoc index 2bf04262840b02..32088bbf2f0a40 100644 --- a/Documentation/config/sideband.adoc +++ b/Documentation/config/sideband.adoc @@ -22,3 +22,7 @@ sideband.allowControlCharacters:: `true`:: Allow all control characters to be sent to the terminal. -- + +sideband..*:: + Apply the `sideband.*` option selectively to specific URLs. The + same URL matching logic applies as for `http..*` settings. diff --git a/sideband.c b/sideband.c index 0b420ca3193888..a90db9e2880cba 100644 --- a/sideband.c +++ b/sideband.c @@ -10,6 +10,7 @@ #include "help.h" #include "pkt-line.h" #include "write-or-die.h" +#include "urlmatch.h" struct keyword_entry { /* @@ -27,13 +28,14 @@ static struct keyword_entry keywords[] = { }; static enum { - ALLOW_NO_CONTROL_CHARACTERS = 0, - ALLOW_ANSI_COLOR_SEQUENCES = 1<<0, - ALLOW_ANSI_CURSOR_MOVEMENTS = 1<<1, - ALLOW_ANSI_ERASE = 1<<2, - ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ANSI_COLOR_SEQUENCES, - ALLOW_ALL_CONTROL_CHARACTERS = 1<<3, -} allow_control_characters = ALLOW_DEFAULT_ANSI_SEQUENCES; + ALLOW_CONTROL_SEQUENCES_UNSET = -1, + ALLOW_NO_CONTROL_CHARACTERS = 0, + ALLOW_ANSI_COLOR_SEQUENCES = 1<<0, + ALLOW_ANSI_CURSOR_MOVEMENTS = 1<<1, + ALLOW_ANSI_ERASE = 1<<2, + ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ANSI_COLOR_SEQUENCES, + ALLOW_ALL_CONTROL_CHARACTERS = 1<<3, +} allow_control_characters = ALLOW_CONTROL_SEQUENCES_UNSET; static inline int skip_prefix_in_csv(const char *value, const char *prefix, const char **out) @@ -45,8 +47,19 @@ static inline int skip_prefix_in_csv(const char *value, const char *prefix, return 1; } -static void parse_allow_control_characters(const char *value) +int sideband_allow_control_characters_config(const char *var, const char *value) { + switch (git_parse_maybe_bool(value)) { + case 0: + allow_control_characters = ALLOW_NO_CONTROL_CHARACTERS; + return 0; + case 1: + allow_control_characters = ALLOW_ALL_CONTROL_CHARACTERS; + return 0; + default: + break; + } + allow_control_characters = ALLOW_NO_CONTROL_CHARACTERS; while (*value) { if (skip_prefix_in_csv(value, "default", &value)) @@ -62,9 +75,37 @@ static void parse_allow_control_characters(const char *value) else if (skip_prefix_in_csv(value, "false", &value)) allow_control_characters = ALLOW_NO_CONTROL_CHARACTERS; else - warning(_("unrecognized value for `sideband." - "allowControlCharacters`: '%s'"), value); + warning(_("unrecognized value for '%s': '%s'"), var, value); } + return 0; +} + +static int sideband_config_callback(const char *var, const char *value, + const struct config_context *ctx UNUSED, + void *data UNUSED) +{ + if (!strcmp(var, "sideband.allowcontrolcharacters")) + return sideband_allow_control_characters_config(var, value); + + return 0; +} + +void sideband_apply_url_config(const char *url) +{ + struct urlmatch_config config = URLMATCH_CONFIG_INIT; + char *normalized_url; + + if (!url) + BUG("must not call sideband_apply_url_config(NULL)"); + + config.section = "sideband"; + config.collect_fn = sideband_config_callback; + + normalized_url = url_normalize(url, &config.url); + repo_config(the_repository, urlmatch_config_entry, &config); + free(normalized_url); + string_list_clear(&config.vars, 1); + urlmatch_config_release(&config); } /* Returns a color setting (GIT_COLOR_NEVER, etc). */ @@ -80,20 +121,12 @@ static enum git_colorbool use_sideband_colors(void) if (use_sideband_colors_cached != GIT_COLOR_UNKNOWN) return use_sideband_colors_cached; - switch (repo_config_get_maybe_bool(the_repository, "sideband.allowcontrolcharacters", &i)) { - case 0: /* Boolean value */ - allow_control_characters = i ? ALLOW_ALL_CONTROL_CHARACTERS : - ALLOW_NO_CONTROL_CHARACTERS; - break; - case -1: /* non-Boolean value */ - if (repo_config_get_string_tmp(the_repository, "sideband.allowcontrolcharacters", - &value)) - ; /* huh? `get_maybe_bool()` returned -1 */ - else - parse_allow_control_characters(value); - break; - default: - break; /* not configured */ + if (allow_control_characters == ALLOW_CONTROL_SEQUENCES_UNSET) { + if (!repo_config_get_value(the_repository, "sideband.allowcontrolcharacters", &value)) + sideband_allow_control_characters_config("sideband.allowcontrolcharacters", value); + + if (allow_control_characters == ALLOW_CONTROL_SEQUENCES_UNSET) + allow_control_characters = ALLOW_DEFAULT_ANSI_SEQUENCES; } if (!repo_config_get_string_tmp(the_repository, key, &value)) diff --git a/sideband.h b/sideband.h index 5a25331be55d30..d15fa4015fa0a3 100644 --- a/sideband.h +++ b/sideband.h @@ -30,4 +30,18 @@ int demultiplex_sideband(const char *me, int status, void send_sideband(int fd, int band, const char *data, ssize_t sz, int packet_max); +/* + * Apply sideband configuration for the given URL. This should be called + * when a transport is created to allow URL-specific configuration of + * sideband behavior (e.g., sideband..allowControlCharacters). + */ +void sideband_apply_url_config(const char *url); + +/* + * Parse and set the sideband allow control characters configuration. + * The var parameter should be the key name (without section prefix). + * Returns 0 if the variable was recognized and handled, non-zero otherwise. + */ +int sideband_allow_control_characters_config(const char *var, const char *value); + #endif diff --git a/t/t5409-colorize-remote-messages.sh b/t/t5409-colorize-remote-messages.sh index 896e790bf955cd..3010913bb113e4 100755 --- a/t/t5409-colorize-remote-messages.sh +++ b/t/t5409-colorize-remote-messages.sh @@ -166,4 +166,28 @@ test_expect_success 'control sequences in sideband allowed by default' ' test_grep ! "\\^\\[\\[G" decoded ' +test_expect_success 'allow all control sequences for a specific URL' ' + write_script .git/eraser <<-\EOF && + printf "error: Ohai!\\r\\033[K" >&2 + exec "$@" + EOF + test_config_global uploadPack.packObjectsHook ./eraser && + test_commit one-more-please && + + rm -rf throw-away && + git clone --no-local . throw-away 2>stderr && + test_decode_color color-decoded && + test_decode_csi decoded && + test_grep ! "CSI \\[K" decoded && + test_grep "\\^\\[\\[K" decoded && + + rm -rf throw-away && + git -c "sideband.file://.allowControlCharacters=true" \ + clone --no-local "file://$PWD" throw-away 2>stderr && + test_decode_color color-decoded && + test_decode_csi decoded && + test_grep "CSI \\[K" decoded && + test_grep ! "\\^\\[\\[K" decoded +' + test_done diff --git a/transport.c b/transport.c index c7f06a7382e605..1602065953a54e 100644 --- a/transport.c +++ b/transport.c @@ -29,6 +29,7 @@ #include "object-name.h" #include "color.h" #include "bundle-uri.h" +#include "sideband.h" static enum git_colorbool transport_use_color = GIT_COLOR_UNKNOWN; static char transport_colors[][COLOR_MAXLEN] = { @@ -1245,6 +1246,8 @@ struct transport *transport_get(struct remote *remote, const char *url) ret->hash_algo = &hash_algos[GIT_HASH_SHA1_LEGACY]; + sideband_apply_url_config(ret->url); + return ret; } From 826cc4722088a02d0ae240c1267b5b74d476b153 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 5 Mar 2026 15:34:51 -0800 Subject: [PATCH 006/241] sideband: drop 'default' configuration The topic so far allows users to tweak the configuration variable sideband.allowControlCharacters to override the hardcoded default, but among which there is the value called 'default'. The plan [*] of the series is to loosen the setting by a later commit in the series and schedule it to tighten at the Git 3.0 boundary for end users, at which point, the meaning of this 'default' value will change. Which is a dubious design. A user expresses their preference by setting configuration variable in order to guard against sudden change brought in by changes to the hardcoded default behaviour, and letting them set it to 'default' that will change at the Git 3.0 boundary defeats its purpose. If a user wants to say "I am easy and can go with whatever hardcoded default Git implementors choose for me", they simply leave the configuration variable unspecified. Let's remove it from the state before Git 3.0 so that those users who set it to 'default' will not see the behaviour changed under their feet all of sudden. Signed-off-by: Junio C Hamano --- Documentation/config/sideband.adoc | 1 - sideband.c | 6 ++---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/Documentation/config/sideband.adoc b/Documentation/config/sideband.adoc index 32088bbf2f0a40..96fade7f5fee39 100644 --- a/Documentation/config/sideband.adoc +++ b/Documentation/config/sideband.adoc @@ -6,7 +6,6 @@ sideband.allowControlCharacters:: a comma-separated list of the following keywords): + -- - `default`:: `color`:: Allow ANSI color sequences, line feeds and horizontal tabs, but mask all other control characters. This is the default. diff --git a/sideband.c b/sideband.c index a90db9e2880cba..04282a568edd90 100644 --- a/sideband.c +++ b/sideband.c @@ -33,8 +33,8 @@ static enum { ALLOW_ANSI_COLOR_SEQUENCES = 1<<0, ALLOW_ANSI_CURSOR_MOVEMENTS = 1<<1, ALLOW_ANSI_ERASE = 1<<2, - ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ANSI_COLOR_SEQUENCES, ALLOW_ALL_CONTROL_CHARACTERS = 1<<3, + ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ANSI_COLOR_SEQUENCES } allow_control_characters = ALLOW_CONTROL_SEQUENCES_UNSET; static inline int skip_prefix_in_csv(const char *value, const char *prefix, @@ -62,9 +62,7 @@ int sideband_allow_control_characters_config(const char *var, const char *value) allow_control_characters = ALLOW_NO_CONTROL_CHARACTERS; while (*value) { - if (skip_prefix_in_csv(value, "default", &value)) - allow_control_characters |= ALLOW_DEFAULT_ANSI_SEQUENCES; - else if (skip_prefix_in_csv(value, "color", &value)) + if (skip_prefix_in_csv(value, "color", &value)) allow_control_characters |= ALLOW_ANSI_COLOR_SEQUENCES; else if (skip_prefix_in_csv(value, "cursor", &value)) allow_control_characters |= ALLOW_ANSI_CURSOR_MOVEMENTS; From 818fbfd208f919e7a4fd9c827b65e5ce5372479b Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 5 Mar 2026 15:34:52 -0800 Subject: [PATCH 007/241] sideband: delay sanitizing by default to Git v3.0 The sideband sanitization patches allow ANSI color sequences through by default, preserving compatibility with pre-receive hooks that provide colored output during `git push`. Even so, there is concern that changing any default behavior in a minor release may have unforeseen consequences. To accommodate this, defer the secure-by-default behavior to Git v3.0, where breaking changes are expected. This gives users and tooling time to prepare, while committing to address CVE-2024-52005 in Git v3.0. Signed-off-by: Johannes Schindelin [jc: adjusted for the removal of 'default' value] Signed-off-by: Junio C Hamano --- Documentation/config/sideband.adoc | 12 ++++++++++-- sideband.c | 6 +++++- t/t5409-colorize-remote-messages.sh | 18 +++++++++++++----- 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/Documentation/config/sideband.adoc b/Documentation/config/sideband.adoc index 96fade7f5fee39..ddba93393ccadc 100644 --- a/Documentation/config/sideband.adoc +++ b/Documentation/config/sideband.adoc @@ -1,8 +1,16 @@ sideband.allowControlCharacters:: +ifdef::with-breaking-changes[] By default, control characters that are delivered via the sideband are masked, except ANSI color sequences. This prevents potentially - unwanted ANSI escape sequences from being sent to the terminal. Use - this config setting to override this behavior (the value can be + unwanted ANSI escape sequences from being sent to the terminal. +endif::with-breaking-changes[] +ifndef::with-breaking-changes[] + By default, no control characters delivered via the sideband + are masked. This is unsafe and will change in Git v3.* to only + allow ANSI color sequences by default, preventing potentially + unwanted ANSI escape sequences from being sent to the terminal. +endif::with-breaking-changes[] + Use this config setting to override this behavior (the value can be a comma-separated list of the following keywords): + -- diff --git a/sideband.c b/sideband.c index 04282a568edd90..5fb60e52bf00b2 100644 --- a/sideband.c +++ b/sideband.c @@ -34,7 +34,11 @@ static enum { ALLOW_ANSI_CURSOR_MOVEMENTS = 1<<1, ALLOW_ANSI_ERASE = 1<<2, ALLOW_ALL_CONTROL_CHARACTERS = 1<<3, - ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ANSI_COLOR_SEQUENCES +#ifdef WITH_BREAKING_CHANGES + ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ANSI_COLOR_SEQUENCES, +#else + ALLOW_DEFAULT_ANSI_SEQUENCES = ALLOW_ALL_CONTROL_CHARACTERS, +#endif } allow_control_characters = ALLOW_CONTROL_SEQUENCES_UNSET; static inline int skip_prefix_in_csv(const char *value, const char *prefix, diff --git a/t/t5409-colorize-remote-messages.sh b/t/t5409-colorize-remote-messages.sh index 3010913bb113e4..07cbc62736bd26 100755 --- a/t/t5409-colorize-remote-messages.sh +++ b/t/t5409-colorize-remote-messages.sh @@ -98,6 +98,13 @@ test_expect_success 'fallback to color.ui' ' grep "error: error" decoded ' +if test_have_prereq WITH_BREAKING_CHANGES +then + TURN_ON_SANITIZING=already.turned=on +else + TURN_ON_SANITIZING=sideband.allowControlCharacters=color +fi + test_expect_success 'disallow (color) control sequences in sideband' ' write_script .git/color-me-surprised <<-\EOF && printf "error: Have you \\033[31mread\\033[m this?\\a\\n" >&2 @@ -106,7 +113,7 @@ test_expect_success 'disallow (color) control sequences in sideband' ' test_config_global uploadPack.packObjectsHook ./color-me-surprised && test_commit need-at-least-one-commit && - git clone --no-local . throw-away 2>stderr && + git -c $TURN_ON_SANITIZING clone --no-local . throw-away 2>stderr && test_decode_color decoded && test_grep RED decoded && test_grep "\\^G" stderr && @@ -138,7 +145,7 @@ test_decode_csi() { }' } -test_expect_success 'control sequences in sideband allowed by default' ' +test_expect_success 'control sequences in sideband allowed by default (in Git v3.8)' ' write_script .git/color-me-surprised <<-\EOF && printf "error: \\033[31mcolor\\033[m\\033[Goverwrite\\033[Gerase\\033[K\\033?25l\\n" >&2 exec "$@" @@ -147,7 +154,7 @@ test_expect_success 'control sequences in sideband allowed by default' ' test_commit need-at-least-one-commit-at-least && rm -rf throw-away && - git clone --no-local . throw-away 2>stderr && + git -c $TURN_ON_SANITIZING clone --no-local . throw-away 2>stderr && test_decode_color color-decoded && test_decode_csi decoded && test_grep ! "CSI \\[K" decoded && @@ -175,14 +182,15 @@ test_expect_success 'allow all control sequences for a specific URL' ' test_commit one-more-please && rm -rf throw-away && - git clone --no-local . throw-away 2>stderr && + git -c $TURN_ON_SANITIZING clone --no-local . throw-away 2>stderr && test_decode_color color-decoded && test_decode_csi decoded && test_grep ! "CSI \\[K" decoded && test_grep "\\^\\[\\[K" decoded && rm -rf throw-away && - git -c "sideband.file://.allowControlCharacters=true" \ + git -c sideband.allowControlCharacters=false \ + -c "sideband.file://.allowControlCharacters=true" \ clone --no-local "file://$PWD" throw-away 2>stderr && test_decode_color color-decoded && test_decode_csi decoded && From f8e90b972ef9567df2d6983ae2c5f1f2659e86ad Mon Sep 17 00:00:00 2001 From: Colin Stagner Date: Thu, 5 Mar 2026 17:55:47 -0600 Subject: [PATCH 008/241] contrib/subtree: reduce function side-effects `process_subtree_split_trailer()` communicates its return value to the caller by setting a variable (`sub`) that is also defined by the calling function. This is both unclear and encourages side-effects. Invoke this function in a sub-shell instead. Signed-off-by: Colin Stagner Signed-off-by: Junio C Hamano --- contrib/subtree/git-subtree.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/contrib/subtree/git-subtree.sh b/contrib/subtree/git-subtree.sh index 791fd8260c4703..bae5d9170bd7a3 100755 --- a/contrib/subtree/git-subtree.sh +++ b/contrib/subtree/git-subtree.sh @@ -373,6 +373,10 @@ try_remove_previous () { } # Usage: process_subtree_split_trailer SPLIT_HASH MAIN_HASH [REPOSITORY] +# +# Parse SPLIT_HASH as a commit. If the commit is not found, fetches +# REPOSITORY and tries again. If found, prints full commit hash. +# Otherwise, dies. process_subtree_split_trailer () { assert test $# -ge 2 assert test $# -le 3 @@ -400,6 +404,7 @@ process_subtree_split_trailer () { die "$fail_msg" fi fi + echo "${sub}" } # Usage: find_latest_squash DIR [REPOSITORY] @@ -432,7 +437,7 @@ find_latest_squash () { main="$b" ;; git-subtree-split:) - process_subtree_split_trailer "$b" "$sq" "$repository" + sub="$(process_subtree_split_trailer "$b" "$sq" "$repository")" || exit 1 ;; END) if test -n "$sub" @@ -489,7 +494,7 @@ find_existing_splits () { main="$b" ;; git-subtree-split:) - process_subtree_split_trailer "$b" "$sq" "$repository" + sub="$(process_subtree_split_trailer "$b" "$sq" "$repository")" || exit 1 ;; END) debug "Main is: '$main'" From 3b3ace4d5bb72cb1845e547439b53e00dcf49b8e Mon Sep 17 00:00:00 2001 From: Colin Stagner Date: Thu, 5 Mar 2026 17:55:48 -0600 Subject: [PATCH 009/241] contrib/subtree: functionalize split traversal `git subtree split` requires an ancestor-first history traversal. Refactor the existing rev-list traversal into its own function, `find_commits_to_split`. Pass unrevs via stdin to avoid limits on the maximum length of command-line arguments. Also remove an unnecessary `eval`. Signed-off-by: Colin Stagner Signed-off-by: Junio C Hamano --- contrib/subtree/git-subtree.sh | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/contrib/subtree/git-subtree.sh b/contrib/subtree/git-subtree.sh index bae5d9170bd7a3..c1756b3e74cc35 100755 --- a/contrib/subtree/git-subtree.sh +++ b/contrib/subtree/git-subtree.sh @@ -519,6 +519,31 @@ find_existing_splits () { done || exit $? } +# Usage: find_commits_to_split REV UNREVS [ARGS...] +# +# List each commit to split, with its parents. +# +# Specify the starting REV for the split, which is usually +# a branch tip. Populate UNREVS with the last --rejoin for +# this prefix, if any. Typically, `subtree split` ignores +# history prior to the last --rejoin... unless and if it +# becomes necessary to consider it. `find_existing_splits` is +# a convenient source of UNREVS. +# +# Remaining arguments are passed to rev-list. +# +# Outputs commits in ancestor-first order, one per line, with +# parent information. Outputs all parents before any child. +find_commits_to_split() { + assert test $# -ge 2 + rev="$1" + unrevs="$2" + shift 2 + + echo "$unrevs" | + git rev-list --topo-order --reverse --parents --stdin "$rev" "$@" +} + # Usage: copy_commit REV TREE FLAGS_STR copy_commit () { assert test $# = 3 @@ -976,12 +1001,11 @@ cmd_split () { # We can't restrict rev-list to only $dir here, because some of our # parents have the $dir contents the root, and those won't match. # (and rev-list --follow doesn't seem to solve this) - grl='git rev-list --topo-order --reverse --parents $rev $unrevs' - revmax=$(eval "$grl" | wc -l) + revmax="$(find_commits_to_split "$rev" "$unrevs" --count)" revcount=0 createcount=0 extracount=0 - eval "$grl" | + find_commits_to_split "$rev" "$unrevs" | while read rev parents do process_split_commit "$rev" "$parents" From c30871b91d4d01ddf24f8129e23aff9da0a57575 Mon Sep 17 00:00:00 2001 From: Colin Stagner Date: Thu, 5 Mar 2026 17:55:49 -0600 Subject: [PATCH 010/241] contrib/subtree: reduce recursion during split MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Debian-alikes, POSIX sh has a hardcoded recursion depth of 1000. This limit operates like bash's `$FUNCNEST` [1], but it does not actually respect `$FUNCNEST`. This is non-standard behavior. On other distros, the sh recursion depth is limited only by the available stack size. With certain history graphs, subtree splits are recursive—with one recursion per commit. Attempting to split complex repos that have thousands of commits, like [2], may fail on these distros. Reduce the amount of recursion required by eagerly discovering the complete range of commits to process. The recursion is a side-effect of the rejoin-finder in `find_existing_splits`. Rejoin mode, as in git subtree split --rejoin -b hax main ... improves the speed of later splits by merging the split history back into `main`. This gives the splitting algorithm a stopping point. The rejoin maps one commit on `main` to one split commit on `hax`. If we encounter this commit, we know that it maps to `hax`. But this is only a single point in the history. Many splits require history from before the rejoin. See patch content for examples. If pre-rejoin history is required, `check_parents` recursively discovers each individual parent, with one recursion per commit. The recursion deepens the entire tree, even if an older rejoin is available. This quickly overwhelms the Debian sh stack. Instead of recursively processing each commit, process *all* the commits back to the next obvious starting point: i.e., either the next-oldest --rejoin or the beginning of history. This is where the recursion is likely to stop anyway. While this still requires recursion, it is *considerably* less recursive. [1]: https://www.gnu.org/software/bash/manual/html_node/Bash-Variables.html#index-FUNCNEST [2]: https://github.com/christian-heusel/aur.git Signed-off-by: Colin Stagner Signed-off-by: Junio C Hamano --- contrib/subtree/git-subtree.sh | 56 ++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/contrib/subtree/git-subtree.sh b/contrib/subtree/git-subtree.sh index c1756b3e74cc35..c649a9e393a96c 100755 --- a/contrib/subtree/git-subtree.sh +++ b/contrib/subtree/git-subtree.sh @@ -315,6 +315,46 @@ cache_miss () { } # Usage: check_parents [REVS...] +# +# During a split, check that every commit in REVS has already been +# processed via `process_split_commit`. If not, deepen the history +# until it is. +# +# Commits authored by `subtree split` have to be created in the +# same order as every other git commit: ancestor-first, with new +# commits building on old commits. The traversal order normally +# ensures this is the case, but it also excludes --rejoins commits +# by default. +# +# The --rejoin tells us, "this mainline commit is equivalent to +# this split commit." The relationship is only known for that +# exact commit---and not before or after it. Frequently, commits +# prior to a rejoin are not needed... but, just as often, they +# are! Consider this history graph: +# +# --D--- +# / \ +# A--B--C--R--X--Y main +# / / +# a--b--c / split +# \ / +# --e--/ +# +# The main branch has commits A, B, and C. main is split into +# commits a, b, and c. The split history is rejoined at R. +# +# There are at least two cases where we might need the A-B-C +# history that is prior to R: +# +# 1. Commit D is based on history prior to R, but +# it isn't merged into mainline until after R. +# +# 2. Commit e is based on old split history. It is merged +# back into mainline with a subtree merge. Again, this +# happens after R. +# +# check_parents detects these cases and deepens the history +# to the next available rejoin. check_parents () { missed=$(cache_miss "$@") || exit $? local indent=$(($indent + 1)) @@ -322,8 +362,20 @@ check_parents () { do if ! test -r "$cachedir/notree/$miss" then - debug "incorrect order: $miss" - process_split_commit "$miss" "" + debug "found commit excluded by --rejoin: $miss. skipping to the next --rejoin..." + unrevs="$(find_existing_splits "$dir" "$miss" "$repository")" || exit 1 + + find_commits_to_split "$miss" "$unrevs" | + while read -r rev parents + do + process_split_commit "$rev" "$parents" + done + + if ! test -r "$cachedir/$miss" && + ! test -r "$cachedir/notree/$miss" + then + die "failed to deepen history at $miss" + fi fi done } From f584f9d36129f8af18251bd0f193c914ed8b0cfb Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Wed, 11 Mar 2026 14:19:38 +0000 Subject: [PATCH 011/241] promisor-remote: prevent lazy-fetch recursion in child fetch fetch_objects() spawns a child `git fetch` to lazily fill in missing objects. That child's index-pack, when it receives a thin pack containing a REF_DELTA against a still-missing base, calls promisor_remote_get_direct() -- which is fetch_objects() again. With negotiationAlgorithm=noop the client advertises no "have" lines, so a well-behaved server sends requested objects un-deltified or deltified only against objects in the same pack. A server that nevertheless sends REF_DELTA against a base the client does not have is misbehaving; however the client should not recurse unboundedly in response. Propagate GIT_NO_LAZY_FETCH=1 into the child fetch's environment so that if the child's index-pack encounters such a REF_DELTA, it hits the existing guard at the top of fetch_objects() and fails fast instead of recursing. Depth-1 lazy fetch (the whole point of fetch_objects()) is unaffected: only the child and its descendants see the variable. Add a test that injects a thin pack containing a REF_DELTA against a missing base via HTTP, triggering the recursion path through index-pack's promisor_remote_get_direct() call. With the fix, the child's fetch_objects() sees GIT_NO_LAZY_FETCH=1 and blocks the depth-2 fetch with a "lazy fetching disabled" warning. Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- promisor-remote.c | 7 +++++ t/t5616-partial-clone.sh | 60 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/promisor-remote.c b/promisor-remote.c index 77ebf537e2b3ee..2f56e89404b19a 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -42,6 +42,13 @@ static int fetch_objects(struct repository *repo, child.in = -1; if (repo != the_repository) prepare_other_repo_env(&child.env, repo->gitdir); + /* + * Prevent the child's index-pack from recursing back into + * fetch_objects() when resolving REF_DELTA bases it does not + * have. With noop negotiation the server should never need + * to send such deltas, so a depth-2 fetch would not help. + */ + strvec_pushf(&child.env, "%s=1", NO_LAZY_FETCH_ENVIRONMENT); strvec_pushl(&child.args, "-c", "fetch.negotiationAlgorithm=noop", "fetch", remote_name, "--no-tags", "--no-write-fetch-head", "--recurse-submodules=no", diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index 1e354e057fa12c..27f131c8d9ba57 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -907,6 +907,66 @@ test_expect_success PERL_TEST_HELPERS 'tolerate server sending REF_DELTA against ! test -e "$HTTPD_ROOT_PATH/one-time-script" ' +test_expect_success PERL_TEST_HELPERS 'lazy-fetch of REF_DELTA with missing base does not recurse' ' + SERVER="$HTTPD_DOCUMENT_ROOT_PATH/server" && + rm -rf "$SERVER" repo && + test_create_repo "$SERVER" && + test_config -C "$SERVER" uploadpack.allowfilter 1 && + test_config -C "$SERVER" uploadpack.allowanysha1inwant 1 && + + # Create a commit with 2 blobs to be used as delta base and content. + for i in $(test_seq 10) + do + echo "this is a line" >>"$SERVER/foo.txt" && + echo "this is another line" >>"$SERVER/bar.txt" || return 1 + done && + git -C "$SERVER" add foo.txt bar.txt && + git -C "$SERVER" commit -m initial && + BLOB_FOO=$(git -C "$SERVER" rev-parse HEAD:foo.txt) && + BLOB_BAR=$(git -C "$SERVER" rev-parse HEAD:bar.txt) && + + # Partial clone with blob:none. The client has commits and + # trees but no blobs. + test_config -C "$SERVER" protocol.version 2 && + git -c protocol.version=2 clone --no-checkout \ + --filter=blob:none $HTTPD_URL/one_time_script/server repo && + + # Sanity check: client does not have either blob locally. + git -C repo rev-list --objects --ignore-missing \ + -- $BLOB_FOO >objlist && + test_line_count = 0 objlist && + + # Craft a thin pack where BLOB_FOO is a REF_DELTA against + # BLOB_BAR. Since the client has neither blob (blob:none + # filter), the delta base will be missing. This simulates a + # misbehaving server that sends REF_DELTA against an object + # the client does not have. + test-tool -C "$SERVER" pack-deltas --num-objects=1 >thin.pack <<-EOF && + REF_DELTA $BLOB_FOO $BLOB_BAR + EOF + + replace_packfile thin.pack && + + # Trigger a lazy fetch for BLOB_FOO. The child fetch spawned + # by fetch_objects() receives our crafted thin pack. Its + # index-pack encounters the missing delta base (BLOB_BAR) and + # tries to lazy-fetch it via promisor_remote_get_direct(). + # + # With the fix: fetch_objects() propagates GIT_NO_LAZY_FETCH=1 + # to the child, so the depth-2 fetch is blocked and we see the + # "lazy fetching disabled" warning. The object cannot be + # resolved, so cat-file fails. + # + # Without the fix: the depth-2 fetch would proceed, potentially + # recursing unboundedly with a persistently misbehaving server. + test_must_fail git -C repo -c protocol.version=2 \ + cat-file -p $BLOB_FOO 2>err && + test_grep "lazy fetching disabled" err && + + # Ensure that the one-time-script was used. + ! test -e "$HTTPD_ROOT_PATH/one-time-script" +' + # DO NOT add non-httpd-specific tests here, because the last part of this # test script is only executed when httpd is available and enabled. From acefb71d0d4e5a16ccda9226acac0920202de771 Mon Sep 17 00:00:00 2001 From: Alan Braithwaite Date: Sun, 15 Mar 2026 05:37:02 +0000 Subject: [PATCH 012/241] clone: add clone..defaultObjectFilter config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new configuration option that lets users specify a default partial clone filter, optionally scoped by URL pattern. When cloning a repository whose URL matches a configured pattern, git-clone automatically applies the filter, equivalent to passing --filter on the command line. [clone] defaultObjectFilter = blob:limit=1m [clone "https://github.com/"] defaultObjectFilter = blob:limit=5m [clone "https://internal.corp.com/large-project/"] defaultObjectFilter = blob:none The bare clone.defaultObjectFilter applies to all clones. The URL-qualified form clone..defaultObjectFilter restricts the setting to matching URLs. URL matching uses the existing urlmatch_config_entry() infrastructure, following the same rules as http..* — a domain, namespace, or specific project can be matched, and the most specific match wins. The config only affects the initial clone. Once the clone completes, the filter is recorded in remote..partialCloneFilter, so subsequent fetches inherit it automatically. An explicit --filter on the command line takes precedence, and --no-filter defeats the configured default entirely. Signed-off-by: Alan Braithwaite Signed-off-by: Junio C Hamano --- Documentation/config/clone.adoc | 34 +++++++++ builtin/clone.c | 54 ++++++++++++++ t/t5616-partial-clone.sh | 126 ++++++++++++++++++++++++++++++++ 3 files changed, 214 insertions(+) diff --git a/Documentation/config/clone.adoc b/Documentation/config/clone.adoc index 0a10efd174ea4b..1d6c0957a066c5 100644 --- a/Documentation/config/clone.adoc +++ b/Documentation/config/clone.adoc @@ -21,3 +21,37 @@ endif::[] If a partial clone filter is provided (see `--filter` in linkgit:git-rev-list[1]) and `--recurse-submodules` is used, also apply the filter to submodules. + +`clone.defaultObjectFilter`:: +`clone..defaultObjectFilter`:: + When set to a filter spec string (e.g., `blob:limit=1m`, + `blob:none`, `tree:0`), linkgit:git-clone[1] will automatically + use `--filter=` to enable partial clone behavior. + Objects matching the filter are excluded from the initial + transfer and lazily fetched on demand (e.g., during checkout). + Subsequent fetches inherit the filter via the per-remote config + that is written during the clone. ++ +The bare `clone.defaultObjectFilter` applies to all clones. The +URL-qualified form `clone..defaultObjectFilter` restricts the +setting to clones whose URL matches ``, following the same +rules as `http..*` (see linkgit:git-config[1]). The most +specific URL match wins. You can match a domain, a namespace, or a +specific project: ++ +---- +[clone] + defaultObjectFilter = blob:limit=1m + +[clone "https://github.com/"] + defaultObjectFilter = blob:limit=5m + +[clone "https://internal.corp.com/large-project/"] + defaultObjectFilter = blob:none +---- ++ +An explicit `--filter` option on the command line takes precedence +over this config, and `--no-filter` defeats it entirely to force a +full clone. Only affects the initial clone; it has no effect on +later fetches into an existing repository. If the server does not +support object filtering, the setting is silently ignored. diff --git a/builtin/clone.c b/builtin/clone.c index fba3c9c508bc06..ed3af9325921f1 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -44,6 +44,7 @@ #include "path.h" #include "pkt-line.h" #include "list-objects-filter-options.h" +#include "urlmatch.h" #include "hook.h" #include "bundle.h" #include "bundle-uri.h" @@ -759,6 +760,51 @@ static int git_clone_config(const char *k, const char *v, return git_default_config(k, v, ctx, cb); } +static int clone_filter_collect(const char *var, const char *value, + const struct config_context *ctx UNUSED, + void *cb) +{ + char **filter_spec_p = cb; + + if (!strcmp(var, "clone.defaultobjectfilter")) { + if (!value) + return config_error_nonbool(var); + free(*filter_spec_p); + *filter_spec_p = xstrdup(value); + } + return 0; +} + +/* + * Look up clone.defaultObjectFilter or clone..defaultObjectFilter + * using the urlmatch infrastructure. A URL-qualified entry that matches + * the clone URL takes precedence over the bare form, following the same + * rules as http..* configuration variables. + */ +static char *get_default_object_filter(const char *url) +{ + struct urlmatch_config config = URLMATCH_CONFIG_INIT; + char *filter_spec = NULL; + char *normalized_url; + + config.section = "clone"; + config.key = "defaultobjectfilter"; + config.collect_fn = clone_filter_collect; + config.cb = &filter_spec; + + normalized_url = url_normalize(url, &config.url); + if (!normalized_url) { + urlmatch_config_release(&config); + return NULL; + } + + repo_config(the_repository, urlmatch_config_entry, &config); + free(normalized_url); + urlmatch_config_release(&config); + + return filter_spec; +} + static int write_one_config(const char *key, const char *value, const struct config_context *ctx, void *data) @@ -1059,6 +1105,14 @@ int cmd_clone(int argc, } else die(_("repository '%s' does not exist"), repo_name); + if (!filter_options.choice && !filter_options.no_filter) { + char *config_filter = get_default_object_filter(repo); + if (config_filter) { + parse_list_objects_filter(&filter_options, config_filter); + free(config_filter); + } + } + /* no need to be strict, transport_set_option() will validate it again */ if (option_depth && atoi(option_depth) < 1) die(_("depth %s is not a positive number"), option_depth); diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index 1c2805accac636..cff3e06873bdb7 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -723,6 +723,132 @@ test_expect_success 'after fetching descendants of non-promisor commits, gc work git -C partial gc --prune=now ' +# Test clone..defaultObjectFilter config + +test_expect_success 'setup for clone.defaultObjectFilter tests' ' + git init default-filter-src && + echo "small" >default-filter-src/small.txt && + git -C default-filter-src add . && + git -C default-filter-src commit -m "initial" && + + git clone --bare "file://$(pwd)/default-filter-src" default-filter-srv.bare && + git -C default-filter-srv.bare config --local uploadpack.allowfilter 1 && + git -C default-filter-srv.bare config --local uploadpack.allowanysha1inwant 1 +' + +test_expect_success 'clone with clone..defaultObjectFilter applies filter' ' + test_when_finished "rm -r default-filter-clone" && + SERVER_URL="file://$(pwd)/default-filter-srv.bare" && + git -c "clone.$SERVER_URL.defaultObjectFilter=blob:limit=1k" clone \ + "$SERVER_URL" default-filter-clone && + + echo true >expect && + git -C default-filter-clone config --local remote.origin.promisor >actual && + test_cmp expect actual && + + echo "blob:limit=1024" >expect && + git -C default-filter-clone config --local remote.origin.partialclonefilter >actual && + test_cmp expect actual +' + +test_expect_success 'clone with --filter overrides clone..defaultObjectFilter' ' + test_when_finished "rm -r default-filter-override" && + SERVER_URL="file://$(pwd)/default-filter-srv.bare" && + git -c "clone.$SERVER_URL.defaultObjectFilter=blob:limit=1k" \ + clone --filter=blob:none "$SERVER_URL" default-filter-override && + + echo "blob:none" >expect && + git -C default-filter-override config --local remote.origin.partialclonefilter >actual && + test_cmp expect actual +' + +test_expect_success 'clone with clone..defaultObjectFilter=blob:none works' ' + test_when_finished "rm -r default-filter-blobnone" && + SERVER_URL="file://$(pwd)/default-filter-srv.bare" && + git -c "clone.$SERVER_URL.defaultObjectFilter=blob:none" clone \ + "$SERVER_URL" default-filter-blobnone && + + echo true >expect && + git -C default-filter-blobnone config --local remote.origin.promisor >actual && + test_cmp expect actual && + + echo "blob:none" >expect && + git -C default-filter-blobnone config --local remote.origin.partialclonefilter >actual && + test_cmp expect actual +' + +test_expect_success 'clone..defaultObjectFilter with tree:0 works' ' + test_when_finished "rm -r default-filter-tree0" && + SERVER_URL="file://$(pwd)/default-filter-srv.bare" && + git -c "clone.$SERVER_URL.defaultObjectFilter=tree:0" clone \ + "$SERVER_URL" default-filter-tree0 && + + echo true >expect && + git -C default-filter-tree0 config --local remote.origin.promisor >actual && + test_cmp expect actual && + + echo "tree:0" >expect && + git -C default-filter-tree0 config --local remote.origin.partialclonefilter >actual && + test_cmp expect actual +' + +test_expect_success 'most specific URL match wins for clone.defaultObjectFilter' ' + test_when_finished "rm -r default-filter-url-specific" && + SERVER_URL="file://$(pwd)/default-filter-srv.bare" && + git \ + -c "clone.file://.defaultObjectFilter=blob:limit=1k" \ + -c "clone.$SERVER_URL.defaultObjectFilter=blob:none" \ + clone "$SERVER_URL" default-filter-url-specific && + + echo "blob:none" >expect && + git -C default-filter-url-specific config --local remote.origin.partialclonefilter >actual && + test_cmp expect actual +' + +test_expect_success 'non-matching URL does not apply clone.defaultObjectFilter' ' + test_when_finished "rm -r default-filter-url-nomatch" && + git \ + -c "clone.https://other.example.com/.defaultObjectFilter=blob:none" \ + clone "file://$(pwd)/default-filter-srv.bare" default-filter-url-nomatch && + + test_must_fail git -C default-filter-url-nomatch config --local remote.origin.promisor +' + +test_expect_success 'bare clone.defaultObjectFilter applies to all clones' ' + test_when_finished "rm -r default-filter-bare-key" && + git -c clone.defaultObjectFilter=blob:none \ + clone "file://$(pwd)/default-filter-srv.bare" default-filter-bare-key && + + echo true >expect && + git -C default-filter-bare-key config --local remote.origin.promisor >actual && + test_cmp expect actual && + + echo "blob:none" >expect && + git -C default-filter-bare-key config --local remote.origin.partialclonefilter >actual && + test_cmp expect actual +' + +test_expect_success 'URL-specific clone.defaultObjectFilter overrides bare form' ' + test_when_finished "rm -r default-filter-url-over-bare" && + SERVER_URL="file://$(pwd)/default-filter-srv.bare" && + git \ + -c clone.defaultObjectFilter=blob:limit=1k \ + -c "clone.$SERVER_URL.defaultObjectFilter=blob:none" \ + clone "$SERVER_URL" default-filter-url-over-bare && + + echo "blob:none" >expect && + git -C default-filter-url-over-bare config --local remote.origin.partialclonefilter >actual && + test_cmp expect actual +' + +test_expect_success '--no-filter defeats clone.defaultObjectFilter' ' + test_when_finished "rm -r default-filter-no-filter" && + SERVER_URL="file://$(pwd)/default-filter-srv.bare" && + git -c "clone.$SERVER_URL.defaultObjectFilter=blob:none" \ + clone --no-filter "$SERVER_URL" default-filter-no-filter && + + test_must_fail git -C default-filter-no-filter config --local remote.origin.promisor +' . "$TEST_DIRECTORY"/lib-httpd.sh start_httpd From 386fe44951c3d0f8eaee98809aae5f2d886bac83 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:14 +0900 Subject: [PATCH 013/241] parseopt: extract subcommand handling from parse_options_step() Move the subcommand branch out of parse_options_step() into a new handle_subcommand() helper. Also, make parse_subcommand() return a simple success/failure status. This removes the switch over impossible parse_opt_result values and makes the non-option path easier to follow and maintain. Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- parse-options.c | 87 ++++++++++++++++++++++++++----------------------- 1 file changed, 46 insertions(+), 41 deletions(-) diff --git a/parse-options.c b/parse-options.c index c9cafc21b90355..02a4f00919f6d6 100644 --- a/parse-options.c +++ b/parse-options.c @@ -605,17 +605,44 @@ static enum parse_opt_result parse_nodash_opt(struct parse_opt_ctx_t *p, return PARSE_OPT_ERROR; } -static enum parse_opt_result parse_subcommand(const char *arg, - const struct option *options) +static int parse_subcommand(const char *arg, const struct option *options) { - for (; options->type != OPTION_END; options++) - if (options->type == OPTION_SUBCOMMAND && - !strcmp(options->long_name, arg)) { - *(parse_opt_subcommand_fn **)options->value = options->subcommand_fn; - return PARSE_OPT_SUBCOMMAND; - } + for (; options->type != OPTION_END; options++) { + parse_opt_subcommand_fn **opt_val; - return PARSE_OPT_UNKNOWN; + if (options->type != OPTION_SUBCOMMAND || + strcmp(options->long_name, arg)) + continue; + + opt_val = options->value; + *opt_val = options->subcommand_fn; + return 0; + } + + return -1; +} + +static enum parse_opt_result handle_subcommand(struct parse_opt_ctx_t *ctx, + const char *arg, + const struct option *options, + const char * const usagestr[]) +{ + int err = parse_subcommand(arg, options); + + if (!err) + return PARSE_OPT_SUBCOMMAND; + + /* + * arg is neither a short or long option nor a subcommand. Since this + * command has a default operation mode, we have to treat this arg and + * all remaining args as args meant to that default operation mode. + * So we are done parsing. + */ + if (ctx->flags & PARSE_OPT_SUBCOMMAND_OPTIONAL) + return PARSE_OPT_DONE; + + error(_("unknown subcommand: `%s'"), arg); + usage_with_options(usagestr, options); } static void check_typos(const char *arg, const struct option *options) @@ -990,38 +1017,16 @@ enum parse_opt_result parse_options_step(struct parse_opt_ctx_t *ctx, if (*arg != '-' || !arg[1]) { if (parse_nodash_opt(ctx, arg, options) == 0) continue; - if (!ctx->has_subcommands) { - if (ctx->flags & PARSE_OPT_STOP_AT_NON_OPTION) - return PARSE_OPT_NON_OPTION; - ctx->out[ctx->cpidx++] = ctx->argv[0]; - continue; - } - switch (parse_subcommand(arg, options)) { - case PARSE_OPT_SUBCOMMAND: - return PARSE_OPT_SUBCOMMAND; - case PARSE_OPT_UNKNOWN: - if (ctx->flags & PARSE_OPT_SUBCOMMAND_OPTIONAL) - /* - * arg is neither a short or long - * option nor a subcommand. Since - * this command has a default - * operation mode, we have to treat - * this arg and all remaining args - * as args meant to that default - * operation mode. - * So we are done parsing. - */ - return PARSE_OPT_DONE; - error(_("unknown subcommand: `%s'"), arg); - usage_with_options(usagestr, options); - case PARSE_OPT_COMPLETE: - case PARSE_OPT_HELP: - case PARSE_OPT_ERROR: - case PARSE_OPT_DONE: - case PARSE_OPT_NON_OPTION: - /* Impossible. */ - BUG("parse_subcommand() cannot return these"); - } + + if (ctx->has_subcommands) + return handle_subcommand(ctx, arg, options, + usagestr); + + if (ctx->flags & PARSE_OPT_STOP_AT_NON_OPTION) + return PARSE_OPT_NON_OPTION; + + ctx->out[ctx->cpidx++] = ctx->argv[0]; + continue; } /* lone -h asks for help */ From e0245a1169b2acddd94be4da02c426419507f0b5 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:15 +0900 Subject: [PATCH 014/241] help: make autocorrect handling reusable Move config parsing and prompt/delay handling into autocorrect.c and expose them in autocorrect.h. This makes autocorrect reusable regardless of which target links against it. Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- Makefile | 1 + autocorrect.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++ autocorrect.h | 16 ++++++++++++ help.c | 64 +++------------------------------------------ meson.build | 1 + 5 files changed, 94 insertions(+), 60 deletions(-) create mode 100644 autocorrect.c create mode 100644 autocorrect.h diff --git a/Makefile b/Makefile index f3264d0a37cc50..6111631c2caaea 100644 --- a/Makefile +++ b/Makefile @@ -1098,6 +1098,7 @@ LIB_OBJS += archive-tar.o LIB_OBJS += archive-zip.o LIB_OBJS += archive.o LIB_OBJS += attr.o +LIB_OBJS += autocorrect.o LIB_OBJS += base85.o LIB_OBJS += bisect.o LIB_OBJS += blame.o diff --git a/autocorrect.c b/autocorrect.c new file mode 100644 index 00000000000000..97145d3a53ce07 --- /dev/null +++ b/autocorrect.c @@ -0,0 +1,72 @@ +#include "git-compat-util.h" +#include "autocorrect.h" +#include "config.h" +#include "parse.h" +#include "strbuf.h" +#include "prompt.h" +#include "gettext.h" + +static int parse_autocorrect(const char *value) +{ + switch (git_parse_maybe_bool_text(value)) { + case 1: + return AUTOCORRECT_IMMEDIATELY; + case 0: + return AUTOCORRECT_SHOW; + default: /* other random text */ + break; + } + + if (!strcmp(value, "prompt")) + return AUTOCORRECT_PROMPT; + if (!strcmp(value, "never")) + return AUTOCORRECT_NEVER; + if (!strcmp(value, "immediate")) + return AUTOCORRECT_IMMEDIATELY; + if (!strcmp(value, "show")) + return AUTOCORRECT_SHOW; + + return 0; +} + +void autocorrect_resolve_config(const char *var, const char *value, + const struct config_context *ctx, void *data) +{ + int *out = data; + + if (!strcmp(var, "help.autocorrect")) { + int v = parse_autocorrect(value); + + if (!v) { + v = git_config_int(var, value, ctx->kvi); + if (v < 0 || v == 1) + v = AUTOCORRECT_IMMEDIATELY; + } + + *out = v; + } +} + +void autocorrect_confirm(int autocorrect, const char *assumed) +{ + if (autocorrect == AUTOCORRECT_IMMEDIATELY) { + fprintf_ln(stderr, + _("Continuing under the assumption that you meant '%s'."), + assumed); + } else if (autocorrect == AUTOCORRECT_PROMPT) { + char *answer; + struct strbuf msg = STRBUF_INIT; + + strbuf_addf(&msg, _("Run '%s' instead [y/N]? "), assumed); + answer = git_prompt(msg.buf, PROMPT_ECHO); + strbuf_release(&msg); + + if (!(starts_with(answer, "y") || starts_with(answer, "Y"))) + exit(1); + } else { + fprintf_ln(stderr, + _("Continuing in %0.1f seconds, assuming that you meant '%s'."), + (float)autocorrect / 10.0, assumed); + sleep_millisec(autocorrect * 100); + } +} diff --git a/autocorrect.h b/autocorrect.h new file mode 100644 index 00000000000000..f5fadf9d96059b --- /dev/null +++ b/autocorrect.h @@ -0,0 +1,16 @@ +#ifndef AUTOCORRECT_H +#define AUTOCORRECT_H + +#define AUTOCORRECT_SHOW (-4) +#define AUTOCORRECT_PROMPT (-3) +#define AUTOCORRECT_NEVER (-2) +#define AUTOCORRECT_IMMEDIATELY (-1) + +struct config_context; + +void autocorrect_resolve_config(const char *var, const char *value, + const struct config_context *ctx, void *data); + +void autocorrect_confirm(int autocorrect, const char *assumed); + +#endif /* AUTOCORRECT_H */ diff --git a/help.c b/help.c index 95f576c5c81d9f..4acb6ca585ff8f 100644 --- a/help.c +++ b/help.c @@ -22,6 +22,7 @@ #include "repository.h" #include "alias.h" #include "utf8.h" +#include "autocorrect.h" #ifndef NO_CURL #include "git-curl-compat.h" /* For LIBCURL_VERSION only */ @@ -541,34 +542,6 @@ struct help_unknown_cmd_config { struct cmdnames aliases; }; -#define AUTOCORRECT_SHOW (-4) -#define AUTOCORRECT_PROMPT (-3) -#define AUTOCORRECT_NEVER (-2) -#define AUTOCORRECT_IMMEDIATELY (-1) - -static int parse_autocorrect(const char *value) -{ - switch (git_parse_maybe_bool_text(value)) { - case 1: - return AUTOCORRECT_IMMEDIATELY; - case 0: - return AUTOCORRECT_SHOW; - default: /* other random text */ - break; - } - - if (!strcmp(value, "prompt")) - return AUTOCORRECT_PROMPT; - if (!strcmp(value, "never")) - return AUTOCORRECT_NEVER; - if (!strcmp(value, "immediate")) - return AUTOCORRECT_IMMEDIATELY; - if (!strcmp(value, "show")) - return AUTOCORRECT_SHOW; - - return 0; -} - static int git_unknown_cmd_config(const char *var, const char *value, const struct config_context *ctx, void *cb) @@ -577,17 +550,7 @@ static int git_unknown_cmd_config(const char *var, const char *value, const char *subsection, *key; size_t subsection_len; - if (!strcmp(var, "help.autocorrect")) { - int v = parse_autocorrect(value); - - if (!v) { - v = git_config_int(var, value, ctx->kvi); - if (v < 0 || v == 1) - v = AUTOCORRECT_IMMEDIATELY; - } - - cfg->autocorrect = v; - } + autocorrect_resolve_config(var, value, ctx, &cfg->autocorrect); /* Also use aliases for command lookup */ if (!parse_config_key(var, "alias", &subsection, &subsection_len, @@ -724,27 +687,8 @@ char *help_unknown_cmd(const char *cmd) _("WARNING: You called a Git command named '%s', " "which does not exist."), cmd); - if (cfg.autocorrect == AUTOCORRECT_IMMEDIATELY) - fprintf_ln(stderr, - _("Continuing under the assumption that " - "you meant '%s'."), - assumed); - else if (cfg.autocorrect == AUTOCORRECT_PROMPT) { - char *answer; - struct strbuf msg = STRBUF_INIT; - strbuf_addf(&msg, _("Run '%s' instead [y/N]? "), assumed); - answer = git_prompt(msg.buf, PROMPT_ECHO); - strbuf_release(&msg); - if (!(starts_with(answer, "y") || - starts_with(answer, "Y"))) - exit(1); - } else { - fprintf_ln(stderr, - _("Continuing in %0.1f seconds, " - "assuming that you meant '%s'."), - (float)cfg.autocorrect/10.0, assumed); - sleep_millisec(cfg.autocorrect * 100); - } + + autocorrect_confirm(cfg.autocorrect, assumed); cmdnames_release(&cfg.aliases); cmdnames_release(&main_cmds); diff --git a/meson.build b/meson.build index 4b536e012481ca..0429e80a5c966c 100644 --- a/meson.build +++ b/meson.build @@ -283,6 +283,7 @@ libgit_sources = [ 'archive-zip.c', 'archive.c', 'attr.c', + 'autocorrect.c', 'base85.c', 'bisect.c', 'blame.c', From 916b96c0ec006216fcb9475fea37c9bc8e6b6505 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:16 +0900 Subject: [PATCH 015/241] help: move tty check for autocorrection to autocorrect.c TTY checking is the autocorrect config parser's responsibility. It must ensure the parsed value is correct and reliable. Thus, move the check to autocorrect_resolve_config(). Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- autocorrect.c | 24 ++++++++++++++++-------- help.c | 6 ------ 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/autocorrect.c b/autocorrect.c index 97145d3a53ce07..887d2396da44b9 100644 --- a/autocorrect.c +++ b/autocorrect.c @@ -33,18 +33,26 @@ void autocorrect_resolve_config(const char *var, const char *value, const struct config_context *ctx, void *data) { int *out = data; + int parsed; - if (!strcmp(var, "help.autocorrect")) { - int v = parse_autocorrect(value); + if (strcmp(var, "help.autocorrect")) + return; - if (!v) { - v = git_config_int(var, value, ctx->kvi); - if (v < 0 || v == 1) - v = AUTOCORRECT_IMMEDIATELY; - } + parsed = parse_autocorrect(value); - *out = v; + /* + * Disable autocorrection prompt in a non-interactive session + */ + if (parsed == AUTOCORRECT_PROMPT && (!isatty(0) || !isatty(2))) + parsed = AUTOCORRECT_NEVER; + + if (!parsed) { + parsed = git_config_int(var, value, ctx->kvi); + if (parsed < 0 || parsed == 1) + parsed = AUTOCORRECT_IMMEDIATELY; } + + *out = parsed; } void autocorrect_confirm(int autocorrect, const char *assumed) diff --git a/help.c b/help.c index 4acb6ca585ff8f..983057970e7c7f 100644 --- a/help.c +++ b/help.c @@ -607,12 +607,6 @@ char *help_unknown_cmd(const char *cmd) read_early_config(the_repository, git_unknown_cmd_config, &cfg); - /* - * Disable autocorrection prompt in a non-interactive session - */ - if ((cfg.autocorrect == AUTOCORRECT_PROMPT) && (!isatty(0) || !isatty(2))) - cfg.autocorrect = AUTOCORRECT_NEVER; - if (cfg.autocorrect == AUTOCORRECT_NEVER) { fprintf_ln(stderr, _("git: '%s' is not a git command. See 'git --help'."), cmd); exit(1); From a6e0ccbd38e4b274fa2360bc8a49d8049d1ded95 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:17 +0900 Subject: [PATCH 016/241] autocorrect: use mode and delay instead of magic numbers Drop magic numbers and describe autocorrect config with a mode enum and an integer delay. This reduces errors when mutating config values and makes the values easier to access. Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- autocorrect.c | 46 +++++++++++++++++++++++----------------------- autocorrect.h | 20 ++++++++++++++------ help.c | 9 +++++---- 3 files changed, 42 insertions(+), 33 deletions(-) diff --git a/autocorrect.c b/autocorrect.c index 887d2396da44b9..2484546fc731d9 100644 --- a/autocorrect.c +++ b/autocorrect.c @@ -6,7 +6,7 @@ #include "prompt.h" #include "gettext.h" -static int parse_autocorrect(const char *value) +static enum autocorrect_mode parse_autocorrect(const char *value) { switch (git_parse_maybe_bool_text(value)) { case 1: @@ -19,49 +19,49 @@ static int parse_autocorrect(const char *value) if (!strcmp(value, "prompt")) return AUTOCORRECT_PROMPT; - if (!strcmp(value, "never")) + else if (!strcmp(value, "never")) return AUTOCORRECT_NEVER; - if (!strcmp(value, "immediate")) + else if (!strcmp(value, "immediate")) return AUTOCORRECT_IMMEDIATELY; - if (!strcmp(value, "show")) + else if (!strcmp(value, "show")) return AUTOCORRECT_SHOW; - - return 0; + else + return AUTOCORRECT_DELAY; } void autocorrect_resolve_config(const char *var, const char *value, const struct config_context *ctx, void *data) { - int *out = data; - int parsed; + struct autocorrect *conf = data; if (strcmp(var, "help.autocorrect")) return; - parsed = parse_autocorrect(value); + conf->mode = parse_autocorrect(value); /* * Disable autocorrection prompt in a non-interactive session */ - if (parsed == AUTOCORRECT_PROMPT && (!isatty(0) || !isatty(2))) - parsed = AUTOCORRECT_NEVER; + if (conf->mode == AUTOCORRECT_PROMPT && (!isatty(0) || !isatty(2))) + conf->mode = AUTOCORRECT_NEVER; - if (!parsed) { - parsed = git_config_int(var, value, ctx->kvi); - if (parsed < 0 || parsed == 1) - parsed = AUTOCORRECT_IMMEDIATELY; - } + if (conf->mode == AUTOCORRECT_DELAY) { + conf->delay = git_config_int(var, value, ctx->kvi); - *out = parsed; + if (!conf->delay) + conf->mode = AUTOCORRECT_SHOW; + else if (conf->delay < 0 || conf->delay == 1) + conf->mode = AUTOCORRECT_IMMEDIATELY; + } } -void autocorrect_confirm(int autocorrect, const char *assumed) +void autocorrect_confirm(struct autocorrect *conf, const char *assumed) { - if (autocorrect == AUTOCORRECT_IMMEDIATELY) { + if (conf->mode == AUTOCORRECT_IMMEDIATELY) { fprintf_ln(stderr, _("Continuing under the assumption that you meant '%s'."), assumed); - } else if (autocorrect == AUTOCORRECT_PROMPT) { + } else if (conf->mode == AUTOCORRECT_PROMPT) { char *answer; struct strbuf msg = STRBUF_INIT; @@ -71,10 +71,10 @@ void autocorrect_confirm(int autocorrect, const char *assumed) if (!(starts_with(answer, "y") || starts_with(answer, "Y"))) exit(1); - } else { + } else if (conf->mode == AUTOCORRECT_DELAY) { fprintf_ln(stderr, _("Continuing in %0.1f seconds, assuming that you meant '%s'."), - (float)autocorrect / 10.0, assumed); - sleep_millisec(autocorrect * 100); + conf->delay / 10.0, assumed); + sleep_millisec(conf->delay * 100); } } diff --git a/autocorrect.h b/autocorrect.h index f5fadf9d96059b..5506a36f11a7cc 100644 --- a/autocorrect.h +++ b/autocorrect.h @@ -1,16 +1,24 @@ #ifndef AUTOCORRECT_H #define AUTOCORRECT_H -#define AUTOCORRECT_SHOW (-4) -#define AUTOCORRECT_PROMPT (-3) -#define AUTOCORRECT_NEVER (-2) -#define AUTOCORRECT_IMMEDIATELY (-1) - struct config_context; +enum autocorrect_mode { + AUTOCORRECT_SHOW, + AUTOCORRECT_NEVER, + AUTOCORRECT_PROMPT, + AUTOCORRECT_IMMEDIATELY, + AUTOCORRECT_DELAY, +}; + +struct autocorrect { + enum autocorrect_mode mode; + int delay; +}; + void autocorrect_resolve_config(const char *var, const char *value, const struct config_context *ctx, void *data); -void autocorrect_confirm(int autocorrect, const char *assumed); +void autocorrect_confirm(struct autocorrect *conf, const char *assumed); #endif /* AUTOCORRECT_H */ diff --git a/help.c b/help.c index 983057970e7c7f..a89ac5aced9994 100644 --- a/help.c +++ b/help.c @@ -538,7 +538,7 @@ int is_in_cmdlist(struct cmdnames *c, const char *s) } struct help_unknown_cmd_config { - int autocorrect; + struct autocorrect autocorrect; struct cmdnames aliases; }; @@ -607,7 +607,7 @@ char *help_unknown_cmd(const char *cmd) read_early_config(the_repository, git_unknown_cmd_config, &cfg); - if (cfg.autocorrect == AUTOCORRECT_NEVER) { + if (cfg.autocorrect.mode == AUTOCORRECT_NEVER) { fprintf_ln(stderr, _("git: '%s' is not a git command. See 'git --help'."), cmd); exit(1); } @@ -673,7 +673,8 @@ char *help_unknown_cmd(const char *cmd) n++) ; /* still counting */ } - if (cfg.autocorrect && cfg.autocorrect != AUTOCORRECT_SHOW && n == 1 && + + if (cfg.autocorrect.mode != AUTOCORRECT_SHOW && n == 1 && SIMILAR_ENOUGH(best_similarity)) { char *assumed = xstrdup(main_cmds.names[0]->name); @@ -682,7 +683,7 @@ char *help_unknown_cmd(const char *cmd) "which does not exist."), cmd); - autocorrect_confirm(cfg.autocorrect, assumed); + autocorrect_confirm(&cfg.autocorrect, assumed); cmdnames_release(&cfg.aliases); cmdnames_release(&main_cmds); From f06f1f043cbf58b82f1243fd09fc2c8b0202a853 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:18 +0900 Subject: [PATCH 017/241] autocorrect: rename AUTOCORRECT_SHOW to AUTOCORRECT_HINT AUTOCORRECT_SHOW is ambiguous. Its purpose is to show commands similar to the unknown one and take no other action. Rename it to fit the semantics. Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- autocorrect.c | 6 +++--- autocorrect.h | 2 +- help.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/autocorrect.c b/autocorrect.c index 2484546fc731d9..de0fa282c934a8 100644 --- a/autocorrect.c +++ b/autocorrect.c @@ -12,7 +12,7 @@ static enum autocorrect_mode parse_autocorrect(const char *value) case 1: return AUTOCORRECT_IMMEDIATELY; case 0: - return AUTOCORRECT_SHOW; + return AUTOCORRECT_HINT; default: /* other random text */ break; } @@ -24,7 +24,7 @@ static enum autocorrect_mode parse_autocorrect(const char *value) else if (!strcmp(value, "immediate")) return AUTOCORRECT_IMMEDIATELY; else if (!strcmp(value, "show")) - return AUTOCORRECT_SHOW; + return AUTOCORRECT_HINT; else return AUTOCORRECT_DELAY; } @@ -49,7 +49,7 @@ void autocorrect_resolve_config(const char *var, const char *value, conf->delay = git_config_int(var, value, ctx->kvi); if (!conf->delay) - conf->mode = AUTOCORRECT_SHOW; + conf->mode = AUTOCORRECT_HINT; else if (conf->delay < 0 || conf->delay == 1) conf->mode = AUTOCORRECT_IMMEDIATELY; } diff --git a/autocorrect.h b/autocorrect.h index 5506a36f11a7cc..328807242c15ab 100644 --- a/autocorrect.h +++ b/autocorrect.h @@ -4,7 +4,7 @@ struct config_context; enum autocorrect_mode { - AUTOCORRECT_SHOW, + AUTOCORRECT_HINT, AUTOCORRECT_NEVER, AUTOCORRECT_PROMPT, AUTOCORRECT_IMMEDIATELY, diff --git a/help.c b/help.c index a89ac5aced9994..2d441ded3f1489 100644 --- a/help.c +++ b/help.c @@ -674,7 +674,7 @@ char *help_unknown_cmd(const char *cmd) ; /* still counting */ } - if (cfg.autocorrect.mode != AUTOCORRECT_SHOW && n == 1 && + if (cfg.autocorrect.mode != AUTOCORRECT_HINT && n == 1 && SIMILAR_ENOUGH(best_similarity)) { char *assumed = xstrdup(main_cmds.names[0]->name); From 7cd07f167d2980ad58de08a8bd7787d2ef5882b9 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:19 +0900 Subject: [PATCH 018/241] autocorrect: provide config resolution API Add autocorrect_resolve(). This resolves and populates the correct values for autocorrect config. Make autocorrect config callback internal. The API is meant to provide a high-level way to retrieve the config. Allowing access to the config callback from outside violates that intent. Additionally, in some cases, without access to the config callback, two config iterations cannot be merged into one, which can hurt performance. This is fine, as the code path that calls autocorrect_resolve() is cold. Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- autocorrect.c | 15 ++++++++++++--- autocorrect.h | 5 +---- help.c | 40 +++++++++++++++++----------------------- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/autocorrect.c b/autocorrect.c index de0fa282c934a8..b2ee9f51e8c09a 100644 --- a/autocorrect.c +++ b/autocorrect.c @@ -1,3 +1,5 @@ +#define USE_THE_REPOSITORY_VARIABLE + #include "git-compat-util.h" #include "autocorrect.h" #include "config.h" @@ -29,13 +31,13 @@ static enum autocorrect_mode parse_autocorrect(const char *value) return AUTOCORRECT_DELAY; } -void autocorrect_resolve_config(const char *var, const char *value, - const struct config_context *ctx, void *data) +static int resolve_autocorrect(const char *var, const char *value, + const struct config_context *ctx, void *data) { struct autocorrect *conf = data; if (strcmp(var, "help.autocorrect")) - return; + return 0; conf->mode = parse_autocorrect(value); @@ -53,6 +55,13 @@ void autocorrect_resolve_config(const char *var, const char *value, else if (conf->delay < 0 || conf->delay == 1) conf->mode = AUTOCORRECT_IMMEDIATELY; } + + return 0; +} + +void autocorrect_resolve(struct autocorrect *conf) +{ + read_early_config(the_repository, resolve_autocorrect, conf); } void autocorrect_confirm(struct autocorrect *conf, const char *assumed) diff --git a/autocorrect.h b/autocorrect.h index 328807242c15ab..0d3e819262edee 100644 --- a/autocorrect.h +++ b/autocorrect.h @@ -1,8 +1,6 @@ #ifndef AUTOCORRECT_H #define AUTOCORRECT_H -struct config_context; - enum autocorrect_mode { AUTOCORRECT_HINT, AUTOCORRECT_NEVER, @@ -16,8 +14,7 @@ struct autocorrect { int delay; }; -void autocorrect_resolve_config(const char *var, const char *value, - const struct config_context *ctx, void *data); +void autocorrect_resolve(struct autocorrect *conf); void autocorrect_confirm(struct autocorrect *conf, const char *assumed); diff --git a/help.c b/help.c index 2d441ded3f1489..81efdb13d4a375 100644 --- a/help.c +++ b/help.c @@ -537,32 +537,23 @@ int is_in_cmdlist(struct cmdnames *c, const char *s) return 0; } -struct help_unknown_cmd_config { - struct autocorrect autocorrect; - struct cmdnames aliases; -}; - -static int git_unknown_cmd_config(const char *var, const char *value, - const struct config_context *ctx, - void *cb) +static int resolve_aliases(const char *var, const char *value UNUSED, + const struct config_context *ctx UNUSED, void *data) { - struct help_unknown_cmd_config *cfg = cb; + struct cmdnames *aliases = data; const char *subsection, *key; size_t subsection_len; - autocorrect_resolve_config(var, value, ctx, &cfg->autocorrect); - - /* Also use aliases for command lookup */ if (!parse_config_key(var, "alias", &subsection, &subsection_len, &key)) { if (subsection) { /* [alias "name"] command = value */ if (!strcmp(key, "command")) - add_cmdname(&cfg->aliases, subsection, + add_cmdname(aliases, subsection, subsection_len); } else { /* alias.name = value */ - add_cmdname(&cfg->aliases, key, strlen(key)); + add_cmdname(aliases, key, strlen(key)); } } @@ -599,22 +590,26 @@ static const char bad_interpreter_advice[] = char *help_unknown_cmd(const char *cmd) { - struct help_unknown_cmd_config cfg = { 0 }; + struct cmdnames aliases = { 0 }; + struct autocorrect autocorrect = { 0 }; int i, n, best_similarity = 0; struct cmdnames main_cmds = { 0 }; struct cmdnames other_cmds = { 0 }; struct cmdname_help *common_cmds; - read_early_config(the_repository, git_unknown_cmd_config, &cfg); + autocorrect_resolve(&autocorrect); - if (cfg.autocorrect.mode == AUTOCORRECT_NEVER) { + if (autocorrect.mode == AUTOCORRECT_NEVER) { fprintf_ln(stderr, _("git: '%s' is not a git command. See 'git --help'."), cmd); exit(1); } load_command_list("git-", &main_cmds, &other_cmds); - add_cmd_list(&main_cmds, &cfg.aliases); + /* Also use aliases for command lookup */ + read_early_config(the_repository, resolve_aliases, &aliases); + + add_cmd_list(&main_cmds, &aliases); add_cmd_list(&main_cmds, &other_cmds); QSORT(main_cmds.names, main_cmds.cnt, cmdname_compare); uniq(&main_cmds); @@ -674,18 +669,17 @@ char *help_unknown_cmd(const char *cmd) ; /* still counting */ } - if (cfg.autocorrect.mode != AUTOCORRECT_HINT && n == 1 && + if (autocorrect.mode != AUTOCORRECT_HINT && n == 1 && SIMILAR_ENOUGH(best_similarity)) { char *assumed = xstrdup(main_cmds.names[0]->name); fprintf_ln(stderr, - _("WARNING: You called a Git command named '%s', " - "which does not exist."), + _("WARNING: You called a Git command named '%s', which does not exist."), cmd); - autocorrect_confirm(&cfg.autocorrect, assumed); + autocorrect_confirm(&autocorrect, assumed); - cmdnames_release(&cfg.aliases); + cmdnames_release(&aliases); cmdnames_release(&main_cmds); cmdnames_release(&other_cmds); return assumed; From be9df6de6e2cfd09ffc327e9d4edd451248296f9 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:20 +0900 Subject: [PATCH 019/241] parseopt: autocorrect mistyped subcommands Try to autocorrect the mistyped mandatory subcommand before showing an error and exiting. Subcommands parsed with PARSE_OPT_SUBCOMMAND_OPTIONAL are skipped. Use standard Damerau-Levenshtein distance (weights 1, 1, 1, 1) to establish a predictable, mathematically sound baseline. Scale the allowed edit distance based on input length to prevent false positives on short commands, following common practice for fuzziness thresholds (e.g., Elasticsearch's AUTO fuzziness): - Length 0-2: 0 edits allowed - Length 3-5: 1 edit allowed - Length 6+: 2 edits allowed Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- parse-options.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 78 insertions(+), 3 deletions(-) diff --git a/parse-options.c b/parse-options.c index 02a4f00919f6d6..1f1b72762790c0 100644 --- a/parse-options.c +++ b/parse-options.c @@ -6,6 +6,8 @@ #include "strbuf.h" #include "string-list.h" #include "utf8.h" +#include "autocorrect.h" +#include "levenshtein.h" static int disallow_abbreviated_options; @@ -622,13 +624,77 @@ static int parse_subcommand(const char *arg, const struct option *options) return -1; } +static void find_subcommands(struct string_list *list, + const struct option *options) +{ + for (; options->type != OPTION_END; options++) { + if (options->type == OPTION_SUBCOMMAND) + string_list_append(list, options->long_name); + } +} + +static int similar_enough(const char *cmd, unsigned int edit) +{ + size_t len = strlen(cmd); + unsigned int allowed = len < 3 ? 0 : len < 6 ? 1 : 2; + + return edit <= allowed; +} + +static const char *autocorrect_subcommand(const char *cmd, + struct string_list *cmds) +{ + struct autocorrect autocorrect = { 0 }; + unsigned int min = UINT_MAX; + unsigned int ties = 0; + struct string_list_item *cand; + struct string_list_item *best = NULL; + + autocorrect_resolve(&autocorrect); + + /* + * Builtin subcommands are small enough that printing them all via + * usage_with_options() is sufficient. Therefore, AUTOCORRECT_HINT + * acts like AUTOCORRECT_NEVER. + */ + if (autocorrect.mode == AUTOCORRECT_HINT || + autocorrect.mode == AUTOCORRECT_NEVER) + return NULL; + + for_each_string_list_item(cand, cmds) { + unsigned int edit = levenshtein(cmd, cand->string, 1, 1, 1, 1); + + if (edit < min) { + min = edit; + best = cand; + ties = 0; + } else if (edit == min) { + ties++; + } + } + + if (!ties && similar_enough(cmd, min)) { + fprintf_ln(stderr, + _("WARNING: You called a subcommand named '%s', which does not exist."), + cmd); + + autocorrect_confirm(&autocorrect, best->string); + return best->string; + } + + return NULL; +} + static enum parse_opt_result handle_subcommand(struct parse_opt_ctx_t *ctx, const char *arg, const struct option *options, const char * const usagestr[]) { - int err = parse_subcommand(arg, options); + int err; + const char *assumed; + struct string_list cmds = STRING_LIST_INIT_NODUP; + err = parse_subcommand(arg, options); if (!err) return PARSE_OPT_SUBCOMMAND; @@ -641,8 +707,17 @@ static enum parse_opt_result handle_subcommand(struct parse_opt_ctx_t *ctx, if (ctx->flags & PARSE_OPT_SUBCOMMAND_OPTIONAL) return PARSE_OPT_DONE; - error(_("unknown subcommand: `%s'"), arg); - usage_with_options(usagestr, options); + find_subcommands(&cmds, options); + assumed = autocorrect_subcommand(arg, &cmds); + + if (!assumed) { + error(_("unknown subcommand: `%s'"), arg); + usage_with_options(usagestr, options); + } + + string_list_clear(&cmds, 0); + parse_subcommand(assumed, options); + return PARSE_OPT_SUBCOMMAND; } static void check_typos(const char *arg, const struct option *options) From ae8b7e1d200977165755c2e6d5a22e1df8ab6bf1 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:21 +0900 Subject: [PATCH 020/241] parseopt: enable subcommand autocorrection for git-remote and git-notes Add PARSE_OPT_SUBCOMMAND_AUTOCORR to enable autocorrection for subcommands parsed with PARSE_OPT_SUBCOMMAND_OPTIONAL. Use it for git-remote and git-notes, so mistyped subcommands can be automatically corrected, and builtin entry points no longer need to handle the unknown subcommand error path themselves. This is safe for these two builtins, because they either resolve to a single subcommand or take no subcommand at all. This means that if the subcommand parser encounters an unknown argument, it must be a mistyped subcommand. Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- builtin/notes.c | 10 +++------- builtin/remote.c | 12 ++++-------- parse-options.c | 16 +++++++++------- parse-options.h | 1 + 4 files changed, 17 insertions(+), 22 deletions(-) diff --git a/builtin/notes.c b/builtin/notes.c index 9af602bdd7b402..087eb898a4415f 100644 --- a/builtin/notes.c +++ b/builtin/notes.c @@ -1149,14 +1149,10 @@ int cmd_notes(int argc, repo_config(the_repository, git_default_config, NULL); argc = parse_options(argc, argv, prefix, options, git_notes_usage, - PARSE_OPT_SUBCOMMAND_OPTIONAL); - if (!fn) { - if (argc) { - error(_("unknown subcommand: `%s'"), argv[0]); - usage_with_options(git_notes_usage, options); - } + PARSE_OPT_SUBCOMMAND_OPTIONAL | + PARSE_OPT_SUBCOMMAND_AUTOCORR); + if (!fn) fn = list; - } if (override_notes_ref) { struct strbuf sb = STRBUF_INIT; diff --git a/builtin/remote.c b/builtin/remote.c index 0fddaa177331f6..9415f6cb03e807 100644 --- a/builtin/remote.c +++ b/builtin/remote.c @@ -1953,15 +1953,11 @@ int cmd_remote(int argc, }; argc = parse_options(argc, argv, prefix, options, builtin_remote_usage, - PARSE_OPT_SUBCOMMAND_OPTIONAL); + PARSE_OPT_SUBCOMMAND_OPTIONAL | + PARSE_OPT_SUBCOMMAND_AUTOCORR); - if (fn) { + if (fn) return !!fn(argc, argv, prefix, repo); - } else { - if (argc) { - error(_("unknown subcommand: `%s'"), argv[0]); - usage_with_options(builtin_remote_usage, options); - } + else return !!show_all(); - } } diff --git a/parse-options.c b/parse-options.c index 1f1b72762790c0..0b84061a381153 100644 --- a/parse-options.c +++ b/parse-options.c @@ -698,14 +698,16 @@ static enum parse_opt_result handle_subcommand(struct parse_opt_ctx_t *ctx, if (!err) return PARSE_OPT_SUBCOMMAND; - /* - * arg is neither a short or long option nor a subcommand. Since this - * command has a default operation mode, we have to treat this arg and - * all remaining args as args meant to that default operation mode. - * So we are done parsing. - */ - if (ctx->flags & PARSE_OPT_SUBCOMMAND_OPTIONAL) + if (ctx->flags & PARSE_OPT_SUBCOMMAND_OPTIONAL && + !(ctx->flags & PARSE_OPT_SUBCOMMAND_AUTOCORR)) { + /* + * arg is neither a short or long option nor a subcommand. + * Since this command has a default operation mode, we have to + * treat this arg and all remaining args as args meant to that + * default operation mode. So we are done parsing. + */ return PARSE_OPT_DONE; + } find_subcommands(&cmds, options); assumed = autocorrect_subcommand(arg, &cmds); diff --git a/parse-options.h b/parse-options.h index 706de9729f6b3f..f29ac337893c92 100644 --- a/parse-options.h +++ b/parse-options.h @@ -40,6 +40,7 @@ enum parse_opt_flags { PARSE_OPT_ONE_SHOT = 1 << 5, PARSE_OPT_SHELL_EVAL = 1 << 6, PARSE_OPT_SUBCOMMAND_OPTIONAL = 1 << 7, + PARSE_OPT_SUBCOMMAND_AUTOCORR = 1 << 8, }; enum parse_opt_option_flags { From 273faabea8db47247564df092adf5a675ddfa284 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:22 +0900 Subject: [PATCH 021/241] parseopt: add tests for subcommand autocorrection These tests cover default behavior (help.autocorrect is unset), no correction, immediate correction, delayed correction, and rejection when the typo is too dissimilar. Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- t/meson.build | 1 + t/t9004-autocorrect-subcommand.sh | 51 +++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100755 t/t9004-autocorrect-subcommand.sh diff --git a/t/meson.build b/t/meson.build index f66a73f8a07d93..bf0503d705a9a3 100644 --- a/t/meson.build +++ b/t/meson.build @@ -973,6 +973,7 @@ integration_tests = [ 't9001-send-email.sh', 't9002-column.sh', 't9003-help-autocorrect.sh', + 't9004-autocorrect-subcommand.sh', 't9100-git-svn-basic.sh', 't9101-git-svn-props.sh', 't9102-git-svn-deep-rmdir.sh', diff --git a/t/t9004-autocorrect-subcommand.sh b/t/t9004-autocorrect-subcommand.sh new file mode 100755 index 00000000000000..d10031659b940b --- /dev/null +++ b/t/t9004-autocorrect-subcommand.sh @@ -0,0 +1,51 @@ +#!/bin/sh + +test_description='subcommand auto-correction test + +Test autocorrection for subcommands with different +help.autocorrect mode.' + +. ./test-lib.sh + +test_expect_success 'setup' " + echo '^error: unknown subcommand: ' >grep_unknown +" + +test_expect_success 'default is not to autocorrect' ' + test_must_fail git worktree lsit 2>actual && + head -n1 actual >first && test_grep -f grep_unknown first +' + +for mode in false no off 0 show never +do + test_expect_success "'$mode' disables autocorrection" " + test_config help.autocorrect $mode && + + test_must_fail git worktree lsit 2>actual && + head -n1 actual >first && test_grep -f grep_unknown first + " +done + +for mode in -39 immediate 1 +do + test_expect_success "autocorrect immediately with '$mode'" - <<-EOT + test_config help.autocorrect $mode && + + git worktree lsit 2>actual && + test_grep "you meant 'list'\.$" actual + EOT +done + +test_expect_success 'delay path is executed' - <<-\EOT + test_config help.autocorrect 2 && + + git worktree lsit 2>actual && + test_grep '^Continuing in 0.2 seconds, ' actual +EOT + +test_expect_success 'deny if too dissimilar' - <<-\EOT + test_must_fail git remote rensnr 2>actual && + head -n1 actual >first && test_grep -f grep_unknown first +EOT + +test_done From 916b45080534e9603094dda39c4599a2f2a466d1 Mon Sep 17 00:00:00 2001 From: Jiamu Sun <39@barroit.sh> Date: Tue, 17 Mar 2026 00:36:23 +0900 Subject: [PATCH 022/241] doc: document autocorrect API Explain behaviors for autocorrect_resolve(), autocorrect_confirm(), and struct autocorrect. Signed-off-by: Jiamu Sun <39@barroit.sh> Signed-off-by: Junio C Hamano --- autocorrect.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/autocorrect.h b/autocorrect.h index 0d3e819262edee..bfa3ba20a4fb73 100644 --- a/autocorrect.h +++ b/autocorrect.h @@ -9,13 +9,24 @@ enum autocorrect_mode { AUTOCORRECT_DELAY, }; +/** + * `mode` indicates which action will be performed by autocorrect_confirm(). + * `delay` is the timeout before autocorrect_confirm() returns, in tenths of a + * second. Use it only with AUTOCORRECT_DELAY. + */ struct autocorrect { enum autocorrect_mode mode; int delay; }; +/** + * Resolve the autocorrect configuration into `conf`. + */ void autocorrect_resolve(struct autocorrect *conf); +/** + * Interact with the user in different ways depending on `conf->mode`. + */ void autocorrect_confirm(struct autocorrect *conf, const char *assumed); #endif /* AUTOCORRECT_H */ From 7fe5a5fc23b3aa8c302e0beca20ced5171db03cf Mon Sep 17 00:00:00 2001 From: Kristoffer Haugsbakk Date: Fri, 20 Mar 2026 14:09:34 +0100 Subject: [PATCH 023/241] name-rev: wrap both blocks in braces See `CodingGuidelines`: - When there are multiple arms to a conditional and some of them require braces, enclose even a single line block in braces for consistency. [...] Signed-off-by: Kristoffer Haugsbakk Signed-off-by: Junio C Hamano --- builtin/name-rev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/builtin/name-rev.c b/builtin/name-rev.c index 6188cf98ce0157..171e7bd0e98a46 100644 --- a/builtin/name-rev.c +++ b/builtin/name-rev.c @@ -466,9 +466,9 @@ static const char *get_rev_name(const struct object *o, struct strbuf *buf) if (!n) return NULL; - if (!n->generation) + if (!n->generation) { return n->tip_name; - else { + } else { strbuf_reset(buf); strbuf_addstr(buf, n->tip_name); strbuf_strip_suffix(buf, "^0"); @@ -516,9 +516,9 @@ static void name_rev_line(char *p, struct name_ref_data *data) for (p_start = p; *p; p++) { #define ishex(x) (isdigit((x)) || ((x) >= 'a' && (x) <= 'f')) - if (!ishex(*p)) + if (!ishex(*p)) { counter = 0; - else if (++counter == hexsz && + } else if (++counter == hexsz && !ishex(*(p+1))) { struct object_id oid; const char *name = NULL; From 803e7051041ca156a0efe30301787255df3f0b43 Mon Sep 17 00:00:00 2001 From: Kristoffer Haugsbakk Date: Fri, 20 Mar 2026 14:09:35 +0100 Subject: [PATCH 024/241] name-rev: learn --format= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Teach git-name-rev(1) to format the given revisions instead of creating symbolic names. Sometimes you want to format commits. Most of the time you’re walking the graph, e.g. getting a range of commits like `master..topic`. That’s a job for git-log(1). But sometimes you might want to format commits that you encounter on demand: • Full hashes in running text that you might want to pretty-print • git-last-modified(1) outputs full hashes that you can do the same with • git-cherry(1) has `-v` for commit subject, but maybe you want something else? But now you can’t use git-log(1), git-show(1), or git-rev-list(1): • You can’t feed commits piecemeal to these commands, one input for one output; they block until standard in is closed • You can’t feed a list of possibly duplicate commits, like the output of git-last-modified(1); they effectively deduplicate the output Beyond these two points there’s also the input massage problem: you cannot feed mixed input (revisions mixed with arbitrary text). One might hope that git-cat-file(1) can save us. But it doesn’t support pretty formats. But there is one command that already both handles revisions as arguments, revisions on standard input, and even revisions mixed in with arbitrary text. Namely git-name-rev(1). Teach it to work in a format mode where the output for each revision is the pretty output (implies `--name-only`). This can be used to format any revision expression when given as arguments, and all full commit hashes in running text on stdin. Just bring the hashes (to the pipeline). We will pretty print them. Signed-off-by: Kristoffer Haugsbakk Signed-off-by: Junio C Hamano --- Documentation/git-name-rev.adoc | 10 +++- builtin/name-rev.c | 100 +++++++++++++++++++++++++++++--- t/t6120-describe.sh | 96 ++++++++++++++++++++++++++++++ 3 files changed, 198 insertions(+), 8 deletions(-) diff --git a/Documentation/git-name-rev.adoc b/Documentation/git-name-rev.adoc index d4f1c4d5945e8e..65348690c8cd3b 100644 --- a/Documentation/git-name-rev.adoc +++ b/Documentation/git-name-rev.adoc @@ -9,7 +9,7 @@ git-name-rev - Find symbolic names for given revs SYNOPSIS -------- [verse] -'git name-rev' [--tags] [--refs=] +'git name-rev' [--tags] [--refs=] [--format=] ( --all | --annotate-stdin | ... ) DESCRIPTION @@ -21,6 +21,14 @@ format parsable by 'git rev-parse'. OPTIONS ------- +--format=:: +--no-format:: + Format revisions instead of outputting symbolic names. The + default is `--no-format`. ++ +Implies `--name-only`. The negation `--no-format` implies +`--no-name-only` (the default for the command). + --tags:: Do not use branch names, but only tags to name the commits diff --git a/builtin/name-rev.c b/builtin/name-rev.c index 171e7bd0e98a46..9a008d8b7a8128 100644 --- a/builtin/name-rev.c +++ b/builtin/name-rev.c @@ -18,6 +18,9 @@ #include "commit-graph.h" #include "wildmatch.h" #include "mem-pool.h" +#include "pretty.h" +#include "revision.h" +#include "notes.h" /* * One day. See the 'name a rev shortly after epoch' test in t6120 when @@ -33,6 +36,16 @@ struct rev_name { int from_tag; }; +struct pretty_format { + struct pretty_print_context ctx; + struct userformat_want want; +}; + +struct format_cb_data { + const char *format; + int *name_only; +}; + define_commit_slab(commit_rev_name, struct rev_name); static timestamp_t generation_cutoff = GENERATION_NUMBER_INFINITY; @@ -454,7 +467,9 @@ static const char *get_exact_ref_match(const struct object *o) } /* may return a constant string or use "buf" as scratch space */ -static const char *get_rev_name(const struct object *o, struct strbuf *buf) +static const char *get_rev_name(const struct object *o, + struct pretty_format *format_ctx, + struct strbuf *buf) { struct rev_name *n; const struct commit *c; @@ -462,6 +477,25 @@ static const char *get_rev_name(const struct object *o, struct strbuf *buf) if (o->type != OBJ_COMMIT) return get_exact_ref_match(o); c = (const struct commit *) o; + + if (format_ctx) { + strbuf_reset(buf); + + if (format_ctx->want.notes) { + struct strbuf notebuf = STRBUF_INIT; + + format_display_notes(&c->object.oid, ¬ebuf, + get_log_output_encoding(), + format_ctx->ctx.fmt == CMIT_FMT_USERFORMAT); + format_ctx->ctx.notes_message = strbuf_detach(¬ebuf, NULL); + } + + pretty_print_commit(&format_ctx->ctx, c, buf); + FREE_AND_NULL(format_ctx->ctx.notes_message); + + return buf->buf; + } + n = get_commit_rev_name(c); if (!n) return NULL; @@ -479,6 +513,7 @@ static const char *get_rev_name(const struct object *o, struct strbuf *buf) static void show_name(const struct object *obj, const char *caller_name, + struct pretty_format *format_ctx, int always, int allow_undefined, int name_only) { const char *name; @@ -487,7 +522,7 @@ static void show_name(const struct object *obj, if (!name_only) printf("%s ", caller_name ? caller_name : oid_to_hex(oid)); - name = get_rev_name(obj, &buf); + name = get_rev_name(obj, format_ctx, &buf); if (name) printf("%s\n", name); else if (allow_undefined) @@ -507,7 +542,9 @@ static char const * const name_rev_usage[] = { NULL }; -static void name_rev_line(char *p, struct name_ref_data *data) +static void name_rev_line(char *p, + struct name_ref_data *data, + struct pretty_format *format_ctx) { struct strbuf buf = STRBUF_INIT; int counter = 0; @@ -532,7 +569,7 @@ static void name_rev_line(char *p, struct name_ref_data *data) struct object *o = lookup_object(the_repository, &oid); if (o) - name = get_rev_name(o, &buf); + name = get_rev_name(o, format_ctx, &buf); } *(p+1) = c; @@ -554,6 +591,16 @@ static void name_rev_line(char *p, struct name_ref_data *data) strbuf_release(&buf); } +static int format_cb(const struct option *option, + const char *arg, + int unset) +{ + struct format_cb_data *data = option->value; + data->format = arg; + *data->name_only = !unset; + return 0; +} + int cmd_name_rev(int argc, const char **argv, const char *prefix, @@ -567,6 +614,12 @@ int cmd_name_rev(int argc, #endif int all = 0, annotate_stdin = 0, allow_undefined = 1, always = 0, peel_tag = 0; struct name_ref_data data = { 0, 0, STRING_LIST_INIT_NODUP, STRING_LIST_INIT_NODUP }; + static struct format_cb_data format_cb_data = { 0 }; + struct display_notes_opt format_notes_opt; + struct rev_info format_rev = REV_INFO_INIT; + struct pretty_format *format_ctx = NULL; + struct pretty_format format_pp = { 0 }; + struct string_list notes = STRING_LIST_INIT_NODUP; struct option opts[] = { OPT_BOOL(0, "name-only", &data.name_only, N_("print only ref-based names (no object names)")), OPT_BOOL(0, "tags", &data.tags_only, N_("only use tags to name the commits")), @@ -584,6 +637,10 @@ int cmd_name_rev(int argc, PARSE_OPT_HIDDEN), #endif /* WITH_BREAKING_CHANGES */ OPT_BOOL(0, "annotate-stdin", &annotate_stdin, N_("annotate text from stdin")), + OPT_CALLBACK(0, "format", &format_cb_data, N_("format"), + N_("pretty-print output instead"), format_cb), + OPT_STRING_LIST(0, "notes", ¬es, N_("notes"), + N_("display notes for --format")), OPT_BOOL(0, "undefined", &allow_undefined, N_("allow to print `undefined` names (default)")), OPT_BOOL(0, "always", &always, N_("show abbreviated commit object as fallback")), @@ -592,6 +649,8 @@ int cmd_name_rev(int argc, OPT_END(), }; + init_display_notes(&format_notes_opt); + format_cb_data.name_only = &data.name_only; mem_pool_init(&string_pool, 0); init_commit_rev_name(&rev_names); repo_config(the_repository, git_default_config, NULL); @@ -606,6 +665,31 @@ int cmd_name_rev(int argc, } #endif + if (format_cb_data.format) { + get_commit_format(format_cb_data.format, &format_rev); + format_pp.ctx.rev = &format_rev; + format_pp.ctx.fmt = format_rev.commit_format; + format_pp.ctx.abbrev = format_rev.abbrev; + format_pp.ctx.date_mode_explicit = format_rev.date_mode_explicit; + format_pp.ctx.date_mode = format_rev.date_mode; + format_pp.ctx.color = GIT_COLOR_AUTO; + + userformat_find_requirements(format_cb_data.format, + &format_pp.want); + if (format_pp.want.notes) { + int ignore_show_notes = 0; + struct string_list_item *n; + + for_each_string_list_item(n, ¬es) + enable_ref_display_notes(&format_notes_opt, + &ignore_show_notes, + n->string); + load_display_notes(&format_notes_opt); + } + + format_ctx = &format_pp; + } + if (all + annotate_stdin + !!argc > 1) { error("Specify either a list, or --all, not both!"); usage_with_options(name_rev_usage, opts); @@ -663,7 +747,7 @@ int cmd_name_rev(int argc, while (strbuf_getline(&sb, stdin) != EOF) { strbuf_addch(&sb, '\n'); - name_rev_line(sb.buf, &data); + name_rev_line(sb.buf, &data, format_ctx); } strbuf_release(&sb); } else if (all) { @@ -674,18 +758,20 @@ int cmd_name_rev(int argc, struct object *obj = get_indexed_object(the_repository, i); if (!obj || obj->type != OBJ_COMMIT) continue; - show_name(obj, NULL, + show_name(obj, NULL, format_ctx, always, allow_undefined, data.name_only); } } else { int i; for (i = 0; i < revs.nr; i++) - show_name(revs.objects[i].item, revs.objects[i].name, + show_name(revs.objects[i].item, revs.objects[i].name, format_ctx, always, allow_undefined, data.name_only); } string_list_clear(&data.ref_filters, 0); string_list_clear(&data.exclude_filters, 0); + string_list_clear(¬es, 0); + release_display_notes(&format_notes_opt); mem_pool_discard(&string_pool, 0); object_array_clear(&revs); return 0; diff --git a/t/t6120-describe.sh b/t/t6120-describe.sh index 2c70cc561ad5f6..0b7e9fe396dbb5 100755 --- a/t/t6120-describe.sh +++ b/t/t6120-describe.sh @@ -658,6 +658,102 @@ test_expect_success 'name-rev --annotate-stdin works with commitGraph' ' ) ' +test_expect_success 'name-rev --format setup' ' + mkdir repo-format && + git -C repo-format init && + test_commit -C repo-format first && + test_commit -C repo-format second && + test_commit -C repo-format third && + test_commit -C repo-format fourth && + test_commit -C repo-format fifth && + test_commit -C repo-format sixth && + test_commit -C repo-format seventh && + test_commit -C repo-format eighth +' + +test_expect_success 'name-rev --format --no-name-only' ' + cat >expect <<-\EOF && + HEAD~3 [fifth] + HEAD [eighth] + HEAD~5 [third] + EOF + git -C repo-format name-rev --format="[%s]" \ + --no-name-only HEAD~3 HEAD HEAD~5 >actual && + test_cmp expect actual +' + +test_expect_success 'name-rev --format --no-format is the same as regular name-rev' ' + git -C repo-format name-rev HEAD~2 HEAD~3 >expect && + test_file_not_empty expect && + git -C repo-format name-rev --format="huh?" \ + --no-format HEAD~2 HEAD~3 >actual && + test_cmp expect actual +' + +test_expect_success 'name-rev --format=%s for argument revs' ' + cat >expect <<-\EOF && + eighth + seventh + fifth + EOF + git -C repo-format name-rev --format=%s \ + HEAD HEAD~ HEAD~3 >actual && + test_cmp expect actual +' + +test_expect_success '--name-rev --format=reference --annotate-stdin from rev-list same as log' ' + git -C repo-format log --format=reference >expect && + test_file_not_empty expect && + git -C repo-format rev-list HEAD >list && + git -C repo-format name-rev --format=reference \ + --annotate-stdin actual && + test_cmp expect actual +' + +test_expect_success '--name-rev --format= --annotate-stdin with running text and tree oid' ' + cmit_oid=$(git -C repo-format rev-parse :/fifth) && + reference=$(git -C repo-format log -n1 --format=reference :/fifth) && + tree=$(git -C repo-format rev-parse HEAD^{tree}) && + cat >expect <<-EOF && + We thought we fixed this in ${reference}. + But look at this tree: ${tree}. + EOF + git -C repo-format name-rev --format=reference --annotate-stdin \ + >actual <<-EOF && + We thought we fixed this in ${cmit_oid}. + But look at this tree: ${tree}. + EOF + test_cmp expect actual +' + +test_expect_success 'name-rev --format= with %N (note)' ' + test_when_finished "git -C repo-format notes remove" && + git -C repo-format notes add -m"Make a note" && + printf "Make a note\n\n\n" >expect && + git -C repo-format name-rev --format="tformat:%N" \ + HEAD HEAD~ >actual && + test_cmp expect actual +' + +test_expect_success 'name-rev --format= --notes' ' + # One custom notes ref + test_when_finished "git -C repo-format notes remove" && + test_when_finished "git -C repo-format notes --ref=word remove" && + git -C repo-format notes add -m"default" && + git -C repo-format notes --ref=word add -m"custom" && + printf "custom\n\n" >expect && + git -C repo-format name-rev --format="tformat:%N" \ + --notes=word \ + HEAD >actual && + test_cmp expect actual && + # Glob all + printf "default\ncustom\n\n" >expect && + git -C repo-format name-rev --format="tformat:%N" \ + --notes=* \ + HEAD >actual && + test_cmp expect actual +' + # B # o # H \ From ee832e326985499ca3f17855f69dca22a4d41406 Mon Sep 17 00:00:00 2001 From: Usman Akinyemi Date: Thu, 26 Mar 2026 00:39:05 +0530 Subject: [PATCH 025/241] remote: move remote group resolution to remote.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `get_remote_group`, `add_remote_or_group`, and the `remote_group_data` struct are currently defined as static helpers inside builtin/fetch.c. They implement generic remote group resolution that is not specific to fetch — they parse `remotes.` config entries and resolve a name to either a list of group members or a single configured remote. Move them to remote.c and declare them in remote.h so that other builtins can use the same logic without duplication. Useful for the next patch. Suggested-by: Junio C Hamano Signed-off-by: Usman Akinyemi Signed-off-by: Junio C Hamano --- builtin/fetch.c | 42 ------------------------------------------ remote.c | 37 +++++++++++++++++++++++++++++++++++++ remote.h | 12 ++++++++++++ 3 files changed, 49 insertions(+), 42 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index 8a36cf67b5f140..966cc58f730150 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -2138,48 +2138,6 @@ static int get_one_remote_for_fetch(struct remote *remote, void *priv) return 0; } -struct remote_group_data { - const char *name; - struct string_list *list; -}; - -static int get_remote_group(const char *key, const char *value, - const struct config_context *ctx UNUSED, - void *priv) -{ - struct remote_group_data *g = priv; - - if (skip_prefix(key, "remotes.", &key) && !strcmp(key, g->name)) { - /* split list by white space */ - while (*value) { - size_t wordlen = strcspn(value, " \t\n"); - - if (wordlen >= 1) - string_list_append_nodup(g->list, - xstrndup(value, wordlen)); - value += wordlen + (value[wordlen] != '\0'); - } - } - - return 0; -} - -static int add_remote_or_group(const char *name, struct string_list *list) -{ - int prev_nr = list->nr; - struct remote_group_data g; - g.name = name; g.list = list; - - repo_config(the_repository, get_remote_group, &g); - if (list->nr == prev_nr) { - struct remote *remote = remote_get(name); - if (!remote_is_configured(remote, 0)) - return 0; - string_list_append(list, remote->name); - } - return 1; -} - static void add_options_to_argv(struct strvec *argv, const struct fetch_config *config) { diff --git a/remote.c b/remote.c index 7ca2a6501b4920..3d62384792c323 100644 --- a/remote.c +++ b/remote.c @@ -2114,6 +2114,43 @@ int get_fetch_map(const struct ref *remote_refs, return 0; } +int get_remote_group(const char *key, const char *value, + const struct config_context *ctx UNUSED, + void *priv) +{ + struct remote_group_data *g = priv; + + if (skip_prefix(key, "remotes.", &key) && !strcmp(key, g->name)) { + /* split list by white space */ + while (*value) { + size_t wordlen = strcspn(value, " \t\n"); + + if (wordlen >= 1) + string_list_append_nodup(g->list, + xstrndup(value, wordlen)); + value += wordlen + (value[wordlen] != '\0'); + } + } + + return 0; +} + +int add_remote_or_group(const char *name, struct string_list *list) +{ + int prev_nr = list->nr; + struct remote_group_data g; + g.name = name; g.list = list; + + repo_config(the_repository, get_remote_group, &g); + if (list->nr == prev_nr) { + struct remote *remote = remote_get(name); + if (!remote_is_configured(remote, 0)) + return 0; + string_list_append(list, remote->name); + } + return 1; +} + int resolve_remote_symref(struct ref *ref, struct ref *list) { if (!ref->symref) diff --git a/remote.h b/remote.h index fc052945ee451d..8ff2bd88fa1b29 100644 --- a/remote.h +++ b/remote.h @@ -347,6 +347,18 @@ int branch_has_merge_config(struct branch *branch); int branch_merge_matches(struct branch *, int n, const char *); +/* list of the remote in a group as configured */ +struct remote_group_data { + const char *name; + struct string_list *list; +}; + +int get_remote_group(const char *key, const char *value, + const struct config_context *ctx, + void *priv); + +int add_remote_or_group(const char *name, struct string_list *list); + /** * Return the fully-qualified refname of the tracking branch for `branch`. * I.e., what "branch@{upstream}" would give you. Returns NULL if no From aac9bf095f1a3f1e7cdc74c6d4e537f671b93a5f Mon Sep 17 00:00:00 2001 From: Usman Akinyemi Date: Thu, 26 Mar 2026 00:39:06 +0530 Subject: [PATCH 026/241] push: support pushing to a remote group MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `git fetch` accepts a remote group name (configured via `remotes.` in config) and fetches from each member remote. `git push` has no equivalent — it only accepts a single remote name. Teach `git push` to resolve its repository argument through `add_remote_or_group()`, which was made public in the previous patch, so that a user can push to all remotes in a group with: git push When the argument resolves to a single remote, the behaviour is identical to before. When it resolves to a group, each member remote is pushed in sequence. The group push path rebuilds the refspec list (`rs`) from scratch for each member remote so that per-remote push mappings configured via `remote..push` are resolved correctly against each specific remote. Without this, refspec entries would accumulate across iterations and each subsequent remote would receive a growing list of duplicated entries. Mirror detection (`remote->mirror`) is also evaluated per remote using a copy of the flags, so that a mirror remote in the group cannot set TRANSPORT_PUSH_FORCE on subsequent non-mirror remotes in the same group. Suggested-by: Junio C Hamano Signed-off-by: Usman Akinyemi Signed-off-by: Junio C Hamano --- Documentation/git-push.adoc | 73 ++++++++++++++++-- builtin/push.c | 123 +++++++++++++++++++++-------- t/meson.build | 1 + t/t5566-push-group.sh | 150 ++++++++++++++++++++++++++++++++++++ 4 files changed, 306 insertions(+), 41 deletions(-) create mode 100755 t/t5566-push-group.sh diff --git a/Documentation/git-push.adoc b/Documentation/git-push.adoc index e5ba3a67421edc..b7f617a290592b 100644 --- a/Documentation/git-push.adoc +++ b/Documentation/git-push.adoc @@ -18,17 +18,28 @@ git push [--all | --branches | --mirror | --tags] [--follow-tags] [--atomic] [-n DESCRIPTION ----------- - -Updates one or more branches, tags, or other references in a remote -repository from your local repository, and sends all necessary data -that isn't already on the remote. +Updates one or more branches, tags, or other references in one or more +remote repositories from your local repository, and sends all necessary +data that isn't already on the remote. The simplest way to push is `git push `. `git push origin main` will push the local `main` branch to the `main` branch on the remote named `origin`. -The `` argument defaults to the upstream for the current branch, -or `origin` if there's no configured upstream. +You can also push to multiple remotes at once by using a remote group. +A remote group is a named list of remotes configured via `remotes.` +in your git config: + + $ git config remotes.all-remotes "origin gitlab backup" + +Then `git push all-remotes` will push to `origin`, `gitlab`, and +`backup` in turn, as if you had run `git push` against each one +individually. Each remote is pushed independently using its own +push mapping configuration. There is a `remotes.` entry in +the configuration file. (See linkgit:git-config[1]). + +The `` argument defaults to the upstream for the current +branch, or `origin` if there's no configured upstream. To decide which branches, tags, or other refs to push, Git uses (in order of precedence): @@ -55,8 +66,10 @@ OPTIONS __:: The "remote" repository that is the destination of a push operation. This parameter can be either a URL - (see the section <> below) or the name - of a remote (see the section <> below). + (see the section <> below), the name + of a remote (see the section <> below), + or the name of a remote group + (see the section <> below). `...`:: Specify what destination ref to update with what source object. @@ -430,6 +443,50 @@ further recursion will occur. In this case, `only` is treated as `on-demand`. include::urls-remotes.adoc[] +[[REMOTE-GROUPS]] +REMOTE GROUPS +------------- + +A remote group is a named list of remotes configured via `remotes.` +in your git config: + + $ git config remotes.all-remotes "r1 r2 r3" + +When a group name is given as the `` argument, the push is +performed to each member remote in turn. The defining principle is: + + git push all-remotes + +is exactly equivalent to: + + git push r1 + git push r2 + ... + git push rN + +where r1, r2, ..., rN are the members of `all-remotes`. No special +behaviour is added or removed — the group is purely a shorthand for +running the same push command against each member remote individually. + +The behaviour upon failure depends on the kind of error encountered: + +If a member remote rejects the push, for example due to a +non-fast-forward update, force needed but not given, an existing tag, +or a server-side hook refusing a ref, Git reports the error and continues +pushing to the remaining remotes in the group. The overall exit code is +non-zero if any member push fails. + +If a member remote cannot be contacted at all, for example because the +repository does not exist, authentication fails, or the network is +unreachable, the push stops at that point and the remaining remotes +are not attempted. + +This means the user is responsible for ensuring that the sequence of +individual pushes makes sense. If `git push r1`` would fail for a given +set of options and arguments, then `git push all-remotes` will fail in +the same way when it reaches r1. The group push does not do anything +special to make a failing individual push succeed. + OUTPUT ------ diff --git a/builtin/push.c b/builtin/push.c index 7100ffba5da17e..ed292c48fc45e6 100644 --- a/builtin/push.c +++ b/builtin/push.c @@ -552,12 +552,13 @@ int cmd_push(int argc, int flags = 0; int tags = 0; int push_cert = -1; - int rc; + int rc = 0; + int base_flags; const char *repo = NULL; /* default repository */ struct string_list push_options_cmdline = STRING_LIST_INIT_DUP; + struct string_list remote_group = STRING_LIST_INIT_DUP; struct string_list *push_options; const struct string_list_item *item; - struct remote *remote; struct option options[] = { OPT__VERBOSITY(&verbosity), @@ -620,39 +621,45 @@ int cmd_push(int argc, else if (recurse_submodules == RECURSE_SUBMODULES_ONLY) flags |= TRANSPORT_RECURSE_SUBMODULES_ONLY; - if (tags) - refspec_append(&rs, "refs/tags/*"); - if (argc > 0) repo = argv[0]; - remote = pushremote_get(repo); - if (!remote) { - if (repo) - die(_("bad repository '%s'"), repo); - die(_("No configured push destination.\n" - "Either specify the URL from the command-line or configure a remote repository using\n" - "\n" - " git remote add \n" - "\n" - "and then push using the remote name\n" - "\n" - " git push \n")); - } - - if (argc > 0) - set_refspecs(argv + 1, argc - 1, remote); - - if (remote->mirror) - flags |= (TRANSPORT_PUSH_MIRROR|TRANSPORT_PUSH_FORCE); - - if (flags & TRANSPORT_PUSH_ALL) { - if (argc >= 2) - die(_("--all can't be combined with refspecs")); - } - if (flags & TRANSPORT_PUSH_MIRROR) { - if (argc >= 2) - die(_("--mirror can't be combined with refspecs")); + if (repo) { + if (!add_remote_or_group(repo, &remote_group)) { + /* + * Not a configured remote name or group name. + * Try treating it as a direct URL or path, e.g. + * git push /tmp/foo.git + * git push https://github.com/user/repo.git + * pushremote_get() creates an anonymous remote + * from the URL so the loop below can handle it + * identically to a named remote. + */ + struct remote *r = pushremote_get(repo); + if (!r) + die(_("bad repository '%s'"), repo); + string_list_append(&remote_group, r->name); + } + } else { + struct remote *r = pushremote_get(NULL); + if (!r) + die(_("No configured push destination.\n" + "Either specify the URL from the command-line or configure a remote repository using\n" + "\n" + " git remote add \n" + "\n" + "and then push using the remote name\n" + "\n" + " git push \n" + "\n" + "To push to multiple remotes at once, configure a remote group using\n" + "\n" + " git config remotes. \" \"\n" + "\n" + "and then push using the group name\n" + "\n" + " git push \n")); + string_list_append(&remote_group, r->name); } if (!is_empty_cas(&cas) && (flags & TRANSPORT_PUSH_FORCE_IF_INCLUDES)) @@ -662,10 +669,60 @@ int cmd_push(int argc, if (strchr(item->string, '\n')) die(_("push options must not have new line characters")); - rc = do_push(flags, push_options, remote); + /* + * Push to each remote in remote_group. For a plain "git push " + * or a default push, remote_group has exactly one entry and the loop + * runs once — there is nothing structurally special about that case. + * For a group, the loop runs once per member remote. + * + * Mirror detection and the --mirror/--all + refspec conflict checks + * are done per remote inside the loop. A remote configured with + * remote.NAME.mirror=true implies mirror mode for that remote only — + * other non-mirror remotes in the same group are unaffected. + * + * rs is rebuilt from scratch for each remote so that per-remote push + * mappings (remote.NAME.push config) are resolved against the correct + * remote. iter_flags is derived from a clean snapshot of flags taken + * before the loop so that a mirror remote cannot bleed + * TRANSPORT_PUSH_FORCE into subsequent non-mirror remotes in the + * same group. + */ + base_flags = flags; + for (size_t i = 0; i < remote_group.nr; i++) { + int iter_flags = base_flags; + struct remote *r = pushremote_get(remote_group.items[i].string); + if (!r) + die(_("no such remote or remote group: %s"), + remote_group.items[i].string); + + if (r->mirror) + iter_flags |= (TRANSPORT_PUSH_MIRROR|TRANSPORT_PUSH_FORCE); + + if (iter_flags & TRANSPORT_PUSH_ALL) { + if (argc >= 2) + die(_("--all can't be combined with refspecs")); + } + if (iter_flags & TRANSPORT_PUSH_MIRROR) { + if (argc >= 2) + die(_("--mirror can't be combined with refspecs")); + } + + refspec_clear(&rs); + rs = (struct refspec) REFSPEC_INIT_PUSH; + + if (tags) + refspec_append(&rs, "refs/tags/*"); + if (argc > 0) + set_refspecs(argv + 1, argc - 1, r); + + rc |= do_push(iter_flags, push_options, r); + } + string_list_clear(&push_options_cmdline, 0); string_list_clear(&push_options_config, 0); + string_list_clear(&remote_group, 0); clear_cas_option(&cas); + if (rc == -1) usage_with_options(push_usage, options); else diff --git a/t/meson.build b/t/meson.build index 9b2fa4dee807d6..215df033e07e32 100644 --- a/t/meson.build +++ b/t/meson.build @@ -700,6 +700,7 @@ integration_tests = [ 't5563-simple-http-auth.sh', 't5564-http-proxy.sh', 't5565-push-multiple.sh', + 't5566-push-group.sh', 't5570-git-daemon.sh', 't5571-pre-push-hook.sh', 't5572-pull-submodule.sh', diff --git a/t/t5566-push-group.sh b/t/t5566-push-group.sh new file mode 100755 index 00000000000000..b9962946c7656b --- /dev/null +++ b/t/t5566-push-group.sh @@ -0,0 +1,150 @@ +#!/bin/sh + +test_description='push to remote group' + +. ./test-lib.sh + +test_expect_success 'setup' ' + for i in 1 2 3 + do + git init --bare dest-$i.git && + git -C dest-$i.git symbolic-ref HEAD refs/heads/not-a-branch || + return 1 + done && + test_tick && + git commit --allow-empty -m "initial" && + git config set remote.remote-1.url "file://$(pwd)/dest-1.git" && + git config set remote.remote-1.fetch "+refs/heads/*:refs/remotes/remote-1/*" && + git config set remote.remote-2.url "file://$(pwd)/dest-2.git" && + git config set remote.remote-2.fetch "+refs/heads/*:refs/remotes/remote-2/*" && + git config set remote.remote-3.url "file://$(pwd)/dest-3.git" && + git config set remote.remote-3.fetch "+refs/heads/*:refs/remotes/remote-3/*" && + git config set remotes.all-remotes "remote-1 remote-2 remote-3" +' + +test_expect_success 'push to remote group updates all members correctly' ' + git push all-remotes HEAD:refs/heads/main && + git rev-parse HEAD >expect && + for i in 1 2 3 + do + git -C dest-$i.git rev-parse refs/heads/main >actual || + return 1 + test_cmp expect actual || return 1 + done +' + +test_expect_success 'push second commit to group updates all members' ' + test_tick && + git commit --allow-empty -m "second" && + git push all-remotes HEAD:refs/heads/main && + git rev-parse HEAD >expect && + for i in 1 2 3 + do + git -C dest-$i.git rev-parse refs/heads/main >actual || + return 1 + test_cmp expect actual || return 1 + done +' + +test_expect_success 'push to single remote in group does not affect others' ' + test_tick && + git commit --allow-empty -m "third" && + git push remote-1 HEAD:refs/heads/main && + git -C dest-1.git rev-parse refs/heads/main >hash-after-1 && + git -C dest-2.git rev-parse refs/heads/main >hash-after-2 && + ! test_cmp hash-after-1 hash-after-2 +' + +test_expect_success 'mirror remote in group with refspec fails' ' + git config set remote.remote-1.mirror true && + test_must_fail git push all-remotes HEAD:refs/heads/main 2>err && + test_grep "mirror" err && + git config unset remote.remote-1.mirror +' +test_expect_success 'push.default=current works with group push' ' + git config set push.default current && + test_tick && + git commit --allow-empty -m "fifth" && + git push all-remotes && + git config unset push.default +' + +test_expect_success 'push continues past rejection to remaining remotes' ' + for i in c1 c2 c3 + do + git init --bare dest-$i.git || return 1 + done && + git config set remote.c1.url "file://$(pwd)/dest-c1.git" && + git config set remote.c2.url "file://$(pwd)/dest-c2.git" && + git config set remote.c3.url "file://$(pwd)/dest-c3.git" && + git config set remotes.continue-group "c1 c2 c3" && + + test_tick && + git commit --allow-empty -m "base for continue test" && + + # initial sync + git push continue-group HEAD:refs/heads/main && + + # advance c2 independently + git clone dest-c2.git tmp-c2 && + ( + cd tmp-c2 && + git checkout -b main origin/main && + test_commit c2_independent && + git push origin HEAD:refs/heads/main + ) && + rm -rf tmp-c2 && + + test_tick && + git commit --allow-empty -m "local diverging commit" && + + # push: c2 rejects, others succeed + test_must_fail git push continue-group HEAD:refs/heads/main && + + git rev-parse HEAD >expect && + git -C dest-c1.git rev-parse refs/heads/main >actual-c1 && + git -C dest-c3.git rev-parse refs/heads/main >actual-c3 && + test_cmp expect actual-c1 && + test_cmp expect actual-c3 && + + # c2 should not have the new commit + git -C dest-c2.git rev-parse refs/heads/main >actual-c2 && + ! test_cmp expect actual-c2 +' + +test_expect_success 'fatal connection error stops remaining remotes' ' + for i in f1 f2 f3 + do + git init --bare dest-$i.git || return 1 + done && + git config set remote.f1.url "file://$(pwd)/dest-f1.git" && + git config set remote.f2.url "file://$(pwd)/dest-f2.git" && + git config set remote.f3.url "file://$(pwd)/dest-f3.git" && + git config set remotes.fatal-group "f1 f2 f3" && + + test_tick && + git commit --allow-empty -m "base for fatal test" && + + # initial sync + git push fatal-group HEAD:refs/heads/main && + + # break f2 + git config set remote.f2.url "file:///tmp/does-not-exist-$$" && + + test_tick && + git commit --allow-empty -m "after fatal setup" && + + test_must_fail git push fatal-group HEAD:refs/heads/main && + + git rev-parse HEAD >expect && + git -C dest-f1.git rev-parse refs/heads/main >actual-f1 && + test_cmp expect actual-f1 && + + # f3 should not be updated + git -C dest-f3.git rev-parse refs/heads/main >actual-f3 && + ! test_cmp expect actual-f3 && + + git config set remote.f2.url "file://$(pwd)/dest-f2.git" +' + +test_done From 5f82f4be85231b3ea099fb56af6984bd8afced48 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 27 Mar 2026 15:18:50 -0700 Subject: [PATCH 027/241] SQUASH??? - futureproof against the attack of the "main" --- t/t5566-push-group.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/t/t5566-push-group.sh b/t/t5566-push-group.sh index b9962946c7656b..32b8c82cea23a6 100755 --- a/t/t5566-push-group.sh +++ b/t/t5566-push-group.sh @@ -2,6 +2,9 @@ test_description='push to remote group' +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=default +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME + . ./test-lib.sh test_expect_success 'setup' ' From 01fba10c96f10a6132f89a9d13060c418336f5e0 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 29 Mar 2026 17:40:53 -0400 Subject: [PATCH 028/241] midx-write: handle noop writes when converting incremental chains When updating a MIDX, we optimize out writes that will result in an identical MIDX as the one we already have on disk. See b3bab9d2729 (midx-write: extract function to test whether MIDX needs updating, 2025-12-10) for more details on exactly which writes are optimized out. If `midx_needs_update()` can't rule out any of the obvious cases (e.g., the checksum is invalid, we're requesting a different version, or performing compaction which always requires an update), then we compare the packs we're writing to the packs we already know about. If there are an equal number of packs being written as there are in any existing MIDX layer(s), then we compare the packs by their name. This comparison fails when we have an incremental MIDX chain with at least two layers, since we do not recursively peel through earlier layers, instead treating the `->pack_names` array of the tip MIDX layer as containing all `m->num_packs + m->num_packs_in_base` packs. Adjust this to instead look through the MIDX layers one by one when comparing pack names. While we're at it, fix a typo above in the same function. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx-write.c | 18 ++++++++++-------- t/t5334-incremental-multi-pack-index.sh | 16 ++++++++++++++++ 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/midx-write.c b/midx-write.c index 0ff2e45aa7abdd..d0351bde49311e 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1152,7 +1152,7 @@ static bool midx_needs_update(struct multi_pack_index *midx, struct write_midx_c /* * Ensure that we have a valid checksum before consulting the - * exisiting MIDX in order to determine if we can avoid an + * existing MIDX in order to determine if we can avoid an * update. * * This is necessary because the given MIDX is loaded directly @@ -1208,14 +1208,16 @@ static bool midx_needs_update(struct multi_pack_index *midx, struct write_midx_c BUG("same pack added twice?"); } - for (uint32_t i = 0; i < ctx->nr; i++) { - strbuf_reset(&buf); - strbuf_addstr(&buf, midx->pack_names[i]); - strbuf_strip_suffix(&buf, ".idx"); + for (struct multi_pack_index *m = midx; m; m = m->base_midx) { + for (uint32_t i = 0; i < m->num_packs; i++) { + strbuf_reset(&buf); + strbuf_addstr(&buf, m->pack_names[i]); + strbuf_strip_suffix(&buf, ".idx"); - if (!strset_contains(&packs, buf.buf)) - goto out; - strset_remove(&packs, buf.buf); + if (!strset_contains(&packs, buf.buf)) + goto out; + strset_remove(&packs, buf.buf); + } } needed = false; diff --git a/t/t5334-incremental-multi-pack-index.sh b/t/t5334-incremental-multi-pack-index.sh index 99c7d44d8e9d34..c9f5b4e87aa035 100755 --- a/t/t5334-incremental-multi-pack-index.sh +++ b/t/t5334-incremental-multi-pack-index.sh @@ -132,4 +132,20 @@ test_expect_success 'relink existing MIDX layer' ' ' +test_expect_success 'non-incremental write with existing incremental chain' ' + git init non-incremental-write-with-existing && + test_when_finished "rm -fr non-incremental-write-with-existing" && + + ( + cd non-incremental-write-with-existing && + + git config set maintenance.auto false && + + write_midx_layer && + write_midx_layer && + + git multi-pack-index write + ) +' + test_done From bdb2f8a521b2d8b7174744081e3397ce8cdb7921 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 29 Mar 2026 17:40:55 -0400 Subject: [PATCH 029/241] midx: use `string_list` for retained MIDX files Both `clear_midx_files_ext()` and `clear_incremental_midx_files_ext()` build a list of filenames to keep while pruning stale MIDX files. Today they hand-roll an array instead of using a `string_list`, thus requiring us to pass an additional length parameter, and makes lookups linear. Replace the bare array with a `string_list` which can be passed around as a single parameter. Though it improves lookup performance, the difference is likely immeasurable given how small the keep_hashes array typically is. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 53 +++++++++++++++++++++++------------------------------ 1 file changed, 23 insertions(+), 30 deletions(-) diff --git a/midx.c b/midx.c index 81d6ab11e6eb0e..d322743f2db655 100644 --- a/midx.c +++ b/midx.c @@ -758,8 +758,7 @@ int midx_checksum_valid(struct multi_pack_index *m) } struct clear_midx_data { - char **keep; - uint32_t keep_nr; + struct string_list keep; const char *ext; }; @@ -767,15 +766,12 @@ static void clear_midx_file_ext(const char *full_path, size_t full_path_len UNUS const char *file_name, void *_data) { struct clear_midx_data *data = _data; - uint32_t i; if (!(starts_with(file_name, "multi-pack-index-") && ends_with(file_name, data->ext))) return; - for (i = 0; i < data->keep_nr; i++) { - if (!strcmp(data->keep[i], file_name)) - return; - } + if (string_list_has_string(&data->keep, file_name)) + return; if (unlink(full_path)) die_errno(_("failed to remove %s"), full_path); } @@ -783,48 +779,45 @@ static void clear_midx_file_ext(const char *full_path, size_t full_path_len UNUS void clear_midx_files_ext(struct odb_source *source, const char *ext, const char *keep_hash) { - struct clear_midx_data data; - memset(&data, 0, sizeof(struct clear_midx_data)); + struct clear_midx_data data = { + .keep = STRING_LIST_INIT_DUP, + .ext = ext, + }; if (keep_hash) { - ALLOC_ARRAY(data.keep, 1); + struct strbuf buf = STRBUF_INIT; + strbuf_addf(&buf, "multi-pack-index-%s.%s", keep_hash, ext); - data.keep[0] = xstrfmt("multi-pack-index-%s.%s", keep_hash, ext); - data.keep_nr = 1; + string_list_insert(&data.keep, buf.buf); + + strbuf_release(&buf); } - data.ext = ext; - for_each_file_in_pack_dir(source->path, - clear_midx_file_ext, - &data); + for_each_file_in_pack_dir(source->path, clear_midx_file_ext, &data); - if (keep_hash) - free(data.keep[0]); - free(data.keep); + string_list_clear(&data.keep, 0); } void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext, char **keep_hashes, uint32_t hashes_nr) { - struct clear_midx_data data; + struct clear_midx_data data = { + .keep = STRING_LIST_INIT_NODUP, + .ext = ext, + }; uint32_t i; - memset(&data, 0, sizeof(struct clear_midx_data)); - - ALLOC_ARRAY(data.keep, hashes_nr); for (i = 0; i < hashes_nr; i++) - data.keep[i] = xstrfmt("multi-pack-index-%s.%s", keep_hashes[i], - ext); - data.keep_nr = hashes_nr; - data.ext = ext; + string_list_append(&data.keep, + xstrfmt("multi-pack-index-%s.%s", + keep_hashes[i], ext)); + string_list_sort(&data.keep); for_each_file_in_pack_subdir(source->path, "multi-pack-index.d", clear_midx_file_ext, &data); - for (i = 0; i < hashes_nr; i++) - free(data.keep[i]); - free(data.keep); + string_list_clear(&data.keep, 0); } void clear_midx_file(struct repository *r) From cd45bf0f3e4a8105984df55d5dcdb69d972e8048 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 29 Mar 2026 17:40:58 -0400 Subject: [PATCH 030/241] strvec: introduce `strvec_init_alloc()` When the caller knows upfront how many elements will be pushed onto a `strvec`, it is useful to pre-allocate enough space in the array to fit that many elements (and one additional slot to store NULL, indicating the end of the list.) Introduce `strvec_init_alloc()`, which allocates the backing array large enough to hold `alloc` elements and the termination marker without further reallocation. Reimplement `strvec_init()` as a special case of `strvec_init_alloc()`, namely when `alloc` is zero. Helped-by: Junio C Hamano Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- strvec.c | 15 +++++++++++++-- strvec.h | 5 +++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/strvec.c b/strvec.c index f8de79f5579b49..15371980b84101 100644 --- a/strvec.c +++ b/strvec.c @@ -6,8 +6,19 @@ const char *empty_strvec[] = { NULL }; void strvec_init(struct strvec *array) { - struct strvec blank = STRVEC_INIT; - memcpy(array, &blank, sizeof(*array)); + strvec_init_alloc(array, 0); +} + +void strvec_init_alloc(struct strvec *array, size_t alloc) +{ + if (!alloc) { + struct strvec blank = STRVEC_INIT; + memcpy(array, &blank, sizeof(*array)); + } else { + CALLOC_ARRAY(array->v, st_add(alloc, 1)); + array->nr = 0; + array->alloc = alloc + 1; + } } void strvec_push_nodup(struct strvec *array, char *value) diff --git a/strvec.h b/strvec.h index f74e061e1419bc..34cb1f939f04d9 100644 --- a/strvec.h +++ b/strvec.h @@ -43,6 +43,11 @@ struct strvec { */ void strvec_init(struct strvec *); +/* + * Initializes an array large enough to store `alloc` elements. + */ +void strvec_init_alloc(struct strvec *, size_t alloc); + /* Push a copy of a string onto the end of the array. */ const char *strvec_push(struct strvec *, const char *); From bd90472c1e3b39225a0c35f8e77c35e67c92c8a4 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 29 Mar 2026 17:41:00 -0400 Subject: [PATCH 031/241] midx: use `strvec` for `keep_hashes` The `keep_hashes` array in `write_midx_internal()` accumulates the checksums of MIDX files that should be retained when pruning stale entries from the MIDX chain. For similar reasons as in a previous commit, rewrite this using a strvec, requiring us to pass one fewer parameter. Unlike the aforementioned previous commit, use a `strvec` instead of a `string_list`, which provides a more ergonomic interface to adjust the values at a particular index. The ordering is important here, as this value is used to determine the contents of the resulting `multi-pack-index-chain` file when writing with "--incremental". Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx-write.c | 51 ++++++++++++++++++++++++++------------------------- midx.c | 28 +++++++++++++++++----------- 2 files changed, 43 insertions(+), 36 deletions(-) diff --git a/midx-write.c b/midx-write.c index d0351bde49311e..9f7d2bbf4cb814 100644 --- a/midx-write.c +++ b/midx-write.c @@ -29,8 +29,7 @@ extern void clear_midx_files_ext(struct odb_source *source, const char *ext, const char *keep_hash); extern void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext, - const char **keep_hashes, - uint32_t hashes_nr); + const struct strvec *keep_hashes); extern int cmp_idx_or_pack_name(const char *idx_or_pack_name, const char *idx_name); @@ -1109,8 +1108,7 @@ static int link_midx_to_chain(struct multi_pack_index *m) } static void clear_midx_files(struct odb_source *source, - const char **hashes, uint32_t hashes_nr, - unsigned incremental) + const struct strvec *hashes, unsigned incremental) { /* * if incremental: @@ -1124,13 +1122,15 @@ static void clear_midx_files(struct odb_source *source, */ struct strbuf buf = STRBUF_INIT; const char *exts[] = { MIDX_EXT_BITMAP, MIDX_EXT_REV, MIDX_EXT_MIDX }; - uint32_t i, j; + uint32_t i; for (i = 0; i < ARRAY_SIZE(exts); i++) { - clear_incremental_midx_files_ext(source, exts[i], - hashes, hashes_nr); - for (j = 0; j < hashes_nr; j++) - clear_midx_files_ext(source, exts[i], hashes[j]); + clear_incremental_midx_files_ext(source, exts[i], hashes); + if (hashes) { + for (size_t j = 0; j < hashes->nr; j++) + clear_midx_files_ext(source, exts[i], + hashes->v[j]); + } } if (incremental) @@ -1268,7 +1268,7 @@ static int write_midx_internal(struct write_midx_opts *opts) int pack_name_concat_len = 0; int dropped_packs = 0; int result = -1; - const char **keep_hashes = NULL; + struct strvec keep_hashes = STRVEC_INIT; size_t keep_hashes_nr = 0; struct chunkfile *cf; @@ -1723,7 +1723,7 @@ static int write_midx_internal(struct write_midx_opts *opts) } else { keep_hashes_nr = ctx.num_multi_pack_indexes_before + 1; } - CALLOC_ARRAY(keep_hashes, keep_hashes_nr); + strvec_init_alloc(&keep_hashes, keep_hashes_nr); if (ctx.incremental) { FILE *chainf = fdopen_lock_file(&lk, "w"); @@ -1760,39 +1760,45 @@ static int write_midx_internal(struct write_midx_opts *opts) for (i = 0; i < num_layers_before_from; i++) { uint32_t j = num_layers_before_from - i - 1; - keep_hashes[j] = xstrdup(midx_get_checksum_hex(m)); + keep_hashes.v[j] = xstrdup(midx_get_checksum_hex(m)); + keep_hashes.nr++; m = m->base_midx; } - keep_hashes[i] = xstrdup(hash_to_hex_algop(midx_hash, + keep_hashes.v[i] = xstrdup(hash_to_hex_algop(midx_hash, r->hash_algo)); + keep_hashes.nr++; i = 0; for (m = ctx.m; m && midx_hashcmp(m, ctx.compact_to, r->hash_algo); m = m->base_midx) { - keep_hashes[keep_hashes_nr - i - 1] = + keep_hashes.v[keep_hashes_nr - i - 1] = xstrdup(midx_get_checksum_hex(m)); + keep_hashes.nr++; i++; } } else { - keep_hashes[ctx.num_multi_pack_indexes_before] = + keep_hashes.v[ctx.num_multi_pack_indexes_before] = xstrdup(hash_to_hex_algop(midx_hash, r->hash_algo)); + keep_hashes.nr++; for (uint32_t i = 0; i < ctx.num_multi_pack_indexes_before; i++) { uint32_t j = ctx.num_multi_pack_indexes_before - i - 1; - keep_hashes[j] = xstrdup(midx_get_checksum_hex(m)); + keep_hashes.v[j] = xstrdup(midx_get_checksum_hex(m)); + keep_hashes.nr++; m = m->base_midx; } } for (uint32_t i = 0; i < keep_hashes_nr; i++) - fprintf(get_lock_file_fp(&lk), "%s\n", keep_hashes[i]); + fprintf(get_lock_file_fp(&lk), "%s\n", keep_hashes.v[i]); } else { - keep_hashes[ctx.num_multi_pack_indexes_before] = + keep_hashes.v[ctx.num_multi_pack_indexes_before] = xstrdup(hash_to_hex_algop(midx_hash, r->hash_algo)); + keep_hashes.nr++; } if (ctx.m || ctx.base_midx) @@ -1801,8 +1807,7 @@ static int write_midx_internal(struct write_midx_opts *opts) if (commit_lock_file(&lk) < 0) die_errno(_("could not write multi-pack-index")); - clear_midx_files(opts->source, keep_hashes, keep_hashes_nr, - ctx.incremental); + clear_midx_files(opts->source, &keep_hashes, ctx.incremental); result = 0; cleanup: @@ -1818,11 +1823,7 @@ static int write_midx_internal(struct write_midx_opts *opts) free(ctx.entries); free(ctx.pack_perm); free(ctx.pack_order); - if (keep_hashes) { - for (uint32_t i = 0; i < keep_hashes_nr; i++) - free((char *)keep_hashes[i]); - free(keep_hashes); - } + strvec_clear(&keep_hashes); strbuf_release(&midx_name); close_midx(midx_to_free); diff --git a/midx.c b/midx.c index d322743f2db655..e6b1fbe37d7718 100644 --- a/midx.c +++ b/midx.c @@ -12,6 +12,7 @@ #include "chunk-format.h" #include "pack-bitmap.h" #include "pack-revindex.h" +#include "strvec.h" #define MIDX_PACK_ERROR ((void *)(intptr_t)-1) @@ -19,8 +20,7 @@ int midx_checksum_valid(struct multi_pack_index *m); void clear_midx_files_ext(struct odb_source *source, const char *ext, const char *keep_hash); void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext, - char **keep_hashes, - uint32_t hashes_nr); + const struct strvec *keep_hashes); int cmp_idx_or_pack_name(const char *idx_or_pack_name, const char *idx_name); @@ -799,20 +799,26 @@ void clear_midx_files_ext(struct odb_source *source, const char *ext, } void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext, - char **keep_hashes, - uint32_t hashes_nr) + const struct strvec *keep_hashes) { struct clear_midx_data data = { - .keep = STRING_LIST_INIT_NODUP, + .keep = STRING_LIST_INIT_DUP, .ext = ext, }; - uint32_t i; - for (i = 0; i < hashes_nr; i++) - string_list_append(&data.keep, - xstrfmt("multi-pack-index-%s.%s", - keep_hashes[i], ext)); - string_list_sort(&data.keep); + if (keep_hashes) { + struct strbuf buf = STRBUF_INIT; + for (size_t i = 0; i < keep_hashes->nr; i++) { + strbuf_reset(&buf); + + strbuf_addf(&buf, "multi-pack-index-%s.%s", + keep_hashes->v[i], ext); + string_list_append(&data.keep, buf.buf); + } + + string_list_sort(&data.keep); + strbuf_release(&buf); + } for_each_file_in_pack_subdir(source->path, "multi-pack-index.d", clear_midx_file_ext, &data); From 3b32e22a1eed4351a7718b2d229014c7ba536c05 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 29 Mar 2026 17:41:03 -0400 Subject: [PATCH 032/241] midx: introduce `--checksum-only` for incremental MIDX writes When writing an incremental MIDX layer, the MIDX machinery writes the new layer into the multi-pack-index.d directory and then updates the multi-pack-index-chain file to include the freshly written layer. Future callers however may not wish to immediately update the MIDX chain itself, preferring instead to write out new layer(s) themselves before atomically updating the chain. Concretely, the new incremental MIDX-based repacking strategy will want to do exactly this (that is, assemble the new MIDX chain itself before writing a new chain file and atomically linking it into place). Introduce a `--checksum-only` flag that: * writes the new MIDX layer into the multi-pack-index.d directory * prints its checksum * does not update the multi-pack-index-chain file. The MIDX chain file (and thus, the lock protecting it) remain untouched, allowing callers to assemble the chain themselves. This flag requires `--incremental`, since the notion of a separate layer only makes sense for incremental MIDXs. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-multi-pack-index.adoc | 4 +-- builtin/multi-pack-index.c | 28 ++++++++++++++++-- midx-write.c | 38 ++++++++++++++++--------- midx.h | 1 + t/t5334-incremental-multi-pack-index.sh | 17 +++++++++++ t/t5335-compact-multi-pack-index.sh | 36 +++++++++++++++++++++++ 6 files changed, 107 insertions(+), 17 deletions(-) diff --git a/Documentation/git-multi-pack-index.adoc b/Documentation/git-multi-pack-index.adoc index 612568301412d6..657e0639f6a1a2 100644 --- a/Documentation/git-multi-pack-index.adoc +++ b/Documentation/git-multi-pack-index.adoc @@ -11,9 +11,9 @@ SYNOPSIS [verse] 'git multi-pack-index' [] write [--preferred-pack=] [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs] - [--refs-snapshot=] + [--refs-snapshot=] [--[no-]checksum-only] 'git multi-pack-index' [] compact [--[no-]incremental] - [--[no-]bitmap] + [--[no-]bitmap] [--[no-]checksum-only] 'git multi-pack-index' [] verify 'git multi-pack-index' [] expire 'git multi-pack-index' [] repack [--batch-size=] diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index 0f72d96c02da4c..17ab1525454a15 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -16,11 +16,11 @@ #define BUILTIN_MIDX_WRITE_USAGE \ N_("git multi-pack-index [] write [--preferred-pack=]\n" \ " [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs]\n" \ - " [--refs-snapshot=]") + " [--refs-snapshot=] [--[no-]checksum-only]") #define BUILTIN_MIDX_COMPACT_USAGE \ N_("git multi-pack-index [] compact [--[no-]incremental]\n" \ - " [--[no-]bitmap] ") + " [--[no-]bitmap] [--[no-]checksum-only] ") #define BUILTIN_MIDX_VERIFY_USAGE \ N_("git multi-pack-index [] verify") @@ -153,6 +153,9 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX), OPT_BIT(0, "incremental", &opts.flags, N_("write a new incremental MIDX"), MIDX_WRITE_INCREMENTAL), + OPT_BIT(0, "checksum-only", &opts.flags, + N_("write a MIDX layer without updating the MIDX chain"), + MIDX_WRITE_CHECKSUM_ONLY), OPT_BOOL(0, "stdin-packs", &opts.stdin_packs, N_("write multi-pack index containing only given indexes")), OPT_FILENAME(0, "refs-snapshot", &opts.refs_snapshot, @@ -178,6 +181,15 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, if (argc) usage_with_options(builtin_multi_pack_index_write_usage, options); + + if (opts.flags & MIDX_WRITE_CHECKSUM_ONLY && + !(opts.flags & MIDX_WRITE_INCREMENTAL)) { + error(_("cannot use %s without %s"), + "--checksum-only", "--incremental"); + usage_with_options(builtin_multi_pack_index_write_usage, + options); + } + source = handle_object_dir_option(repo); FREE_AND_NULL(options); @@ -221,6 +233,9 @@ static int cmd_multi_pack_index_compact(int argc, const char **argv, MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX), OPT_BIT(0, "incremental", &opts.flags, N_("write a new incremental MIDX"), MIDX_WRITE_INCREMENTAL), + OPT_BIT(0, "checksum-only", &opts.flags, + N_("write a MIDX layer without updating the MIDX chain"), + MIDX_WRITE_CHECKSUM_ONLY), OPT_END(), }; @@ -239,6 +254,15 @@ static int cmd_multi_pack_index_compact(int argc, const char **argv, if (argc != 2) usage_with_options(builtin_multi_pack_index_compact_usage, options); + + if (opts.flags & MIDX_WRITE_CHECKSUM_ONLY && + !(opts.flags & MIDX_WRITE_INCREMENTAL)) { + error(_("cannot use %s without %s"), + "--checksum-only", "--incremental"); + usage_with_options(builtin_multi_pack_index_compact_usage, + options); + } + source = handle_object_dir_option(the_repository); FREE_AND_NULL(options); diff --git a/midx-write.c b/midx-write.c index 9f7d2bbf4cb814..2c6905173ba353 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1600,11 +1600,14 @@ static int write_midx_internal(struct write_midx_opts *opts) } if (ctx.incremental) { - struct strbuf lock_name = STRBUF_INIT; + if (!(opts->flags & MIDX_WRITE_CHECKSUM_ONLY)) { + struct strbuf lock_name = STRBUF_INIT; - get_midx_chain_filename(opts->source, &lock_name); - hold_lock_file_for_update(&lk, lock_name.buf, LOCK_DIE_ON_ERROR); - strbuf_release(&lock_name); + get_midx_chain_filename(opts->source, &lock_name); + hold_lock_file_for_update(&lk, lock_name.buf, + LOCK_DIE_ON_ERROR); + strbuf_release(&lock_name); + } incr = mks_tempfile_m(midx_name.buf, 0444); if (!incr) { @@ -1725,14 +1728,19 @@ static int write_midx_internal(struct write_midx_opts *opts) } strvec_init_alloc(&keep_hashes, keep_hashes_nr); + if (opts->flags & MIDX_WRITE_CHECKSUM_ONLY) + printf("%s\n", hash_to_hex_algop(midx_hash, r->hash_algo)); + if (ctx.incremental) { - FILE *chainf = fdopen_lock_file(&lk, "w"); struct strbuf final_midx_name = STRBUF_INIT; struct multi_pack_index *m = ctx.base_midx; - if (!chainf) { - error_errno(_("unable to open multi-pack-index chain file")); - goto cleanup; + if (!(opts->flags & MIDX_WRITE_CHECKSUM_ONLY)) { + FILE *chainf = fdopen_lock_file(&lk, "w"); + if (!chainf) { + error_errno(_("unable to open multi-pack-index chain file")); + goto cleanup; + } } if (link_midx_to_chain(ctx.base_midx) < 0) @@ -1793,8 +1801,10 @@ static int write_midx_internal(struct write_midx_opts *opts) } } - for (uint32_t i = 0; i < keep_hashes_nr; i++) - fprintf(get_lock_file_fp(&lk), "%s\n", keep_hashes.v[i]); + if (!(opts->flags & MIDX_WRITE_CHECKSUM_ONLY)) + for (uint32_t i = 0; i < keep_hashes_nr; i++) + fprintf(get_lock_file_fp(&lk), "%s\n", + keep_hashes.v[i]); } else { keep_hashes.v[ctx.num_multi_pack_indexes_before] = xstrdup(hash_to_hex_algop(midx_hash, r->hash_algo)); @@ -1804,10 +1814,12 @@ static int write_midx_internal(struct write_midx_opts *opts) if (ctx.m || ctx.base_midx) odb_close(ctx.repo->objects); - if (commit_lock_file(&lk) < 0) - die_errno(_("could not write multi-pack-index")); + if (!(opts->flags & MIDX_WRITE_CHECKSUM_ONLY)) { + if (commit_lock_file(&lk) < 0) + die_errno(_("could not write multi-pack-index")); - clear_midx_files(opts->source, &keep_hashes, ctx.incremental); + clear_midx_files(opts->source, &keep_hashes, ctx.incremental); + } result = 0; cleanup: diff --git a/midx.h b/midx.h index 08f3728e5204b8..9f1acd7ace44b5 100644 --- a/midx.h +++ b/midx.h @@ -83,6 +83,7 @@ struct multi_pack_index { #define MIDX_WRITE_BITMAP_LOOKUP_TABLE (1 << 4) #define MIDX_WRITE_INCREMENTAL (1 << 5) #define MIDX_WRITE_COMPACT (1 << 6) +#define MIDX_WRITE_CHECKSUM_ONLY (1 << 7) #define MIDX_EXT_REV "rev" #define MIDX_EXT_BITMAP "bitmap" diff --git a/t/t5334-incremental-multi-pack-index.sh b/t/t5334-incremental-multi-pack-index.sh index c9f5b4e87aa035..ac249365f2b30d 100755 --- a/t/t5334-incremental-multi-pack-index.sh +++ b/t/t5334-incremental-multi-pack-index.sh @@ -96,6 +96,23 @@ test_expect_success 'show object from second pack' ' git cat-file -p 2.2 ' +test_expect_success 'write MIDX layer with --checksum-only' ' + test_commit checksum-only && + git repack -d && + + cp "$midx_chain" "$midx_chain.bak" && + layer="$(git multi-pack-index write --bitmap --incremental \ + --checksum-only)" && + + test_cmp "$midx_chain.bak" "$midx_chain" && + test_path_is_file "$midxdir/multi-pack-index-$layer.midx" +' + +test_expect_success 'write non-incremental MIDX layer with --checksum-only' ' + test_must_fail git multi-pack-index write --bitmap --checksum-only 2>err && + test_grep "cannot use --checksum-only without --incremental" err +' + for reuse in false single multi do test_expect_success "full clone (pack.allowPackReuse=$reuse)" ' diff --git a/t/t5335-compact-multi-pack-index.sh b/t/t5335-compact-multi-pack-index.sh index 40f3844282f04e..3ad910a9d1c19d 100755 --- a/t/t5335-compact-multi-pack-index.sh +++ b/t/t5335-compact-multi-pack-index.sh @@ -290,4 +290,40 @@ test_expect_success 'MIDX compaction with bitmaps (non-trivial)' ' ) ' +test_expect_success 'MIDX compaction with --checksum-only' ' + git init midx-compact-with--checksum-only && + ( + cd midx-compact-with--checksum-only && + + git config maintenance.auto false && + + write_packs A B C D && + + test_line_count = 4 $midx_chain && + cp "$midx_chain" "$midx_chain".bak && + + layer="$(git multi-pack-index compact --incremental \ + --checksum-only \ + "$(nth_line 2 "$midx_chain")" \ + "$(nth_line 3 "$midx_chain")")" && + + test_cmp "$midx_chain.bak" "$midx_chain" && + + # After writing the new layer, insert it into the chain + # manually. This is done in order to make $layer visible + # to the read-midx test helper below, and matches what + # the MIDX command would do without --checksum-only. + { + nth_line 1 "$midx_chain.bak" && + echo $layer && + nth_line 4 "$midx_chain.bak" + } >$midx_chain && + + test-tool read-midx $objdir $layer >midx.data && + grep "^pack-B-.*\.idx" midx.data && + grep "^pack-C-.*\.idx" midx.data + + ) +' + test_done From 15b52e8be4c1b99b93a499b96f9b9650daece0e0 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 29 Mar 2026 17:41:06 -0400 Subject: [PATCH 033/241] midx: support custom `--base` for incremental MIDX writes Both `compact` and `write --incremental` fix the base of the resulting MIDX layer: `compact` always places the compacted result on top of "from's" immediate parent in the chain, and `write --incremental` always appends a new layer to the existing tip. In both cases the base is not configurable. Future callers need additional flexibility. For instance, the incremental MIDX-based repacking code may wish to write a layer based on some intermediate ancestor rather than the current tip, or produce a root layer when replacing the bottommost entries in the chain. Introduce a new `--base` option for both subcommands to specify the checksum of the MIDX layer to use as the base. The given checksum must refer to a valid layer in the MIDX chain that is an ancestor of the topmost layer being written or compacted. The special value "none" is accepted to produce a root layer with no parent. This will be needed when the incremental repacking machinery determines that the bottommost layers of the chain should be replaced. If no `--base` is given, behavior is unchanged: `compact` uses "from's" immediate parent in the chain, and `write` appends to the existing tip. For the `write` subcommand, `--base` requires `--checksum-only`. A plain `write --incremental` appends a new layer to the live chain tip with no mechanism to atomically replace it; overriding the base would produce a layer that does not extend the tip, breaking chain invariants. With `--checksum-only` the chain is left unmodified and the caller is responsible for assembling a valid chain. For `compact`, no such restriction applies. The compaction operation atomically replaces the compacted range in the chain file, so writing the result on top of any valid ancestor preserves chain invariants. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-multi-pack-index.adoc | 17 +++++- builtin/multi-pack-index.c | 24 ++++++-- midx-write.c | 34 ++++++++++- midx.h | 5 +- t/t5334-incremental-multi-pack-index.sh | 30 ++++++++++ t/t5335-compact-multi-pack-index.sh | 77 +++++++++++++++++++++++++ 6 files changed, 178 insertions(+), 9 deletions(-) diff --git a/Documentation/git-multi-pack-index.adoc b/Documentation/git-multi-pack-index.adoc index 657e0639f6a1a2..635105ad8017ac 100644 --- a/Documentation/git-multi-pack-index.adoc +++ b/Documentation/git-multi-pack-index.adoc @@ -12,8 +12,10 @@ SYNOPSIS 'git multi-pack-index' [] write [--preferred-pack=] [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs] [--refs-snapshot=] [--[no-]checksum-only] + [--base=] 'git multi-pack-index' [] compact [--[no-]incremental] - [--[no-]bitmap] [--[no-]checksum-only] + [--[no-]bitmap] [--base=] [--[no-]checksum-only] + 'git multi-pack-index' [] verify 'git multi-pack-index' [] expire 'git multi-pack-index' [] repack [--batch-size=] @@ -83,6 +85,13 @@ marker). and packs not present in an existing MIDX layer. Migrates non-incremental MIDXs to incremental ones when necessary. + + --base=:: + Specify the checksum of an existing MIDX layer to use + as the base when writing a new incremental layer. + The special value `none` indicates that the new layer + should have no base (i.e., it becomes a root layer). + Requires `--checksum-only`. -- compact:: @@ -97,6 +106,12 @@ compact:: --[no-]bitmap:: Control whether or not a multi-pack bitmap is written. + + --base=:: + Specify the checksum of an existing MIDX layer to use + as the base for the compacted result, instead of using + the immediate parent of ``. The special value + `none` indicates that the result should have no base. -- verify:: diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index 17ab1525454a15..3a12baacb5691f 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -16,11 +16,13 @@ #define BUILTIN_MIDX_WRITE_USAGE \ N_("git multi-pack-index [] write [--preferred-pack=]\n" \ " [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs]\n" \ - " [--refs-snapshot=] [--[no-]checksum-only]") + " [--refs-snapshot=] [--[no-]checksum-only]\n" \ + " [--base=]") #define BUILTIN_MIDX_COMPACT_USAGE \ N_("git multi-pack-index [] compact [--[no-]incremental]\n" \ - " [--[no-]bitmap] [--[no-]checksum-only] ") + " [--[no-]bitmap] [--base=] [--[no-]checksum-only]\n" \ + " ") #define BUILTIN_MIDX_VERIFY_USAGE \ N_("git multi-pack-index [] verify") @@ -63,6 +65,7 @@ static char const * const builtin_multi_pack_index_usage[] = { static struct opts_multi_pack_index { char *object_dir; const char *preferred_pack; + const char *incremental_base; char *refs_snapshot; unsigned long batch_size; unsigned flags; @@ -151,6 +154,8 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, N_("pack for reuse when computing a multi-pack bitmap")), OPT_BIT(0, "bitmap", &opts.flags, N_("write multi-pack bitmap"), MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX), + OPT_STRING(0, "base", &opts.incremental_base, N_("checksum"), + N_("base MIDX for incremental writes")), OPT_BIT(0, "incremental", &opts.flags, N_("write a new incremental MIDX"), MIDX_WRITE_INCREMENTAL), OPT_BIT(0, "checksum-only", &opts.flags, @@ -190,6 +195,13 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, options); } + if (opts.incremental_base && + !(opts.flags & MIDX_WRITE_CHECKSUM_ONLY)) { + error(_("cannot use --base without --checksum-only")); + usage_with_options(builtin_multi_pack_index_write_usage, + options); + } + source = handle_object_dir_option(repo); FREE_AND_NULL(options); @@ -201,7 +213,8 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, ret = write_midx_file_only(source, &packs, opts.preferred_pack, - opts.refs_snapshot, opts.flags); + opts.refs_snapshot, + opts.incremental_base, opts.flags); string_list_clear(&packs, 0); free(opts.refs_snapshot); @@ -229,6 +242,8 @@ static int cmd_multi_pack_index_compact(int argc, const char **argv, struct option *options; static struct option builtin_multi_pack_index_compact_options[] = { + OPT_STRING(0, "base", &opts.incremental_base, N_("checksum"), + N_("base MIDX for incremental writes")), OPT_BIT(0, "bitmap", &opts.flags, N_("write multi-pack bitmap"), MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX), OPT_BIT(0, "incremental", &opts.flags, @@ -290,7 +305,8 @@ static int cmd_multi_pack_index_compact(int argc, const char **argv, die(_("MIDX %s must be an ancestor of %s"), argv[0], argv[1]); } - ret = write_midx_file_compact(source, from_midx, to_midx, opts.flags); + ret = write_midx_file_compact(source, from_midx, to_midx, + opts.incremental_base, opts.flags); return ret; } diff --git a/midx-write.c b/midx-write.c index 2c6905173ba353..9298a5140fd163 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1247,6 +1247,7 @@ struct write_midx_opts { const char *preferred_pack_name; const char *refs_snapshot; + const char *incremental_base; unsigned flags; }; @@ -1329,11 +1330,32 @@ static int write_midx_internal(struct write_midx_opts *opts) /* * If compacting MIDX layer(s) in the range [from, to], then the - * compacted MIDX will share the same base MIDX as 'from'. + * compacted MIDX will share the same base MIDX as 'from', + * unless a custom --base is specified (see below). */ if (ctx.compact) ctx.base_midx = ctx.compact_from->base_midx; + if (opts->incremental_base) { + if (!strcmp(opts->incremental_base, "none")) { + ctx.base_midx = NULL; + } else { + while (ctx.base_midx) { + const char *cmp = midx_get_checksum_hex(ctx.base_midx); + if (!strcmp(opts->incremental_base, cmp)) + break; + + ctx.base_midx = ctx.base_midx->base_midx; + } + + if (!ctx.base_midx) { + error(_("could not find base MIDX '%s'"), + opts->incremental_base); + goto cleanup; + } + } + } + ctx.nr = 0; ctx.alloc = ctx.m ? ctx.m->num_packs + ctx.m->num_packs_in_base : 16; ctx.info = NULL; @@ -1846,7 +1868,8 @@ static int write_midx_internal(struct write_midx_opts *opts) int write_midx_file(struct odb_source *source, const char *preferred_pack_name, - const char *refs_snapshot, unsigned flags) + const char *refs_snapshot, + unsigned flags) { struct write_midx_opts opts = { .source = source, @@ -1861,13 +1884,16 @@ int write_midx_file(struct odb_source *source, int write_midx_file_only(struct odb_source *source, struct string_list *packs_to_include, const char *preferred_pack_name, - const char *refs_snapshot, unsigned flags) + const char *refs_snapshot, + const char *incremental_base, + unsigned flags) { struct write_midx_opts opts = { .source = source, .packs_to_include = packs_to_include, .preferred_pack_name = preferred_pack_name, .refs_snapshot = refs_snapshot, + .incremental_base = incremental_base, .flags = flags, }; @@ -1877,12 +1903,14 @@ int write_midx_file_only(struct odb_source *source, int write_midx_file_compact(struct odb_source *source, struct multi_pack_index *from, struct multi_pack_index *to, + const char *incremental_base, unsigned flags) { struct write_midx_opts opts = { .source = source, .compact_from = from, .compact_to = to, + .incremental_base = incremental_base, .flags = flags | MIDX_WRITE_COMPACT, }; diff --git a/midx.h b/midx.h index 9f1acd7ace44b5..e4a75ff2bef82a 100644 --- a/midx.h +++ b/midx.h @@ -132,10 +132,13 @@ int write_midx_file(struct odb_source *source, int write_midx_file_only(struct odb_source *source, struct string_list *packs_to_include, const char *preferred_pack_name, - const char *refs_snapshot, unsigned flags); + const char *refs_snapshot, + const char *incremental_base, + unsigned flags); int write_midx_file_compact(struct odb_source *source, struct multi_pack_index *from, struct multi_pack_index *to, + const char *incremental_base, unsigned flags); void clear_midx_file(struct repository *r); int verify_midx_file(struct odb_source *source, unsigned flags); diff --git a/t/t5334-incremental-multi-pack-index.sh b/t/t5334-incremental-multi-pack-index.sh index ac249365f2b30d..7733cafb647f36 100755 --- a/t/t5334-incremental-multi-pack-index.sh +++ b/t/t5334-incremental-multi-pack-index.sh @@ -113,6 +113,36 @@ test_expect_success 'write non-incremental MIDX layer with --checksum-only' ' test_grep "cannot use --checksum-only without --incremental" err ' +test_expect_success 'write MIDX layer with --base without --checksum-only' ' + test_must_fail git multi-pack-index write --bitmap --incremental \ + --base=none 2>err && + test_grep "cannot use --base without --checksum-only" err +' + +test_expect_success 'write MIDX layer with --base=none and --checksum-only' ' + test_commit base-none && + git repack -d && + + cp "$midx_chain" "$midx_chain.bak" && + layer="$(git multi-pack-index write --bitmap --incremental \ + --checksum-only --base=none)" && + + test_cmp "$midx_chain.bak" "$midx_chain" && + test_path_is_file "$midxdir/multi-pack-index-$layer.midx" +' + +test_expect_success 'write MIDX layer with --base= and --checksum-only' ' + test_commit base-hash && + git repack -d && + + cp "$midx_chain" "$midx_chain.bak" && + layer="$(git multi-pack-index write --bitmap --incremental \ + --checksum-only --base="$(nth_line 1 "$midx_chain")")" && + + test_cmp "$midx_chain.bak" "$midx_chain" && + test_path_is_file "$midxdir/multi-pack-index-$layer.midx" +' + for reuse in false single multi do test_expect_success "full clone (pack.allowPackReuse=$reuse)" ' diff --git a/t/t5335-compact-multi-pack-index.sh b/t/t5335-compact-multi-pack-index.sh index 3ad910a9d1c19d..d8e1b03669a7e7 100755 --- a/t/t5335-compact-multi-pack-index.sh +++ b/t/t5335-compact-multi-pack-index.sh @@ -304,6 +304,7 @@ test_expect_success 'MIDX compaction with --checksum-only' ' layer="$(git multi-pack-index compact --incremental \ --checksum-only \ + --base="$(nth_line 1 "$midx_chain")" \ "$(nth_line 2 "$midx_chain")" \ "$(nth_line 3 "$midx_chain")")" && @@ -326,4 +327,80 @@ test_expect_success 'MIDX compaction with --checksum-only' ' ) ' +test_expect_success 'MIDX compaction with --base' ' + git init midx-compact-with--base && + ( + cd midx-compact-with--base && + + git config maintenance.auto false && + + write_packs A B C D && + + test_line_count = 4 "$midx_chain" && + + cp "$midx_chain" "$midx_chain.bak" && + + git multi-pack-index compact --incremental \ + --base="$(nth_line 1 "$midx_chain")" \ + "$(nth_line 3 "$midx_chain")" \ + "$(nth_line 4 "$midx_chain")" && + test_line_count = 2 $midx_chain && + + nth_line 1 "$midx_chain.bak" >expect && + nth_line 1 "$midx_chain" >actual && + + test_cmp expect actual + ) +' + +test_expect_success 'MIDX compaction with --base=none' ' + git init midx-compact-base-none && + ( + cd midx-compact-base-none && + + git config maintenance.auto false && + + write_packs A B C D && + + test_line_count = 4 $midx_chain && + + cp "$midx_chain" "$midx_chain".bak && + + # Compact the two bottommost layers (A and B) into a new + # root layer with no parent. + git multi-pack-index compact --incremental \ + --base=none \ + "$(nth_line 1 "$midx_chain")" \ + "$(nth_line 2 "$midx_chain")" && + + test_line_count = 3 $midx_chain && + + # The upper layers (C and D) should be preserved + # unchanged. + nth_line 3 "$midx_chain.bak" >expect && + nth_line 4 "$midx_chain.bak" >>expect && + nth_line 2 "$midx_chain" >actual && + nth_line 3 "$midx_chain" >>actual && + + test_cmp expect actual + ) +' + +test_expect_success 'MIDX compaction with bogus --base checksum' ' + git init midx-compact-bogus-base && + ( + cd midx-compact-bogus-base && + + git config maintenance.auto false && + + write_packs A B C && + + test_must_fail git multi-pack-index compact --incremental \ + --base=deadbeef \ + "$(nth_line 2 "$midx_chain")" \ + "$(nth_line 3 "$midx_chain")" 2>err && + test_grep "could not find base MIDX" err + ) +' + test_done From 4bd6e030d85d3a28445033dc4ebb2b972f2c3b25 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 29 Mar 2026 17:41:08 -0400 Subject: [PATCH 034/241] repack: track the ODB source via existing_packs Store the ODB source in the `existing_packs` struct and use that in place of the raw `repo->objects->sources` access within `cmd_repack()`. The source used is still assigned from the first source in the list, so there are no functional changes in this commit. The changes instead serve two purposes (one immediate, one not): - The incremental MIDX-based repacking machinery will need to know what source is being used to read the existing MIDX/chain (should one exist). - In the future, if "git repack" is taught how to operate on other object sources, this field will serve as the authoritative value for that source. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 5 ++--- repack.c | 2 ++ repack.h | 1 + 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index f6bb04bef7264e..44a95b56f23f4c 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -402,7 +402,7 @@ int cmd_repack(int argc, * midx_has_unknown_packs() will make the decision for * us. */ - if (!get_multi_pack_index(repo->objects->sources)) + if (!get_multi_pack_index(existing.source)) midx_must_contain_cruft = 1; } @@ -549,8 +549,7 @@ int cmd_repack(int argc, unsigned flags = 0; if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL, 0)) flags |= MIDX_WRITE_INCREMENTAL; - write_midx_file(repo->objects->sources, - NULL, NULL, flags); + write_midx_file(existing.source, NULL, NULL, flags); } cleanup: diff --git a/repack.c b/repack.c index 596841027af93f..2ee6b51420aa54 100644 --- a/repack.c +++ b/repack.c @@ -154,6 +154,8 @@ void existing_packs_collect(struct existing_packs *existing, string_list_append(&existing->non_kept_packs, buf.buf); } + existing->source = existing->repo->objects->sources; + string_list_sort(&existing->kept_packs); string_list_sort(&existing->non_kept_packs); string_list_sort(&existing->cruft_packs); diff --git a/repack.h b/repack.h index bc9f2e1a5de984..c0e9f0ca647c50 100644 --- a/repack.h +++ b/repack.h @@ -56,6 +56,7 @@ struct packed_git; struct existing_packs { struct repository *repo; + struct odb_source *source; struct string_list kept_packs; struct string_list non_kept_packs; struct string_list cruft_packs; From 04cd9cde474fdbd4f57b58912e225dcdd00eaae1 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 29 Mar 2026 17:41:11 -0400 Subject: [PATCH 035/241] midx: expose `midx_layer_contains_pack()` Rename the function `midx_contains_pack_1()` to instead be called `midx_layer_contains_pack()` and make it accessible. Unlike `midx_contains_pack()` (which recurses through the entire chain), this function checks only a single MIDX layer. This will be used by a subsequent commit to determine whether a given pack belongs to the tip MIDX layer specifically, rather than to any layer in the chain. No functional changes are present in this commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 6 +++--- midx.h | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/midx.c b/midx.c index e6b1fbe37d7718..7d23338aa3a400 100644 --- a/midx.c +++ b/midx.c @@ -667,8 +667,8 @@ static int midx_pack_names_cmp(const void *a, const void *b, void *m_) m->pack_names[*(const size_t *)b]); } -static int midx_contains_pack_1(struct multi_pack_index *m, - const char *idx_or_pack_name) +int midx_layer_contains_pack(struct multi_pack_index *m, + const char *idx_or_pack_name) { uint32_t first = 0, last = m->num_packs; @@ -709,7 +709,7 @@ static int midx_contains_pack_1(struct multi_pack_index *m, int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name) { for (; m; m = m->base_midx) - if (midx_contains_pack_1(m, idx_or_pack_name)) + if (midx_layer_contains_pack(m, idx_or_pack_name)) return 1; return 0; } diff --git a/midx.h b/midx.h index e4a75ff2bef82a..f211a38b9e7157 100644 --- a/midx.h +++ b/midx.h @@ -119,6 +119,8 @@ struct object_id *nth_midxed_object_oid(struct object_id *oid, int fill_midx_entry(struct multi_pack_index *m, const struct object_id *oid, struct pack_entry *e); int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name); +int midx_layer_contains_pack(struct multi_pack_index *m, + const char *idx_or_pack_name); int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id); int prepare_multi_pack_index_one(struct odb_source *source); From f702566e54a1d5a3ad826a62205981fe6a73e524 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 29 Mar 2026 17:41:13 -0400 Subject: [PATCH 036/241] repack-midx: factor out `repack_prepare_midx_command()` The `write_midx_included_packs()` function assembles and executes a `git multi-pack-index write` command, constructing the argument list inline. Future commits will introduce additional callers that need to construct similar `git multi-pack-index` commands (for both `write` and `compact` subcommands), so extract the common portions of the command setup into a reusable `repack_prepare_midx_command()` helper. The extracted helper sets `git_cmd`, pushes the `multi-pack-index` subcommand and verb, and handles `--progress`/`--no-progress` and `--bitmap` flags. The remaining arguments that are specific to the `write` subcommand (such as `--stdin-packs`) are left to the caller. No functional changes are included in this patch. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- repack-midx.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/repack-midx.c b/repack-midx.c index 0682b80c4278d4..83151d4734ae6d 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -275,6 +275,23 @@ static void remove_redundant_bitmaps(struct string_list *include, strbuf_release(&path); } +static void repack_prepare_midx_command(struct child_process *cmd, + struct repack_write_midx_opts *opts, + const char *verb) +{ + cmd->git_cmd = 1; + + strvec_pushl(&cmd->args, "multi-pack-index", verb, NULL); + + if (opts->show_progress) + strvec_push(&cmd->args, "--progress"); + else + strvec_push(&cmd->args, "--no-progress"); + + if (opts->write_bitmaps) + strvec_push(&cmd->args, "--bitmap"); +} + int write_midx_included_packs(struct repack_write_midx_opts *opts) { struct child_process cmd = CHILD_PROCESS_INIT; @@ -289,18 +306,9 @@ int write_midx_included_packs(struct repack_write_midx_opts *opts) goto done; cmd.in = -1; - cmd.git_cmd = 1; - strvec_push(&cmd.args, "multi-pack-index"); - strvec_pushl(&cmd.args, "write", "--stdin-packs", NULL); - - if (opts->show_progress) - strvec_push(&cmd.args, "--progress"); - else - strvec_push(&cmd.args, "--no-progress"); - - if (opts->write_bitmaps) - strvec_push(&cmd.args, "--bitmap"); + repack_prepare_midx_command(&cmd, opts, "write"); + strvec_push(&cmd.args, "--stdin-packs"); if (preferred) strvec_pushf(&cmd.args, "--preferred-pack=%s", From 16a20d113c9fc8bc9b9657987ddc7e9d2798c601 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 29 Mar 2026 17:41:16 -0400 Subject: [PATCH 037/241] repack-midx: extract `repack_fill_midx_stdin_packs()` The function `write_midx_included_packs()` manages the lifecycle of writing packs to stdin when running `git multi-pack-index write` as a child process. Extract a standalone `repack_fill_midx_stdin_packs()` helper, which handles `--stdin-packs` argument setup, starting the command, writing pack names to its standard input, and finishing the command. This simplifies `write_midx_included_packs()` and prepares for a subsequent commit where the same helper is called with `cmd->out = -1` to capture the MIDX's checksum from the command's standard output, which is needed when writing MIDX layers with `--checksum-only`. No functional changes are included in this patch. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- repack-midx.c | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/repack-midx.c b/repack-midx.c index 83151d4734ae6d..78f069c2151c96 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -292,23 +292,42 @@ static void repack_prepare_midx_command(struct child_process *cmd, strvec_push(&cmd->args, "--bitmap"); } +static int repack_fill_midx_stdin_packs(struct child_process *cmd, + struct string_list *include) +{ + struct string_list_item *item; + FILE *in; + int ret; + + cmd->in = -1; + + strvec_push(&cmd->args, "--stdin-packs"); + + ret = start_command(cmd); + if (ret) + return ret; + + in = xfdopen(cmd->in, "w"); + for_each_string_list_item(item, include) + fprintf(in, "%s\n", item->string); + fclose(in); + + return finish_command(cmd); +} + int write_midx_included_packs(struct repack_write_midx_opts *opts) { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list include = STRING_LIST_INIT_DUP; struct string_list_item *item; struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); - FILE *in; int ret = 0; midx_included_packs(&include, opts); if (!include.nr) goto done; - cmd.in = -1; - repack_prepare_midx_command(&cmd, opts, "write"); - strvec_push(&cmd.args, "--stdin-packs"); if (preferred) strvec_pushf(&cmd.args, "--preferred-pack=%s", @@ -350,16 +369,7 @@ int write_midx_included_packs(struct repack_write_midx_opts *opts) strvec_pushf(&cmd.args, "--refs-snapshot=%s", opts->refs_snapshot); - ret = start_command(&cmd); - if (ret) - goto done; - - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, &include) - fprintf(in, "%s\n", item->string); - fclose(in); - - ret = finish_command(&cmd); + ret = repack_fill_midx_stdin_packs(&cmd, &include); done: if (!ret && opts->write_bitmaps) remove_redundant_bitmaps(&include, opts->packdir); From 4e84640660eadb75db74d4ae67c2e4a0014ad67e Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 29 Mar 2026 17:41:18 -0400 Subject: [PATCH 038/241] repack-geometry: prepare for incremental MIDX repacking Teach `pack_geometry_init()` to optionally restrict the set of repacking candidates to only packs in the tip MIDX layer when a `midx_layer_threshold` is configured. If the tip layer has fewer packs than the threshold, those packs are excluded entirely; otherwise only packs in that layer participate in the geometric repack. Also track whether any tip-layer packs were included in the rollup (`midx_tip_rewritten`), which a subsequent commit will use to decide how to update the MIDX chain after repacking. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- repack-geometry.c | 37 +++++++++++++++++++++++++++++++++++++ repack.h | 4 ++++ 2 files changed, 41 insertions(+) diff --git a/repack-geometry.c b/repack-geometry.c index 7cebd0cb45f0ea..d2065205f87361 100644 --- a/repack-geometry.c +++ b/repack-geometry.c @@ -4,6 +4,7 @@ #include "repack.h" #include "repository.h" #include "hex.h" +#include "midx.h" #include "packfile.h" static uint32_t pack_geometry_weight(struct packed_git *p) @@ -31,8 +32,30 @@ void pack_geometry_init(struct pack_geometry *geometry, { struct packed_git *p; struct strbuf buf = STRBUF_INIT; + struct multi_pack_index *m = get_multi_pack_index(existing->source); repo_for_each_pack(existing->repo, p) { + if (geometry->midx_layer_threshold_set && m && + p->multi_pack_index) { + /* + * When writing MIDX layers incrementally, + * ignore packs unless they are in the most + * recent MIDX layer *and* there are at least + * 'midx_layer_threshold' packs in that layer. + * + * Otherwise 'p' is either in an older layer, or + * the youngest layer does not have enough packs + * to consider its packs as candidates for + * repacking. In either of those cases we want + * to ignore the pack. + */ + if (m->num_packs > geometry->midx_layer_threshold && + midx_layer_contains_pack(m, pack_basename(p))) + ; + else + continue; + } + if (args->local && !p->pack_local) /* * When asked to only repack local packfiles we skip @@ -173,6 +196,20 @@ void pack_geometry_split(struct pack_geometry *geometry) geometry->promisor_split = compute_pack_geometry_split(geometry->promisor_pack, geometry->promisor_pack_nr, geometry->split_factor); + for (uint32_t i = 0; i < geometry->split; i++) { + struct packed_git *p = geometry->pack[i]; + /* + * During incremental MIDX/bitmap repacking, any packs + * included in the rollup are either (a) not MIDX'd, or + * (b) contained in the tip layer iff it has more than + * the threshold number of packs. + * + * In the latter case, we can safely conclude that the + * tip of the MIDX chain will be rewritten. + */ + if (p->multi_pack_index) + geometry->midx_tip_rewritten = true; + } } struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry) diff --git a/repack.h b/repack.h index c0e9f0ca647c50..77d24ee45fb6ae 100644 --- a/repack.h +++ b/repack.h @@ -108,6 +108,10 @@ struct pack_geometry { uint32_t promisor_pack_nr, promisor_pack_alloc; uint32_t promisor_split; + uint32_t midx_layer_threshold; + bool midx_layer_threshold_set; + bool midx_tip_rewritten; + int split_factor; }; From 70226c8c3068607b19b3f5999ed7afe980399ff0 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 29 Mar 2026 17:41:21 -0400 Subject: [PATCH 039/241] builtin/repack.c: convert `--write-midx` to an `OPT_CALLBACK` Change the --write-midx (-m) flag from an OPT_BOOL to an OPT_CALLBACK that accepts an optional mode argument. Introduce an enum with REPACK_WRITE_MIDX_NONE and REPACK_WRITE_MIDX_DEFAULT to distinguish between the two states, and update all existing boolean checks accordingly. For now, passing no argument (or just `-m`) selects the default mode, preserving existing behavior. A subsequent commit will add a new mode for writing incremental MIDXs. Extract repack_write_midx() as a dispatcher that selects the appropriate MIDX-writing implementation based on the mode. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 50 ++++++++++++++++++++++++++++++++++++------------ repack-midx.c | 14 +++++++++++++- repack.h | 8 +++++++- 3 files changed, 58 insertions(+), 14 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 44a95b56f23f4c..3a5042491d6d64 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -97,6 +97,24 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } +static int option_parse_write_midx(const struct option *opt, const char *arg, + int unset) +{ + enum repack_write_midx_mode *cfg = opt->value; + + if (unset) { + *cfg = REPACK_WRITE_MIDX_NONE; + return 0; + } + + if (!arg || !*arg) + *cfg = REPACK_WRITE_MIDX_DEFAULT; + else + return error(_("unknown value for %s: %s"), opt->long_name, arg); + + return 0; +} + int cmd_repack(int argc, const char **argv, const char *prefix, @@ -119,7 +137,7 @@ int cmd_repack(int argc, struct string_list keep_pack_list = STRING_LIST_INIT_NODUP; struct pack_objects_args po_args = PACK_OBJECTS_ARGS_INIT; struct pack_objects_args cruft_po_args = PACK_OBJECTS_ARGS_INIT; - int write_midx = 0; + enum repack_write_midx_mode write_midx = REPACK_WRITE_MIDX_NONE; const char *cruft_expiration = NULL; const char *expire_to = NULL; const char *filter_to = NULL; @@ -185,8 +203,14 @@ int cmd_repack(int argc, N_("do not repack this pack")), OPT_INTEGER('g', "geometric", &geometry.split_factor, N_("find a geometric progression with factor ")), - OPT_BOOL('m', "write-midx", &write_midx, - N_("write a multi-pack index of the resulting packs")), + OPT_CALLBACK_F(0, "write-midx", &write_midx, + N_("mode"), + N_("write a multi-pack index of the resulting packs"), + PARSE_OPT_OPTARG, option_parse_write_midx), + OPT_SET_INT_F('m', NULL, &write_midx, + N_("write a multi-pack index of the resulting packs"), + REPACK_WRITE_MIDX_DEFAULT, + PARSE_OPT_HIDDEN), OPT_STRING(0, "expire-to", &expire_to, N_("dir"), N_("pack prefix to store a pack containing pruned objects")), OPT_STRING(0, "filter-to", &filter_to, N_("dir"), @@ -221,14 +245,16 @@ int cmd_repack(int argc, pack_everything |= ALL_INTO_ONE; if (write_bitmaps < 0) { - if (!write_midx && + if (write_midx == REPACK_WRITE_MIDX_NONE && (!(pack_everything & ALL_INTO_ONE) || !is_bare_repository())) write_bitmaps = 0; } if (po_args.pack_kept_objects < 0) - po_args.pack_kept_objects = write_bitmaps > 0 && !write_midx; + po_args.pack_kept_objects = write_bitmaps > 0 && + write_midx == REPACK_WRITE_MIDX_NONE; - if (write_bitmaps && !(pack_everything & ALL_INTO_ONE) && !write_midx) + if (write_bitmaps && !(pack_everything & ALL_INTO_ONE) && + write_midx == REPACK_WRITE_MIDX_NONE) die(_(incremental_bitmap_conflict_error)); if (write_bitmaps && po_args.local && @@ -244,7 +270,7 @@ int cmd_repack(int argc, write_bitmaps = 0; } - if (write_midx && write_bitmaps) { + if (write_midx != REPACK_WRITE_MIDX_NONE && write_bitmaps) { struct strbuf path = STRBUF_INIT; strbuf_addf(&path, "%s/%s_XXXXXX", @@ -297,7 +323,7 @@ int cmd_repack(int argc, } if (repo_has_promisor_remote(repo)) strvec_push(&cmd.args, "--exclude-promisor-objects"); - if (!write_midx) { + if (write_midx == REPACK_WRITE_MIDX_NONE) { if (write_bitmaps > 0) strvec_push(&cmd.args, "--write-bitmap-index"); else if (write_bitmaps < 0) @@ -504,7 +530,7 @@ int cmd_repack(int argc, if (delete_redundant && pack_everything & ALL_INTO_ONE) existing_packs_mark_for_deletion(&existing, &names); - if (write_midx) { + if (write_midx != REPACK_WRITE_MIDX_NONE) { struct repack_write_midx_opts opts = { .existing = &existing, .geometry = &geometry, @@ -513,11 +539,11 @@ int cmd_repack(int argc, .packdir = packdir, .show_progress = show_progress, .write_bitmaps = write_bitmaps > 0, - .midx_must_contain_cruft = midx_must_contain_cruft + .midx_must_contain_cruft = midx_must_contain_cruft, + .mode = write_midx, }; - ret = write_midx_included_packs(&opts); - + ret = repack_write_midx(&opts); if (ret) goto cleanup; } diff --git a/repack-midx.c b/repack-midx.c index 78f069c2151c96..4a568a2a9b85d9 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -315,7 +315,7 @@ static int repack_fill_midx_stdin_packs(struct child_process *cmd, return finish_command(cmd); } -int write_midx_included_packs(struct repack_write_midx_opts *opts) +static int write_midx_included_packs(struct repack_write_midx_opts *opts) { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list include = STRING_LIST_INIT_DUP; @@ -378,3 +378,15 @@ int write_midx_included_packs(struct repack_write_midx_opts *opts) return ret; } + +int repack_write_midx(struct repack_write_midx_opts *opts) +{ + switch (opts->mode) { + case REPACK_WRITE_MIDX_NONE: + BUG("write_midx mode is NONE?"); + case REPACK_WRITE_MIDX_DEFAULT: + return write_midx_included_packs(opts); + default: + BUG("unhandled write_midx mode: %d", opts->mode); + } +} diff --git a/repack.h b/repack.h index 77d24ee45fb6ae..81907fcce7ff94 100644 --- a/repack.h +++ b/repack.h @@ -134,6 +134,11 @@ void pack_geometry_release(struct pack_geometry *geometry); struct tempfile; +enum repack_write_midx_mode { + REPACK_WRITE_MIDX_NONE, + REPACK_WRITE_MIDX_DEFAULT, +}; + struct repack_write_midx_opts { struct existing_packs *existing; struct pack_geometry *geometry; @@ -143,10 +148,11 @@ struct repack_write_midx_opts { int show_progress; int write_bitmaps; int midx_must_contain_cruft; + enum repack_write_midx_mode mode; }; void midx_snapshot_refs(struct repository *repo, struct tempfile *f); -int write_midx_included_packs(struct repack_write_midx_opts *opts); +int repack_write_midx(struct repack_write_midx_opts *opts); int write_filtered_pack(const struct write_pack_opts *opts, struct existing_packs *existing, From 68a1c8880f84a1574f72b971b9d1a185bb52bf3f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 29 Mar 2026 17:41:23 -0400 Subject: [PATCH 040/241] packfile: ensure `close_pack_revindex()` frees in-memory revindex The following commit will introduce a case where we write a MIDX bitmap over packs that do not themselves have on-disk *.rev files. This case is supported within Git, and we will simply fall back to generating the revindex in memory. But we don't ever release that memory, causing a leak that is exposed by a test introduced in the following commit. (As far as I could find, we never free()'d memory allocated as a byproduct of creating an in-memory revindex, likely because that code predates the leak-checking niceties we have in the test suite now.) Rectify this by calling `FREE_AND_NULL()` on the `p->revindex` field when calling `close_pack_revindex()`. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- packfile.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packfile.c b/packfile.c index d4de9f3ffe831e..97f7662e079d78 100644 --- a/packfile.c +++ b/packfile.c @@ -420,6 +420,8 @@ void close_pack_index(struct packed_git *p) static void close_pack_revindex(struct packed_git *p) { + FREE_AND_NULL(p->revindex); + if (!p->revindex_map) return; From e96ff36dfbbf8d4c8d524c50f4292f18e9d7b3cf Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 29 Mar 2026 17:41:26 -0400 Subject: [PATCH 041/241] repack: implement incremental MIDX repacking Implement the `write_midx_incremental()` function, which builds and maintains an incremental MIDX chain as part of the geometric repacking process. Unlike the default mode which writes a single flat MIDX, the incremental mode constructs a compaction plan that determines which MIDX layers to write, compact, or copy, and then executes each step using `git multi-pack-index` subcommands with the --checksum-only flag. The repacking strategy works as follows: * Acquire the lock guarding the multi-pack-index-chain. * A new MIDX layer is always written containing the newly created pack(s). If the tip MIDX layer was rewritten during geometric repacking, any surviving packs from that layer are also included. * Starting from the new layer, adjacent MIDX layers are merged together as long as the accumulated object count exceeds half the object count of the next deeper layer (controlled by 'repack.midxSplitFactor'). * Remaining layers in the chain are evaluated pairwise and either compacted or copied as-is, following the same merging condition. * Write the contents of the new multi-pack-index chain, atomically move it into place, and then release the lock. * Delete any now-unused MIDX layers. After writing the new layer, the strategy is evaluated among the existing MIDX layers in order from oldest to newest. Each step that writes a new MIDX layer uses "--checksum-only" to avoid updating the multi-pack-index-chain file. After all steps are complete, the new chain file is written and then atomically moved into place. At present, this functionality is exposed behind a new enum value, `REPACK_WRITE_MIDX_INCREMENTAL`, but has no external callers. A subsequent commit will expose this mode via `git repack --write-midx=incremental`. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- repack-midx.c | 578 +++++++++++++++++++++++++++++++++++++++++++++++++- repack.h | 3 + 2 files changed, 579 insertions(+), 2 deletions(-) diff --git a/repack-midx.c b/repack-midx.c index 4a568a2a9b85d9..a159b879775e1d 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -2,12 +2,16 @@ #include "repack.h" #include "hash.h" #include "hex.h" +#include "lockfile.h" +#include "midx.h" #include "odb.h" #include "oidset.h" #include "pack-bitmap.h" +#include "path.h" #include "refs.h" #include "run-command.h" #include "tempfile.h" +#include "trace2.h" struct midx_snapshot_ref_data { struct repository *repo; @@ -293,13 +297,16 @@ static void repack_prepare_midx_command(struct child_process *cmd, } static int repack_fill_midx_stdin_packs(struct child_process *cmd, - struct string_list *include) + struct string_list *include, + struct string_list *out) { struct string_list_item *item; FILE *in; int ret; cmd->in = -1; + if (out) + cmd->out = -1; strvec_push(&cmd->args, "--stdin-packs"); @@ -312,6 +319,17 @@ static int repack_fill_midx_stdin_packs(struct child_process *cmd, fprintf(in, "%s\n", item->string); fclose(in); + if (out) { + struct strbuf buf = STRBUF_INIT; + FILE *outf = xfdopen(cmd->out, "r"); + + while (strbuf_getline(&buf, outf) != EOF) + string_list_append(out, buf.buf); + strbuf_release(&buf); + + fclose(outf); + } + return finish_command(cmd); } @@ -369,7 +387,7 @@ static int write_midx_included_packs(struct repack_write_midx_opts *opts) strvec_pushf(&cmd.args, "--refs-snapshot=%s", opts->refs_snapshot); - ret = repack_fill_midx_stdin_packs(&cmd, &include); + ret = repack_fill_midx_stdin_packs(&cmd, &include, NULL); done: if (!ret && opts->write_bitmaps) remove_redundant_bitmaps(&include, opts->packdir); @@ -379,6 +397,560 @@ static int write_midx_included_packs(struct repack_write_midx_opts *opts) return ret; } +struct midx_compaction_step { + union { + struct multi_pack_index *copy; + struct string_list write; + struct { + struct multi_pack_index *from; + struct multi_pack_index *to; + } compact; + } u; + + uint32_t objects_nr; + char *csum; + + enum { + MIDX_COMPACTION_STEP_UNKNOWN, + MIDX_COMPACTION_STEP_COPY, + MIDX_COMPACTION_STEP_WRITE, + MIDX_COMPACTION_STEP_COMPACT, + } type; +}; + +static const char *midx_compaction_step_base(const struct midx_compaction_step *step) +{ + switch (step->type) { + case MIDX_COMPACTION_STEP_UNKNOWN: + BUG("cannot use UNKNOWN step as a base"); + case MIDX_COMPACTION_STEP_COPY: + return midx_get_checksum_hex(step->u.copy); + case MIDX_COMPACTION_STEP_WRITE: + BUG("cannot use WRITE step as a base"); + case MIDX_COMPACTION_STEP_COMPACT: + return midx_get_checksum_hex(step->u.compact.to); + default: + BUG("unhandled midx compaction step type %d", step->type); + } +} + +static int midx_compaction_step_exec_copy(struct midx_compaction_step *step) +{ + step->csum = xstrdup(midx_get_checksum_hex(step->u.copy)); + return 0; +} + +static int midx_compaction_step_exec_write(struct midx_compaction_step *step, + struct repack_write_midx_opts *opts, + const char *base) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list hash = STRING_LIST_INIT_DUP; + struct string_list_item *item; + const char *preferred_pack = NULL; + int ret = 0; + + if (!step->u.write.nr) { + ret = error(_("no packs to write MIDX during compaction")); + goto out; + } + + for_each_string_list_item(item, &step->u.write) { + if (item->util) + preferred_pack = item->string; + } + + repack_prepare_midx_command(&cmd, opts, "write"); + strvec_pushl(&cmd.args, "--incremental", "--checksum-only", NULL); + strvec_pushf(&cmd.args, "--base=%s", base ? base : "none"); + + if (preferred_pack) { + struct strbuf buf = STRBUF_INIT; + + strbuf_addstr(&buf, preferred_pack); + strbuf_strip_suffix(&buf, ".idx"); + strbuf_addstr(&buf, ".pack"); + + strvec_pushf(&cmd.args, "--preferred-pack=%s", buf.buf); + + strbuf_release(&buf); + } + + ret = repack_fill_midx_stdin_packs(&cmd, &step->u.write, &hash); + if (hash.nr != 1) { + ret = error(_("expected exactly one line during MIDX write, " + "got: %"PRIuMAX), + (uintmax_t)hash.nr); + goto out; + } + + step->csum = xstrdup(hash.items[0].string); + +out: + string_list_clear(&hash, 0); + + return ret; +} + +static int midx_compaction_step_exec_compact(struct midx_compaction_step *step, + struct repack_write_midx_opts *opts) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct strbuf buf = STRBUF_INIT; + FILE *out = NULL; + int ret; + + repack_prepare_midx_command(&cmd, opts, "compact"); + strvec_pushl(&cmd.args, "--incremental", "--checksum-only", + midx_get_checksum_hex(step->u.compact.from), + midx_get_checksum_hex(step->u.compact.to), NULL); + + cmd.out = -1; + + ret = start_command(&cmd); + if (ret) + goto out; + + out = xfdopen(cmd.out, "r"); + while (strbuf_getline_lf(&buf, out) != EOF) { + if (step->csum) { + ret = error(_("unexpected MIDX output: '%s'"), buf.buf); + goto out; + } + step->csum = strbuf_detach(&buf, NULL); + } + + ret = finish_command(&cmd); + +out: + if (out) + fclose(out); + strbuf_release(&buf); + + return ret; +} + +static int midx_compaction_step_exec(struct midx_compaction_step *step, + struct repack_write_midx_opts *opts, + const char *base) +{ + switch (step->type) { + case MIDX_COMPACTION_STEP_UNKNOWN: + BUG("cannot execute UNKNOWN midx compaction step"); + case MIDX_COMPACTION_STEP_COPY: + return midx_compaction_step_exec_copy(step); + case MIDX_COMPACTION_STEP_WRITE: + return midx_compaction_step_exec_write(step, opts, base); + case MIDX_COMPACTION_STEP_COMPACT: + return midx_compaction_step_exec_compact(step, opts); + default: + BUG("unhandled midx compaction step type %d", step->type); + } +} + +static void midx_compaction_step_release(struct midx_compaction_step *step) +{ + if (step->type == MIDX_COMPACTION_STEP_WRITE) + string_list_clear(&step->u.write, 0); + free(step->csum); +} + +static int repack_make_midx_compaction_plan(struct repack_write_midx_opts *opts, + struct midx_compaction_step **steps_p, + size_t *steps_nr_p) +{ + struct multi_pack_index *m; + struct midx_compaction_step *steps = NULL; + struct midx_compaction_step step = { 0 }; + struct strbuf buf = STRBUF_INIT; + size_t steps_nr = 0, steps_alloc = 0; + uint32_t i; + int ret = 0; + + trace2_region_enter("repack", "make_midx_compaction_plan", + opts->existing->repo); + + odb_reprepare(opts->existing->repo->objects); + m = get_multi_pack_index(opts->existing->source); + + for (i = 0; m && i < m->num_packs + m->num_packs_in_base; i++) { + if (prepare_midx_pack(m, i)) { + ret = error(_("could not load pack %"PRIu32" from MIDX"), + i); + goto out; + } + } + + trace2_region_enter("repack", "steps:write", opts->existing->repo); + + /* + * The first MIDX in the resulting chain is always going to be + * new. + * + * At a minimum, it will include all of the newly written packs. + * If there is an existing MIDX whose tip layer contains packs + * that were repacked, it will also include any of its packs + * which were *not* rolled up as part of the geometric repack + * (if any), and the previous tip will be replaced. + * + * It may grow to include the packs from zero or more MIDXs from + * the old chain, beginning either at the old tip (if the MIDX + * was *not* rewritten) or the old tip's base MIDX layer + * (otherwise). + */ + step.type = MIDX_COMPACTION_STEP_WRITE; + string_list_init_dup(&step.u.write); + + for (i = 0; i < opts->names->nr; i++) { + strbuf_reset(&buf); + strbuf_addf(&buf, "pack-%s.idx", opts->names->items[i].string); + string_list_append(&step.u.write, buf.buf); + + trace2_data_string("repack", opts->existing->repo, + "include:fresh", + step.u.write.items[step.u.write.nr - 1].string); + } + for (i = 0; i < opts->geometry->split; i++) { + struct packed_git *p = opts->geometry->pack[i]; + if (unsigned_add_overflows(step.objects_nr, p->num_objects)) { + ret = error(_("too many objects in MIDX compaction step")); + goto out; + } + + step.objects_nr += p->num_objects; + } + trace2_data_intmax("repack", opts->existing->repo, + "include:fresh:objects_nr", + (uintmax_t)step.objects_nr); + + /* + * Now handle any existing packs which were *not* rewritten. + * + * The list of packs in opts->geometry only contains MIDX'd + * packs from the newest layer when that layer has more than + * 'repack.midxNewLayerThreshold' number of packs. + * + * If the MIDX tip was rewritten (that is, one or more of those + * packs appear below the split line), then add all packs above + * the split line to the new layer, as the old one is no longer + * usable. + * + * If the MIDX tip was not rewritten (that is, all MIDX'd packs + * from the youngest layer appear below the split line, or were + * not included in the geometric repack at all because there + * were too few of them), ignore them since we'll retain the + * existing layer as-is. + */ + for (i = opts->geometry->split; i < opts->geometry->pack_nr; i++) { + struct packed_git *p = opts->geometry->pack[i]; + struct string_list_item *item; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".idx"); + + if (p->multi_pack_index && + !opts->geometry->midx_tip_rewritten) { + trace2_data_string("repack", opts->existing->repo, + "exclude:unmodified", buf.buf); + continue; + } + + trace2_data_string("repack", opts->existing->repo, + "include:unmodified", buf.buf); + trace2_data_string("repack", opts->existing->repo, + "include:unmodified:midx", + p->multi_pack_index ? "true" : "false"); + + item = string_list_append(&step.u.write, buf.buf); + if (p->multi_pack_index || i == opts->geometry->pack_nr - 1) + item->util = (void *)1; /* mark as preferred */ + + if (unsigned_add_overflows(step.objects_nr, p->num_objects)) { + ret = error(_("too many objects in MIDX compaction step")); + goto out; + } + + step.objects_nr += p->num_objects; + } + trace2_data_intmax("repack", opts->existing->repo, + "include:unmodified:objects_nr", + (uintmax_t)step.objects_nr); + + /* + * If the MIDX tip was rewritten, then we no longer consider it + * a candidate for compaction, since it will not exist in the + * MIDX chain being built. + */ + if (opts->geometry->midx_tip_rewritten) + m = m->base_midx; + + trace2_data_string("repack", opts->existing->repo, "midx:rewrote-tip", + opts->geometry->midx_tip_rewritten ? "true" : "false"); + + trace2_region_enter("repack", "compact", opts->existing->repo); + + /* + * Compact additional MIDX layers into this proposed one until + * the merging condition is violated. + */ + while (m) { + uint32_t preferred_pack_idx; + + trace2_data_string("repack", opts->existing->repo, + "candidate", midx_get_checksum_hex(m)); + + if (step.objects_nr < m->num_objects / opts->midx_split_factor) { + /* + * Stop compacting MIDX layer as soon as the + * merged size is less than half the size of the + * next layer in the chain. + */ + trace2_data_string("repack", opts->existing->repo, + "compact", "violated"); + trace2_data_intmax("repack", opts->existing->repo, + "objects_nr", + (uintmax_t)step.objects_nr); + trace2_data_intmax("repack", opts->existing->repo, + "next_objects_nr", + (uintmax_t)m->num_objects); + trace2_data_intmax("repack", opts->existing->repo, + "split_factor", + (uintmax_t)opts->midx_split_factor); + + break; + } + + if (midx_preferred_pack(m, &preferred_pack_idx) < 0) { + ret = error(_("could not find preferred pack for MIDX " + "%s"), midx_get_checksum_hex(m)); + goto out; + } + + for (i = 0; i < m->num_packs; i++) { + struct string_list_item *item; + uint32_t pack_int_id = i + m->num_packs_in_base; + struct packed_git *p = nth_midxed_pack(m, pack_int_id); + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".idx"); + + trace2_data_string("repack", opts->existing->repo, + "midx:pack", buf.buf); + + item = string_list_append(&step.u.write, buf.buf); + if (pack_int_id == preferred_pack_idx) + item->util = (void *)1; /* mark as preferred */ + } + + if (unsigned_add_overflows(step.objects_nr, m->num_objects)) { + ret = error(_("too many objects in MIDX compaction step")); + goto out; + } + step.objects_nr += m->num_objects; + + m = m->base_midx; + } + + if (step.u.write.nr > 0) { + /* + * As long as there is at least one new pack to write + * (and thus the MIDX is non-empty), add it to the plan. + */ + ALLOC_GROW(steps, steps_nr + 1, steps_alloc); + steps[steps_nr++] = step; + } + + trace2_data_intmax("repack", opts->existing->repo, + "step:objects_nr", (uintmax_t)step.objects_nr); + trace2_data_intmax("repack", opts->existing->repo, + "step:packs_nr", (uintmax_t)step.u.write.nr); + + trace2_region_leave("repack", "compact", opts->existing->repo); + trace2_region_leave("repack", "steps:write", opts->existing->repo); + + trace2_region_enter("repack", "steps:rest", opts->existing->repo); + + /* + * Then start over, repeat, and either compact or keep as-is + * each MIDX layer until we have exhausted the chain. + * + * Finally, evaluate the remainder of the chain (if any) and + * either compact a sequence of adjacent layers, or keep + * individual layers as-is according to the same merging + * condition as above. + */ + while (m) { + struct multi_pack_index *next = m; + + ALLOC_GROW(steps, steps_nr + 1, steps_alloc); + + memset(&step, 0, sizeof(step)); + step.type = MIDX_COMPACTION_STEP_UNKNOWN; + + trace2_region_enter("repack", "step", opts->existing->repo); + + trace2_data_string("repack", opts->existing->repo, + "from", midx_get_checksum_hex(m)); + + while (next) { + uint32_t proposed_objects_nr; + if (unsigned_add_overflows(step.objects_nr, next->num_objects)) { + ret = error(_("too many objects in MIDX compaction step")); + trace2_region_leave("repack", "step", opts->existing->repo); + goto out; + } + + proposed_objects_nr = step.objects_nr + next->num_objects; + + trace2_data_string("repack", opts->existing->repo, + "proposed", + midx_get_checksum_hex(next)); + trace2_data_intmax("repack", opts->existing->repo, + "proposed:objects_nr", + (uintmax_t)next->num_objects); + + if (!next->base_midx) { + /* + * If we are at the end of the MIDX + * chain, there is nothing to compact, + * so mark it and stop. + */ + step.objects_nr = proposed_objects_nr; + break; + } + + if (proposed_objects_nr < next->base_midx->num_objects / opts->midx_split_factor) { + /* + * If there is a MIDX following this + * one, but our accumulated size is less + * than half of its size, compacting + * them would violate the merging + * condition, so stop here. + */ + + trace2_data_string("repack", opts->existing->repo, + "compact:violated:at", + midx_get_checksum_hex(next->base_midx)); + trace2_data_intmax("repack", opts->existing->repo, + "compact:violated:at:objects_nr", + (uintmax_t)next->base_midx->num_objects); + break; + } + + /* + * Otherwise, it is OK to compact the next layer + * into this one. Do so, and then continue + * through the remainder of the chain. + */ + step.objects_nr = proposed_objects_nr; + trace2_data_intmax("repack", opts->existing->repo, + "step:objects_nr", + (uintmax_t)step.objects_nr); + next = next->base_midx; + } + + if (m == next) { + step.type = MIDX_COMPACTION_STEP_COPY; + step.u.copy = m; + + trace2_data_string("repack", opts->existing->repo, + "type", "copy"); + } else { + step.type = MIDX_COMPACTION_STEP_COMPACT; + step.u.compact.from = next; + step.u.compact.to = m; + + trace2_data_string("repack", opts->existing->repo, + "to", midx_get_checksum_hex(m)); + trace2_data_string("repack", opts->existing->repo, + "type", "compact"); + } + + m = next->base_midx; + steps[steps_nr++] = step; + trace2_region_leave("repack", "step", opts->existing->repo); + } + + trace2_region_leave("repack", "steps:rest", opts->existing->repo); + +out: + *steps_p = steps; + *steps_nr_p = steps_nr; + + strbuf_release(&buf); + + trace2_region_leave("repack", "make_midx_compaction_plan", + opts->existing->repo); + + return ret; +} + +static int write_midx_incremental(struct repack_write_midx_opts *opts) +{ + struct midx_compaction_step *steps = NULL; + struct strbuf lock_name = STRBUF_INIT; + struct lock_file lf; + size_t steps_nr = 0; + size_t i; + int ret = 0; + + get_midx_chain_filename(opts->existing->source, &lock_name); + if (safe_create_leading_directories(opts->existing->repo, + lock_name.buf)) + die_errno(_("unable to create leading directories of %s"), + lock_name.buf); + hold_lock_file_for_update(&lf, lock_name.buf, LOCK_DIE_ON_ERROR); + + if (!fdopen_lock_file(&lf, "w")) { + ret = error_errno(_("unable to open multi-pack-index chain file")); + goto done; + } + + if (repack_make_midx_compaction_plan(opts, &steps, &steps_nr) < 0) { + ret = error(_("unable to generate compaction plan")); + goto done; + } + + for (i = 0; i < steps_nr; i++) { + struct midx_compaction_step *step = &steps[i]; + char *base = NULL; + + if (i + 1 < steps_nr) + base = xstrdup(midx_compaction_step_base(&steps[i + 1])); + + if (midx_compaction_step_exec(step, opts, base) < 0) { + ret = error(_("unable to execute compaction step %"PRIuMAX), + (uintmax_t)i); + free(base); + goto done; + } + + free(base); + } + + i = steps_nr; + while (i--) { + struct midx_compaction_step *step = &steps[i]; + if (!step->csum) + BUG("missing result for compaction step %"PRIuMAX, + (uintmax_t)i); + fprintf(get_lock_file_fp(&lf), "%s\n", step->csum); + } + + commit_lock_file(&lf); + +done: + strbuf_release(&lock_name); + for (i = 0; i < steps_nr; i++) + midx_compaction_step_release(&steps[i]); + free(steps); + return ret; +} + int repack_write_midx(struct repack_write_midx_opts *opts) { switch (opts->mode) { @@ -386,6 +958,8 @@ int repack_write_midx(struct repack_write_midx_opts *opts) BUG("write_midx mode is NONE?"); case REPACK_WRITE_MIDX_DEFAULT: return write_midx_included_packs(opts); + case REPACK_WRITE_MIDX_INCREMENTAL: + return write_midx_incremental(opts); default: BUG("unhandled write_midx mode: %d", opts->mode); } diff --git a/repack.h b/repack.h index 81907fcce7ff94..831ccfb1c6ce77 100644 --- a/repack.h +++ b/repack.h @@ -137,6 +137,7 @@ struct tempfile; enum repack_write_midx_mode { REPACK_WRITE_MIDX_NONE, REPACK_WRITE_MIDX_DEFAULT, + REPACK_WRITE_MIDX_INCREMENTAL, }; struct repack_write_midx_opts { @@ -148,6 +149,8 @@ struct repack_write_midx_opts { int show_progress; int write_bitmaps; int midx_must_contain_cruft; + int midx_split_factor; + int midx_new_layer_threshold; enum repack_write_midx_mode mode; }; From a00b0db47f11fe1366bf2b81ffcc7f3566dd224a Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 29 Mar 2026 17:41:29 -0400 Subject: [PATCH 042/241] repack: introduce `--write-midx=incremental` Expose the incremental MIDX repacking mode (implemented in an earlier commit) via a new --write-midx=incremental option for `git repack`. Add "incremental" as a recognized argument to the --write-midx OPT_CALLBACK, mapping it to REPACK_WRITE_MIDX_INCREMENTAL. When this mode is active and --geometric is in use, set the midx_layer_threshold on the pack geometry so that only packs in sufficiently large tip layers are considered for repacking. Two new configuration options control the compaction behavior: - repack.midxSplitFactor (default: 2): the factor used in the geometric merging condition for MIDX layers. - repack.midxNewLayerThreshold (default: 8): the minimum number of packs in the tip MIDX layer before its packs are considered as candidates for geometric repacking. Add tests exercising the new mode across a variety of scenarios including basic geometric violations, multi-round chain integrity, branching and merging histories, cross-layer object uniqueness, and threshold-based compaction. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/config/repack.adoc | 18 ++ Documentation/git-repack.adoc | 39 ++- builtin/repack.c | 38 ++- midx.c | 31 ++ midx.h | 3 + repack-geometry.c | 13 +- repack-midx.c | 5 + repack.c | 21 +- repack.h | 9 +- t/meson.build | 1 + t/t7705-repack-incremental-midx.sh | 436 +++++++++++++++++++++++++++++ 11 files changed, 593 insertions(+), 21 deletions(-) create mode 100755 t/t7705-repack-incremental-midx.sh diff --git a/Documentation/config/repack.adoc b/Documentation/config/repack.adoc index e9e78dcb198292..054de9f87953ad 100644 --- a/Documentation/config/repack.adoc +++ b/Documentation/config/repack.adoc @@ -46,3 +46,21 @@ repack.midxMustContainCruft:: `--write-midx`. When false, cruft packs are only included in the MIDX when necessary (e.g., because they might be required to form a reachability closure with MIDX bitmaps). Defaults to true. + +repack.midxSplitFactor:: + The factor used in the geometric merging condition when + compacting incremental MIDX layers during `git repack` when + invoked with the `--write-midx=incremental` option. ++ +Adjacent layers are merged when the accumulated object count of the +newer layer exceeds `1/` of the object count of the next deeper +layer. Defaults to 2. + +repack.midxNewLayerThreshold:: + The minimum number of packs in the tip MIDX layer before those + packs are considered as candidates for geometric repacking + during `git repack --write-midx=incremental`. ++ +When the tip layer has fewer packs than this threshold, those packs are +excluded from the geometric repack entirely, and are thus left +unmodified. Defaults to 8. diff --git a/Documentation/git-repack.adoc b/Documentation/git-repack.adoc index 673ce91083720d..27a99cc46f4ada 100644 --- a/Documentation/git-repack.adoc +++ b/Documentation/git-repack.adoc @@ -11,7 +11,7 @@ SYNOPSIS [verse] 'git repack' [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m] [--window=] [--depth=] [--threads=] [--keep-pack=] - [--write-midx] [--name-hash-version=] [--path-walk] + [--write-midx[=]] [--name-hash-version=] [--path-walk] DESCRIPTION ----------- @@ -250,9 +250,42 @@ pack as the preferred pack for object selection by the MIDX (see linkgit:git-multi-pack-index[1]). -m:: ---write-midx:: +--write-midx[=]:: Write a multi-pack index (see linkgit:git-multi-pack-index[1]) - containing the non-redundant packs. + containing the non-redundant packs. The following modes are + available: ++ +-- + `default`;; + Write a single MIDX covering all packs. This is the + default when `--write-midx` is given without an + explicit mode. + + `incremental`;; + Write an incremental MIDX chain instead of a single + flat MIDX. This mode requires `--geometric`. ++ +The incremental mode maintains a chain of MIDX layers that is compacted +over time using a geometric merging strategy. Each repack creates a new +tip layer containing the newly written pack(s). Adjacent layers are then +merged whenever the newer layer's object count exceeds +`1/repack.midxSplitFactor` of the next deeper layer's count. Layers +that do not meet this condition are retained as-is. ++ +The result is that newer (tip) layers tend to contain many small packs +with relatively few objects, while older (deeper) layers contain fewer, +larger packs covering more objects. Because compaction is driven by the +tip of the chain, newer layers are also rewritten more frequently than +older ones, which are only touched when enough objects have accumulated +to justify merging into them. This keeps the total number of layers +logarithmic relative to the total number of objects. ++ +Only packs in the tip MIDX layer are considered as candidates for the +geometric repack; packs in deeper layers are left untouched. If the tip +layer contains fewer packs than `repack.midxNewLayerThreshold`, those +packs are excluded from the geometry entirely, and a new layer is +created for any new pack(s) without disturbing the existing chain. +-- --name-hash-version=:: Provide this argument to the underlying `git pack-objects` process. diff --git a/builtin/repack.c b/builtin/repack.c index 3a5042491d6d64..9e070f35868ffc 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -33,7 +33,7 @@ static int midx_must_contain_cruft = 1; static const char *const git_repack_usage[] = { N_("git repack [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m]\n" "[--window=] [--depth=] [--threads=] [--keep-pack=]\n" - "[--write-midx] [--name-hash-version=] [--path-walk]"), + "[--write-midx[=]] [--name-hash-version=] [--path-walk]"), NULL }; @@ -42,9 +42,14 @@ static const char incremental_bitmap_conflict_error[] = N_( "--no-write-bitmap-index or disable the pack.writeBitmaps configuration." ); +#define DEFAULT_MIDX_SPLIT_FACTOR 2 +#define DEFAULT_MIDX_NEW_LAYER_THRESHOLD 8 + struct repack_config_ctx { struct pack_objects_args *po_args; struct pack_objects_args *cruft_po_args; + int midx_split_factor; + int midx_new_layer_threshold; }; static int repack_config(const char *var, const char *value, @@ -94,6 +99,16 @@ static int repack_config(const char *var, const char *value, midx_must_contain_cruft = git_config_bool(var, value); return 0; } + if (!strcmp(var, "repack.midxsplitfactor")) { + repack_ctx->midx_split_factor = git_config_int(var, value, + ctx->kvi); + return 0; + } + if (!strcmp(var, "repack.midxnewlayerthreshold")) { + repack_ctx->midx_new_layer_threshold = git_config_int(var, value, + ctx->kvi); + return 0; + } return git_default_config(var, value, ctx, cb); } @@ -109,6 +124,8 @@ static int option_parse_write_midx(const struct option *opt, const char *arg, if (!arg || !*arg) *cfg = REPACK_WRITE_MIDX_DEFAULT; + else if (!strcmp(arg, "incremental")) + *cfg = REPACK_WRITE_MIDX_INCREMENTAL; else return error(_("unknown value for %s: %s"), opt->long_name, arg); @@ -223,6 +240,8 @@ int cmd_repack(int argc, memset(&config_ctx, 0, sizeof(config_ctx)); config_ctx.po_args = &po_args; config_ctx.cruft_po_args = &cruft_po_args; + config_ctx.midx_split_factor = DEFAULT_MIDX_SPLIT_FACTOR; + config_ctx.midx_new_layer_threshold = DEFAULT_MIDX_NEW_LAYER_THRESHOLD; repo_config(repo, repack_config, &config_ctx); @@ -244,6 +263,9 @@ int cmd_repack(int argc, if (pack_everything & PACK_CRUFT) pack_everything |= ALL_INTO_ONE; + if (write_midx == REPACK_WRITE_MIDX_INCREMENTAL && !geometry.split_factor) + die(_("--write-midx=incremental requires --geometric")); + if (write_bitmaps < 0) { if (write_midx == REPACK_WRITE_MIDX_NONE && (!(pack_everything & ALL_INTO_ONE) || !is_bare_repository())) @@ -293,6 +315,10 @@ int cmd_repack(int argc, if (geometry.split_factor) { if (pack_everything) die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a"); + if (write_midx == REPACK_WRITE_MIDX_INCREMENTAL) { + geometry.midx_layer_threshold = config_ctx.midx_new_layer_threshold; + geometry.midx_layer_threshold_set = true; + } pack_geometry_init(&geometry, &existing, &po_args); pack_geometry_split(&geometry); } @@ -540,6 +566,8 @@ int cmd_repack(int argc, .show_progress = show_progress, .write_bitmaps = write_bitmaps > 0, .midx_must_contain_cruft = midx_must_contain_cruft, + .midx_split_factor = config_ctx.midx_split_factor, + .midx_new_layer_threshold = config_ctx.midx_new_layer_threshold, .mode = write_midx, }; @@ -552,11 +580,15 @@ int cmd_repack(int argc, if (delete_redundant) { int opts = 0; - existing_packs_remove_redundant(&existing, packdir); + bool wrote_incremental_midx = write_midx == REPACK_WRITE_MIDX_INCREMENTAL; + + existing_packs_remove_redundant(&existing, packdir, + wrote_incremental_midx); if (geometry.split_factor) pack_geometry_remove_redundant(&geometry, &names, - &existing, packdir); + &existing, packdir, + wrote_incremental_midx); if (show_progress) opts |= PRUNE_PACKED_VERBOSE; prune_packed_objects(opts); diff --git a/midx.c b/midx.c index 7d23338aa3a400..fd9e72a3941749 100644 --- a/midx.c +++ b/midx.c @@ -852,6 +852,37 @@ void clear_midx_file(struct repository *r) strbuf_release(&midx); } +void clear_incremental_midx_files(struct repository *r, + const struct strvec *keep_hashes) +{ + struct strbuf chain = STRBUF_INIT; + + get_midx_chain_filename(r->objects->sources, &chain); + + if (r->objects) { + struct odb_source *source = r->objects->sources; + for (source = r->objects->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + if (files->packed->midx) + close_midx(files->packed->midx); + files->packed->midx = NULL; + } + } + + if (!keep_hashes && remove_path(chain.buf)) + die(_("failed to clear multi-pack-index chain at %s"), + chain.buf); + + clear_incremental_midx_files_ext(r->objects->sources, MIDX_EXT_BITMAP, + keep_hashes); + clear_incremental_midx_files_ext(r->objects->sources, MIDX_EXT_REV, + keep_hashes); + clear_incremental_midx_files_ext(r->objects->sources, MIDX_EXT_MIDX, + keep_hashes); + + strbuf_release(&chain); +} + static int verify_midx_error; __attribute__((format (printf, 1, 2))) diff --git a/midx.h b/midx.h index f211a38b9e7157..b45da0a3144770 100644 --- a/midx.h +++ b/midx.h @@ -9,6 +9,7 @@ struct repository; struct bitmapped_pack; struct git_hash_algo; struct odb_source; +struct strvec; #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ #define MIDX_VERSION_V1 1 @@ -143,6 +144,8 @@ int write_midx_file_compact(struct odb_source *source, const char *incremental_base, unsigned flags); void clear_midx_file(struct repository *r); +void clear_incremental_midx_files(struct repository *r, + const struct strvec *keep_hashes); int verify_midx_file(struct odb_source *source, unsigned flags); int expire_midx_packs(struct odb_source *source, unsigned flags); int midx_repack(struct odb_source *source, size_t batch_size, unsigned flags); diff --git a/repack-geometry.c b/repack-geometry.c index d2065205f87361..5b554da89f1d09 100644 --- a/repack-geometry.c +++ b/repack-geometry.c @@ -251,7 +251,8 @@ static void remove_redundant_packs(struct packed_git **pack, uint32_t pack_nr, struct string_list *names, struct existing_packs *existing, - const char *packdir) + const char *packdir, + bool wrote_incremental_midx) { const struct git_hash_algo *algop = existing->repo->hash_algo; struct strbuf buf = STRBUF_INIT; @@ -271,7 +272,8 @@ static void remove_redundant_packs(struct packed_git **pack, (string_list_has_string(&existing->kept_packs, buf.buf))) continue; - repack_remove_redundant_pack(existing->repo, packdir, buf.buf); + repack_remove_redundant_pack(existing->repo, packdir, buf.buf, + wrote_incremental_midx); } strbuf_release(&buf); @@ -280,12 +282,13 @@ static void remove_redundant_packs(struct packed_git **pack, void pack_geometry_remove_redundant(struct pack_geometry *geometry, struct string_list *names, struct existing_packs *existing, - const char *packdir) + const char *packdir, + bool wrote_incremental_midx) { remove_redundant_packs(geometry->pack, geometry->split, - names, existing, packdir); + names, existing, packdir, wrote_incremental_midx); remove_redundant_packs(geometry->promisor_pack, geometry->promisor_split, - names, existing, packdir); + names, existing, packdir, wrote_incremental_midx); } void pack_geometry_release(struct pack_geometry *geometry) diff --git a/repack-midx.c b/repack-midx.c index a159b879775e1d..5bf5df03d5d03c 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -894,6 +894,7 @@ static int write_midx_incremental(struct repack_write_midx_opts *opts) struct midx_compaction_step *steps = NULL; struct strbuf lock_name = STRBUF_INIT; struct lock_file lf; + struct strvec keep_hashes = STRVEC_INIT; size_t steps_nr = 0; size_t i; int ret = 0; @@ -939,11 +940,15 @@ static int write_midx_incremental(struct repack_write_midx_opts *opts) BUG("missing result for compaction step %"PRIuMAX, (uintmax_t)i); fprintf(get_lock_file_fp(&lf), "%s\n", step->csum); + strvec_push(&keep_hashes, step->csum); } commit_lock_file(&lf); + clear_incremental_midx_files(opts->existing->repo, &keep_hashes); + done: + strvec_clear(&keep_hashes); strbuf_release(&lock_name); for (i = 0; i < steps_nr; i++) midx_compaction_step_release(&steps[i]); diff --git a/repack.c b/repack.c index 2ee6b51420aa54..be2cc0e9d44aeb 100644 --- a/repack.c +++ b/repack.c @@ -55,14 +55,18 @@ void pack_objects_args_release(struct pack_objects_args *args) } void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, - const char *base_name) + const char *base_name, + bool wrote_incremental_midx) { struct strbuf buf = STRBUF_INIT; struct odb_source *source = repo->objects->sources; struct multi_pack_index *m = get_multi_pack_index(source); strbuf_addf(&buf, "%s.pack", base_name); - if (m && source->local && midx_contains_pack(m, buf.buf)) + if (m && source->local && midx_contains_pack(m, buf.buf)) { clear_midx_file(repo); + if (!wrote_incremental_midx) + clear_incremental_midx_files(repo, NULL); + } strbuf_insertf(&buf, 0, "%s/", dir_name); unlink_pack_path(buf.buf, 1); strbuf_release(&buf); @@ -252,23 +256,26 @@ void existing_packs_mark_for_deletion(struct existing_packs *existing, static void remove_redundant_packs_1(struct repository *repo, struct string_list *packs, - const char *packdir) + const char *packdir, + bool wrote_incremental_midx) { struct string_list_item *item; for_each_string_list_item(item, packs) { if (!existing_pack_is_marked_for_deletion(item)) continue; - repack_remove_redundant_pack(repo, packdir, item->string); + repack_remove_redundant_pack(repo, packdir, item->string, + wrote_incremental_midx); } } void existing_packs_remove_redundant(struct existing_packs *existing, - const char *packdir) + const char *packdir, + bool wrote_incremental_midx) { remove_redundant_packs_1(existing->repo, &existing->non_kept_packs, - packdir); + packdir, wrote_incremental_midx); remove_redundant_packs_1(existing->repo, &existing->cruft_packs, - packdir); + packdir, wrote_incremental_midx); } void existing_packs_release(struct existing_packs *existing) diff --git a/repack.h b/repack.h index 831ccfb1c6ce77..d2876f569a6738 100644 --- a/repack.h +++ b/repack.h @@ -34,7 +34,8 @@ void prepare_pack_objects(struct child_process *cmd, void pack_objects_args_release(struct pack_objects_args *args); void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, - const char *base_name); + const char *base_name, + bool wrote_incremental_midx); struct write_pack_opts { struct pack_objects_args *po_args; @@ -84,7 +85,8 @@ void existing_packs_retain_cruft(struct existing_packs *existing, void existing_packs_mark_for_deletion(struct existing_packs *existing, struct string_list *names); void existing_packs_remove_redundant(struct existing_packs *existing, - const char *packdir); + const char *packdir, + bool wrote_incremental_midx); void existing_packs_release(struct existing_packs *existing); struct generated_pack; @@ -129,7 +131,8 @@ struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry); void pack_geometry_remove_redundant(struct pack_geometry *geometry, struct string_list *names, struct existing_packs *existing, - const char *packdir); + const char *packdir, + bool wrote_incremental_midx); void pack_geometry_release(struct pack_geometry *geometry); struct tempfile; diff --git a/t/meson.build b/t/meson.build index 919a0ae4cbb38e..3fc38d123b1696 100644 --- a/t/meson.build +++ b/t/meson.build @@ -950,6 +950,7 @@ integration_tests = [ 't7702-repack-cyclic-alternate.sh', 't7703-repack-geometric.sh', 't7704-repack-cruft.sh', + 't7705-repack-incremental-midx.sh', 't7800-difftool.sh', 't7810-grep.sh', 't7811-grep-open.sh', diff --git a/t/t7705-repack-incremental-midx.sh b/t/t7705-repack-incremental-midx.sh new file mode 100755 index 00000000000000..f81c2c67060653 --- /dev/null +++ b/t/t7705-repack-incremental-midx.sh @@ -0,0 +1,436 @@ +#!/bin/sh + +test_description='git repack --write-midx=incremental' + +. ./test-lib.sh + +GIT_TEST_MULTI_PACK_INDEX=0 +GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 +GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL=0 + +objdir=.git/objects +packdir=$objdir/pack +midxdir=$packdir/multi-pack-index.d +midx_chain=$midxdir/multi-pack-index-chain + +# incrementally_repack N +# +# Make "N" new commits, each stored in their own pack, and then repacked +# with the --write-midx=incremental strategy. +incrementally_repack () { + for i in $(test_seq 1 "$1") + do + test_commit "$i" && + + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + git multi-pack-index verify || return 1 + done +} + +# Create packs with geometrically increasing sizes so that they +# satisfy the geometric progression and survive a --geometric=2 +# repack without being rolled up. Creates 3 packs containing 1, +# 2, and 6 commits (3, 6, and 18 objects) respectively. +create_geometric_packs () { + test_commit "small" && + git repack -d && + + test_commit_bulk --message="medium" 2 && + test_commit_bulk --message="large" 6 && + + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index +} + +# create_layer +# +# Creates a new MIDX layer with the contents of "test_commit_bulk $@". +create_layer () { + test_commit_bulk "$@" && + + git multi-pack-index write --incremental --bitmap +} + +# create_layers +# +# Reads lines of " " from stdin and creates a new MIDX +# layer for each line. See create_layer above for more. +create_layers () { + while read msg nr + do + create_layer --message="$msg" "$nr" || return 1 + done +} + +test_expect_success '--write-midx=incremental requires --geometric' ' + test_must_fail git repack --write-midx=incremental 2>err && + + test_grep -- "--write-midx=incremental requires --geometric" err +' + +test_expect_success 'below layer threshold, tip packs excluded' ' + git init below-layer-threshold-tip-packs-excluded && + ( + cd below-layer-threshold-tip-packs-excluded && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 4 && + git config repack.midxsplitfactor 2 && + + # Create 3 packs forming a geometric progression by + # object count such that they are unmodified by the + # initial repack. The MIDX chain thusly contains a + # single layer with three packs. + create_geometric_packs && + ls $packdir/pack-*.idx | sort >packs.before && + test_line_count = 1 $midx_chain && + cp $midx_chain $midx_chain.before && + + # Repack a new commit. Since the layer threshold is + # unmet, a new MIDX layer is added on top of the + # existing one. + test_commit extra && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + git multi-pack-index verify && + + ls $packdir/pack-*.idx | sort >packs.after && + comm -13 packs.before packs.after >packs.new && + test_line_count = 1 packs.new && + + test_line_count = 2 "$midx_chain" && + head -n 1 "$midx_chain.before" >expect && + head -n 1 "$midx_chain" >actual && + test_cmp expect actual + ) +' + +test_expect_success 'above layer threshold, tip packs repacked' ' + git init above-layer-threshold-tip-packs-repacked && + ( + cd above-layer-threshold-tip-packs-repacked && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 2 && + git config repack.midxsplitfactor 2 && + + # Same setup, but with the layer threshold set to 2. + # Since the tip MIDX layer meets that threshold, its + # packs are considered repack candidates. + create_geometric_packs && + cp $midx_chain $midx_chain.before && + + # Perturb the existing progression such that it is + # rolled up into a single new pack, invalidating the + # existing MIDX layer and replacing it with a new one. + test_commit extra && + git repack -d && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + ! test_cmp $midx_chain.before $midx_chain && + test_line_count = 1 $midx_chain && + + git multi-pack-index verify + ) +' + +test_expect_success 'above layer threshold, tip layer preserved' ' + git init above-layer-threshold-tip-layer-preserved && + ( + cd above-layer-threshold-tip-layer-preserved && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 2 && + git config repack.midxsplitfactor 2 && + + test_commit_bulk --message="medium" 2 && + test_commit_bulk --message="large" 6 && + + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + test_line_count = 1 "$midx_chain" && + ls $packdir/pack-*.idx | sort >packs.before && + cp $midx_chain $midx_chain.before && + + # Create objects to form a pack satisfying the geometric + # progression (thus preserving the tip layer), but not + # so large that it meets the layer merging condition. + test_commit_bulk --message="small" 1 && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + ls $packdir/pack-*.idx | sort >packs.after && + comm -13 packs.before packs.after >packs.new && + + test_line_count = 1 packs.new && + test_line_count = 3 packs.after && + test_line_count = 2 "$midx_chain" && + head -n 1 "$midx_chain.before" >expect && + head -n 1 "$midx_chain" >actual && + test_cmp expect actual && + + git multi-pack-index verify + ) +' + +test_expect_success 'above layer threshold, tip packs preserved' ' + git init above-layer-threshold-tip-packs-preserved && + ( + cd above-layer-threshold-tip-packs-preserved && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 2 && + git config repack.midxsplitfactor 2 && + + create_geometric_packs && + ls $packdir/pack-*.idx | sort >packs.before && + cp $midx_chain $midx_chain.before && + + # Same setup as above, but this time the new objects do + # not satisfy the new layer merging condition, resulting + # in a new tip layer. + test_commit_bulk --message="huge" 18 && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + ls $packdir/pack-*.idx | sort >packs.after && + comm -13 packs.before packs.after >packs.new && + + ! test_cmp $midx_chain.before $midx_chain && + test_line_count = 1 $midx_chain && + test_line_count = 1 packs.new && + + git multi-pack-index verify + ) +' + +test_expect_success 'new tip absorbs multiple layers' ' + git init new-tip-absorbs-multiple-layers && + ( + cd new-tip-absorbs-multiple-layers && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + # Build a 4-layer chain where each layer is too small to + # absorb the one below it. The sizes must satisfy L(n) < + # L(n-1)/2 for each adjacent pair: + # + # L0 (oldest): 75 obj (25 commits) + # L1: 21 obj (7 commits, 21 < 75/2) + # L2: 9 obj (3 commits, 9 < 21/2) + # L3 (tip): 3 obj (1 commit, 3 < 9/2) + create_layers <<-\EOF && + L0 25 + L1 7 + L2 3 + L3 1 + EOF + + test_line_count = 4 "$midx_chain" && + cp $midx_chain $midx_chain.before && + + # Now add a new commit. The merging condition is + # satisfied between L3-L1, but violated at L0, which is + # too large relative to the accumulated size. + # + # As a result, the chain shrinks from 4 to 2 layers. + test_commit new && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + ! test_cmp $midx_chain.before $midx_chain && + test_line_count = 2 "$midx_chain" && + git multi-pack-index verify + ) +' + +test_expect_success 'compaction of older layers' ' + git init compaction-of-older-layers && + ( + cd compaction-of-older-layers && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + # Build a chain with two small layers at the bottom + # and a larger barrier layer on top, producing a + # chain that violates the compaction invariant, since + # the two small layers would normally have been merged. + create_layers <<-\EOF && + one 2 + two 4 + barrier 54 + EOF + + cp $midx_chain $midx_chain.before && + + # Running an incremental repack compacts the two + # small layers at the bottom of the chain as a + # separate step in the compaction plan. + test_commit another && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + test_line_count = 2 "$midx_chain" && + git multi-pack-index verify + ) +' + +test_expect_success 'geometric rollup with surviving tip packs' ' + git init geometric-rollup-with-surviving-tip-packs && + ( + cd geometric-rollup-with-surviving-tip-packs && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + # Create a pack large enough to anchor the geometric + # progression when small packs are added alongside it. + create_layer --message="big" 5 && + + test_line_count = 1 "$midx_chain" && + cp $midx_chain $midx_chain.before && + + # Repack a small number of objects such that the + # progression is unbothered. Note that the existing pack + # is considered a repack candidate as the new layer + # threshold is set to 1. + test_commit small-1 && + git repack -d && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + ! test_cmp $midx_chain.before $midx_chain && + cp $midx_chain $midx_chain.before + ) +' + +test_expect_success 'kept packs are excluded from repack' ' + git init kept-packs-excluded-from-repack && + ( + cd kept-packs-excluded-from-repack && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + # Create two equal-sized packs, marking one as kept. + for i in A B + do + test_commit "$i" && git repack -d || return 1 + done && + + keep=$(ls $packdir/pack-*.idx | head -n 1) && + touch "${keep%.idx}.keep" && + + # The kept pack is excluded as a repacking candidate + # entirely, so no rollup occurs as there is only one + # non-kept pack. A new MIDX layer is written containing + # that pack. + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + test-tool read-midx $objdir >actual && + grep "^pack-.*\.idx$" actual >actual.packs && + test_line_count = 1 actual.packs && + test_grep ! "$keep" actual.packs && + + git multi-pack-index verify && + + # All objects (from both kept and non-kept packs) + # must still be accessible. + git fsck + ) +' + +test_expect_success 'incremental MIDX with --max-pack-size' ' + git init incremental-midx-with--max-pack-size && + ( + cd incremental-midx-with--max-pack-size && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + create_layer --message="base" 1 && + + # Now add enough data that a small --max-pack-size will + # cause pack-objects to split its output. Create objects + # large enough to fill multiple packs. + test-tool genrandom foo 1M >big1 && + test-tool genrandom bar 1M >big2 && + git add big1 big2 && + test_tick && + git commit -a -m "big blobs" && + git repack -d && + + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index --max-pack-size=1M && + + test_line_count = 1 "$midx_chain" && + test-tool read-midx $objdir >actual && + grep "^pack-.*\.idx$" actual >actual.packs && + test_line_count -gt 1 actual.packs && + + git multi-pack-index verify + ) +' + +test_expect_success 'noop repack preserves valid MIDX chain' ' + git init noop-repack-preserves-valid-midx-chain && + ( + cd noop-repack-preserves-valid-midx-chain && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + create_layer --message="base" 1 && + + git multi-pack-index verify && + cp $midx_chain $midx_chain.before && + + # Running again with no new objects should not break + # the MIDX chain. It produces "Nothing new to pack." + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + test_cmp $midx_chain.before $midx_chain && + + git multi-pack-index verify && + git fsck + ) +' + +test_expect_success 'repack -ad removes stale incremental chain' ' + git init repack--ad-removes-stale-incremental-chain && + ( + cd repack--ad-removes-stale-incremental-chain && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + create_layers <<-\EOF && + one 1 + two 1 + EOF + + test_path_is_file $midx_chain && + test_line_count = 2 $midx_chain && + + git repack -ad && + + test_path_is_missing $packdir/multi-pack-index && + test_dir_is_empty $midxdir + ) +' + +test_done From 1b17f64d40bc32592c0894f134acaf61d0c5b912 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Sun, 29 Mar 2026 17:41:31 -0400 Subject: [PATCH 043/241] repack: allow `--write-midx=incremental` without `--geometric` Previously, `--write-midx=incremental` required `--geometric` and would die() without it. Relax this restriction so that incremental MIDX repacking can be used independently. Without `--geometric`, the behavior is append-only: a single new MIDX layer is created containing whatever packs were written by the repack and appended to the existing chain (or a new chain is started). Existing layers are preserved as-is with no compaction or merging. Implement this via a new repack_make_midx_append_plan() that builds a plan consisting of a WRITE step for the freshly written packs followed by COPY steps for every existing MIDX layer. The existing compaction plan (repack_make_midx_compaction_plan) is used only when `--geometric` is active. Update the documentation to describe the behavior with and without `--geometric`, and replace the test that enforced the old restriction with one exercising append-only incremental MIDX repacking. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-repack.adoc | 19 +++++---- builtin/repack.c | 3 -- repack-midx.c | 64 ++++++++++++++++++++++++++++-- t/t7705-repack-incremental-midx.sh | 35 +++++++++++++--- 4 files changed, 103 insertions(+), 18 deletions(-) diff --git a/Documentation/git-repack.adoc b/Documentation/git-repack.adoc index 27a99cc46f4ada..72c42015e23f94 100644 --- a/Documentation/git-repack.adoc +++ b/Documentation/git-repack.adoc @@ -263,14 +263,19 @@ linkgit:git-multi-pack-index[1]). `incremental`;; Write an incremental MIDX chain instead of a single - flat MIDX. This mode requires `--geometric`. + flat MIDX. + -The incremental mode maintains a chain of MIDX layers that is compacted -over time using a geometric merging strategy. Each repack creates a new -tip layer containing the newly written pack(s). Adjacent layers are then -merged whenever the newer layer's object count exceeds -`1/repack.midxSplitFactor` of the next deeper layer's count. Layers -that do not meet this condition are retained as-is. +Without `--geometric`, a new MIDX layer is appended to the existing +chain (or a new chain is started) containing whatever packs were written +by the repack. Existing layers are preserved as-is. ++ +When combined with `--geometric`, the incremental mode maintains a chain +of MIDX layers that is compacted over time using a geometric merging +strategy. Each repack creates a new tip layer containing the newly +written pack(s). Adjacent layers are then merged whenever the newer +layer's object count exceeds `1/repack.midxSplitFactor` of the next +deeper layer's count. Layers that do not meet this condition are +retained as-is. + The result is that newer (tip) layers tend to contain many small packs with relatively few objects, while older (deeper) layers contain fewer, diff --git a/builtin/repack.c b/builtin/repack.c index 9e070f35868ffc..8e187322fe483f 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -263,9 +263,6 @@ int cmd_repack(int argc, if (pack_everything & PACK_CRUFT) pack_everything |= ALL_INTO_ONE; - if (write_midx == REPACK_WRITE_MIDX_INCREMENTAL && !geometry.split_factor) - die(_("--write-midx=incremental requires --geometric")); - if (write_bitmaps < 0) { if (write_midx == REPACK_WRITE_MIDX_NONE && (!(pack_everything & ALL_INTO_ONE) || !is_bare_repository())) diff --git a/repack-midx.c b/repack-midx.c index 5bf5df03d5d03c..055aa4c1356873 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -555,6 +555,60 @@ static void midx_compaction_step_release(struct midx_compaction_step *step) free(step->csum); } +/* + * Build an append-only MIDX plan: a single WRITE step for the freshly + * written packs, plus COPY steps for every existing layer. No + * compaction or merging is performed. + */ +static void repack_make_midx_append_plan(struct repack_write_midx_opts *opts, + struct midx_compaction_step **steps_p, + size_t *steps_nr_p) +{ + struct multi_pack_index *m; + struct midx_compaction_step *steps = NULL; + struct midx_compaction_step *step; + size_t steps_nr = 0, steps_alloc = 0; + + odb_reprepare(opts->existing->repo->objects); + m = get_multi_pack_index(opts->existing->source); + + if (opts->names->nr) { + struct strbuf buf = STRBUF_INIT; + uint32_t i; + + ALLOC_GROW(steps, st_add(steps_nr, 1), steps_alloc); + + step = &steps[steps_nr++]; + memset(step, 0, sizeof(*step)); + + step->type = MIDX_COMPACTION_STEP_WRITE; + string_list_init_dup(&step->u.write); + + for (i = 0; i < opts->names->nr; i++) { + strbuf_reset(&buf); + strbuf_addf(&buf, "pack-%s.idx", + opts->names->items[i].string); + string_list_append(&step->u.write, buf.buf); + } + + strbuf_release(&buf); + } + + for (; m; m = m->base_midx) { + ALLOC_GROW(steps, st_add(steps_nr, 1), steps_alloc); + + step = &steps[steps_nr++]; + memset(step, 0, sizeof(*step)); + + step->type = MIDX_COMPACTION_STEP_COPY; + step->u.copy = m; + step->objects_nr = m->num_objects; + } + + *steps_p = steps; + *steps_nr_p = steps_nr; +} + static int repack_make_midx_compaction_plan(struct repack_write_midx_opts *opts, struct midx_compaction_step **steps_p, size_t *steps_nr_p) @@ -911,9 +965,13 @@ static int write_midx_incremental(struct repack_write_midx_opts *opts) goto done; } - if (repack_make_midx_compaction_plan(opts, &steps, &steps_nr) < 0) { - ret = error(_("unable to generate compaction plan")); - goto done; + if (opts->geometry->split_factor) { + if (repack_make_midx_compaction_plan(opts, &steps, &steps_nr) < 0) { + ret = error(_("unable to generate compaction plan")); + goto done; + } + } else { + repack_make_midx_append_plan(opts, &steps, &steps_nr); } for (i = 0; i < steps_nr; i++) { diff --git a/t/t7705-repack-incremental-midx.sh b/t/t7705-repack-incremental-midx.sh index f81c2c67060653..562554e69b4f5d 100755 --- a/t/t7705-repack-incremental-midx.sh +++ b/t/t7705-repack-incremental-midx.sh @@ -63,10 +63,36 @@ create_layers () { done } -test_expect_success '--write-midx=incremental requires --geometric' ' - test_must_fail git repack --write-midx=incremental 2>err && +test_expect_success '--write-midx=incremental without --geometric' ' + git init incremental-without-geometric && + ( + cd incremental-without-geometric && + + git config maintenance.auto false && + + test_commit first && + git repack -d && + + test_commit second && + git repack --write-midx=incremental && + + git multi-pack-index verify && + test_line_count = 1 $midx_chain && + cp $midx_chain $midx_chain.before && - test_grep -- "--write-midx=incremental requires --geometric" err + # A second repack appends a new layer without + # disturbing the existing one. + test_commit third && + git repack --write-midx=incremental && + + git multi-pack-index verify && + test_line_count = 2 $midx_chain && + head -n 1 $midx_chain.before >expect && + head -n 1 $midx_chain >actual && + test_cmp expect actual && + + git fsck + ) ' test_expect_success 'below layer threshold, tip packs excluded' ' @@ -334,8 +360,7 @@ test_expect_success 'kept packs are excluded from repack' ' # entirely, so no rollup occurs as there is only one # non-kept pack. A new MIDX layer is written containing # that pack. - git repack --geometric=2 -d --write-midx=incremental \ - --write-bitmap-index && + git repack --geometric=2 -d --write-midx=incremental && test-tool read-midx $objdir >actual && grep "^pack-.*\.idx$" actual >actual.packs && From b2faaaec1193c64f7366e26a569ca6e231cbd478 Mon Sep 17 00:00:00 2001 From: Pablo Sabater Date: Sat, 28 Mar 2026 01:11:11 +0100 Subject: [PATCH 044/241] graph: limit the graph width to a hard-coded max Repositories that have many active branches at the same time produce wide graphs. A lane consists of two columns, the edge and the padding (or another edge), each branch takes a lane in the graph and there is no way to limit how many can be shown. Limit the graph engine to draw at most 15 lanes. Lanes over the limit are not rendered. On the commit line, if the commit lives on a visible lane, show the normal commit mark and stop rendering. If the commit lives on the first hidden lane, show the "*" commit mark so it is known that this commit lives in the first hidden lane. Commits on deeper lanes aren't rendered, but the commit subject will always remain. For merges, the post-merge lane is only needed when the commit or the first parent lives on a visible lane (to draw the connection between them), when both are on hidden lanes, post-merge carries no useful information, skip it and go to collapsing or padding state. Also fix a pre-existing indentation issue. The hard-coded limit will be replaced by a user-facing option on a subsequent commit. Signed-off-by: Pablo Sabater Signed-off-by: Junio C Hamano --- graph.c | 161 +++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 136 insertions(+), 25 deletions(-) diff --git a/graph.c b/graph.c index 26f6fbf000aef5..70458cf323e168 100644 --- a/graph.c +++ b/graph.c @@ -82,6 +82,8 @@ static void graph_show_line_prefix(const struct diff_options *diffopt) static const char **column_colors; static unsigned short column_colors_max; +static unsigned int max_lanes = 15; + static void parse_graph_colors_config(struct strvec *colors, const char *string) { const char *end, *start; @@ -317,6 +319,11 @@ struct git_graph { struct strbuf prefix_buf; }; +static inline int graph_needs_truncation(int lane) +{ + return lane >= max_lanes; +} + static const char *diff_output_prefix_callback(struct diff_options *opt, void *data) { struct git_graph *graph = data; @@ -607,7 +614,7 @@ static void graph_update_columns(struct git_graph *graph) { struct commit_list *parent; int max_new_columns; - int i, seen_this, is_commit_in_columns; + int i, seen_this, is_commit_in_columns, max; /* * Swap graph->columns with graph->new_columns @@ -696,6 +703,14 @@ static void graph_update_columns(struct git_graph *graph) } } + /* + * Cap to the hard-coded limit. + * Allow commits from merges to align to the merged lane. + */ + max = max_lanes * 2 + 2; + if (graph->width > max) + graph->width = max; + /* * Shrink mapping_size to be the minimum necessary */ @@ -846,6 +861,8 @@ static void graph_output_padding_line(struct git_graph *graph, * Output a padding row, that leaves all branch lines unchanged */ for (i = 0; i < graph->num_new_columns; i++) { + if (graph_needs_truncation(i)) + break; graph_line_write_column(line, &graph->new_columns[i], '|'); graph_line_addch(line, ' '); } @@ -903,6 +920,8 @@ static void graph_output_pre_commit_line(struct git_graph *graph, seen_this = 1; graph_line_write_column(line, col, '|'); graph_line_addchars(line, ' ', graph->expansion_row); + } else if (seen_this && graph_needs_truncation(i)) { + break; } else if (seen_this && (graph->expansion_row == 0)) { /* * This is the first line of the pre-commit output. @@ -994,6 +1013,14 @@ static void graph_draw_octopus_merge(struct git_graph *graph, struct graph_line col = &graph->new_columns[j]; graph_line_write_column(line, col, '-'); + + /* + * Commit is at commit_index, each iteration move one lane to + * the right from the commit. + */ + if (graph_needs_truncation(graph->commit_index + 1 + i)) + break; + graph_line_write_column(line, col, (i == dashed_parents - 1) ? '.' : '-'); } @@ -1028,8 +1055,16 @@ static void graph_output_commit_line(struct git_graph *graph, struct graph_line seen_this = 1; graph_output_commit_char(graph, line); + if (graph_needs_truncation(i)) { + graph_line_addch(line, ' '); + break; + } + if (graph->num_parents > 2) graph_draw_octopus_merge(graph, line); + } else if (graph_needs_truncation(i)) { + seen_this = 1; + break; } else if (seen_this && (graph->edges_added > 1)) { graph_line_write_column(line, col, '\\'); } else if (seen_this && (graph->edges_added == 1)) { @@ -1065,13 +1100,46 @@ static void graph_output_commit_line(struct git_graph *graph, struct graph_line /* * Update graph->state + * + * If the commit is a merge and the first parent is in a visible lane, + * then the GRAPH_POST_MERGE is needed to draw the merge lane. + * + * If the commit is over the truncation limit, but the first parent is on + * a visible lane, then we still need the merge lane but truncated. + * + * If both commit and first parent are over the truncation limit, then + * there's no need to draw the merge lane because it would work as a + * padding lane. */ - if (graph->num_parents > 1) - graph_update_state(graph, GRAPH_POST_MERGE); - else if (graph_is_mapping_correct(graph)) + if (graph->num_parents > 1) { + if (!graph_needs_truncation(graph->commit_index)) { + graph_update_state(graph, GRAPH_POST_MERGE); + } else { + struct commit_list *p = first_interesting_parent(graph); + int lane; + + /* + * graph->num_parents are found using first_interesting_parent + * and next_interesting_parent so it can't be a scenario + * where num_parents > 1 and there are no interesting parents + */ + if (!p) + BUG("num_parents > 1 but no interesting parent"); + + lane = graph_find_new_column_by_commit(graph, p->item); + + if (!graph_needs_truncation(lane)) + graph_update_state(graph, GRAPH_POST_MERGE); + else if (graph_is_mapping_correct(graph)) + graph_update_state(graph, GRAPH_PADDING); + else + graph_update_state(graph, GRAPH_COLLAPSING); + } + } else if (graph_is_mapping_correct(graph)) { graph_update_state(graph, GRAPH_PADDING); - else + } else { graph_update_state(graph, GRAPH_COLLAPSING); + } } static const char merge_chars[] = {'/', '|', '\\'}; @@ -1109,6 +1177,7 @@ static void graph_output_post_merge_line(struct git_graph *graph, struct graph_l int par_column; int idx = graph->merge_layout; char c; + int truncated = 0; seen_this = 1; for (j = 0; j < graph->num_parents; j++) { @@ -1117,23 +1186,53 @@ static void graph_output_post_merge_line(struct git_graph *graph, struct graph_l c = merge_chars[idx]; graph_line_write_column(line, &graph->new_columns[par_column], c); + + /* + * j counts parents, it needs to be halved to be + * comparable with i. Don't truncate if there are + * no more lanes to print (end of the lane) + */ + if (graph_needs_truncation(j / 2 + i) && + j / 2 + i <= graph->num_columns) { + if ((j + i * 2) % 2 != 0) + graph_line_addch(line, ' '); + truncated = 1; + break; + } + if (idx == 2) { - if (graph->edges_added > 0 || j < graph->num_parents - 1) + /* + * Check if the next lane needs truncation + * to avoid having the padding doubled + */ + if (graph_needs_truncation((j + 1) / 2 + i) && + j < graph->num_parents - 1) { + truncated = 1; + break; + } else if (graph->edges_added > 0 || j < graph->num_parents - 1) graph_line_addch(line, ' '); } else { idx++; } parents = next_interesting_parent(graph, parents); } + if (truncated) + break; if (graph->edges_added == 0) graph_line_addch(line, ' '); - + } else if (graph_needs_truncation(i)) { + break; } else if (seen_this) { if (graph->edges_added > 0) graph_line_write_column(line, col, '\\'); else graph_line_write_column(line, col, '|'); - graph_line_addch(line, ' '); + /* + * If it's between two lanes and next would be truncated, + * don't add space padding. + */ + if (!graph_needs_truncation(i + 1)) + graph_line_addch(line, ' '); } else { graph_line_write_column(line, col, '|'); if (graph->merge_layout != 0 || i != graph->commit_index - 1) { @@ -1164,6 +1263,7 @@ static void graph_output_collapsing_line(struct git_graph *graph, struct graph_l short used_horizontal = 0; int horizontal_edge = -1; int horizontal_edge_target = -1; + int truncated = 0; /* * Swap the mapping and old_mapping arrays @@ -1279,26 +1379,34 @@ static void graph_output_collapsing_line(struct git_graph *graph, struct graph_l */ for (i = 0; i < graph->mapping_size; i++) { int target = graph->mapping[i]; - if (target < 0) - graph_line_addch(line, ' '); - else if (target * 2 == i) - graph_line_write_column(line, &graph->new_columns[target], '|'); - else if (target == horizontal_edge_target && - i != horizontal_edge - 1) { - /* - * Set the mappings for all but the - * first segment to -1 so that they - * won't continue into the next line. - */ - if (i != (target * 2)+3) - graph->mapping[i] = -1; - used_horizontal = 1; - graph_line_write_column(line, &graph->new_columns[target], '_'); + + if (!truncated && graph_needs_truncation(i / 2)) { + truncated = 1; + } + + if (target < 0) { + if (!truncated) + graph_line_addch(line, ' '); + } else if (target * 2 == i) { + if (!truncated) + graph_line_write_column(line, &graph->new_columns[target], '|'); + } else if (target == horizontal_edge_target && + i != horizontal_edge - 1) { + /* + * Set the mappings for all but the + * first segment to -1 so that they + * won't continue into the next line. + */ + if (i != (target * 2)+3) + graph->mapping[i] = -1; + used_horizontal = 1; + if (!truncated) + graph_line_write_column(line, &graph->new_columns[target], '_'); } else { if (used_horizontal && i < horizontal_edge) graph->mapping[i] = -1; - graph_line_write_column(line, &graph->new_columns[target], '/'); - + if (!truncated) + graph_line_write_column(line, &graph->new_columns[target], '/'); } } @@ -1372,6 +1480,9 @@ static void graph_padding_line(struct git_graph *graph, struct strbuf *sb) for (i = 0; i < graph->num_columns; i++) { struct column *col = &graph->columns[i]; + if (graph_needs_truncation(i)) + break; + graph_line_write_column(&line, col, '|'); if (col->commit == graph->commit && graph->num_parents > 2) { From f756a3c78d4d88af1701996394650e6df6f66170 Mon Sep 17 00:00:00 2001 From: Pablo Sabater Date: Sat, 28 Mar 2026 01:11:12 +0100 Subject: [PATCH 045/241] graph: add --graph-lane-limit option Replace the hard-coded lane limit with a user-facing option '--graph-lane-limit='. It caps the number of visible lanes to n. This option requires '--graph', without it, limiting the graph has no meaning, in this case error out. Zero and negative values are valid inputs but silently ignored treating them as "no limit", the same as not using the option. This follows what '--max-parents' does with negative values. The default is 0, same as not being used. Signed-off-by: Pablo Sabater Signed-off-by: Junio C Hamano --- Documentation/rev-list-options.adoc | 5 + graph.c | 53 +++++----- revision.c | 6 ++ revision.h | 1 + t/t4215-log-skewed-merges.sh | 144 ++++++++++++++++++++++++++++ 5 files changed, 186 insertions(+), 23 deletions(-) diff --git a/Documentation/rev-list-options.adoc b/Documentation/rev-list-options.adoc index 2d195a147456ea..d530e744f6c19d 100644 --- a/Documentation/rev-list-options.adoc +++ b/Documentation/rev-list-options.adoc @@ -1259,6 +1259,11 @@ This implies the `--topo-order` option by default, but the in between them in that case. If __ is specified, it is the string that will be shown instead of the default one. +`--graph-lane-limit=`:: + When `--graph` is used, limit the number of graph lanes to be shown. + Lanes over the limit are not shown. By default it is set to 0 + (no limit), zero and negative values are ignored and treated as no limit. + ifdef::git-rev-list[] `--count`:: Print a number stating how many commits would have been diff --git a/graph.c b/graph.c index 70458cf323e168..ee1f9e2d2d943a 100644 --- a/graph.c +++ b/graph.c @@ -82,8 +82,6 @@ static void graph_show_line_prefix(const struct diff_options *diffopt) static const char **column_colors; static unsigned short column_colors_max; -static unsigned int max_lanes = 15; - static void parse_graph_colors_config(struct strvec *colors, const char *string) { const char *end, *start; @@ -319,9 +317,13 @@ struct git_graph { struct strbuf prefix_buf; }; -static inline int graph_needs_truncation(int lane) +static inline int graph_needs_truncation(struct git_graph *graph, int lane) { - return lane >= max_lanes; + int max = graph->revs->graph_max_lanes; + /* + * Ignore values <= 0, meaning no limit. + */ + return max > 0 && lane >= max; } static const char *diff_output_prefix_callback(struct diff_options *opt, void *data) @@ -614,7 +616,7 @@ static void graph_update_columns(struct git_graph *graph) { struct commit_list *parent; int max_new_columns; - int i, seen_this, is_commit_in_columns, max; + int i, seen_this, is_commit_in_columns; /* * Swap graph->columns with graph->new_columns @@ -704,12 +706,17 @@ static void graph_update_columns(struct git_graph *graph) } /* - * Cap to the hard-coded limit. - * Allow commits from merges to align to the merged lane. + * If graph_max_lanes is set, cap the width */ - max = max_lanes * 2 + 2; - if (graph->width > max) - graph->width = max; + if (graph->revs->graph_max_lanes > 0) { + /* + * Width is column index while a lane is half that. + * Allow commits from merges to align to the merged lane. + */ + int max_width = graph->revs->graph_max_lanes * 2 + 2; + if (graph->width > max_width) + graph->width = max_width; + } /* * Shrink mapping_size to be the minimum necessary @@ -861,7 +868,7 @@ static void graph_output_padding_line(struct git_graph *graph, * Output a padding row, that leaves all branch lines unchanged */ for (i = 0; i < graph->num_new_columns; i++) { - if (graph_needs_truncation(i)) + if (graph_needs_truncation(graph, i)) break; graph_line_write_column(line, &graph->new_columns[i], '|'); graph_line_addch(line, ' '); @@ -920,7 +927,7 @@ static void graph_output_pre_commit_line(struct git_graph *graph, seen_this = 1; graph_line_write_column(line, col, '|'); graph_line_addchars(line, ' ', graph->expansion_row); - } else if (seen_this && graph_needs_truncation(i)) { + } else if (seen_this && graph_needs_truncation(graph, i)) { break; } else if (seen_this && (graph->expansion_row == 0)) { /* @@ -1018,7 +1025,7 @@ static void graph_draw_octopus_merge(struct git_graph *graph, struct graph_line * Commit is at commit_index, each iteration move one lane to * the right from the commit. */ - if (graph_needs_truncation(graph->commit_index + 1 + i)) + if (graph_needs_truncation(graph, graph->commit_index + 1 + i)) break; graph_line_write_column(line, col, (i == dashed_parents - 1) ? '.' : '-'); @@ -1055,14 +1062,14 @@ static void graph_output_commit_line(struct git_graph *graph, struct graph_line seen_this = 1; graph_output_commit_char(graph, line); - if (graph_needs_truncation(i)) { + if (graph_needs_truncation(graph, i)) { graph_line_addch(line, ' '); break; } if (graph->num_parents > 2) graph_draw_octopus_merge(graph, line); - } else if (graph_needs_truncation(i)) { + } else if (graph_needs_truncation(graph, i)) { seen_this = 1; break; } else if (seen_this && (graph->edges_added > 1)) { @@ -1112,7 +1119,7 @@ static void graph_output_commit_line(struct git_graph *graph, struct graph_line * padding lane. */ if (graph->num_parents > 1) { - if (!graph_needs_truncation(graph->commit_index)) { + if (!graph_needs_truncation(graph, graph->commit_index)) { graph_update_state(graph, GRAPH_POST_MERGE); } else { struct commit_list *p = first_interesting_parent(graph); @@ -1128,7 +1135,7 @@ static void graph_output_commit_line(struct git_graph *graph, struct graph_line lane = graph_find_new_column_by_commit(graph, p->item); - if (!graph_needs_truncation(lane)) + if (!graph_needs_truncation(graph, lane)) graph_update_state(graph, GRAPH_POST_MERGE); else if (graph_is_mapping_correct(graph)) graph_update_state(graph, GRAPH_PADDING); @@ -1192,7 +1199,7 @@ static void graph_output_post_merge_line(struct git_graph *graph, struct graph_l * comparable with i. Don't truncate if there are * no more lanes to print (end of the lane) */ - if (graph_needs_truncation(j / 2 + i) && + if (graph_needs_truncation(graph, j / 2 + i) && j / 2 + i <= graph->num_columns) { if ((j + i * 2) % 2 != 0) graph_line_addch(line, ' '); @@ -1205,7 +1212,7 @@ static void graph_output_post_merge_line(struct git_graph *graph, struct graph_l * Check if the next lane needs truncation * to avoid having the padding doubled */ - if (graph_needs_truncation((j + 1) / 2 + i) && + if (graph_needs_truncation(graph, (j + 1) / 2 + i) && j < graph->num_parents - 1) { truncated = 1; break; @@ -1220,7 +1227,7 @@ static void graph_output_post_merge_line(struct git_graph *graph, struct graph_l break; if (graph->edges_added == 0) graph_line_addch(line, ' '); - } else if (graph_needs_truncation(i)) { + } else if (graph_needs_truncation(graph, i)) { break; } else if (seen_this) { if (graph->edges_added > 0) @@ -1231,7 +1238,7 @@ static void graph_output_post_merge_line(struct git_graph *graph, struct graph_l * If it's between two lanes and next would be truncated, * don't add space padding. */ - if (!graph_needs_truncation(i + 1)) + if (!graph_needs_truncation(graph, i + 1)) graph_line_addch(line, ' '); } else { graph_line_write_column(line, col, '|'); @@ -1380,7 +1387,7 @@ static void graph_output_collapsing_line(struct git_graph *graph, struct graph_l for (i = 0; i < graph->mapping_size; i++) { int target = graph->mapping[i]; - if (!truncated && graph_needs_truncation(i / 2)) { + if (!truncated && graph_needs_truncation(graph, i / 2)) { truncated = 1; } @@ -1480,7 +1487,7 @@ static void graph_padding_line(struct git_graph *graph, struct strbuf *sb) for (i = 0; i < graph->num_columns; i++) { struct column *col = &graph->columns[i]; - if (graph_needs_truncation(i)) + if (graph_needs_truncation(graph, i)) break; graph_line_write_column(&line, col, '|'); diff --git a/revision.c b/revision.c index 31808e3df055c7..81b67682a87a92 100644 --- a/revision.c +++ b/revision.c @@ -2605,6 +2605,8 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg } else if (!strcmp(arg, "--no-graph")) { graph_clear(revs->graph); revs->graph = NULL; + } else if (skip_prefix(arg, "--graph-lane-limit=", &optarg)) { + revs->graph_max_lanes = parse_count(optarg); } else if (!strcmp(arg, "--encode-email-headers")) { revs->encode_email_headers = 1; } else if (!strcmp(arg, "--no-encode-email-headers")) { @@ -3172,6 +3174,10 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s if (revs->no_walk && revs->graph) die(_("options '%s' and '%s' cannot be used together"), "--no-walk", "--graph"); + + if (revs->graph_max_lanes > 0 && !revs->graph) + die(_("the option '%s' requires '%s'"), "--graph-lane-limit", "--graph"); + if (!revs->reflog_info && revs->grep_filter.use_reflog_filter) die(_("the option '%s' requires '%s'"), "--grep-reflog", "--walk-reflogs"); diff --git a/revision.h b/revision.h index 69242ecb189a52..874ccce62571e4 100644 --- a/revision.h +++ b/revision.h @@ -304,6 +304,7 @@ struct rev_info { /* Display history graph */ struct git_graph *graph; + int graph_max_lanes; /* special limits */ int skip_count; diff --git a/t/t4215-log-skewed-merges.sh b/t/t4215-log-skewed-merges.sh index 28d0779a8c599e..d7524e93669874 100755 --- a/t/t4215-log-skewed-merges.sh +++ b/t/t4215-log-skewed-merges.sh @@ -370,4 +370,148 @@ test_expect_success 'log --graph with multiple tips' ' EOF ' +test_expect_success 'log --graph --graph-lane-limit=2 limited to two lanes' ' + check_graph --graph-lane-limit=2 M_7 <<-\EOF + *-. 7_M4 + |\ \ + | | * 7_G + | | * 7_F + | * 7_E + | * 7_D + * | 7_C + | |/ + |/| + * | 7_B + |/ + * 7_A + EOF +' + +test_expect_success 'log --graph --graph-lane-limit=1 truncate mid octopus merge' ' + check_graph --graph-lane-limit=1 M_7 <<-\EOF + *- 7_M4 + |\ + | 7_G + | 7_F + | * 7_E + | * 7_D + * 7_C + | + |/ + * 7_B + |/ + * 7_A + EOF +' + +test_expect_success 'log --graph --graph-lane-limit=3 limited to three lanes' ' + check_graph --graph-lane-limit=3 M_1 M_3 M_5 M_7 <<-\EOF + * 7_M1 + |\ + | | * 7_M2 + | | |\ + | | | * 7_H + | | | 7_M3 + | | | 7_J + | | | 7_I + | | | 7_M4 + | |_|_ + |/| | + | | |_ + | |/| + | | | + | | |/ + | | * 7_G + | | | + | | |/ + | | * 7_F + | * | 7_E + | | |/ + | |/| + | * | 7_D + | | |/ + | |/| + * | | 7_C + | |/ + |/| + * | 7_B + |/ + * 7_A + EOF +' + +test_expect_success 'log --graph --graph-lane-limit=6 check if it only shows first of 3 parent merge' ' + check_graph --graph-lane-limit=6 M_1 M_3 M_5 M_7 <<-\EOF + * 7_M1 + |\ + | | * 7_M2 + | | |\ + | | | * 7_H + | | | | * 7_M3 + | | | | |\ + | | | | | * 7_J + | | | | * | 7_I + | | | | | | * 7_M4 + | |_|_|_|_|/ + |/| | | | |/ + | | |_|_|/| + | |/| | | |/ + | | | |_|/| + | | |/| | | + | | * | | | 7_G + | | | |_|/ + | | |/| | + | | * | | 7_F + | * | | | 7_E + | | |/ / + | |/| | + | * | | 7_D + | | |/ + | |/| + * | | 7_C + | |/ + |/| + * | 7_B + |/ + * 7_A + EOF +' + +test_expect_success 'log --graph --graph-lane-limit=7 check if it shows all 3 parent merge' ' + check_graph --graph-lane-limit=7 M_1 M_3 M_5 M_7 <<-\EOF + * 7_M1 + |\ + | | * 7_M2 + | | |\ + | | | * 7_H + | | | | * 7_M3 + | | | | |\ + | | | | | * 7_J + | | | | * | 7_I + | | | | | | * 7_M4 + | |_|_|_|_|/|\ + |/| | | | |/ / + | | |_|_|/| / + | |/| | | |/ + | | | |_|/| + | | |/| | | + | | * | | | 7_G + | | | |_|/ + | | |/| | + | | * | | 7_F + | * | | | 7_E + | | |/ / + | |/| | + | * | | 7_D + | | |/ + | |/| + * | | 7_C + | |/ + |/| + * | 7_B + |/ + * 7_A + EOF +' + test_done From 9bab3ce5553b2333b8f8ee1aff27a9fe6a938f65 Mon Sep 17 00:00:00 2001 From: Pablo Sabater Date: Sat, 28 Mar 2026 01:11:13 +0100 Subject: [PATCH 046/241] graph: add truncation mark to capped lanes When lanes are hidden by --graph-lane-limit, show a "~" truncation mark, so users know that there are lanes being truncated. The "~" is chosen because it is not used elsewhere in the graph and it is discrete. Signed-off-by: Pablo Sabater Signed-off-by: Junio C Hamano --- Documentation/rev-list-options.adoc | 5 ++- graph.c | 22 +++++++--- t/t4215-log-skewed-merges.sh | 64 ++++++++++++++--------------- 3 files changed, 52 insertions(+), 39 deletions(-) diff --git a/Documentation/rev-list-options.adoc b/Documentation/rev-list-options.adoc index d530e744f6c19d..94a7b1c065dba8 100644 --- a/Documentation/rev-list-options.adoc +++ b/Documentation/rev-list-options.adoc @@ -1261,8 +1261,9 @@ This implies the `--topo-order` option by default, but the `--graph-lane-limit=`:: When `--graph` is used, limit the number of graph lanes to be shown. - Lanes over the limit are not shown. By default it is set to 0 - (no limit), zero and negative values are ignored and treated as no limit. + Lanes over the limit are replaced with a truncation mark '~'. + By default it is set to 0 (no limit), zero and negative values + are ignored and treated as no limit. ifdef::git-rev-list[] `--count`:: diff --git a/graph.c b/graph.c index ee1f9e2d2d943a..842282685f6cef 100644 --- a/graph.c +++ b/graph.c @@ -706,11 +706,11 @@ static void graph_update_columns(struct git_graph *graph) } /* - * If graph_max_lanes is set, cap the width + * If graph_max_lanes is set, cap the width */ if (graph->revs->graph_max_lanes > 0) { /* - * Width is column index while a lane is half that. + * width of "| " per lanes plus truncation mark "~ ". * Allow commits from merges to align to the merged lane. */ int max_width = graph->revs->graph_max_lanes * 2 + 2; @@ -868,8 +868,10 @@ static void graph_output_padding_line(struct git_graph *graph, * Output a padding row, that leaves all branch lines unchanged */ for (i = 0; i < graph->num_new_columns; i++) { - if (graph_needs_truncation(graph, i)) + if (graph_needs_truncation(graph, i)) { + graph_line_addstr(line, "~ "); break; + } graph_line_write_column(line, &graph->new_columns[i], '|'); graph_line_addch(line, ' '); } @@ -928,6 +930,7 @@ static void graph_output_pre_commit_line(struct git_graph *graph, graph_line_write_column(line, col, '|'); graph_line_addchars(line, ' ', graph->expansion_row); } else if (seen_this && graph_needs_truncation(graph, i)) { + graph_line_addstr(line, "~ "); break; } else if (seen_this && (graph->expansion_row == 0)) { /* @@ -1025,8 +1028,10 @@ static void graph_draw_octopus_merge(struct git_graph *graph, struct graph_line * Commit is at commit_index, each iteration move one lane to * the right from the commit. */ - if (graph_needs_truncation(graph, graph->commit_index + 1 + i)) + if (graph_needs_truncation(graph, graph->commit_index + 1 + i)) { + graph_line_addstr(line, "~ "); break; + } graph_line_write_column(line, col, (i == dashed_parents - 1) ? '.' : '-'); } @@ -1070,6 +1075,7 @@ static void graph_output_commit_line(struct git_graph *graph, struct graph_line if (graph->num_parents > 2) graph_draw_octopus_merge(graph, line); } else if (graph_needs_truncation(graph, i)) { + graph_line_addstr(line, "~ "); seen_this = 1; break; } else if (seen_this && (graph->edges_added > 1)) { @@ -1203,6 +1209,7 @@ static void graph_output_post_merge_line(struct git_graph *graph, struct graph_l j / 2 + i <= graph->num_columns) { if ((j + i * 2) % 2 != 0) graph_line_addch(line, ' '); + graph_line_addstr(line, "~ "); truncated = 1; break; } @@ -1214,6 +1221,7 @@ static void graph_output_post_merge_line(struct git_graph *graph, struct graph_l */ if (graph_needs_truncation(graph, (j + 1) / 2 + i) && j < graph->num_parents - 1) { + graph_line_addstr(line, "~ "); truncated = 1; break; } else if (graph->edges_added > 0 || j < graph->num_parents - 1) @@ -1228,6 +1236,7 @@ static void graph_output_post_merge_line(struct git_graph *graph, struct graph_l if (graph->edges_added == 0) graph_line_addch(line, ' '); } else if (graph_needs_truncation(graph, i)) { + graph_line_addstr(line, "~ "); break; } else if (seen_this) { if (graph->edges_added > 0) @@ -1388,6 +1397,7 @@ static void graph_output_collapsing_line(struct git_graph *graph, struct graph_l int target = graph->mapping[i]; if (!truncated && graph_needs_truncation(graph, i / 2)) { + graph_line_addstr(line, "~ "); truncated = 1; } @@ -1487,8 +1497,10 @@ static void graph_padding_line(struct git_graph *graph, struct strbuf *sb) for (i = 0; i < graph->num_columns; i++) { struct column *col = &graph->columns[i]; - if (graph_needs_truncation(graph, i)) + if (graph_needs_truncation(graph, i)) { + graph_line_addstr(&line, "~ "); break; + } graph_line_write_column(&line, col, '|'); diff --git a/t/t4215-log-skewed-merges.sh b/t/t4215-log-skewed-merges.sh index d7524e93669874..1612f05f1b39ce 100755 --- a/t/t4215-log-skewed-merges.sh +++ b/t/t4215-log-skewed-merges.sh @@ -376,9 +376,9 @@ test_expect_success 'log --graph --graph-lane-limit=2 limited to two lanes' ' |\ \ | | * 7_G | | * 7_F - | * 7_E - | * 7_D - * | 7_C + | * ~ 7_E + | * ~ 7_D + * | ~ 7_C | |/ |/| * | 7_B @@ -389,16 +389,16 @@ test_expect_success 'log --graph --graph-lane-limit=2 limited to two lanes' ' test_expect_success 'log --graph --graph-lane-limit=1 truncate mid octopus merge' ' check_graph --graph-lane-limit=1 M_7 <<-\EOF - *- 7_M4 - |\ - | 7_G - | 7_F + *-~ 7_M4 + |\~ + | ~ 7_G + | ~ 7_F | * 7_E | * 7_D - * 7_C - | - |/ - * 7_B + * ~ 7_C + | ~ + |/~ + * ~ 7_B |/ * 7_A EOF @@ -411,24 +411,24 @@ test_expect_success 'log --graph --graph-lane-limit=3 limited to three lanes' ' | | * 7_M2 | | |\ | | | * 7_H - | | | 7_M3 - | | | 7_J - | | | 7_I - | | | 7_M4 - | |_|_ - |/| | - | | |_ - | |/| - | | | - | | |/ - | | * 7_G - | | | - | | |/ - | | * 7_F - | * | 7_E - | | |/ - | |/| - | * | 7_D + | | | ~ 7_M3 + | | | ~ 7_J + | | | ~ 7_I + | | | ~ 7_M4 + | |_|_~ + |/| | ~ + | | |_~ + | |/| ~ + | | | ~ + | | |/~ + | | * ~ 7_G + | | | ~ + | | |/~ + | | * ~ 7_F + | * | ~ 7_E + | | |/~ + | |/| ~ + | * | ~ 7_D | | |/ | |/| * | | 7_C @@ -452,9 +452,9 @@ test_expect_success 'log --graph --graph-lane-limit=6 check if it only shows fir | | | | | * 7_J | | | | * | 7_I | | | | | | * 7_M4 - | |_|_|_|_|/ - |/| | | | |/ - | | |_|_|/| + | |_|_|_|_|/~ + |/| | | | |/~ + | | |_|_|/| ~ | |/| | | |/ | | | |_|/| | | |/| | | From b10b40719a7650faa077fc0772bf563840d225ce Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 30 Mar 2026 15:17:23 +0200 Subject: [PATCH 047/241] setup: replace use of `the_repository` in static functions Replace the use of `the_repository` in "setup.c" for all static functions. For now, we simply add `the_repository` to invocations of these functions. This will be addressed in subsequent commits, where we'll move up `the_repository` one more layer to callers of "setup.c". Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- setup.c | 188 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 100 insertions(+), 88 deletions(-) diff --git a/setup.c b/setup.c index 7ec4427368a2a7..ba2898473a58a3 100644 --- a/setup.c +++ b/setup.c @@ -50,13 +50,13 @@ const char *tmp_original_cwd; * /dir/repolink/file (repolink points to /dir/repo) -> file * /dir/repo (exactly equal to work tree) -> (empty string) */ -static int abspath_part_inside_repo(char *path) +static int abspath_part_inside_repo(struct repository *repo, char *path) { size_t len; size_t wtlen; char *path0; int off; - const char *work_tree = precompose_string_if_needed(repo_get_work_tree(the_repository)); + const char *work_tree = precompose_string_if_needed(repo_get_work_tree(repo)); struct strbuf realpath = STRBUF_INIT; if (!work_tree) @@ -132,7 +132,7 @@ char *prefix_path_gently(const char *prefix, int len, free(sanitized); return NULL; } - if (abspath_part_inside_repo(sanitized)) { + if (abspath_part_inside_repo(the_repository, sanitized)) { free(sanitized); return NULL; } @@ -509,7 +509,7 @@ void setup_work_tree(void) initialized = 1; } -static void setup_original_cwd(void) +static void setup_original_cwd(struct repository *repo) { struct strbuf tmp = STRBUF_INIT; const char *worktree = NULL; @@ -535,9 +535,9 @@ static void setup_original_cwd(void) /* Normalize the directory */ if (!strbuf_realpath(&tmp, tmp_original_cwd, 0)) { - trace2_data_string("setup", the_repository, + trace2_data_string("setup", repo, "realpath-path", tmp_original_cwd); - trace2_data_string("setup", the_repository, + trace2_data_string("setup", repo, "realpath-failure", strerror(errno)); free((char*)tmp_original_cwd); tmp_original_cwd = NULL; @@ -552,7 +552,7 @@ static void setup_original_cwd(void) * Get our worktree; we only protect the current working directory * if it's in the worktree. */ - worktree = repo_get_work_tree(the_repository); + worktree = repo_get_work_tree(repo); if (!worktree) goto no_prevention_needed; @@ -747,7 +747,10 @@ static int check_repo_format(const char *var, const char *value, return read_worktree_config(var, value, ctx, vdata); } -static int check_repository_format_gently(const char *gitdir, struct repository_format *candidate, int *nongit_ok) +static int check_repository_format_gently(struct repository *repo, + const char *gitdir, + struct repository_format *candidate, + int *nongit_ok) { struct strbuf sb = STRBUF_INIT; struct strbuf err = STRBUF_INIT; @@ -776,7 +779,7 @@ static int check_repository_format_gently(const char *gitdir, struct repository_ die("%s", err.buf); } - the_repository->repository_format_precious_objects = candidate->precious_objects; + repo->repository_format_precious_objects = candidate->precious_objects; string_list_clear(&candidate->unknown_extensions, 0); string_list_clear(&candidate->v1_only_extensions, 0); @@ -1034,7 +1037,8 @@ const char *read_gitfile_gently(const char *path, int *return_error_code) return error_code ? NULL : path; } -static void setup_git_env_internal(const char *git_dir, +static void setup_git_env_internal(struct repository *repo, + const char *git_dir, bool skip_initializing_odb) { char *git_replace_ref_base; @@ -1052,7 +1056,7 @@ static void setup_git_env_internal(const char *git_dir, args.disable_ref_updates = true; args.skip_initializing_odb = skip_initializing_odb; - repo_set_gitdir(the_repository, git_dir, &args); + repo_set_gitdir(repo, git_dir, &args); strvec_clear(&to_free); if (getenv(NO_REPLACE_OBJECTS_ENVIRONMENT)) @@ -1064,7 +1068,7 @@ static void setup_git_env_internal(const char *git_dir, shallow_file = getenv(GIT_SHALLOW_FILE_ENVIRONMENT); if (shallow_file) - set_alternate_shallow_file(the_repository, shallow_file, 0); + set_alternate_shallow_file(repo, shallow_file, 0); if (git_env_bool(NO_LAZY_FETCH_ENVIRONMENT, 0)) fetch_if_missing = 0; @@ -1072,30 +1076,31 @@ static void setup_git_env_internal(const char *git_dir, void setup_git_env(const char *git_dir) { - setup_git_env_internal(git_dir, false); + setup_git_env_internal(the_repository, git_dir, false); } -static void set_git_dir_1(const char *path, bool skip_initializing_odb) +static void set_git_dir_1(struct repository *repo, const char *path, bool skip_initializing_odb) { xsetenv(GIT_DIR_ENVIRONMENT, path, 1); - setup_git_env_internal(path, skip_initializing_odb); + setup_git_env_internal(repo, path, skip_initializing_odb); } static void update_relative_gitdir(const char *name UNUSED, const char *old_cwd, const char *new_cwd, - void *data UNUSED) + void *data) { + struct repository *repo = data; char *path = reparent_relative_path(old_cwd, new_cwd, - repo_get_git_dir(the_repository)); + repo_get_git_dir(repo)); trace_printf_key(&trace_setup_key, "setup: move $GIT_DIR to '%s'", path); - set_git_dir_1(path, true); + set_git_dir_1(repo, path, true); free(path); } -static void set_git_dir(const char *path, int make_realpath) +static void set_git_dir(struct repository *repo, const char *path, int make_realpath) { struct strbuf realpath = STRBUF_INIT; @@ -1104,14 +1109,15 @@ static void set_git_dir(const char *path, int make_realpath) path = realpath.buf; } - set_git_dir_1(path, false); + set_git_dir_1(repo, path, false); if (!is_absolute_path(path)) - chdir_notify_register(NULL, update_relative_gitdir, NULL); + chdir_notify_register(NULL, update_relative_gitdir, repo); strbuf_release(&realpath); } -static const char *setup_explicit_git_dir(const char *gitdirenv, +static const char *setup_explicit_git_dir(struct repository *repo, + const char *gitdirenv, struct strbuf *cwd, struct repository_format *repo_fmt, int *nongit_ok) @@ -1139,7 +1145,7 @@ static const char *setup_explicit_git_dir(const char *gitdirenv, die(_("not a git repository: '%s'"), gitdirenv); } - if (check_repository_format_gently(gitdirenv, repo_fmt, nongit_ok)) { + if (check_repository_format_gently(repo, gitdirenv, repo_fmt, nongit_ok)) { free(gitfile); return NULL; } @@ -1155,7 +1161,7 @@ static const char *setup_explicit_git_dir(const char *gitdirenv, } /* #18, #26 */ - set_git_dir(gitdirenv, 0); + set_git_dir(repo, gitdirenv, 0); free(gitfile); return NULL; } @@ -1177,7 +1183,7 @@ static const char *setup_explicit_git_dir(const char *gitdirenv, } else if (!git_env_bool(GIT_IMPLICIT_WORK_TREE_ENVIRONMENT, 1)) { /* #16d */ - set_git_dir(gitdirenv, 0); + set_git_dir(repo, gitdirenv, 0); free(gitfile); return NULL; } @@ -1185,18 +1191,18 @@ static const char *setup_explicit_git_dir(const char *gitdirenv, set_git_work_tree("."); /* set_git_work_tree() must have been called by now */ - worktree = repo_get_work_tree(the_repository); + worktree = repo_get_work_tree(repo); /* both repo_get_work_tree() and cwd are already normalized */ if (!strcmp(cwd->buf, worktree)) { /* cwd == worktree */ - set_git_dir(gitdirenv, 0); + set_git_dir(repo, gitdirenv, 0); free(gitfile); return NULL; } offset = dir_inside_of(cwd->buf, worktree); if (offset >= 0) { /* cwd inside worktree? */ - set_git_dir(gitdirenv, 1); + set_git_dir(repo, gitdirenv, 1); if (chdir(worktree)) die_errno(_("cannot chdir to '%s'"), worktree); strbuf_addch(cwd, '/'); @@ -1205,17 +1211,18 @@ static const char *setup_explicit_git_dir(const char *gitdirenv, } /* cwd outside worktree */ - set_git_dir(gitdirenv, 0); + set_git_dir(repo, gitdirenv, 0); free(gitfile); return NULL; } -static const char *setup_discovered_git_dir(const char *gitdir, +static const char *setup_discovered_git_dir(struct repository *repo, + const char *gitdir, struct strbuf *cwd, int offset, struct repository_format *repo_fmt, int *nongit_ok) { - if (check_repository_format_gently(gitdir, repo_fmt, nongit_ok)) + if (check_repository_format_gently(repo, gitdir, repo_fmt, nongit_ok)) return NULL; /* --work-tree is set without --git-dir; use discovered one */ @@ -1227,14 +1234,14 @@ static const char *setup_discovered_git_dir(const char *gitdir, gitdir = to_free = real_pathdup(gitdir, 1); if (chdir(cwd->buf)) die_errno(_("cannot come back to cwd")); - ret = setup_explicit_git_dir(gitdir, cwd, repo_fmt, nongit_ok); + ret = setup_explicit_git_dir(repo, gitdir, cwd, repo_fmt, nongit_ok); free(to_free); return ret; } /* #16.2, #17.2, #20.2, #21.2, #24, #25, #28, #29 (see t1510) */ if (is_bare_repository_cfg > 0) { - set_git_dir(gitdir, (offset != cwd->len)); + set_git_dir(repo, gitdir, (offset != cwd->len)); if (chdir(cwd->buf)) die_errno(_("cannot come back to cwd")); return NULL; @@ -1243,7 +1250,7 @@ static const char *setup_discovered_git_dir(const char *gitdir, /* #0, #1, #5, #8, #9, #12, #13 */ set_git_work_tree("."); if (strcmp(gitdir, DEFAULT_GIT_DIR_ENVIRONMENT)) - set_git_dir(gitdir, 0); + set_git_dir(repo, gitdir, 0); inside_git_dir = 0; inside_work_tree = 1; if (offset >= cwd->len) @@ -1258,13 +1265,14 @@ static const char *setup_discovered_git_dir(const char *gitdir, } /* #16.1, #17.1, #20.1, #21.1, #22.1 (see t1510) */ -static const char *setup_bare_git_dir(struct strbuf *cwd, int offset, +static const char *setup_bare_git_dir(struct repository *repo, + struct strbuf *cwd, int offset, struct repository_format *repo_fmt, int *nongit_ok) { int root_len; - if (check_repository_format_gently(".", repo_fmt, nongit_ok)) + if (check_repository_format_gently(repo, ".", repo_fmt, nongit_ok)) return NULL; setenv(GIT_IMPLICIT_WORK_TREE_ENVIRONMENT, "0", 1); @@ -1276,7 +1284,7 @@ static const char *setup_bare_git_dir(struct strbuf *cwd, int offset, gitdir = offset == cwd->len ? "." : xmemdupz(cwd->buf, offset); if (chdir(cwd->buf)) die_errno(_("cannot come back to cwd")); - return setup_explicit_git_dir(gitdir, cwd, repo_fmt, nongit_ok); + return setup_explicit_git_dir(repo, gitdir, cwd, repo_fmt, nongit_ok); } inside_git_dir = 1; @@ -1286,10 +1294,10 @@ static const char *setup_bare_git_dir(struct strbuf *cwd, int offset, die_errno(_("cannot come back to cwd")); root_len = offset_1st_component(cwd->buf); strbuf_setlen(cwd, offset > root_len ? offset : root_len); - set_git_dir(cwd->buf, 0); + set_git_dir(repo, cwd->buf, 0); } else - set_git_dir(".", 0); + set_git_dir(repo, ".", 0); return NULL; } @@ -1827,7 +1835,7 @@ const char *enter_repo(const char *path, unsigned flags) } if (is_git_directory(".")) { - set_git_dir(".", 0); + set_git_dir(the_repository, ".", 0); check_repository_format(NULL); return path; } @@ -1891,18 +1899,18 @@ const char *setup_git_directory_gently(int *nongit_ok) switch (setup_git_directory_gently_1(&dir, &gitdir, &report, 1)) { case GIT_DIR_EXPLICIT: - prefix = setup_explicit_git_dir(gitdir.buf, &cwd, &repo_fmt, nongit_ok); + prefix = setup_explicit_git_dir(the_repository, gitdir.buf, &cwd, &repo_fmt, nongit_ok); break; case GIT_DIR_DISCOVERED: if (dir.len < cwd.len && chdir(dir.buf)) die(_("cannot change to '%s'"), dir.buf); - prefix = setup_discovered_git_dir(gitdir.buf, &cwd, dir.len, + prefix = setup_discovered_git_dir(the_repository, gitdir.buf, &cwd, dir.len, &repo_fmt, nongit_ok); break; case GIT_DIR_BARE: if (dir.len < cwd.len && chdir(dir.buf)) die(_("cannot change to '%s'"), dir.buf); - prefix = setup_bare_git_dir(&cwd, dir.len, &repo_fmt, nongit_ok); + prefix = setup_bare_git_dir(the_repository, &cwd, dir.len, &repo_fmt, nongit_ok); break; case GIT_DIR_HIT_CEILING: if (!nongit_ok) @@ -2044,7 +2052,7 @@ const char *setup_git_directory_gently(int *nongit_ok) free(payload); } - setup_original_cwd(); + setup_original_cwd(the_repository); strbuf_release(&dir); strbuf_release(&gitdir); @@ -2110,7 +2118,7 @@ void check_repository_format(struct repository_format *fmt) struct repository_format repo_fmt = REPOSITORY_FORMAT_INIT; if (!fmt) fmt = &repo_fmt; - check_repository_format_gently(repo_get_git_dir(the_repository), fmt, NULL); + check_repository_format_gently(the_repository, repo_get_git_dir(the_repository), fmt, NULL); startup_info->have_repository = 1; repo_set_hash_algo(the_repository, fmt->hash_algo); repo_set_compat_hash_algo(the_repository, fmt->compat_hash_algo); @@ -2239,7 +2247,9 @@ const char *get_template_dir(const char *option_template) #define GIT_DEFAULT_HASH_ENVIRONMENT "GIT_DEFAULT_HASH" -static void copy_templates_1(struct strbuf *path, struct strbuf *template_path, +static void copy_templates_1(struct repository *repo, + struct strbuf *path, + struct strbuf *template_path, DIR *dir) { size_t path_baselen = path->len; @@ -2253,7 +2263,7 @@ static void copy_templates_1(struct strbuf *path, struct strbuf *template_path, * with the way the namespace under .git/ is organized, should * be really carefully chosen. */ - safe_create_dir(the_repository, path->buf, 1); + safe_create_dir(repo, path->buf, 1); while ((de = readdir(dir)) != NULL) { struct stat st_git, st_template; int exists = 0; @@ -2281,7 +2291,7 @@ static void copy_templates_1(struct strbuf *path, struct strbuf *template_path, die_errno(_("cannot opendir '%s'"), template_path->buf); strbuf_addch(path, '/'); strbuf_addch(template_path, '/'); - copy_templates_1(path, template_path, subdir); + copy_templates_1(repo, path, template_path, subdir); closedir(subdir); } else if (exists) @@ -2306,7 +2316,7 @@ static void copy_templates_1(struct strbuf *path, struct strbuf *template_path, } } -static void copy_templates(const char *option_template) +static void copy_templates(struct repository *repo, const char *option_template) { const char *template_dir = get_template_dir(option_template); struct strbuf path = STRBUF_INIT; @@ -2347,9 +2357,9 @@ static void copy_templates(const char *option_template) goto close_free_return; } - strbuf_addstr(&path, repo_get_common_dir(the_repository)); + strbuf_addstr(&path, repo_get_common_dir(repo)); strbuf_complete(&path, '/'); - copy_templates_1(&path, &template_path, dir); + copy_templates_1(repo, &path, &template_path, dir); close_free_return: closedir(dir); free_return: @@ -2443,13 +2453,13 @@ void initialize_repository_version(int hash_algo, strbuf_release(&repo_version); } -static int is_reinit(void) +static int is_reinit(struct repository *repo) { struct strbuf buf = STRBUF_INIT; char junk[2]; int ret; - repo_git_path_replace(the_repository, &buf, "HEAD"); + repo_git_path_replace(repo, &buf, "HEAD"); ret = !access(buf.buf, R_OK) || readlink(buf.buf, junk, sizeof(junk) - 1) != -1; strbuf_release(&buf); return ret; @@ -2459,7 +2469,7 @@ void create_reference_database(const char *initial_branch, int quiet) { struct strbuf err = STRBUF_INIT; char *to_free = NULL; - int reinit = is_reinit(); + int reinit = is_reinit(the_repository); if (ref_store_create_on_disk(get_main_ref_store(the_repository), 0, &err)) die("failed to set up refs db: %s", err.buf); @@ -2493,7 +2503,8 @@ void create_reference_database(const char *initial_branch, int quiet) free(to_free); } -static int create_default_files(const char *template_path, +static int create_default_files(struct repository *repo, + const char *template_path, const char *original_git_dir, const struct repository_format *fmt, int init_shared_repository) @@ -2502,7 +2513,7 @@ static int create_default_files(const char *template_path, struct strbuf path = STRBUF_INIT; int reinit; int filemode; - const char *work_tree = repo_get_work_tree(the_repository); + const char *work_tree = repo_get_work_tree(repo); /* * First copy the templates -- we might have the default @@ -2513,19 +2524,19 @@ static int create_default_files(const char *template_path, * values (since we've just potentially changed what's available on * disk). */ - copy_templates(template_path); - repo_config_clear(the_repository); - repo_settings_reset_shared_repository(the_repository); - repo_config(the_repository, git_default_config, NULL); + copy_templates(repo, template_path); + repo_config_clear(repo); + repo_settings_reset_shared_repository(repo); + repo_config(repo, git_default_config, NULL); - reinit = is_reinit(); + reinit = is_reinit(repo); /* * We must make sure command-line options continue to override any * values we might have just re-read from the config. */ if (init_shared_repository != -1) - repo_settings_set_shared_repository(the_repository, + repo_settings_set_shared_repository(repo, init_shared_repository); is_bare_repository_cfg = !work_tree; @@ -2534,14 +2545,14 @@ static int create_default_files(const char *template_path, * We would have created the above under user's umask -- under * shared-repository settings, we would need to fix them up. */ - if (repo_settings_get_shared_repository(the_repository)) { - adjust_shared_perm(the_repository, repo_get_git_dir(the_repository)); + if (repo_settings_get_shared_repository(repo)) { + adjust_shared_perm(repo, repo_get_git_dir(repo)); } initialize_repository_version(fmt->hash_algo, fmt->ref_storage_format, reinit); /* Check filemode trustability */ - repo_git_path_replace(the_repository, &path, "config"); + repo_git_path_replace(repo, &path, "config"); filemode = TEST_FILEMODE; if (TEST_FILEMODE && !lstat(path.buf, &st1)) { struct stat st2; @@ -2552,22 +2563,22 @@ static int create_default_files(const char *template_path, if (filemode && !reinit && (st1.st_mode & S_IXUSR)) filemode = 0; } - repo_config_set(the_repository, "core.filemode", filemode ? "true" : "false"); + repo_config_set(repo, "core.filemode", filemode ? "true" : "false"); if (is_bare_repository()) - repo_config_set(the_repository, "core.bare", "true"); + repo_config_set(repo, "core.bare", "true"); else { - repo_config_set(the_repository, "core.bare", "false"); + repo_config_set(repo, "core.bare", "false"); /* allow template config file to override the default */ - if (repo_settings_get_log_all_ref_updates(the_repository) == LOG_REFS_UNSET) - repo_config_set(the_repository, "core.logallrefupdates", "true"); + if (repo_settings_get_log_all_ref_updates(repo) == LOG_REFS_UNSET) + repo_config_set(repo, "core.logallrefupdates", "true"); if (needs_work_tree_config(original_git_dir, work_tree)) - repo_config_set(the_repository, "core.worktree", work_tree); + repo_config_set(repo, "core.worktree", work_tree); } if (!reinit) { /* Check if symlink is supported in the work tree */ - repo_git_path_replace(the_repository, &path, "tXXXXXX"); + repo_git_path_replace(repo, &path, "tXXXXXX"); if (!close(xmkstemp(path.buf)) && !unlink(path.buf) && !symlink("testing", path.buf) && @@ -2575,12 +2586,12 @@ static int create_default_files(const char *template_path, S_ISLNK(st1.st_mode)) unlink(path.buf); /* good */ else - repo_config_set(the_repository, "core.symlinks", "false"); + repo_config_set(repo, "core.symlinks", "false"); /* Check if the filesystem is case-insensitive */ - repo_git_path_replace(the_repository, &path, "CoNfIg"); + repo_git_path_replace(repo, &path, "CoNfIg"); if (!access(path.buf, F_OK)) - repo_config_set(the_repository, "core.ignorecase", "true"); + repo_config_set(repo, "core.ignorecase", "true"); probe_utf8_pathname_composition(); } @@ -2588,23 +2599,23 @@ static int create_default_files(const char *template_path, return reinit; } -static void create_object_directory(void) +static void create_object_directory(struct repository *repo) { struct strbuf path = STRBUF_INIT; size_t baselen; - strbuf_addstr(&path, repo_get_object_directory(the_repository)); + strbuf_addstr(&path, repo_get_object_directory(repo)); baselen = path.len; - safe_create_dir(the_repository, path.buf, 1); + safe_create_dir(repo, path.buf, 1); strbuf_setlen(&path, baselen); strbuf_addstr(&path, "/pack"); - safe_create_dir(the_repository, path.buf, 1); + safe_create_dir(repo, path.buf, 1); strbuf_setlen(&path, baselen); strbuf_addstr(&path, "/info"); - safe_create_dir(the_repository, path.buf, 1); + safe_create_dir(repo, path.buf, 1); strbuf_release(&path); } @@ -2682,7 +2693,8 @@ static int read_default_format_config(const char *key, const char *value, return ret; } -static void repository_format_configure(struct repository_format *repo_fmt, +static void repository_format_configure(struct repository *repo, + struct repository_format *repo_fmt, int hash, enum ref_storage_format ref_format) { struct default_format_config cfg = { @@ -2719,7 +2731,7 @@ static void repository_format_configure(struct repository_format *repo_fmt, } else if (cfg.hash != GIT_HASH_UNKNOWN) { repo_fmt->hash_algo = cfg.hash; } - repo_set_hash_algo(the_repository, repo_fmt->hash_algo); + repo_set_hash_algo(repo, repo_fmt->hash_algo); env = getenv("GIT_DEFAULT_REF_FORMAT"); if (repo_fmt->version >= 0 && @@ -2758,7 +2770,7 @@ static void repository_format_configure(struct repository_format *repo_fmt, free(backend); } - repo_set_ref_storage_format(the_repository, repo_fmt->ref_storage_format, + repo_set_ref_storage_format(repo, repo_fmt->ref_storage_format, repo_fmt->ref_storage_payload); } @@ -2782,12 +2794,12 @@ int init_db(const char *git_dir, const char *real_git_dir, if (!exist_ok && !stat(real_git_dir, &st)) die(_("%s already exists"), real_git_dir); - set_git_dir(real_git_dir, 1); + set_git_dir(the_repository, real_git_dir, 1); git_dir = repo_get_git_dir(the_repository); separate_git_dir(git_dir, original_git_dir); } else { - set_git_dir(git_dir, 1); + set_git_dir(the_repository, git_dir, 1); git_dir = repo_get_git_dir(the_repository); } startup_info->have_repository = 1; @@ -2800,7 +2812,7 @@ int init_db(const char *git_dir, const char *real_git_dir, */ check_repository_format(&repo_fmt); - repository_format_configure(&repo_fmt, hash, ref_storage_format); + repository_format_configure(the_repository, &repo_fmt, hash, ref_storage_format); /* * Ensure `core.hidedotfiles` is processed. This must happen after we @@ -2811,12 +2823,12 @@ int init_db(const char *git_dir, const char *real_git_dir, safe_create_dir(the_repository, git_dir, 0); - reinit = create_default_files(template_dir, original_git_dir, + reinit = create_default_files(the_repository, template_dir, original_git_dir, &repo_fmt, init_shared_repository); if (!(flags & INIT_DB_SKIP_REFDB)) create_reference_database(initial_branch, flags & INIT_DB_QUIET); - create_object_directory(); + create_object_directory(the_repository); if (repo_settings_get_shared_repository(the_repository)) { char buf[10]; From dbf37e57a4ad4ad96824a4af93ddaa3f5e743f7e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 30 Mar 2026 15:17:24 +0200 Subject: [PATCH 048/241] setup: stop using `the_repository` in `is_inside_worktree()` The function `is_inside_worktree()` verifies whether or not the current working directory is located inside the worktree of `the_repository`. This is done by taking the worktree path and verifying that it's a prefix of the current working directory. This information is cached so that we don't have to re-do this change multiple times. Furthermore, we proactively set the value in multiple locations so that we don't even have to perform the check when we have discovered the repository. While we could simply move the caching variable into the repository, the current layout doesn't really feel sensible in the first place: - It can easily lead to false positives or negatives if at any point in time we may switch the current working directory. - We don't call the function in a hot loop, and neither is it overly expensive to compute. Drop the caching infrastructure and instead compute the property ad-hoc via an injected repository. Note that there is one small gotcha: we sometimes may end up with relative directory paths, and if so `is_inside_dir()` might fail. This wasn't an issue before because of how we proactively set the cached value during repository discovery. Now that we stop doing that it becomes a problem though, but it is worked around by resolving the repository directory via `realpath()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/ls-files.c | 2 +- builtin/rev-parse.c | 4 ++-- object-name.c | 2 +- setup.c | 18 +++++++----------- setup.h | 2 +- submodule.c | 2 +- 6 files changed, 13 insertions(+), 17 deletions(-) diff --git a/builtin/ls-files.c b/builtin/ls-files.c index b148607f7a1468..09d95111b35b9f 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -703,7 +703,7 @@ int cmd_ls_files(int argc, if (dir.exclude_per_dir) exc_given = 1; - if (require_work_tree && !is_inside_work_tree()) + if (require_work_tree && !is_inside_work_tree(repo)) setup_work_tree(); if (recurse_submodules && diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c index 01a62800e87938..9629e1ccf79c7f 100644 --- a/builtin/rev-parse.c +++ b/builtin/rev-parse.c @@ -1006,7 +1006,7 @@ int cmd_rev_parse(int argc, } if (!strcmp(arg, "--show-cdup")) { const char *pfx = prefix; - if (!is_inside_work_tree()) { + if (!is_inside_work_tree(the_repository)) { const char *work_tree = repo_get_work_tree(the_repository); if (work_tree) @@ -1068,7 +1068,7 @@ int cmd_rev_parse(int argc, continue; } if (!strcmp(arg, "--is-inside-work-tree")) { - printf("%s\n", is_inside_work_tree() ? "true" + printf("%s\n", is_inside_work_tree(the_repository) ? "true" : "false"); continue; } diff --git a/object-name.c b/object-name.c index e5adec4c9d5084..7b7e546b1f2c43 100644 --- a/object-name.c +++ b/object-name.c @@ -1990,7 +1990,7 @@ static char *resolve_relative_path(struct repository *r, const char *rel) if (!starts_with(rel, "./") && !starts_with(rel, "../")) return NULL; - if (r != the_repository || !is_inside_work_tree()) + if (r != the_repository || !is_inside_work_tree(the_repository)) die(_("relative path syntax can't be used outside working tree")); /* die() inside prefix_path() if resolved path is outside worktree */ diff --git a/setup.c b/setup.c index ba2898473a58a3..4df65ba2e845e3 100644 --- a/setup.c +++ b/setup.c @@ -27,7 +27,6 @@ #include "worktree.h" static int inside_git_dir = -1; -static int inside_work_tree = -1; static int work_tree_config_is_bogus; enum allowed_bare_repo { ALLOWED_BARE_REPO_EXPLICIT = 0, @@ -299,7 +298,7 @@ void verify_filename(const char *prefix, */ void verify_non_filename(const char *prefix, const char *arg) { - if (!is_inside_work_tree() || is_inside_git_dir()) + if (!is_inside_work_tree(the_repository) || is_inside_git_dir()) return; if (*arg == '-') return; /* flag */ @@ -477,11 +476,13 @@ int is_inside_git_dir(void) return inside_git_dir; } -int is_inside_work_tree(void) +int is_inside_work_tree(struct repository *repo) { - if (inside_work_tree < 0) - inside_work_tree = is_inside_dir(repo_get_work_tree(the_repository)); - return inside_work_tree; + static struct strbuf buf = STRBUF_INIT; + const char *worktree = repo_get_work_tree(repo); + if (!worktree) + return 0; + return is_inside_dir(strbuf_realpath(&buf, worktree, 1)); } void setup_work_tree(void) @@ -798,13 +799,10 @@ static int check_repository_format_gently(struct repository *repo, if (!has_common) { if (candidate->is_bare != -1) { is_bare_repository_cfg = candidate->is_bare; - if (is_bare_repository_cfg == 1) - inside_work_tree = -1; } if (candidate->work_tree) { free(git_work_tree_cfg); git_work_tree_cfg = xstrdup(candidate->work_tree); - inside_work_tree = -1; } } @@ -1252,7 +1250,6 @@ static const char *setup_discovered_git_dir(struct repository *repo, if (strcmp(gitdir, DEFAULT_GIT_DIR_ENVIRONMENT)) set_git_dir(repo, gitdir, 0); inside_git_dir = 0; - inside_work_tree = 1; if (offset >= cwd->len) return NULL; @@ -1288,7 +1285,6 @@ static const char *setup_bare_git_dir(struct repository *repo, } inside_git_dir = 1; - inside_work_tree = 0; if (offset != cwd->len) { if (chdir(cwd->buf)) die_errno(_("cannot come back to cwd")); diff --git a/setup.h b/setup.h index 80bc6e5f078af8..7c0aa75319dafd 100644 --- a/setup.h +++ b/setup.h @@ -5,7 +5,7 @@ #include "string-list.h" int is_inside_git_dir(void); -int is_inside_work_tree(void); +int is_inside_work_tree(struct repository *repo); int get_common_dir_noenv(struct strbuf *sb, const char *gitdir); int get_common_dir(struct strbuf *sb, const char *gitdir); diff --git a/submodule.c b/submodule.c index e20537ba8dc66b..46116cee887554 100644 --- a/submodule.c +++ b/submodule.c @@ -2622,7 +2622,7 @@ int get_superproject_working_tree(struct strbuf *buf) int code; ssize_t len; - if (!is_inside_work_tree()) + if (!is_inside_work_tree(the_repository)) /* * FIXME: * We might have a superproject, but it is harder From 8f1dbd22a6d9000d077d35fa5db34a8ccbe39866 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 30 Mar 2026 15:17:25 +0200 Subject: [PATCH 049/241] setup: stop using `the_repository` in `is_inside_git_dir()` Similar as with the preceding commit, `is_inside_git_dir()` determines whether the current working directory is located inside the gitdir of `the_repository`. Perform the same refactoring by dropping the caching mechanism and injecting the repository that shall be checked. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/rev-parse.c | 2 +- setup.c | 12 ++++-------- setup.h | 2 +- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c index 9629e1ccf79c7f..6dd49ae3f44edf 100644 --- a/builtin/rev-parse.c +++ b/builtin/rev-parse.c @@ -1063,7 +1063,7 @@ int cmd_rev_parse(int argc, continue; } if (!strcmp(arg, "--is-inside-git-dir")) { - printf("%s\n", is_inside_git_dir() ? "true" + printf("%s\n", is_inside_git_dir(the_repository) ? "true" : "false"); continue; } diff --git a/setup.c b/setup.c index 4df65ba2e845e3..864ce41116ae14 100644 --- a/setup.c +++ b/setup.c @@ -26,7 +26,6 @@ #include "trace2.h" #include "worktree.h" -static int inside_git_dir = -1; static int work_tree_config_is_bogus; enum allowed_bare_repo { ALLOWED_BARE_REPO_EXPLICIT = 0, @@ -298,7 +297,7 @@ void verify_filename(const char *prefix, */ void verify_non_filename(const char *prefix, const char *arg) { - if (!is_inside_work_tree(the_repository) || is_inside_git_dir()) + if (!is_inside_work_tree(the_repository) || is_inside_git_dir(the_repository)) return; if (*arg == '-') return; /* flag */ @@ -469,11 +468,10 @@ int is_nonbare_repository_dir(struct strbuf *path) return ret; } -int is_inside_git_dir(void) +int is_inside_git_dir(struct repository *repo) { - if (inside_git_dir < 0) - inside_git_dir = is_inside_dir(repo_get_git_dir(the_repository)); - return inside_git_dir; + static struct strbuf buf = STRBUF_INIT; + return is_inside_dir(strbuf_realpath(&buf, repo_get_git_dir(repo), 1)); } int is_inside_work_tree(struct repository *repo) @@ -1249,7 +1247,6 @@ static const char *setup_discovered_git_dir(struct repository *repo, set_git_work_tree("."); if (strcmp(gitdir, DEFAULT_GIT_DIR_ENVIRONMENT)) set_git_dir(repo, gitdir, 0); - inside_git_dir = 0; if (offset >= cwd->len) return NULL; @@ -1284,7 +1281,6 @@ static const char *setup_bare_git_dir(struct repository *repo, return setup_explicit_git_dir(repo, gitdir, cwd, repo_fmt, nongit_ok); } - inside_git_dir = 1; if (offset != cwd->len) { if (chdir(cwd->buf)) die_errno(_("cannot come back to cwd")); diff --git a/setup.h b/setup.h index 7c0aa75319dafd..71d3f918837873 100644 --- a/setup.h +++ b/setup.h @@ -4,7 +4,7 @@ #include "refs.h" #include "string-list.h" -int is_inside_git_dir(void); +int is_inside_git_dir(struct repository *repo); int is_inside_work_tree(struct repository *repo); int get_common_dir_noenv(struct strbuf *sb, const char *gitdir); int get_common_dir(struct strbuf *sb, const char *gitdir); From c51019bf0e33b539c6079f6025b18af751593868 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 30 Mar 2026 15:17:26 +0200 Subject: [PATCH 050/241] setup: stop using `the_repository` in `prefix_path()` Stop using `the_repository` in `prefix_path()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/blame.c | 2 +- builtin/check-attr.c | 2 +- builtin/checkout-index.c | 4 ++-- builtin/mv.c | 5 +++-- builtin/sparse-checkout.c | 3 ++- builtin/update-index.c | 6 +++--- line-log.c | 2 +- object-name.c | 2 +- pathspec.c | 2 +- setup.c | 15 ++++++++------- setup.h | 4 ++-- t/helper/test-path-utils.c | 2 +- 12 files changed, 26 insertions(+), 23 deletions(-) diff --git a/builtin/blame.c b/builtin/blame.c index f3a11eff44ffc7..ffbd3ce5c5a2e3 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -708,7 +708,7 @@ static unsigned parse_score(const char *arg) static char *add_prefix(const char *prefix, const char *path) { - return prefix_path(prefix, prefix ? strlen(prefix) : 0, path); + return prefix_path(the_repository, prefix, prefix ? strlen(prefix) : 0, path); } static int git_blame_config(const char *var, const char *value, diff --git a/builtin/check-attr.c b/builtin/check-attr.c index 51ed48ce4370c3..04b86e42ae66ca 100644 --- a/builtin/check-attr.c +++ b/builtin/check-attr.c @@ -67,7 +67,7 @@ static void check_attr(const char *prefix, struct attr_check *check, { char *full_path = - prefix_path(prefix, prefix ? strlen(prefix) : 0, file); + prefix_path(the_repository, prefix, prefix ? strlen(prefix) : 0, file); if (collect_all) { git_all_attrs(the_repository->index, full_path, check); diff --git a/builtin/checkout-index.c b/builtin/checkout-index.c index 188128aebd9bc0..311b94ff3174a6 100644 --- a/builtin/checkout-index.c +++ b/builtin/checkout-index.c @@ -303,7 +303,7 @@ int cmd_checkout_index(int argc, die("git checkout-index: don't mix '--all' and explicit filenames"); if (read_from_stdin) die("git checkout-index: don't mix '--stdin' and explicit filenames"); - p = prefix_path(prefix, prefix_length, arg); + p = prefix_path(repo, prefix, prefix_length, arg); err |= checkout_file(repo->index, p, prefix); free(p); } @@ -325,7 +325,7 @@ int cmd_checkout_index(int argc, die("line is badly quoted"); strbuf_swap(&buf, &unquoted); } - p = prefix_path(prefix, prefix_length, buf.buf); + p = prefix_path(repo, prefix, prefix_length, buf.buf); err |= checkout_file(repo->index, p, prefix); free(p); } diff --git a/builtin/mv.c b/builtin/mv.c index 2215d34e31f29a..948b3306390337 100644 --- a/builtin/mv.c +++ b/builtin/mv.c @@ -71,7 +71,7 @@ static void internal_prefix_pathspec(struct strvec *out, trimmed = xmemdupz(pathspec[i], to_copy); maybe_basename = (flags & DUP_BASENAME) ? basename(trimmed) : trimmed; - prefixed_path = prefix_path(prefix, prefixlen, maybe_basename); + prefixed_path = prefix_path(the_repository, prefix, prefixlen, maybe_basename); strvec_push(out, prefixed_path); free(prefixed_path); @@ -394,7 +394,8 @@ int cmd_mv(int argc, for (j = 0; j < last - first; j++) { const struct cache_entry *ce = the_repository->index->cache[first + j]; const char *path = ce->name; - char *prefixed_path = prefix_path(dst_with_slash, dst_with_slash_len, path + length + 1); + char *prefixed_path = prefix_path(the_repository, dst_with_slash, + dst_with_slash_len, path + length + 1); strvec_push(&sources, path); strvec_push(&destinations, prefixed_path); diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index f4aa405da93760..2af50fb2f9cb22 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -735,7 +735,8 @@ static void sanitize_paths(struct repository *repo, int prefix_len = strlen(prefix); for (i = 0; i < args->nr; i++) { - char *prefixed_path = prefix_path(prefix, prefix_len, args->v[i]); + char *prefixed_path = prefix_path(the_repository, prefix, + prefix_len, args->v[i]); strvec_replace(args, i, prefixed_path); free(prefixed_path); } diff --git a/builtin/update-index.c b/builtin/update-index.c index 8a5907767bf297..7434112b8e69b0 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -655,7 +655,7 @@ static int do_unresolve(int ac, const char **av, for (i = 1; i < ac; i++) { const char *arg = av[i]; - char *p = prefix_path(prefix, prefix_length, arg); + char *p = prefix_path(the_repository, prefix, prefix_length, arg); err |= unresolve_one(p); free(p); } @@ -1158,7 +1158,7 @@ int cmd_update_index(int argc, } setup_work_tree(); - p = prefix_path(prefix, prefix_length, path); + p = prefix_path(the_repository, prefix, prefix_length, path); update_one(p); if (set_executable_bit) chmod_path(set_executable_bit, p); @@ -1208,7 +1208,7 @@ int cmd_update_index(int argc, die("line is badly quoted"); strbuf_swap(&buf, &unquoted); } - p = prefix_path(prefix, prefix_length, buf.buf); + p = prefix_path(the_repository, prefix, prefix_length, buf.buf); update_one(p); if (set_executable_bit) chmod_path(set_executable_bit, p); diff --git a/line-log.c b/line-log.c index eeaf68454e2246..a6fc46d331a9c7 100644 --- a/line-log.c +++ b/line-log.c @@ -589,7 +589,7 @@ parse_lines(struct repository *r, struct commit *commit, range_part = xstrndup(item->string, name_part - item->string); name_part++; - full_name = prefix_path(prefix, prefix ? strlen(prefix) : 0, + full_name = prefix_path(r, prefix, prefix ? strlen(prefix) : 0, name_part); spec = alloc_filespec(full_name); diff --git a/object-name.c b/object-name.c index 7b7e546b1f2c43..ca5f060a4efae2 100644 --- a/object-name.c +++ b/object-name.c @@ -1994,7 +1994,7 @@ static char *resolve_relative_path(struct repository *r, const char *rel) die(_("relative path syntax can't be used outside working tree")); /* die() inside prefix_path() if resolved path is outside worktree */ - return prefix_path(startup_info->prefix, + return prefix_path(the_repository, startup_info->prefix, startup_info->prefix ? strlen(startup_info->prefix) : 0, rel); } diff --git a/pathspec.c b/pathspec.c index 5993c4afa0eb37..f78b22709ccb67 100644 --- a/pathspec.c +++ b/pathspec.c @@ -486,7 +486,7 @@ static void init_pathspec_item(struct pathspec_item *item, unsigned flags, match = xstrdup(copyfrom); prefixlen = 0; } else { - match = prefix_path_gently(prefix, prefixlen, + match = prefix_path_gently(the_repository, prefix, prefixlen, &prefixlen, copyfrom); if (!match) { const char *hint_path; diff --git a/setup.c b/setup.c index 864ce41116ae14..331ea86a40ebdf 100644 --- a/setup.c +++ b/setup.c @@ -117,7 +117,8 @@ static int abspath_part_inside_repo(struct repository *repo, char *path) * ../../sub1/sub2/foo -> sub1/sub2/foo (but no remaining prefix) * `pwd`/../bar -> sub1/bar (no remaining prefix) */ -char *prefix_path_gently(const char *prefix, int len, +char *prefix_path_gently(struct repository *repo, + const char *prefix, int len, int *remaining_prefix, const char *path) { const char *orig = path; @@ -130,7 +131,7 @@ char *prefix_path_gently(const char *prefix, int len, free(sanitized); return NULL; } - if (abspath_part_inside_repo(the_repository, sanitized)) { + if (abspath_part_inside_repo(repo, sanitized)) { free(sanitized); return NULL; } @@ -146,13 +147,13 @@ char *prefix_path_gently(const char *prefix, int len, return sanitized; } -char *prefix_path(const char *prefix, int len, const char *path) +char *prefix_path(struct repository *repo, const char *prefix, int len, const char *path) { - char *r = prefix_path_gently(prefix, len, NULL, path); + char *r = prefix_path_gently(repo, prefix, len, NULL, path); if (!r) { - const char *hint_path = repo_get_work_tree(the_repository); + const char *hint_path = repo_get_work_tree(repo); if (!hint_path) - hint_path = repo_get_git_dir(the_repository); + hint_path = repo_get_git_dir(repo); die(_("'%s' is outside repository at '%s'"), path, absolute_path(hint_path)); } @@ -162,7 +163,7 @@ char *prefix_path(const char *prefix, int len, const char *path) int path_inside_repo(const char *prefix, const char *path) { int len = prefix ? strlen(prefix) : 0; - char *r = prefix_path_gently(prefix, len, NULL, path); + char *r = prefix_path_gently(the_repository, prefix, len, NULL, path); if (r) { free(r); return 1; diff --git a/setup.h b/setup.h index 71d3f918837873..24034572b150c7 100644 --- a/setup.h +++ b/setup.h @@ -138,8 +138,8 @@ const char *enter_repo(const char *path, unsigned flags); const char *setup_git_directory_gently(int *); const char *setup_git_directory(void); -char *prefix_path(const char *prefix, int len, const char *path); -char *prefix_path_gently(const char *prefix, int len, int *remaining, const char *path); +char *prefix_path(struct repository *repo, const char *prefix, int len, const char *path); +char *prefix_path_gently(struct repository *repo, const char *prefix, int len, int *remaining, const char *path); int check_filename(const char *prefix, const char *name); void verify_filename(const char *prefix, diff --git a/t/helper/test-path-utils.c b/t/helper/test-path-utils.c index 874542ec3462a5..163fdeefb0f7d9 100644 --- a/t/helper/test-path-utils.c +++ b/t/helper/test-path-utils.c @@ -379,7 +379,7 @@ int cmd__path_utils(int argc, const char **argv) int nongit_ok; setup_git_directory_gently(&nongit_ok); while (argc > 3) { - char *pfx = prefix_path(prefix, prefix_len, argv[3]); + char *pfx = prefix_path(the_repository, prefix, prefix_len, argv[3]); puts(pfx); free(pfx); From d0d785e51e5253ac82407159c7d78c63e9a683d4 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 30 Mar 2026 15:17:27 +0200 Subject: [PATCH 051/241] setup: stop using `the_repository` in `path_inside_repo()` Stop using `the_repository` in `path_inside_repo()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/diff.c | 4 ++-- setup.c | 4 ++-- setup.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/builtin/diff.c b/builtin/diff.c index 0b23c41456837f..7ddebce2ac7cec 100644 --- a/builtin/diff.c +++ b/builtin/diff.c @@ -471,8 +471,8 @@ int cmd_diff(int argc, * as a colourful "diff" replacement. */ if (nongit || ((argc == i + 2) && - (!path_inside_repo(prefix, argv[i]) || - !path_inside_repo(prefix, argv[i + 1])))) + (!path_inside_repo(the_repository, prefix, argv[i]) || + !path_inside_repo(the_repository, prefix, argv[i + 1])))) no_index = DIFF_NO_INDEX_IMPLICIT; } diff --git a/setup.c b/setup.c index 331ea86a40ebdf..6cfb05c5278aa8 100644 --- a/setup.c +++ b/setup.c @@ -160,10 +160,10 @@ char *prefix_path(struct repository *repo, const char *prefix, int len, const ch return r; } -int path_inside_repo(const char *prefix, const char *path) +int path_inside_repo(struct repository *repo, const char *prefix, const char *path) { int len = prefix ? strlen(prefix) : 0; - char *r = prefix_path_gently(the_repository, prefix, len, NULL, path); + char *r = prefix_path_gently(repo, prefix, len, NULL, path); if (r) { free(r); return 1; diff --git a/setup.h b/setup.h index 24034572b150c7..c3247d7fc8d4b7 100644 --- a/setup.h +++ b/setup.h @@ -146,7 +146,7 @@ void verify_filename(const char *prefix, const char *name, int diagnose_misspelt_rev); void verify_non_filename(const char *prefix, const char *name); -int path_inside_repo(const char *prefix, const char *path); +int path_inside_repo(struct repository *repo, const char *prefix, const char *path); void sanitize_stdfds(void); int daemonize(void); From 3706c713f27ddb1e1d7b4ca98000bad2c65c8bb3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 30 Mar 2026 15:17:28 +0200 Subject: [PATCH 052/241] setup: stop using `the_repository` in `verify_filename()` Stop using `the_repository` in `verify_filename()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/grep.c | 2 +- builtin/reset.c | 2 +- builtin/rev-parse.c | 4 ++-- revision.c | 2 +- setup.c | 5 +++-- setup.h | 3 ++- 6 files changed, 10 insertions(+), 8 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index e33285e5e69289..b0e350cf8938fb 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1163,7 +1163,7 @@ int cmd_grep(int argc, if (!seen_dashdash) { int j; for (j = i; j < argc; j++) - verify_filename(prefix, argv[j], j == i && allow_revs); + verify_filename(the_repository, prefix, argv[j], j == i && allow_revs); } parse_pathspec(&pathspec, 0, diff --git a/builtin/reset.c b/builtin/reset.c index 3590be57a5f03c..1ac374d31b9a5e 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -285,7 +285,7 @@ static void parse_args(struct pathspec *pathspec, rev = *argv++; } else { /* Otherwise we treat this as a filename */ - verify_filename(prefix, argv[0], 1); + verify_filename(the_repository, prefix, argv[0], 1); } } diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c index 6dd49ae3f44edf..d7c6400ddc885b 100644 --- a/builtin/rev-parse.c +++ b/builtin/rev-parse.c @@ -749,7 +749,7 @@ int cmd_rev_parse(int argc, if (as_is) { if (show_file(arg, output_prefix) && as_is < 2) - verify_filename(prefix, arg, 0); + verify_filename(the_repository, prefix, arg, 0); continue; } @@ -1173,7 +1173,7 @@ int cmd_rev_parse(int argc, as_is = 1; if (!show_file(arg, output_prefix)) continue; - verify_filename(prefix, arg, 1); + verify_filename(the_repository, prefix, arg, 1); } strbuf_release(&buf); if (verify) { diff --git a/revision.c b/revision.c index 31808e3df055c7..57cf00ae6bb9d0 100644 --- a/revision.c +++ b/revision.c @@ -3066,7 +3066,7 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s * but the latter we have checked in the main loop. */ for (j = i; j < argc; j++) - verify_filename(revs->prefix, argv[j], j == i); + verify_filename(the_repository, revs->prefix, argv[j], j == i); strvec_pushv(&prune_data, argv + i); break; diff --git a/setup.c b/setup.c index 6cfb05c5278aa8..736f950bd08686 100644 --- a/setup.c +++ b/setup.c @@ -280,7 +280,8 @@ static int looks_like_pathspec(const char *arg) * diagnose_misspelt_rev == 0 for the next ones (because we already * saw a filename, there's not ambiguity anymore). */ -void verify_filename(const char *prefix, +void verify_filename(struct repository *repo, + const char *prefix, const char *arg, int diagnose_misspelt_rev) { @@ -288,7 +289,7 @@ void verify_filename(const char *prefix, die(_("option '%s' must come before non-option arguments"), arg); if (looks_like_pathspec(arg) || check_filename(prefix, arg)) return; - die_verify_filename(the_repository, prefix, arg, diagnose_misspelt_rev); + die_verify_filename(repo, prefix, arg, diagnose_misspelt_rev); } /* diff --git a/setup.h b/setup.h index c3247d7fc8d4b7..24a6f666294b71 100644 --- a/setup.h +++ b/setup.h @@ -142,7 +142,8 @@ char *prefix_path(struct repository *repo, const char *prefix, int len, const ch char *prefix_path_gently(struct repository *repo, const char *prefix, int len, int *remaining, const char *path); int check_filename(const char *prefix, const char *name); -void verify_filename(const char *prefix, +void verify_filename(struct repository *repo, + const char *prefix, const char *name, int diagnose_misspelt_rev); void verify_non_filename(const char *prefix, const char *name); From a5f0aeb6c10cf6e89e2b67d3d3e150cf45f5e862 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 30 Mar 2026 15:17:29 +0200 Subject: [PATCH 053/241] setup: stop using `the_repository` in `verify_non_filename()` Stop using `the_repository` in `verify_non_filename()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/checkout.c | 2 +- builtin/grep.c | 2 +- builtin/reset.c | 2 +- revision.c | 4 ++-- setup.c | 4 ++-- setup.h | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/builtin/checkout.c b/builtin/checkout.c index e031e6188613a6..d7069765e7c9e6 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -1485,7 +1485,7 @@ static int parse_branchname_arg(int argc, const char **argv, * it would be extremely annoying. */ if (argc) - verify_non_filename(opts->prefix, arg); + verify_non_filename(the_repository, opts->prefix, arg); } else if (opts->accept_pathspec) { argcount++; argv++; diff --git a/builtin/grep.c b/builtin/grep.c index b0e350cf8938fb..4ec0c016b1f11c 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1151,7 +1151,7 @@ int cmd_grep(int argc, object = parse_object_or_die(the_repository, &oid, arg); if (!seen_dashdash) - verify_non_filename(prefix, arg); + verify_non_filename(the_repository, prefix, arg); add_object_array_with_path(object, arg, &list, oc.mode, oc.path); object_context_release(&oc); } diff --git a/builtin/reset.c b/builtin/reset.c index 1ac374d31b9a5e..11f57605b510ae 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -281,7 +281,7 @@ static void parse_args(struct pathspec *pathspec, * Ok, argv[0] looks like a commit/tree; it should not * be a filename. */ - verify_non_filename(prefix, argv[0]); + verify_non_filename(the_repository, prefix, argv[0]); rev = *argv++; } else { /* Otherwise we treat this as a filename */ diff --git a/revision.c b/revision.c index 57cf00ae6bb9d0..9071a38b85b5c7 100644 --- a/revision.c +++ b/revision.c @@ -2082,7 +2082,7 @@ static int handle_dotdot_1(const char *arg, char *dotdot, if (!cant_be_filename) { *dotdot = '.'; - verify_non_filename(revs->prefix, arg); + verify_non_filename(the_repository, revs->prefix, arg); *dotdot = '\0'; } @@ -2227,7 +2227,7 @@ static int handle_revision_arg_1(const char *arg_, struct rev_info *revs, int fl goto out; } if (!cant_be_filename) - verify_non_filename(revs->prefix, arg); + verify_non_filename(the_repository, revs->prefix, arg); object = get_reference(revs, arg, &oid, flags ^ local_flags); if (!object) { ret = (revs->ignore_missing || revs->do_not_die_on_missing_objects) ? 0 : -1; diff --git a/setup.c b/setup.c index 736f950bd08686..c6b5b85f3a9dc0 100644 --- a/setup.c +++ b/setup.c @@ -297,9 +297,9 @@ void verify_filename(struct repository *repo, * and we parsed the arg as a refname. It should not be interpretable * as a filename. */ -void verify_non_filename(const char *prefix, const char *arg) +void verify_non_filename(struct repository *repo, const char *prefix, const char *arg) { - if (!is_inside_work_tree(the_repository) || is_inside_git_dir(the_repository)) + if (!is_inside_work_tree(repo) || is_inside_git_dir(repo)) return; if (*arg == '-') return; /* flag */ diff --git a/setup.h b/setup.h index 24a6f666294b71..364c2c728a69d6 100644 --- a/setup.h +++ b/setup.h @@ -146,7 +146,7 @@ void verify_filename(struct repository *repo, const char *prefix, const char *name, int diagnose_misspelt_rev); -void verify_non_filename(const char *prefix, const char *name); +void verify_non_filename(struct repository *repo, const char *prefix, const char *name); int path_inside_repo(struct repository *repo, const char *prefix, const char *path); void sanitize_stdfds(void); From e282d01b4a0d47b73f9835772feec05822b08b97 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 30 Mar 2026 15:17:30 +0200 Subject: [PATCH 054/241] setup: stop using `the_repository` in `enter_repo()` Stop using `the_repository` in `enter_repo()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/receive-pack.c | 2 +- builtin/upload-archive.c | 2 +- builtin/upload-pack.c | 2 +- daemon.c | 4 ++-- http-backend.c | 2 +- setup.c | 4 ++-- setup.h | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index e34edff406959a..c5f73fff351c99 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -2646,7 +2646,7 @@ int cmd_receive_pack(int argc, setup_path(); - if (!enter_repo(service_dir, 0)) + if (!enter_repo(the_repository, service_dir, 0)) die("'%s' does not appear to be a git repository", service_dir); repo_config(the_repository, receive_pack_config, NULL); diff --git a/builtin/upload-archive.c b/builtin/upload-archive.c index 25312bb2a52887..718e74b3acf85d 100644 --- a/builtin/upload-archive.c +++ b/builtin/upload-archive.c @@ -31,7 +31,7 @@ int cmd_upload_archive_writer(int argc, if (argc != 2) usage(upload_archive_usage); - if (!enter_repo(argv[1], 0)) + if (!enter_repo(the_repository, argv[1], 0)) die("'%s' does not appear to be a git repository", argv[1]); init_archivers(); diff --git a/builtin/upload-pack.c b/builtin/upload-pack.c index 30498fafea3a8b..32831fb8796acc 100644 --- a/builtin/upload-pack.c +++ b/builtin/upload-pack.c @@ -59,7 +59,7 @@ int cmd_upload_pack(int argc, if (strict) enter_repo_flags |= ENTER_REPO_STRICT; - if (!enter_repo(dir, enter_repo_flags)) + if (!enter_repo(the_repository, dir, enter_repo_flags)) die("'%s' does not appear to be a git repository", dir); switch (determine_protocol_version_server()) { diff --git a/daemon.c b/daemon.c index 0a7b1aae447912..947dd906554963 100644 --- a/daemon.c +++ b/daemon.c @@ -244,14 +244,14 @@ static const char *path_ok(const char *directory, struct hostinfo *hi) } enter_repo_flags = strict_paths ? ENTER_REPO_STRICT : 0; - path = enter_repo(dir, enter_repo_flags); + path = enter_repo(the_repository, dir, enter_repo_flags); if (!path && base_path && base_path_relaxed) { /* * if we fail and base_path_relaxed is enabled, try without * prefixing the base path */ dir = directory; - path = enter_repo(dir, enter_repo_flags); + path = enter_repo(the_repository, dir, enter_repo_flags); } if (!path) { diff --git a/http-backend.c b/http-backend.c index 1a171c5c5a0b02..c7566b1d12d35f 100644 --- a/http-backend.c +++ b/http-backend.c @@ -809,7 +809,7 @@ int cmd_main(int argc UNUSED, const char **argv UNUSED) not_found(&hdr, "Request not supported: '%s'", dir); setup_path(); - if (!enter_repo(dir, 0)) + if (!enter_repo(the_repository, dir, 0)) not_found(&hdr, "Not a git repository: '%s'", dir); if (!getenv("GIT_HTTP_EXPORT_ALL") && access("git-daemon-export-ok", F_OK) ) diff --git a/setup.c b/setup.c index c6b5b85f3a9dc0..dca32addae0eef 100644 --- a/setup.c +++ b/setup.c @@ -1756,7 +1756,7 @@ enum discovery_result discover_git_directory_reason(struct strbuf *commondir, return result; } -const char *enter_repo(const char *path, unsigned flags) +const char *enter_repo(struct repository *repo, const char *path, unsigned flags) { static struct strbuf validated_path = STRBUF_INIT; static struct strbuf used_path = STRBUF_INIT; @@ -1829,7 +1829,7 @@ const char *enter_repo(const char *path, unsigned flags) } if (is_git_directory(".")) { - set_git_dir(the_repository, ".", 0); + set_git_dir(repo, ".", 0); check_repository_format(NULL); return path; } diff --git a/setup.h b/setup.h index 364c2c728a69d6..d0cfdfd44a67a8 100644 --- a/setup.h +++ b/setup.h @@ -134,7 +134,7 @@ enum { * links. User relative paths are also returned as they are given, * except DWIM suffixing. */ -const char *enter_repo(const char *path, unsigned flags); +const char *enter_repo(struct repository *repo, const char *path, unsigned flags); const char *setup_git_directory_gently(int *); const char *setup_git_directory(void); From 4f924d92cd88680d672cc76211fa2547f7e67c48 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 30 Mar 2026 15:17:31 +0200 Subject: [PATCH 055/241] setup: stop using `the_repository` in `setup_work_tree()` Stop using `the_repository` in `setup_work_tree()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Note that the function tracks bogus worktree configuration via a global variable. If we have bogus configuration, and if later on some caller tries to setup a worktree, then we'll die instead. Of course, tracking this as a global variable doesn't make sense anymore now that we can set up worktrees for arbitrary repositories. Move the variable into `struct repository` instead. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- blame.c | 2 +- builtin/check-attr.c | 2 +- builtin/clone.c | 2 +- builtin/describe.c | 2 +- builtin/diff-index.c | 2 +- builtin/diff.c | 4 ++-- builtin/difftool.c | 2 +- builtin/grep.c | 2 +- builtin/ls-files.c | 2 +- builtin/read-tree.c | 2 +- builtin/reset.c | 2 +- builtin/rm.c | 2 +- builtin/sparse-checkout.c | 16 ++++++++-------- builtin/submodule--helper.c | 2 +- builtin/update-index.c | 10 +++++----- git.c | 2 +- repository.h | 1 + setup.c | 9 ++++----- setup.h | 2 +- t/helper/test-subprocess.c | 4 +++- wt-status.c | 2 +- 21 files changed, 38 insertions(+), 36 deletions(-) diff --git a/blame.c b/blame.c index a3c49d132e4ae1..977cbb70974f8c 100644 --- a/blame.c +++ b/blame.c @@ -2813,7 +2813,7 @@ void setup_scoreboard(struct blame_scoreboard *sb, } if (!sb->contents_from) - setup_work_tree(); + setup_work_tree(the_repository); sb->final = fake_working_tree_commit(sb->repo, &sb->revs->diffopt, diff --git a/builtin/check-attr.c b/builtin/check-attr.c index 04b86e42ae66ca..98f64d5b922e6c 100644 --- a/builtin/check-attr.c +++ b/builtin/check-attr.c @@ -117,7 +117,7 @@ int cmd_check_attr(int argc, int cnt, i, doubledash, filei; if (!is_bare_repository()) - setup_work_tree(); + setup_work_tree(the_repository); repo_config(the_repository, git_default_config, NULL); diff --git a/builtin/clone.c b/builtin/clone.c index fba3c9c508bc06..91b9a105a4442e 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -668,7 +668,7 @@ static int checkout(int submodule_progress, } /* We need to be in the new work tree for the checkout */ - setup_work_tree(); + setup_work_tree(the_repository); repo_hold_locked_index(the_repository, &lock_file, LOCK_DIE_ON_ERROR); diff --git a/builtin/describe.c b/builtin/describe.c index bffeed13a3cb14..1c47d7c0b7c38d 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -781,7 +781,7 @@ int cmd_describe(int argc, struct rev_info revs; int fd; - setup_work_tree(); + setup_work_tree(the_repository); prepare_repo_settings(the_repository); the_repository->settings.command_requires_full_index = 0; repo_read_index(the_repository); diff --git a/builtin/diff-index.c b/builtin/diff-index.c index 522dacfc4cf097..3db7cffede578c 100644 --- a/builtin/diff-index.c +++ b/builtin/diff-index.c @@ -69,7 +69,7 @@ int cmd_diff_index(int argc, rev.max_count != -1 || rev.min_age != -1 || rev.max_age != -1) usage(diff_cache_usage); if (!(option & DIFF_INDEX_CACHED)) { - setup_work_tree(); + setup_work_tree(the_repository); if (repo_read_index_preload(the_repository, &rev.diffopt.pathspec, 0) < 0) { perror("repo_read_index_preload"); return -1; diff --git a/builtin/diff.c b/builtin/diff.c index 7ddebce2ac7cec..1ede873ac1895d 100644 --- a/builtin/diff.c +++ b/builtin/diff.c @@ -159,7 +159,7 @@ static void builtin_diff_index(struct rev_info *revs, revs->max_age != -1) usage(builtin_diff_usage); if (!(option & DIFF_INDEX_CACHED)) { - setup_work_tree(); + setup_work_tree(the_repository); if (repo_read_index_preload(the_repository, &revs->diffopt.pathspec, 0) < 0) { die_errno("repo_read_index_preload"); @@ -281,7 +281,7 @@ static void builtin_diff_files(struct rev_info *revs, int argc, const char **arg (revs->diffopt.output_format & DIFF_FORMAT_PATCH)) diff_merges_set_dense_combined_if_unset(revs); - setup_work_tree(); + setup_work_tree(the_repository); if (repo_read_index_preload(the_repository, &revs->diffopt.pathspec, 0) < 0) { die_errno("repo_read_index_preload"); diff --git a/builtin/difftool.c b/builtin/difftool.c index e4bc1f831696a8..2a21005f2ee264 100644 --- a/builtin/difftool.c +++ b/builtin/difftool.c @@ -767,7 +767,7 @@ int cmd_difftool(int argc, die(_("difftool requires worktree or --no-index")); if (!no_index){ - setup_work_tree(); + setup_work_tree(repo); setenv(GIT_DIR_ENVIRONMENT, absolute_path(repo_get_git_dir(repo)), 1); setenv(GIT_WORK_TREE_ENVIRONMENT, absolute_path(repo_get_work_tree(repo)), 1); } else if (dir_diff) diff --git a/builtin/grep.c b/builtin/grep.c index 4ec0c016b1f11c..679f8b567a1578 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1272,7 +1272,7 @@ int cmd_grep(int argc, die(_("--[no-]exclude-standard cannot be used for tracked contents")); } else if (!list.nr) { if (!cached) - setup_work_tree(); + setup_work_tree(the_repository); hit = grep_cache(&opt, &pathspec, cached); } else { diff --git a/builtin/ls-files.c b/builtin/ls-files.c index 09d95111b35b9f..e1a22b41b94c08 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -704,7 +704,7 @@ int cmd_ls_files(int argc, exc_given = 1; if (require_work_tree && !is_inside_work_tree(repo)) - setup_work_tree(); + setup_work_tree(repo); if (recurse_submodules && (show_deleted || show_others || show_unmerged || diff --git a/builtin/read-tree.c b/builtin/read-tree.c index 460b21e40ac914..999a82ecdfd737 100644 --- a/builtin/read-tree.c +++ b/builtin/read-tree.c @@ -229,7 +229,7 @@ int cmd_read_tree(int argc, opts.preserve_ignored = 0; /* otherwise, opts.preserve_ignored is irrelevant */ if (opts.merge && !opts.index_only) - setup_work_tree(); + setup_work_tree(the_repository); if (opts.skip_sparse_checkout) ensure_full_index(the_repository->index); diff --git a/builtin/reset.c b/builtin/reset.c index 11f57605b510ae..3be6bd0121afe5 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -468,7 +468,7 @@ int cmd_reset(int argc, trace2_cmd_mode(reset_type_names[reset_type]); if (reset_type != SOFT && (reset_type != MIXED || repo_get_work_tree(the_repository))) - setup_work_tree(); + setup_work_tree(the_repository); if (reset_type == MIXED && is_bare_repository()) die(_("%s reset is not allowed in a bare repository"), diff --git a/builtin/rm.c b/builtin/rm.c index 05d89e98c3c6b8..081d0bc3754c52 100644 --- a/builtin/rm.c +++ b/builtin/rm.c @@ -296,7 +296,7 @@ int cmd_rm(int argc, die(_("No pathspec was given. Which files should I remove?")); if (!index_only) - setup_work_tree(); + setup_work_tree(the_repository); prepare_repo_settings(the_repository); the_repository->settings.command_requires_full_index = 0; diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 2af50fb2f9cb22..d89acbeb533bd8 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -63,7 +63,7 @@ static int sparse_checkout_list(int argc, const char **argv, const char *prefix, int res; struct repo_config_values *cfg = repo_config_values(the_repository); - setup_work_tree(); + setup_work_tree(the_repository); if (!cfg->apply_sparse_checkout) die(_("this worktree is not sparse")); @@ -229,7 +229,7 @@ static int update_working_directory(struct repository *r, o.dst_index = r->index; o.skip_sparse_checkout = 0; - setup_work_tree(); + setup_work_tree(the_repository); repo_hold_locked_index(r, &lock_file, LOCK_DIE_ON_ERROR); @@ -468,7 +468,7 @@ static int sparse_checkout_init(int argc, const char **argv, const char *prefix, OPT_END(), }; - setup_work_tree(); + setup_work_tree(the_repository); repo_read_index(repo); init_opts.cone_mode = -1; @@ -802,7 +802,7 @@ static int sparse_checkout_add(int argc, const char **argv, const char *prefix, int ret; struct repo_config_values *cfg = repo_config_values(the_repository); - setup_work_tree(); + setup_work_tree(the_repository); if (!cfg->apply_sparse_checkout) die(_("no sparse-checkout to add to")); @@ -856,7 +856,7 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix, struct strvec patterns = STRVEC_INIT; int ret; - setup_work_tree(); + setup_work_tree(the_repository); repo_read_index(repo); set_opts.cone_mode = -1; @@ -912,7 +912,7 @@ static int sparse_checkout_reapply(int argc, const char **argv, }; struct repo_config_values *cfg = repo_config_values(the_repository); - setup_work_tree(); + setup_work_tree(the_repository); if (!cfg->apply_sparse_checkout) die(_("must be in a sparse-checkout to reapply sparsity patterns")); @@ -975,7 +975,7 @@ static int sparse_checkout_clean(int argc, const char **argv, OPT_END(), }; - setup_work_tree(); + setup_work_tree(the_repository); if (!cfg->apply_sparse_checkout) die(_("must be in a sparse-checkout to clean directories")); if (!core_sparse_checkout_cone) @@ -1053,7 +1053,7 @@ static int sparse_checkout_disable(int argc, const char **argv, * forcibly return to a dense checkout regardless of initial state. */ - setup_work_tree(); + setup_work_tree(the_repository); argc = parse_options(argc, argv, prefix, builtin_sparse_checkout_disable_options, builtin_sparse_checkout_disable_usage, 0); diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index 2f589e3b378d3f..1cc82a134db22e 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -1250,7 +1250,7 @@ static int compute_summary_module_list(struct object_id *head_oid, if (!info->cached) { if (diff_cmd == DIFF_INDEX) - setup_work_tree(); + setup_work_tree(the_repository); if (repo_read_index_preload(the_repository, &rev.diffopt.pathspec, 0) < 0) { perror("repo_read_index_preload"); ret = -1; diff --git a/builtin/update-index.c b/builtin/update-index.c index 7434112b8e69b0..d6dabacfd1275d 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -732,7 +732,7 @@ struct refresh_params { static int refresh(struct refresh_params *o, unsigned int flag) { - setup_work_tree(); + setup_work_tree(the_repository); repo_read_index(the_repository); *o->has_errors |= refresh_index(the_repository->index, o->flags | flag, NULL, NULL, NULL); @@ -901,7 +901,7 @@ static enum parse_opt_result reupdate_callback( BUG_ON_OPT_ARG(arg); /* consume remaining arguments. */ - setup_work_tree(); + setup_work_tree(the_repository); *has_errors = do_reupdate(ctx->argv + 1, prefix); if (*has_errors) the_repository->index->cache_changed = 0; @@ -1157,7 +1157,7 @@ int cmd_update_index(int argc, transaction = NULL; } - setup_work_tree(); + setup_work_tree(the_repository); p = prefix_path(the_repository, prefix, prefix_length, path); update_one(p); if (set_executable_bit) @@ -1199,7 +1199,7 @@ int cmd_update_index(int argc, struct strbuf buf = STRBUF_INIT; struct strbuf unquoted = STRBUF_INIT; - setup_work_tree(); + setup_work_tree(the_repository); while (getline_fn(&buf, stdin) != EOF) { char *p; if (!nul_term_line && buf.buf[0] == '"') { @@ -1253,7 +1253,7 @@ int cmd_update_index(int argc, report(_("Untracked cache disabled")); break; case UC_TEST: - setup_work_tree(); + setup_work_tree(the_repository); return !test_if_untracked_cache_is_supported(); case UC_ENABLE: case UC_FORCE: diff --git a/git.c b/git.c index 2b212e6675d926..c146eaa20b8ab4 100644 --- a/git.c +++ b/git.c @@ -497,7 +497,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv, struct commit_pager_choice(); if (!help && p->option & NEED_WORK_TREE) - setup_work_tree(); + setup_work_tree(the_repository); trace_argv_printf(argv, "trace: built-in: git"); trace2_cmd_name(p->cmd); diff --git a/repository.h b/repository.h index 078059a6e02b10..abeef3129e9094 100644 --- a/repository.h +++ b/repository.h @@ -113,6 +113,7 @@ struct repository { * A NULL value indicates that there is no working directory. */ char *worktree; + bool worktree_config_is_bogus; /* * Path from the root of the top-level superproject down to this diff --git a/setup.c b/setup.c index dca32addae0eef..64a030c6cde286 100644 --- a/setup.c +++ b/setup.c @@ -26,7 +26,6 @@ #include "trace2.h" #include "worktree.h" -static int work_tree_config_is_bogus; enum allowed_bare_repo { ALLOWED_BARE_REPO_EXPLICIT = 0, ALLOWED_BARE_REPO_ALL, @@ -485,7 +484,7 @@ int is_inside_work_tree(struct repository *repo) return is_inside_dir(strbuf_realpath(&buf, worktree, 1)); } -void setup_work_tree(void) +void setup_work_tree(struct repository *repo) { const char *work_tree; static int initialized = 0; @@ -493,10 +492,10 @@ void setup_work_tree(void) if (initialized) return; - if (work_tree_config_is_bogus) + if (repo->worktree_config_is_bogus) die(_("unable to set up work tree using invalid config")); - work_tree = repo_get_work_tree(the_repository); + work_tree = repo_get_work_tree(repo); if (!work_tree || chdir_notify(work_tree)) die(_("this operation must be run in a work tree")); @@ -1155,7 +1154,7 @@ static const char *setup_explicit_git_dir(struct repository *repo, if (git_work_tree_cfg) { /* #22.2, #30 */ warning("core.bare and core.worktree do not make sense"); - work_tree_config_is_bogus = 1; + repo->worktree_config_is_bogus = true; } /* #18, #26 */ diff --git a/setup.h b/setup.h index d0cfdfd44a67a8..8fed365637ec2b 100644 --- a/setup.h +++ b/setup.h @@ -56,7 +56,7 @@ const char *resolve_gitdir_gently(const char *suspect, int *return_error_code); void die_upon_dubious_ownership(const char *gitfile, const char *worktree, const char *gitdir); -void setup_work_tree(void); +void setup_work_tree(struct repository *repo); /* * discover_git_directory_reason() is similar to discover_git_directory(), diff --git a/t/helper/test-subprocess.c b/t/helper/test-subprocess.c index c344f1694df28d..8a070e47cddb51 100644 --- a/t/helper/test-subprocess.c +++ b/t/helper/test-subprocess.c @@ -1,3 +1,5 @@ +#define USE_THE_REPOSITORY_VARIABLE + #include "test-tool.h" #include "run-command.h" #include "setup.h" @@ -11,7 +13,7 @@ int cmd__subprocess(int argc, const char **argv) if (nogit) die("No git repo found"); if (argc > 1 && !strcmp(argv[1], "--setup-work-tree")) { - setup_work_tree(); + setup_work_tree(the_repository); argv++; } cp.git_cmd = 1; diff --git a/wt-status.c b/wt-status.c index 479ccc3304bc33..6cc77ba68cb699 100644 --- a/wt-status.c +++ b/wt-status.c @@ -1206,7 +1206,7 @@ static void wt_longstatus_print_verbose(struct wt_status *s) status_printf_ln(s, c, "--------------------------------------------------"); status_printf_ln(s, c, _("Changes not staged for commit:")); - setup_work_tree(); + setup_work_tree(the_repository); rev.diffopt.a_prefix = "i/"; rev.diffopt.b_prefix = "w/"; run_diff_files(&rev, 0); From 755efcc4693a4253de474b5171d82dfbebce4f97 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 30 Mar 2026 15:17:32 +0200 Subject: [PATCH 056/241] setup: stop using `the_repository` in `set_git_work_tree()` Stop using `the_repository` in `set_git_work_tree()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Similar as with the preceding commit, we track whether the worktree has been initialized already via a global variable so that we can die in case the repository is re-initialized with a different worktree path. Store this info in the `struct repository` instead so that we correctly handle this per repository. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/clone.c | 2 +- builtin/init-db.c | 6 +++--- repository.h | 1 + setup.c | 24 +++++++++++------------- setup.h | 2 +- 5 files changed, 17 insertions(+), 18 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index 91b9a105a4442e..16cd7b029b0967 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1114,7 +1114,7 @@ int cmd_clone(int argc, die_errno(_("could not create work tree dir '%s'"), work_tree); junk_work_tree = work_tree; - set_git_work_tree(work_tree); + set_git_work_tree(the_repository, work_tree); } if (real_git_dir) { diff --git a/builtin/init-db.c b/builtin/init-db.c index bb853e69f5426e..e626b0d8b7ccc6 100644 --- a/builtin/init-db.c +++ b/builtin/init-db.c @@ -237,9 +237,9 @@ int cmd_init_db(int argc, if (!git_work_tree_cfg) git_work_tree_cfg = xgetcwd(); if (work_tree) - set_git_work_tree(work_tree); + set_git_work_tree(the_repository, work_tree); else - set_git_work_tree(git_work_tree_cfg); + set_git_work_tree(the_repository, git_work_tree_cfg); if (access(repo_get_work_tree(the_repository), X_OK)) die_errno (_("Cannot access work tree '%s'"), repo_get_work_tree(the_repository)); @@ -248,7 +248,7 @@ int cmd_init_db(int argc, if (real_git_dir) die(_("--separate-git-dir incompatible with bare repository")); if (work_tree) - set_git_work_tree(work_tree); + set_git_work_tree(the_repository, work_tree); } flags |= INIT_DB_EXIST_OK; diff --git a/repository.h b/repository.h index abeef3129e9094..7ae3d344848023 100644 --- a/repository.h +++ b/repository.h @@ -113,6 +113,7 @@ struct repository { * A NULL value indicates that there is no working directory. */ char *worktree; + bool worktree_initialized; bool worktree_config_is_bogus; /* diff --git a/setup.c b/setup.c index 64a030c6cde286..2a917e3a5b2479 100644 --- a/setup.c +++ b/setup.c @@ -1149,7 +1149,7 @@ static const char *setup_explicit_git_dir(struct repository *repo, /* #3, #7, #11, #15, #19, #23, #27, #31 (see t1510) */ if (work_tree_env) - set_git_work_tree(work_tree_env); + set_git_work_tree(repo, work_tree_env); else if (is_bare_repository_cfg > 0) { if (git_work_tree_cfg) { /* #22.2, #30 */ @@ -1164,7 +1164,7 @@ static const char *setup_explicit_git_dir(struct repository *repo, } else if (git_work_tree_cfg) { /* #6, #14 */ if (is_absolute_path(git_work_tree_cfg)) - set_git_work_tree(git_work_tree_cfg); + set_git_work_tree(repo, git_work_tree_cfg); else { char *core_worktree; if (chdir(gitdirenv)) @@ -1174,7 +1174,7 @@ static const char *setup_explicit_git_dir(struct repository *repo, core_worktree = xgetcwd(); if (chdir(cwd->buf)) die_errno(_("cannot come back to cwd")); - set_git_work_tree(core_worktree); + set_git_work_tree(repo, core_worktree); free(core_worktree); } } @@ -1185,7 +1185,7 @@ static const char *setup_explicit_git_dir(struct repository *repo, return NULL; } else /* #2, #10 */ - set_git_work_tree("."); + set_git_work_tree(repo, "."); /* set_git_work_tree() must have been called by now */ worktree = repo_get_work_tree(repo); @@ -1245,7 +1245,7 @@ static const char *setup_discovered_git_dir(struct repository *repo, } /* #0, #1, #5, #8, #9, #12, #13 */ - set_git_work_tree("."); + set_git_work_tree(repo, "."); if (strcmp(gitdir, DEFAULT_GIT_DIR_ENVIRONMENT)) set_git_dir(repo, gitdir, 0); if (offset >= cwd->len) @@ -1836,29 +1836,27 @@ const char *enter_repo(struct repository *repo, const char *path, unsigned flags return NULL; } -static int git_work_tree_initialized; - /* * Note. This works only before you used a work tree. This was added * primarily to support git-clone to work in a new repository it just * created, and is not meant to flip between different work trees. */ -void set_git_work_tree(const char *new_work_tree) +void set_git_work_tree(struct repository *repo, const char *new_work_tree) { - if (git_work_tree_initialized) { + if (repo->worktree_initialized) { struct strbuf realpath = STRBUF_INIT; strbuf_realpath(&realpath, new_work_tree, 1); new_work_tree = realpath.buf; - if (strcmp(new_work_tree, the_repository->worktree)) + if (strcmp(new_work_tree, repo->worktree)) die("internal error: work tree has already been set\n" "Current worktree: %s\nNew worktree: %s", - the_repository->worktree, new_work_tree); + repo->worktree, new_work_tree); strbuf_release(&realpath); return; } - git_work_tree_initialized = 1; - repo_set_worktree(the_repository, new_work_tree); + repo->worktree_initialized = 1; + repo_set_worktree(repo, new_work_tree); } const char *setup_git_directory_gently(int *nongit_ok) diff --git a/setup.h b/setup.h index 8fed365637ec2b..1a37089fa0aa54 100644 --- a/setup.h +++ b/setup.h @@ -96,7 +96,7 @@ static inline int discover_git_directory(struct strbuf *commondir, return 0; } -void set_git_work_tree(const char *tree); +void set_git_work_tree(struct repository *repo, const char *tree); /* Flags that can be passed to `enter_repo()`. */ enum { From fedb5aa834c68abd0601ea0092fbd51eb9061a9b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 30 Mar 2026 15:17:33 +0200 Subject: [PATCH 057/241] setup: stop using `the_repository` in `setup_git_env()` Stop using `the_repository` in `setup_git_env()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Furthermore, the function is never used outside of "setup.c". Drop the declaration in "environment.h" and make it static. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- environment.h | 2 -- setup.c | 6 +++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/environment.h b/environment.h index 123a71cdc8d14e..9eb97b3869c9b1 100644 --- a/environment.h +++ b/environment.h @@ -147,8 +147,6 @@ void repo_config_values_init(struct repo_config_values *cfg); * Please do not add new global config variables here. */ # ifdef USE_THE_REPOSITORY_VARIABLE -void setup_git_env(const char *git_dir); - /* * Returns true iff we have a configured git repository (either via * setup_git_directory, or in the environment via $GIT_DIR). diff --git a/setup.c b/setup.c index 2a917e3a5b2479..e1814fb8e67022 100644 --- a/setup.c +++ b/setup.c @@ -1071,9 +1071,9 @@ static void setup_git_env_internal(struct repository *repo, fetch_if_missing = 0; } -void setup_git_env(const char *git_dir) +static void setup_git_env(struct repository *repo, const char *git_dir) { - setup_git_env_internal(the_repository, git_dir, false); + setup_git_env_internal(repo, git_dir, false); } static void set_git_dir_1(struct repository *repo, const char *path, bool skip_initializing_odb) @@ -1985,7 +1985,7 @@ const char *setup_git_directory_gently(int *nongit_ok) const char *gitdir = getenv(GIT_DIR_ENVIRONMENT); if (!gitdir) gitdir = DEFAULT_GIT_DIR_ENVIRONMENT; - setup_git_env(gitdir); + setup_git_env(the_repository, gitdir); } if (startup_info->have_repository) { repo_set_hash_algo(the_repository, repo_fmt.hash_algo); From bfd4afe0d50033246f1991429291c09a65099ffd Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 30 Mar 2026 15:17:34 +0200 Subject: [PATCH 058/241] setup: stop using `the_repository` in `setup_git_directory_gently()` Stop using `the_repository` in `setup_git_directory_gently()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/check-ref-format.c | 5 ++++- builtin/diff.c | 2 +- builtin/hash-object.c | 2 +- builtin/help.c | 2 +- builtin/stripspace.c | 2 +- git.c | 6 +++--- http-fetch.c | 2 +- imap-send.c | 2 +- remote-curl.c | 4 ++-- setup.c | 36 ++++++++++++++++++------------------ setup.h | 2 +- t/helper/test-path-utils.c | 2 +- t/helper/test-subprocess.c | 2 +- 13 files changed, 36 insertions(+), 33 deletions(-) diff --git a/builtin/check-ref-format.c b/builtin/check-ref-format.c index 5d80afeec05e3d..e42b0444ead269 100644 --- a/builtin/check-ref-format.c +++ b/builtin/check-ref-format.c @@ -1,6 +1,9 @@ /* * GIT - The information manager from hell */ + +#define USE_THE_REPOSITORY_VARIABLE + #include "builtin.h" #include "refs.h" #include "setup.h" @@ -41,7 +44,7 @@ static int check_ref_format_branch(const char *arg) const char *name; int nongit; - setup_git_directory_gently(&nongit); + setup_git_directory_gently(the_repository, &nongit); if (check_branch_ref(&sb, arg) || !skip_prefix(sb.buf, "refs/heads/", &name)) die("'%s' is not a valid branch name", arg); diff --git a/builtin/diff.c b/builtin/diff.c index 1ede873ac1895d..4b46e394cecb8d 100644 --- a/builtin/diff.c +++ b/builtin/diff.c @@ -455,7 +455,7 @@ int cmd_diff(int argc, break; } - prefix = setup_git_directory_gently(&nongit); + prefix = setup_git_directory_gently(the_repository, &nongit); if (!nongit) { prepare_repo_settings(the_repository); diff --git a/builtin/hash-object.c b/builtin/hash-object.c index 5d900a6b8c953e..d7905bedc2dfd3 100644 --- a/builtin/hash-object.c +++ b/builtin/hash-object.c @@ -102,7 +102,7 @@ int cmd_hash_object(int argc, if (flags & INDEX_WRITE_OBJECT) prefix = setup_git_directory(); else - prefix = setup_git_directory_gently(&nongit); + prefix = setup_git_directory_gently(the_repository, &nongit); if (nongit && !the_hash_algo) repo_set_hash_algo(the_repository, GIT_HASH_DEFAULT); diff --git a/builtin/help.c b/builtin/help.c index c0aece4da39c81..a140339999debe 100644 --- a/builtin/help.c +++ b/builtin/help.c @@ -740,7 +740,7 @@ int cmd_help(int argc, return 0; } - setup_git_directory_gently(&nongit); + setup_git_directory_gently(the_repository, &nongit); repo_config(the_repository, git_help_config, NULL); if (parsed_help_format != HELP_FORMAT_NONE) diff --git a/builtin/stripspace.c b/builtin/stripspace.c index 4a566cbc5de672..18705f1a5b1150 100644 --- a/builtin/stripspace.c +++ b/builtin/stripspace.c @@ -54,7 +54,7 @@ int cmd_stripspace(int argc, usage_with_options(stripspace_usage, options); if (mode == STRIP_COMMENTS || mode == COMMENT_LINES) { - setup_git_directory_gently(&nongit); + setup_git_directory_gently(the_repository, &nongit); repo_config(the_repository, git_default_config, NULL); } diff --git a/git.c b/git.c index c146eaa20b8ab4..0df36c9b9d5a0a 100644 --- a/git.c +++ b/git.c @@ -84,7 +84,7 @@ static int list_cmds(const char *spec) * Set up the repository so we can pick up any repo-level config (like * completion.commands). */ - setup_git_directory_gently(&nongit); + setup_git_directory_gently(the_repository, &nongit); while (*spec) { const char *sep = strchrnul(spec, ','); @@ -386,7 +386,7 @@ static int handle_alias(struct strvec *args, struct string_list *expanded_aliase int nongit_ok; /* Aliases expect GIT_PREFIX, GIT_DIR etc to be set */ - setup_git_directory_gently(&nongit_ok); + setup_git_directory_gently(the_repository, &nongit_ok); commit_pager_choice(); @@ -480,7 +480,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv, struct prefix = setup_git_directory(); no_repo = 0; } else if (run_setup & RUN_SETUP_GENTLY) { - prefix = setup_git_directory_gently(&no_repo); + prefix = setup_git_directory_gently(the_repository, &no_repo); } else { prefix = NULL; } diff --git a/http-fetch.c b/http-fetch.c index 1922e23fcd3f97..f9b6ecb0616fe0 100644 --- a/http-fetch.c +++ b/http-fetch.c @@ -109,7 +109,7 @@ int cmd_main(int argc, const char **argv) struct strvec index_pack_args = STRVEC_INIT; int ret; - setup_git_directory_gently(&nongit); + setup_git_directory_gently(the_repository, &nongit); while (arg < argc && argv[arg][0] == '-') { const char *p; diff --git a/imap-send.c b/imap-send.c index af02c6a689495e..cfd6a5120c50e4 100644 --- a/imap-send.c +++ b/imap-send.c @@ -1799,7 +1799,7 @@ int cmd_main(int argc, const char **argv) int nongit_ok; int ret; - setup_git_directory_gently(&nongit_ok); + setup_git_directory_gently(the_repository, &nongit_ok); repo_config(the_repository, git_imap_config, &server); argc = parse_options(argc, (const char **)argv, "", imap_send_options, imap_send_usage, 0); diff --git a/remote-curl.c b/remote-curl.c index 92e40bb682d34d..454283c43f9782 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -1546,7 +1546,7 @@ int cmd_main(int argc, const char **argv) int nongit; int ret = 1; - setup_git_directory_gently(&nongit); + setup_git_directory_gently(the_repository, &nongit); if (argc < 2) { error(_("remote-curl: usage: git remote-curl []")); goto cleanup; @@ -1587,7 +1587,7 @@ int cmd_main(int argc, const char **argv) break; if (starts_with(buf.buf, "fetch ")) { if (nongit) { - setup_git_directory_gently(&nongit); + setup_git_directory_gently(the_repository, &nongit); if (nongit) die(_("remote-curl: fetch attempted without a local repo")); } diff --git a/setup.c b/setup.c index e1814fb8e67022..99c0d1640ef6c0 100644 --- a/setup.c +++ b/setup.c @@ -1859,7 +1859,7 @@ void set_git_work_tree(struct repository *repo, const char *new_work_tree) repo_set_worktree(repo, new_work_tree); } -const char *setup_git_directory_gently(int *nongit_ok) +const char *setup_git_directory_gently(struct repository *repo, int *nongit_ok) { static struct strbuf cwd = STRBUF_INIT; struct strbuf dir = STRBUF_INIT, gitdir = STRBUF_INIT, report = STRBUF_INIT; @@ -1874,7 +1874,7 @@ const char *setup_git_directory_gently(int *nongit_ok) * configuration (including the per-repo config file that we * ignored previously). */ - repo_config_clear(the_repository); + repo_config_clear(repo); /* * Let's assume that we are in a git repository. @@ -1890,18 +1890,18 @@ const char *setup_git_directory_gently(int *nongit_ok) switch (setup_git_directory_gently_1(&dir, &gitdir, &report, 1)) { case GIT_DIR_EXPLICIT: - prefix = setup_explicit_git_dir(the_repository, gitdir.buf, &cwd, &repo_fmt, nongit_ok); + prefix = setup_explicit_git_dir(repo, gitdir.buf, &cwd, &repo_fmt, nongit_ok); break; case GIT_DIR_DISCOVERED: if (dir.len < cwd.len && chdir(dir.buf)) die(_("cannot change to '%s'"), dir.buf); - prefix = setup_discovered_git_dir(the_repository, gitdir.buf, &cwd, dir.len, + prefix = setup_discovered_git_dir(repo, gitdir.buf, &cwd, dir.len, &repo_fmt, nongit_ok); break; case GIT_DIR_BARE: if (dir.len < cwd.len && chdir(dir.buf)) die(_("cannot change to '%s'"), dir.buf); - prefix = setup_bare_git_dir(the_repository, &cwd, dir.len, &repo_fmt, nongit_ok); + prefix = setup_bare_git_dir(repo, &cwd, dir.len, &repo_fmt, nongit_ok); break; case GIT_DIR_HIT_CEILING: if (!nongit_ok) @@ -1981,30 +1981,30 @@ const char *setup_git_directory_gently(int *nongit_ok) startup_info->have_repository || /* GIT_DIR_EXPLICIT */ getenv(GIT_DIR_ENVIRONMENT)) { - if (!the_repository->gitdir) { + if (!repo->gitdir) { const char *gitdir = getenv(GIT_DIR_ENVIRONMENT); if (!gitdir) gitdir = DEFAULT_GIT_DIR_ENVIRONMENT; - setup_git_env(the_repository, gitdir); + setup_git_env(repo, gitdir); } if (startup_info->have_repository) { - repo_set_hash_algo(the_repository, repo_fmt.hash_algo); - repo_set_compat_hash_algo(the_repository, + repo_set_hash_algo(repo, repo_fmt.hash_algo); + repo_set_compat_hash_algo(repo, repo_fmt.compat_hash_algo); - repo_set_ref_storage_format(the_repository, + repo_set_ref_storage_format(repo, repo_fmt.ref_storage_format, repo_fmt.ref_storage_payload); - the_repository->repository_format_worktree_config = + repo->repository_format_worktree_config = repo_fmt.worktree_config; - the_repository->repository_format_relative_worktrees = + repo->repository_format_relative_worktrees = repo_fmt.relative_worktrees; - the_repository->repository_format_submodule_path_cfg = + repo->repository_format_submodule_path_cfg = repo_fmt.submodule_path_cfg; /* take ownership of repo_fmt.partial_clone */ - the_repository->repository_format_partial_clone = + repo->repository_format_partial_clone = repo_fmt.partial_clone; repo_fmt.partial_clone = NULL; - the_repository->repository_format_precious_objects = + repo->repository_format_precious_objects = repo_fmt.precious_objects; } } @@ -2037,13 +2037,13 @@ const char *setup_git_directory_gently(int *nongit_ok) format = ref_storage_format_by_name(backend); if (format == REF_STORAGE_FORMAT_UNKNOWN) die(_("unknown ref storage format: '%s'"), backend); - repo_set_ref_storage_format(the_repository, format, payload); + repo_set_ref_storage_format(repo, format, payload); free(backend); free(payload); } - setup_original_cwd(the_repository); + setup_original_cwd(repo); strbuf_release(&dir); strbuf_release(&gitdir); @@ -2135,7 +2135,7 @@ void check_repository_format(struct repository_format *fmt) */ const char *setup_git_directory(void) { - return setup_git_directory_gently(NULL); + return setup_git_directory_gently(the_repository, NULL); } const char *resolve_gitdir_gently(const char *suspect, int *return_error_code) diff --git a/setup.h b/setup.h index 1a37089fa0aa54..18092fbf1630f5 100644 --- a/setup.h +++ b/setup.h @@ -136,7 +136,7 @@ enum { */ const char *enter_repo(struct repository *repo, const char *path, unsigned flags); -const char *setup_git_directory_gently(int *); +const char *setup_git_directory_gently(struct repository *repo, int *); const char *setup_git_directory(void); char *prefix_path(struct repository *repo, const char *prefix, int len, const char *path); char *prefix_path_gently(struct repository *repo, const char *prefix, int len, int *remaining, const char *path); diff --git a/t/helper/test-path-utils.c b/t/helper/test-path-utils.c index 163fdeefb0f7d9..15eb44485cda3d 100644 --- a/t/helper/test-path-utils.c +++ b/t/helper/test-path-utils.c @@ -377,7 +377,7 @@ int cmd__path_utils(int argc, const char **argv) const char *prefix = argv[2]; int prefix_len = strlen(prefix); int nongit_ok; - setup_git_directory_gently(&nongit_ok); + setup_git_directory_gently(the_repository, &nongit_ok); while (argc > 3) { char *pfx = prefix_path(the_repository, prefix, prefix_len, argv[3]); diff --git a/t/helper/test-subprocess.c b/t/helper/test-subprocess.c index 8a070e47cddb51..a8194d24b31db2 100644 --- a/t/helper/test-subprocess.c +++ b/t/helper/test-subprocess.c @@ -9,7 +9,7 @@ int cmd__subprocess(int argc, const char **argv) struct child_process cp = CHILD_PROCESS_INIT; int nogit = 0; - setup_git_directory_gently(&nogit); + setup_git_directory_gently(the_repository, &nogit); if (nogit) die("No git repo found"); if (argc > 1 && !strcmp(argv[1], "--setup-work-tree")) { From 75f1d6fbfb54256fd77c3df5f60f6edde507839b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 30 Mar 2026 15:17:35 +0200 Subject: [PATCH 059/241] setup: stop using `the_repository` in `setup_git_directory()` Stop using `the_repository` in `setup_git_directory()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- archive.c | 2 +- builtin/grep.c | 2 +- builtin/hash-object.c | 2 +- builtin/merge-file.c | 2 +- builtin/rev-parse.c | 4 ++-- git.c | 2 +- http-push.c | 2 +- scalar.c | 4 ++-- setup.c | 4 ++-- setup.h | 2 +- t/helper/test-advise.c | 2 +- t/helper/test-bitmap.c | 2 +- t/helper/test-bloom.c | 2 +- t/helper/test-cache-tree.c | 2 +- t/helper/test-config.c | 2 +- t/helper/test-dump-cache-tree.c | 2 +- t/helper/test-dump-fsmonitor.c | 2 +- t/helper/test-dump-split-index.c | 2 +- t/helper/test-dump-untracked-cache.c | 2 +- t/helper/test-find-pack.c | 2 +- t/helper/test-fsmonitor-client.c | 2 +- t/helper/test-lazy-init-name-hash.c | 2 +- t/helper/test-match-trees.c | 2 +- t/helper/test-pack-deltas.c | 2 +- t/helper/test-pack-mtimes.c | 2 +- t/helper/test-partial-clone.c | 4 +++- t/helper/test-path-walk.c | 2 +- t/helper/test-reach.c | 2 +- t/helper/test-read-cache.c | 2 +- t/helper/test-read-graph.c | 2 +- t/helper/test-read-midx.c | 2 +- t/helper/test-ref-store.c | 2 +- t/helper/test-revision-walking.c | 2 +- t/helper/test-scrap-cache-tree.c | 2 +- t/helper/test-serve-v2.c | 2 +- t/helper/test-submodule-config.c | 2 +- t/helper/test-submodule-nested-repo-config.c | 2 +- t/helper/test-submodule.c | 10 +++++----- t/helper/test-userdiff.c | 2 +- t/helper/test-write-cache.c | 2 +- 40 files changed, 49 insertions(+), 47 deletions(-) diff --git a/archive.c b/archive.c index fcd474c682ffe5..51229107a57495 100644 --- a/archive.c +++ b/archive.c @@ -786,7 +786,7 @@ int write_archive(int argc, const char **argv, const char *prefix, * die ourselves; but its error message will be more specific * than what we could write here. */ - setup_git_directory(); + setup_git_directory(the_repository); } parse_treeish_arg(argv, &args, remote); diff --git a/builtin/grep.c b/builtin/grep.c index 679f8b567a1578..560133feb88783 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1064,7 +1064,7 @@ int cmd_grep(int argc, use_index = 0; else /* die the same way as if we did it at the beginning */ - setup_git_directory(); + setup_git_directory(the_repository); } /* Ignore --recurse-submodules if --no-index is given or implied */ if (!use_index) diff --git a/builtin/hash-object.c b/builtin/hash-object.c index d7905bedc2dfd3..f306b0643f4a04 100644 --- a/builtin/hash-object.c +++ b/builtin/hash-object.c @@ -100,7 +100,7 @@ int cmd_hash_object(int argc, hash_object_usage, 0); if (flags & INDEX_WRITE_OBJECT) - prefix = setup_git_directory(); + prefix = setup_git_directory(the_repository); else prefix = setup_git_directory_gently(the_repository, &nongit); diff --git a/builtin/merge-file.c b/builtin/merge-file.c index 59a97922086a58..8fa576523927e8 100644 --- a/builtin/merge-file.c +++ b/builtin/merge-file.c @@ -110,7 +110,7 @@ int cmd_merge_file(int argc, if (!repo && object_id) /* emit the correct "not a git repo" error in this case */ - setup_git_directory(); + setup_git_directory(the_repository); for (i = 0; i < 3; i++) { char *fname; diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c index d7c6400ddc885b..426a3c56767726 100644 --- a/builtin/rev-parse.c +++ b/builtin/rev-parse.c @@ -739,7 +739,7 @@ int cmd_rev_parse(int argc, /* No options; just report on whether we're in a git repo or not. */ if (argc == 1) { - setup_git_directory(); + setup_git_directory(the_repository); repo_config(the_repository, git_default_config, NULL); return 0; } @@ -774,7 +774,7 @@ int cmd_rev_parse(int argc, /* The rest of the options require a git repository. */ if (!did_repo_setup) { - prefix = setup_git_directory(); + prefix = setup_git_directory(the_repository); repo_config(the_repository, git_default_config, NULL); did_repo_setup = 1; diff --git a/git.c b/git.c index 0df36c9b9d5a0a..78bd607e2fe8e9 100644 --- a/git.c +++ b/git.c @@ -477,7 +477,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv, struct run_setup = RUN_SETUP_GENTLY; if (run_setup & RUN_SETUP) { - prefix = setup_git_directory(); + prefix = setup_git_directory(the_repository); no_repo = 0; } else if (run_setup & RUN_SETUP_GENTLY) { prefix = setup_git_directory_gently(the_repository, &no_repo); diff --git a/http-push.c b/http-push.c index 9ae6062198e14f..ee66813012cac8 100644 --- a/http-push.c +++ b/http-push.c @@ -1788,7 +1788,7 @@ int cmd_main(int argc, const char **argv) if (delete_branch && rs.nr != 1) die("You must specify only one branch name when deleting a remote branch"); - gitdir = setup_git_directory(); + gitdir = setup_git_directory(the_repository); memset(remote_dir_exists, -1, 256); diff --git a/scalar.c b/scalar.c index 4efb6ac36d888e..a80d8ee3ff54a7 100644 --- a/scalar.c +++ b/scalar.c @@ -58,7 +58,7 @@ static void setup_enlistment_directory(int argc, const char **argv, } strbuf_setlen(&path, len); - setup_git_directory(); + setup_git_directory(the_repository); if (!the_repository->worktree) die(_("Scalar enlistments require a worktree")); @@ -514,7 +514,7 @@ static int cmd_clone(int argc, const char **argv) goto cleanup; } - setup_git_directory(); + setup_git_directory(the_repository); /* common-main already logs `argv` */ trace2_def_repo(the_repository); diff --git a/setup.c b/setup.c index 99c0d1640ef6c0..da0576d28afdad 100644 --- a/setup.c +++ b/setup.c @@ -2133,9 +2133,9 @@ void check_repository_format(struct repository_format *fmt) * directory is not a strict subdirectory of the work tree root. The * prefix always ends with a '/' character. */ -const char *setup_git_directory(void) +const char *setup_git_directory(struct repository *repo) { - return setup_git_directory_gently(the_repository, NULL); + return setup_git_directory_gently(repo, NULL); } const char *resolve_gitdir_gently(const char *suspect, int *return_error_code) diff --git a/setup.h b/setup.h index 18092fbf1630f5..b779661ce7df47 100644 --- a/setup.h +++ b/setup.h @@ -137,7 +137,7 @@ enum { const char *enter_repo(struct repository *repo, const char *path, unsigned flags); const char *setup_git_directory_gently(struct repository *repo, int *); -const char *setup_git_directory(void); +const char *setup_git_directory(struct repository *repo); char *prefix_path(struct repository *repo, const char *prefix, int len, const char *path); char *prefix_path_gently(struct repository *repo, const char *prefix, int len, int *remaining, const char *path); diff --git a/t/helper/test-advise.c b/t/helper/test-advise.c index 81ed93a05c9ac7..8f9db2693ed87a 100644 --- a/t/helper/test-advise.c +++ b/t/helper/test-advise.c @@ -11,7 +11,7 @@ int cmd__advise_if_enabled(int argc, const char **argv) if (argc != 2) die("usage: %s ", argv[0]); - setup_git_directory(); + setup_git_directory(the_repository); repo_config(the_repository, git_default_config, NULL); /* diff --git a/t/helper/test-bitmap.c b/t/helper/test-bitmap.c index 16a01669e4149a..d9b9a83b8f0596 100644 --- a/t/helper/test-bitmap.c +++ b/t/helper/test-bitmap.c @@ -37,7 +37,7 @@ static int bitmap_dump_pseudo_merge_objects(uint32_t n) int cmd__bitmap(int argc, const char **argv) { - setup_git_directory(); + setup_git_directory(the_repository); if (argc == 2 && !strcmp(argv[1], "list-commits")) return bitmap_list_commits(); diff --git a/t/helper/test-bloom.c b/t/helper/test-bloom.c index 3283544bd33db6..0c65befbf00f7d 100644 --- a/t/helper/test-bloom.c +++ b/t/helper/test-bloom.c @@ -52,7 +52,7 @@ static const char *const bloom_usage = "\n" int cmd__bloom(int argc, const char **argv) { - setup_git_directory(); + setup_git_directory(the_repository); if (argc < 2) usage(bloom_usage); diff --git a/t/helper/test-cache-tree.c b/t/helper/test-cache-tree.c index ff61d0ca7e2fe3..d42e2600921029 100644 --- a/t/helper/test-cache-tree.c +++ b/t/helper/test-cache-tree.c @@ -33,7 +33,7 @@ int cmd__cache_tree(int argc, const char **argv) OPT_END() }; - setup_git_directory(); + setup_git_directory(the_repository); argc = parse_options(argc, argv, NULL, options, test_cache_tree_usage, 0); diff --git a/t/helper/test-config.c b/t/helper/test-config.c index 9f8cca7c48cfe3..cfb3f4b111939c 100644 --- a/t/helper/test-config.c +++ b/t/helper/test-config.c @@ -102,7 +102,7 @@ int cmd__config(int argc, const char **argv) return 0; } - setup_git_directory(); + setup_git_directory(the_repository); git_configset_init(&cs); diff --git a/t/helper/test-dump-cache-tree.c b/t/helper/test-dump-cache-tree.c index 3f0c7d0ed07ec9..ccb41a423975f8 100644 --- a/t/helper/test-dump-cache-tree.c +++ b/t/helper/test-dump-cache-tree.c @@ -66,7 +66,7 @@ int cmd__dump_cache_tree(int ac UNUSED, const char **av UNUSED) struct cache_tree *another = cache_tree(); int ret; - setup_git_directory(); + setup_git_directory(the_repository); if (repo_read_index(the_repository) < 0) die("unable to read index file"); istate = *the_repository->index; diff --git a/t/helper/test-dump-fsmonitor.c b/t/helper/test-dump-fsmonitor.c index efd017ca357e0b..c991cbbb8a542c 100644 --- a/t/helper/test-dump-fsmonitor.c +++ b/t/helper/test-dump-fsmonitor.c @@ -9,7 +9,7 @@ int cmd__dump_fsmonitor(int ac UNUSED, const char **av UNUSED) { struct index_state *istate = the_repository->index; - setup_git_directory(); + setup_git_directory(the_repository); if (do_read_index(istate, the_repository->index_file, 0) < 0) die("unable to read index file"); if (!istate->fsmonitor_last_update) { diff --git a/t/helper/test-dump-split-index.c b/t/helper/test-dump-split-index.c index f855a3862c97bb..aae0a40a74bcbc 100644 --- a/t/helper/test-dump-split-index.c +++ b/t/helper/test-dump-split-index.c @@ -17,7 +17,7 @@ int cmd__dump_split_index(int ac UNUSED, const char **av) { struct split_index *si; - setup_git_directory(); + setup_git_directory(the_repository); do_read_index(the_repository->index, av[1], 1); printf("own %s\n", oid_to_hex(&the_repository->index->oid)); diff --git a/t/helper/test-dump-untracked-cache.c b/t/helper/test-dump-untracked-cache.c index 01a109496bee78..24308bd371b6be 100644 --- a/t/helper/test-dump-untracked-cache.c +++ b/t/helper/test-dump-untracked-cache.c @@ -54,7 +54,7 @@ int cmd__dump_untracked_cache(int ac UNUSED, const char **av UNUSED) xsetenv("GIT_CONFIG_KEY_0", "core.untrackedCache", 1); xsetenv("GIT_CONFIG_VALUE_0", "keep", 1); - setup_git_directory(); + setup_git_directory(the_repository); if (repo_read_index(the_repository) < 0) die("unable to read index file"); uc = the_repository->index->untracked; diff --git a/t/helper/test-find-pack.c b/t/helper/test-find-pack.c index fc4b8a77b3007a..28d5b1fe094345 100644 --- a/t/helper/test-find-pack.c +++ b/t/helper/test-find-pack.c @@ -25,7 +25,7 @@ int cmd__find_pack(int argc, const char **argv) struct object_id oid; struct packed_git *p; int count = -1, actual_count = 0; - const char *prefix = setup_git_directory(); + const char *prefix = setup_git_directory(the_repository); struct option options[] = { OPT_INTEGER('c', "check-count", &count, "expected number of packs"), diff --git a/t/helper/test-fsmonitor-client.c b/t/helper/test-fsmonitor-client.c index 02bfe92e8d55b7..dc1dff23fb8ed5 100644 --- a/t/helper/test-fsmonitor-client.c +++ b/t/helper/test-fsmonitor-client.c @@ -210,7 +210,7 @@ int cmd__fsmonitor_client(int argc, const char **argv) subcmd = argv[0]; - setup_git_directory(); + setup_git_directory(the_repository); if (!strcmp(subcmd, "query")) return !!do_send_query(token); diff --git a/t/helper/test-lazy-init-name-hash.c b/t/helper/test-lazy-init-name-hash.c index 40f5df4412adf1..e542985c943452 100644 --- a/t/helper/test-lazy-init-name-hash.c +++ b/t/helper/test-lazy-init-name-hash.c @@ -211,7 +211,7 @@ int cmd__lazy_init_name_hash(int argc, const char **argv) const char *prefix; uint64_t avg_single, avg_multi; - prefix = setup_git_directory(); + prefix = setup_git_directory(the_repository); argc = parse_options(argc, argv, prefix, options, usage, 0); diff --git a/t/helper/test-match-trees.c b/t/helper/test-match-trees.c index 2ed064b9716ac8..006ce5278e23a0 100644 --- a/t/helper/test-match-trees.c +++ b/t/helper/test-match-trees.c @@ -13,7 +13,7 @@ int cmd__match_trees(int ac UNUSED, const char **av) struct object_id hash1, hash2, shifted; struct tree *one, *two; - setup_git_directory(); + setup_git_directory(the_repository); if (repo_get_oid(the_repository, av[1], &hash1)) die("cannot parse %s as an object name", av[1]); diff --git a/t/helper/test-pack-deltas.c b/t/helper/test-pack-deltas.c index 4981401eaa6664..c493b75e02a99a 100644 --- a/t/helper/test-pack-deltas.c +++ b/t/helper/test-pack-deltas.c @@ -95,7 +95,7 @@ int cmd__pack_deltas(int argc, const char **argv) if (argc || num_objects < 0) usage_with_options(usage_str, options); - setup_git_directory(); + setup_git_directory(the_repository); f = hashfd(the_repository->hash_algo, 1, ""); write_pack_header(f, num_objects); diff --git a/t/helper/test-pack-mtimes.c b/t/helper/test-pack-mtimes.c index 7a8ee1de24ba83..b7740567999299 100644 --- a/t/helper/test-pack-mtimes.c +++ b/t/helper/test-pack-mtimes.c @@ -32,7 +32,7 @@ int cmd__pack_mtimes(int argc, const char **argv) struct strbuf buf = STRBUF_INIT; struct packed_git *p; - setup_git_directory(); + setup_git_directory(the_repository); if (argc != 2) usage(pack_mtimes_usage); diff --git a/t/helper/test-partial-clone.c b/t/helper/test-partial-clone.c index d84880074930b2..a7aab426d0194a 100644 --- a/t/helper/test-partial-clone.c +++ b/t/helper/test-partial-clone.c @@ -1,3 +1,5 @@ +#define USE_THE_REPOSITORY_VARIABLE + #include "test-tool.h" #include "hex.h" #include "repository.h" @@ -32,7 +34,7 @@ static void object_info(const char *gitdir, const char *oid_hex) int cmd__partial_clone(int argc, const char **argv) { - setup_git_directory(); + setup_git_directory(the_repository); if (argc < 4) die("too few arguments"); diff --git a/t/helper/test-path-walk.c b/t/helper/test-path-walk.c index fe63002c2be27d..69676b15a53f73 100644 --- a/t/helper/test-path-walk.c +++ b/t/helper/test-path-walk.c @@ -89,7 +89,7 @@ int cmd__path_walk(int argc, const char **argv) OPT_END(), }; - setup_git_directory(); + setup_git_directory(the_repository); revs.repo = the_repository; argc = parse_options(argc, argv, NULL, diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c index 3131b54a871c1b..5d86a96c17e4e5 100644 --- a/t/helper/test-reach.c +++ b/t/helper/test-reach.c @@ -39,7 +39,7 @@ int cmd__reach(int ac, const char **av) struct strbuf buf = STRBUF_INIT; struct repository *r = the_repository; - setup_git_directory(); + setup_git_directory(the_repository); if (ac < 2) exit(1); diff --git a/t/helper/test-read-cache.c b/t/helper/test-read-cache.c index 9ae71cefb302c9..6b08ba8f078d00 100644 --- a/t/helper/test-read-cache.c +++ b/t/helper/test-read-cache.c @@ -19,7 +19,7 @@ int cmd__read_cache(int argc, const char **argv) if (argc == 2) cnt = strtol(argv[1], NULL, 0); - setup_git_directory(); + setup_git_directory(the_repository); repo_config(the_repository, git_default_config, NULL); for (i = 0; i < cnt; i++) { diff --git a/t/helper/test-read-graph.c b/t/helper/test-read-graph.c index 6a5f64e473f2b6..9f07b9c25a66e3 100644 --- a/t/helper/test-read-graph.c +++ b/t/helper/test-read-graph.c @@ -76,7 +76,7 @@ int cmd__read_graph(int argc, const char **argv) struct odb_source *source; int ret = 0; - setup_git_directory(); + setup_git_directory(the_repository); source = the_repository->objects->sources; prepare_repo_settings(the_repository); diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c index 388d29e2b53db3..790000fb26c270 100644 --- a/t/helper/test-read-midx.c +++ b/t/helper/test-read-midx.c @@ -14,7 +14,7 @@ static struct multi_pack_index *setup_midx(const char *object_dir) { struct odb_source *source; - setup_git_directory(); + setup_git_directory(the_repository); source = odb_find_source(the_repository->objects, object_dir); if (!source) source = odb_add_to_alternates_memory(the_repository->objects, diff --git a/t/helper/test-ref-store.c b/t/helper/test-ref-store.c index 74edf2029a28fc..3866d0aca49bc2 100644 --- a/t/helper/test-ref-store.c +++ b/t/helper/test-ref-store.c @@ -340,7 +340,7 @@ int cmd__ref_store(int argc UNUSED, const char **argv) const char *func; struct command *cmd; - setup_git_directory(); + setup_git_directory(the_repository); argv = get_store(argv + 1, &refs); diff --git a/t/helper/test-revision-walking.c b/t/helper/test-revision-walking.c index 071f5bd1e21974..70051eeaf848e7 100644 --- a/t/helper/test-revision-walking.c +++ b/t/helper/test-revision-walking.c @@ -56,7 +56,7 @@ int cmd__revision_walking(int argc, const char **argv) if (argc < 2) return 1; - setup_git_directory(); + setup_git_directory(the_repository); if (!strcmp(argv[1], "run-twice")) { printf("1st\n"); diff --git a/t/helper/test-scrap-cache-tree.c b/t/helper/test-scrap-cache-tree.c index 64fff6e9e3ce87..7b5ce501d9eec0 100644 --- a/t/helper/test-scrap-cache-tree.c +++ b/t/helper/test-scrap-cache-tree.c @@ -12,7 +12,7 @@ int cmd__scrap_cache_tree(int ac UNUSED, const char **av UNUSED) { struct lock_file index_lock = LOCK_INIT; - setup_git_directory(); + setup_git_directory(the_repository); repo_hold_locked_index(the_repository, &index_lock, LOCK_DIE_ON_ERROR); if (repo_read_index(the_repository) < 0) die("unable to read index file"); diff --git a/t/helper/test-serve-v2.c b/t/helper/test-serve-v2.c index 63a200b8d46f68..27f3ed89478fde 100644 --- a/t/helper/test-serve-v2.c +++ b/t/helper/test-serve-v2.c @@ -23,7 +23,7 @@ int cmd__serve_v2(int argc, const char **argv) N_("exit immediately after advertising capabilities")), OPT_END() }; - const char *prefix = setup_git_directory(); + const char *prefix = setup_git_directory(the_repository); /* ignore all unknown cmdline switches for now */ argc = parse_options(argc, argv, prefix, options, serve_usage, diff --git a/t/helper/test-submodule-config.c b/t/helper/test-submodule-config.c index cbe93f2f9e0178..3f302921793378 100644 --- a/t/helper/test-submodule-config.c +++ b/t/helper/test-submodule-config.c @@ -34,7 +34,7 @@ int cmd__submodule_config(int argc, const char **argv) if (my_argc % 2 != 0) die_usage(argc, argv, "Wrong number of arguments."); - setup_git_directory(); + setup_git_directory(the_repository); while (*arg) { struct object_id commit_oid; diff --git a/t/helper/test-submodule-nested-repo-config.c b/t/helper/test-submodule-nested-repo-config.c index 2710341cd5db11..7e31d3fe47a105 100644 --- a/t/helper/test-submodule-nested-repo-config.c +++ b/t/helper/test-submodule-nested-repo-config.c @@ -19,7 +19,7 @@ int cmd__submodule_nested_repo_config(int argc, const char **argv) if (argc < 3) die_usage(argv, "Wrong number of arguments."); - setup_git_directory(); + setup_git_directory(the_repository); if (repo_submodule_init(&subrepo, the_repository, argv[1], null_oid(the_hash_algo))) { die_usage(argv, "Submodule not found."); diff --git a/t/helper/test-submodule.c b/t/helper/test-submodule.c index 0133852e1ebfc7..3c5c4c4a090e98 100644 --- a/t/helper/test-submodule.c +++ b/t/helper/test-submodule.c @@ -99,7 +99,7 @@ static int cmd__submodule_is_active(int argc, const char **argv) if (argc != 1) usage_with_options(submodule_is_active_usage, options); - setup_git_directory(); + setup_git_directory(the_repository); return !is_submodule_active(the_repository, argv[0]); } @@ -142,7 +142,7 @@ static int cmd__submodule_config_list(int argc, const char **argv) argc = parse_options(argc, argv, "test-tools", options, usage, PARSE_OPT_KEEP_ARGV0); - setup_git_directory(); + setup_git_directory(the_repository); if (argc == 2) return print_config_from_gitmodules(the_repository, argv[1]); @@ -161,7 +161,7 @@ static int cmd__submodule_config_set(int argc, const char **argv) argc = parse_options(argc, argv, "test-tools", options, usage, PARSE_OPT_KEEP_ARGV0); - setup_git_directory(); + setup_git_directory(the_repository); /* Equivalent to ACTION_SET in builtin/config.c */ if (argc == 3) { @@ -183,7 +183,7 @@ static int cmd__submodule_config_unset(int argc, const char **argv) NULL }; - setup_git_directory(); + setup_git_directory(the_repository); if (argc == 2) { if (!is_writing_gitmodules_ok()) @@ -202,7 +202,7 @@ static int cmd__submodule_config_writeable(int argc, const char **argv UNUSED) "test-tool submodule config-writeable", NULL }; - setup_git_directory(); + setup_git_directory(the_repository); if (argc == 1) return is_writing_gitmodules_ok() ? 0 : -1; diff --git a/t/helper/test-userdiff.c b/t/helper/test-userdiff.c index aa3a9894d257ef..fc34c589b3d106 100644 --- a/t/helper/test-userdiff.c +++ b/t/helper/test-userdiff.c @@ -40,7 +40,7 @@ int cmd__userdiff(int argc, const char **argv) return error("unknown argument %s", argv[1]); if (want & USERDIFF_DRIVER_TYPE_CUSTOM) { - setup_git_directory(); + setup_git_directory(the_repository); repo_config(the_repository, cmd__userdiff_config, NULL); } diff --git a/t/helper/test-write-cache.c b/t/helper/test-write-cache.c index b37dd2c5d6db64..98e1477c98d1f7 100644 --- a/t/helper/test-write-cache.c +++ b/t/helper/test-write-cache.c @@ -12,7 +12,7 @@ int cmd__write_cache(int argc, const char **argv) int i, cnt = 1; if (argc == 2) cnt = strtol(argv[1], NULL, 0); - setup_git_directory(); + setup_git_directory(the_repository); repo_read_index(the_repository); for (i = 0; i < cnt; i++) { repo_hold_locked_index(the_repository, &index_lock, From fe9dc124277e8506d152006be9495d890745e129 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 30 Mar 2026 15:17:36 +0200 Subject: [PATCH 060/241] setup: stop using `the_repository` in `upgrade_repository_format()` Stop using `the_repository` in `upgrade_repository_format()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- list-objects-filter-options.c | 2 +- repository.h | 2 +- setup.c | 6 +++--- worktree.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index cef67e591954a9..bc5d98f9e613cf 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -378,7 +378,7 @@ void partial_clone_register( */ return; } else { - if (upgrade_repository_format(1) < 0) + if (upgrade_repository_format(the_repository, 1) < 0) die(_("unable to upgrade repository format to support partial clone")); /* Add promisor config for the remote */ diff --git a/repository.h b/repository.h index 7ae3d344848023..bb83d03bbf6f02 100644 --- a/repository.h +++ b/repository.h @@ -271,6 +271,6 @@ void repo_update_index_if_able(struct repository *, struct lock_file *); * Return 1 if upgrade repository format to target_version succeeded, * 0 if no upgrade is necessary, and -1 when upgrade is not possible. */ -int upgrade_repository_format(int target_version); +int upgrade_repository_format(struct repository *repo, int target_version); #endif /* REPOSITORY_H */ diff --git a/setup.c b/setup.c index da0576d28afdad..6dbd096f20520b 100644 --- a/setup.c +++ b/setup.c @@ -808,7 +808,7 @@ static int check_repository_format_gently(struct repository *repo, return 0; } -int upgrade_repository_format(int target_version) +int upgrade_repository_format(struct repository *repo, int target_version) { struct strbuf sb = STRBUF_INIT; struct strbuf err = STRBUF_INIT; @@ -816,7 +816,7 @@ int upgrade_repository_format(int target_version) struct repository_format repo_fmt = REPOSITORY_FORMAT_INIT; int ret; - repo_common_path_append(the_repository, &sb, "config"); + repo_common_path_append(repo, &sb, "config"); read_repository_format(&repo_fmt, sb.buf); strbuf_release(&sb); @@ -838,7 +838,7 @@ int upgrade_repository_format(int target_version) } strbuf_addf(&repo_version, "%d", target_version); - repo_config_set(the_repository, "core.repositoryformatversion", repo_version.buf); + repo_config_set(repo, "core.repositoryformatversion", repo_version.buf); ret = 1; diff --git a/worktree.c b/worktree.c index 56732f8f3312bf..8ca0a172c630b8 100644 --- a/worktree.c +++ b/worktree.c @@ -1104,7 +1104,7 @@ void write_worktree_linking_files(const char *dotgit, const char *gitdir, strbuf_realpath(&repo, repo.buf, 1); if (use_relative_paths && !the_repository->repository_format_relative_worktrees) { - if (upgrade_repository_format(1) < 0) + if (upgrade_repository_format(the_repository, 1) < 0) die(_("unable to upgrade repository format to support relative worktrees")); if (repo_config_set_gently(the_repository, "extensions.relativeWorktrees", "true")) die(_("unable to set extensions.relativeWorktrees setting")); From 8792ccb40bc3751e3903a4bcaf3bce489c28a5b2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 30 Mar 2026 15:17:37 +0200 Subject: [PATCH 061/241] setup: stop using `the_repository` in `check_repository_format()` Stop using `the_repository` in `check_repository_format()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Furthermore, the function is never used outside "setup.c". Drop its declaration in "setup.h" and make it static. Note that this requires us to reorder the function. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- setup.c | 58 ++++++++++++++++++++++++++++++++------------------------- setup.h | 10 ---------- 2 files changed, 33 insertions(+), 35 deletions(-) diff --git a/setup.c b/setup.c index 6dbd096f20520b..c32d6e96bb61ba 100644 --- a/setup.c +++ b/setup.c @@ -1755,6 +1755,37 @@ enum discovery_result discover_git_directory_reason(struct strbuf *commondir, return result; } +/* + * Check the repository format version in the path found in repo_get_git_dir(the_repository), + * and die if it is a version we don't understand. Generally one would + * set_git_dir() before calling this, and use it only for "are we in a valid + * repo?". + * + * If successful and fmt is not NULL, fill fmt with data. + */ +static void check_repository_format(struct repository *repo, struct repository_format *fmt) +{ + struct repository_format repo_fmt = REPOSITORY_FORMAT_INIT; + if (!fmt) + fmt = &repo_fmt; + check_repository_format_gently(repo, repo_get_git_dir(repo), fmt, NULL); + startup_info->have_repository = 1; + repo_set_hash_algo(repo, fmt->hash_algo); + repo_set_compat_hash_algo(repo, fmt->compat_hash_algo); + repo_set_ref_storage_format(repo, + fmt->ref_storage_format, + fmt->ref_storage_payload); + repo->repository_format_worktree_config = + fmt->worktree_config; + repo->repository_format_submodule_path_cfg = + fmt->submodule_path_cfg; + repo->repository_format_relative_worktrees = + fmt->relative_worktrees; + repo->repository_format_partial_clone = + xstrdup_or_null(fmt->partial_clone); + clear_repository_format(&repo_fmt); +} + const char *enter_repo(struct repository *repo, const char *path, unsigned flags) { static struct strbuf validated_path = STRBUF_INIT; @@ -1829,7 +1860,7 @@ const char *enter_repo(struct repository *repo, const char *path, unsigned flags if (is_git_directory(".")) { set_git_dir(repo, ".", 0); - check_repository_format(NULL); + check_repository_format(repo, NULL); return path; } @@ -2104,29 +2135,6 @@ int git_config_perm(const char *var, const char *value) return -(i & 0666); } -void check_repository_format(struct repository_format *fmt) -{ - struct repository_format repo_fmt = REPOSITORY_FORMAT_INIT; - if (!fmt) - fmt = &repo_fmt; - check_repository_format_gently(the_repository, repo_get_git_dir(the_repository), fmt, NULL); - startup_info->have_repository = 1; - repo_set_hash_algo(the_repository, fmt->hash_algo); - repo_set_compat_hash_algo(the_repository, fmt->compat_hash_algo); - repo_set_ref_storage_format(the_repository, - fmt->ref_storage_format, - fmt->ref_storage_payload); - the_repository->repository_format_worktree_config = - fmt->worktree_config; - the_repository->repository_format_submodule_path_cfg = - fmt->submodule_path_cfg; - the_repository->repository_format_relative_worktrees = - fmt->relative_worktrees; - the_repository->repository_format_partial_clone = - xstrdup_or_null(fmt->partial_clone); - clear_repository_format(&repo_fmt); -} - /* * Returns the "prefix", a path to the current working directory * relative to the work tree root, or NULL, if the current working @@ -2801,7 +2809,7 @@ int init_db(const char *git_dir, const char *real_git_dir, * config file, so this will not fail. What we are catching * is an attempt to reinitialize new repository with an old tool. */ - check_repository_format(&repo_fmt); + check_repository_format(the_repository, &repo_fmt); repository_format_configure(the_repository, &repo_fmt, hash, ref_storage_format); diff --git a/setup.h b/setup.h index b779661ce7df47..a820041af05ffb 100644 --- a/setup.h +++ b/setup.h @@ -221,16 +221,6 @@ void clear_repository_format(struct repository_format *format); int verify_repository_format(const struct repository_format *format, struct strbuf *err); -/* - * Check the repository format version in the path found in repo_get_git_dir(the_repository), - * and die if it is a version we don't understand. Generally one would - * set_git_dir() before calling this, and use it only for "are we in a valid - * repo?". - * - * If successful and fmt is not NULL, fill fmt with data. - */ -void check_repository_format(struct repository_format *fmt); - const char *get_template_dir(const char *option_template); #define INIT_DB_QUIET (1 << 0) From 50e71a3f085b49b3d79fd74ade3d2dc66f942c97 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 30 Mar 2026 15:17:38 +0200 Subject: [PATCH 062/241] setup: stop using `the_repository` in `initialize_repository_version()` Stop using `the_repository` in `initialize_repository_version()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/clone.c | 4 ++-- refs.c | 2 +- setup.c | 29 +++++++++++++++-------------- setup.h | 3 ++- 4 files changed, 20 insertions(+), 18 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index 16cd7b029b0967..663ef0b524a11e 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1227,7 +1227,7 @@ int cmd_clone(int argc, * * This is sufficient for Git commands to discover the Git directory. */ - initialize_repository_version(GIT_HASH_UNKNOWN, + initialize_repository_version(the_repository, GIT_HASH_UNKNOWN, the_repository->ref_storage_format, 1); refs_create_refdir_stubs(the_repository, git_dir, NULL); @@ -1440,7 +1440,7 @@ int cmd_clone(int argc, * ours to the same thing. */ hash_algo = hash_algo_by_ptr(transport_get_hash_algo(transport)); - initialize_repository_version(hash_algo, the_repository->ref_storage_format, 1); + initialize_repository_version(the_repository, hash_algo, the_repository->ref_storage_format, 1); repo_set_hash_algo(the_repository, hash_algo); create_reference_database(NULL, 1); diff --git a/refs.c b/refs.c index e66cf4861d7ec5..6a49ef8a1cd56b 100644 --- a/refs.c +++ b/refs.c @@ -3450,7 +3450,7 @@ int repo_migrate_ref_storage_format(struct repository *repo, * repository format so that clients will use the new ref store. * We also need to swap out the repository's main ref store. */ - initialize_repository_version(hash_algo_by_ptr(repo->hash_algo), format, 1); + initialize_repository_version(the_repository, hash_algo_by_ptr(repo->hash_algo), format, 1); /* * Unset the old ref store and release it. `get_main_ref_store()` will diff --git a/setup.c b/setup.c index c32d6e96bb61ba..1570749b4a67e0 100644 --- a/setup.c +++ b/setup.c @@ -2382,7 +2382,8 @@ static int needs_work_tree_config(const char *git_dir, const char *work_tree) return 1; } -void initialize_repository_version(int hash_algo, +void initialize_repository_version(struct repository *repo, + int hash_algo, enum ref_storage_format ref_storage_format, int reinit) { @@ -2399,35 +2400,35 @@ void initialize_repository_version(int hash_algo, */ if (hash_algo != GIT_HASH_SHA1_LEGACY || ref_storage_format != REF_STORAGE_FORMAT_FILES || - the_repository->ref_storage_payload) + repo->ref_storage_payload) target_version = GIT_REPO_VERSION_READ; if (hash_algo != GIT_HASH_SHA1_LEGACY && hash_algo != GIT_HASH_UNKNOWN) - repo_config_set(the_repository, "extensions.objectformat", + repo_config_set(repo, "extensions.objectformat", hash_algos[hash_algo].name); else if (reinit) - repo_config_set_gently(the_repository, "extensions.objectformat", NULL); + repo_config_set_gently(repo, "extensions.objectformat", NULL); - if (the_repository->ref_storage_payload) { + if (repo->ref_storage_payload) { struct strbuf ref_uri = STRBUF_INIT; strbuf_addf(&ref_uri, "%s://%s", ref_storage_format_to_name(ref_storage_format), - the_repository->ref_storage_payload); - repo_config_set(the_repository, "extensions.refstorage", ref_uri.buf); + repo->ref_storage_payload); + repo_config_set(repo, "extensions.refstorage", ref_uri.buf); strbuf_release(&ref_uri); } else if (ref_storage_format != REF_STORAGE_FORMAT_FILES) { - repo_config_set(the_repository, "extensions.refstorage", + repo_config_set(repo, "extensions.refstorage", ref_storage_format_to_name(ref_storage_format)); } else if (reinit) { - repo_config_set_gently(the_repository, "extensions.refstorage", NULL); + repo_config_set_gently(repo, "extensions.refstorage", NULL); } if (reinit) { struct strbuf config = STRBUF_INIT; struct repository_format repo_fmt = REPOSITORY_FORMAT_INIT; - repo_common_path_append(the_repository, &config, "config"); + repo_common_path_append(repo, &config, "config"); read_repository_format(&repo_fmt, config.buf); if (repo_fmt.v1_only_extensions.nr) @@ -2437,17 +2438,17 @@ void initialize_repository_version(int hash_algo, clear_repository_format(&repo_fmt); } - repo_config_get_bool(the_repository, "init.defaultSubmodulePathConfig", + repo_config_get_bool(repo, "init.defaultSubmodulePathConfig", &default_submodule_path_config); if (default_submodule_path_config) { /* extensions.submodulepathconfig requires at least version 1 */ if (target_version == 0) target_version = 1; - repo_config_set(the_repository, "extensions.submodulepathconfig", "true"); + repo_config_set(repo, "extensions.submodulepathconfig", "true"); } strbuf_addf(&repo_version, "%d", target_version); - repo_config_set(the_repository, "core.repositoryformatversion", repo_version.buf); + repo_config_set(repo, "core.repositoryformatversion", repo_version.buf); strbuf_release(&repo_version); } @@ -2548,7 +2549,7 @@ static int create_default_files(struct repository *repo, adjust_shared_perm(repo, repo_get_git_dir(repo)); } - initialize_repository_version(fmt->hash_algo, fmt->ref_storage_format, reinit); + initialize_repository_version(repo, fmt->hash_algo, fmt->ref_storage_format, reinit); /* Check filemode trustability */ repo_git_path_replace(repo, &path, "config"); diff --git a/setup.h b/setup.h index a820041af05ffb..c33b675ccfd95f 100644 --- a/setup.h +++ b/setup.h @@ -232,7 +232,8 @@ int init_db(const char *git_dir, const char *real_git_dir, enum ref_storage_format ref_storage_format, const char *initial_branch, int init_shared_repository, unsigned int flags); -void initialize_repository_version(int hash_algo, +void initialize_repository_version(struct repository *repo, + int hash_algo, enum ref_storage_format ref_storage_format, int reinit); void create_reference_database(const char *initial_branch, int quiet); From 325c017e48ce82ddde20dc5315126bbb44901f89 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 30 Mar 2026 15:17:39 +0200 Subject: [PATCH 063/241] setup: stop using `the_repository` in `create_reference_database()` Stop using `the_repository` in `create_reference_database()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/clone.c | 2 +- setup.c | 13 +++++++------ setup.h | 2 +- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index 663ef0b524a11e..d8640222149c8c 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1442,7 +1442,7 @@ int cmd_clone(int argc, hash_algo = hash_algo_by_ptr(transport_get_hash_algo(transport)); initialize_repository_version(the_repository, hash_algo, the_repository->ref_storage_format, 1); repo_set_hash_algo(the_repository, hash_algo); - create_reference_database(NULL, 1); + create_reference_database(the_repository, NULL, 1); /* * Before fetching from the remote, download and install bundle diff --git a/setup.c b/setup.c index 1570749b4a67e0..efac2dd7b5c204 100644 --- a/setup.c +++ b/setup.c @@ -2465,13 +2465,14 @@ static int is_reinit(struct repository *repo) return ret; } -void create_reference_database(const char *initial_branch, int quiet) +void create_reference_database(struct repository *repo, + const char *initial_branch, int quiet) { struct strbuf err = STRBUF_INIT; char *to_free = NULL; - int reinit = is_reinit(the_repository); + int reinit = is_reinit(repo); - if (ref_store_create_on_disk(get_main_ref_store(the_repository), 0, &err)) + if (ref_store_create_on_disk(get_main_ref_store(repo), 0, &err)) die("failed to set up refs db: %s", err.buf); /* @@ -2483,14 +2484,14 @@ void create_reference_database(const char *initial_branch, int quiet) if (!initial_branch) initial_branch = to_free = - repo_default_branch_name(the_repository, quiet); + repo_default_branch_name(repo, quiet); ref = xstrfmt("refs/heads/%s", initial_branch); if (check_refname_format(ref, 0) < 0) die(_("invalid initial branch name: '%s'"), initial_branch); - if (refs_update_symref(get_main_ref_store(the_repository), "HEAD", ref, NULL) < 0) + if (refs_update_symref(get_main_ref_store(repo), "HEAD", ref, NULL) < 0) exit(1); free(ref); } @@ -2827,7 +2828,7 @@ int init_db(const char *git_dir, const char *real_git_dir, &repo_fmt, init_shared_repository); if (!(flags & INIT_DB_SKIP_REFDB)) - create_reference_database(initial_branch, flags & INIT_DB_QUIET); + create_reference_database(the_repository, initial_branch, flags & INIT_DB_QUIET); create_object_directory(the_repository); if (repo_settings_get_shared_repository(the_repository)) { diff --git a/setup.h b/setup.h index c33b675ccfd95f..21737e9bd69108 100644 --- a/setup.h +++ b/setup.h @@ -236,7 +236,7 @@ void initialize_repository_version(struct repository *repo, int hash_algo, enum ref_storage_format ref_storage_format, int reinit); -void create_reference_database(const char *initial_branch, int quiet); +void create_reference_database(struct repository *repo, const char *initial_branch, int quiet); /* * NOTE NOTE NOTE!! From 947d23432d3ea07e6619f4e0bf0ed17c8c8d9385 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 30 Mar 2026 15:17:40 +0200 Subject: [PATCH 064/241] setup: stop using `the_repository` in `init_db()` Stop using `the_repository` in `init_db()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/clone.c | 2 +- builtin/init-db.c | 2 +- setup.c | 43 ++++++++++++++++++++++--------------------- setup.h | 3 ++- 4 files changed, 26 insertions(+), 24 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index d8640222149c8c..bc35a3c0b18fcb 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1184,7 +1184,7 @@ int cmd_clone(int argc, * repository, and reference backends may persist that information into * their on-disk data structures. */ - init_db(git_dir, real_git_dir, option_template, GIT_HASH_UNKNOWN, + init_db(the_repository, git_dir, real_git_dir, option_template, GIT_HASH_UNKNOWN, ref_storage_format, NULL, do_not_override_repo_unix_permissions, INIT_DB_QUIET | INIT_DB_SKIP_REFDB); diff --git a/builtin/init-db.c b/builtin/init-db.c index e626b0d8b7ccc6..c55517ad94d447 100644 --- a/builtin/init-db.c +++ b/builtin/init-db.c @@ -252,7 +252,7 @@ int cmd_init_db(int argc, } flags |= INIT_DB_EXIST_OK; - ret = init_db(git_dir, real_git_dir, template_dir, hash_algo, + ret = init_db(the_repository, git_dir, real_git_dir, template_dir, hash_algo, ref_storage_format, initial_branch, init_shared_repository, flags); diff --git a/setup.c b/setup.c index efac2dd7b5c204..67bede8b9dd80f 100644 --- a/setup.c +++ b/setup.c @@ -2775,7 +2775,8 @@ static void repository_format_configure(struct repository *repo, repo_fmt->ref_storage_payload); } -int init_db(const char *git_dir, const char *real_git_dir, +int init_db(struct repository *repo, + const char *git_dir, const char *real_git_dir, const char *template_dir, int hash, enum ref_storage_format ref_storage_format, const char *initial_branch, @@ -2795,13 +2796,13 @@ int init_db(const char *git_dir, const char *real_git_dir, if (!exist_ok && !stat(real_git_dir, &st)) die(_("%s already exists"), real_git_dir); - set_git_dir(the_repository, real_git_dir, 1); - git_dir = repo_get_git_dir(the_repository); + set_git_dir(repo, real_git_dir, 1); + git_dir = repo_get_git_dir(repo); separate_git_dir(git_dir, original_git_dir); } else { - set_git_dir(the_repository, git_dir, 1); - git_dir = repo_get_git_dir(the_repository); + set_git_dir(repo, git_dir, 1); + git_dir = repo_get_git_dir(repo); } startup_info->have_repository = 1; @@ -2811,27 +2812,27 @@ int init_db(const char *git_dir, const char *real_git_dir, * config file, so this will not fail. What we are catching * is an attempt to reinitialize new repository with an old tool. */ - check_repository_format(the_repository, &repo_fmt); + check_repository_format(repo, &repo_fmt); - repository_format_configure(the_repository, &repo_fmt, hash, ref_storage_format); + repository_format_configure(repo, &repo_fmt, hash, ref_storage_format); /* * Ensure `core.hidedotfiles` is processed. This must happen after we * have set up the repository format such that we can evaluate * includeIf conditions correctly in the case of re-initialization. */ - repo_config(the_repository, git_default_core_config, NULL); + repo_config(repo, git_default_core_config, NULL); - safe_create_dir(the_repository, git_dir, 0); + safe_create_dir(repo, git_dir, 0); - reinit = create_default_files(the_repository, template_dir, original_git_dir, + reinit = create_default_files(repo, template_dir, original_git_dir, &repo_fmt, init_shared_repository); if (!(flags & INIT_DB_SKIP_REFDB)) - create_reference_database(the_repository, initial_branch, flags & INIT_DB_QUIET); - create_object_directory(the_repository); + create_reference_database(repo, initial_branch, flags & INIT_DB_QUIET); + create_object_directory(repo); - if (repo_settings_get_shared_repository(the_repository)) { + if (repo_settings_get_shared_repository(repo)) { char buf[10]; /* We do not spell "group" and such, so that * the configuration can be read by older version @@ -2839,29 +2840,29 @@ int init_db(const char *git_dir, const char *real_git_dir, * and compatibility values for PERM_GROUP and * PERM_EVERYBODY. */ - if (repo_settings_get_shared_repository(the_repository) < 0) + if (repo_settings_get_shared_repository(repo) < 0) /* force to the mode value */ - xsnprintf(buf, sizeof(buf), "0%o", -repo_settings_get_shared_repository(the_repository)); - else if (repo_settings_get_shared_repository(the_repository) == PERM_GROUP) + xsnprintf(buf, sizeof(buf), "0%o", -repo_settings_get_shared_repository(repo)); + else if (repo_settings_get_shared_repository(repo) == PERM_GROUP) xsnprintf(buf, sizeof(buf), "%d", OLD_PERM_GROUP); - else if (repo_settings_get_shared_repository(the_repository) == PERM_EVERYBODY) + else if (repo_settings_get_shared_repository(repo) == PERM_EVERYBODY) xsnprintf(buf, sizeof(buf), "%d", OLD_PERM_EVERYBODY); else BUG("invalid value for shared_repository"); - repo_config_set(the_repository, "core.sharedrepository", buf); - repo_config_set(the_repository, "receive.denyNonFastforwards", "true"); + repo_config_set(repo, "core.sharedrepository", buf); + repo_config_set(repo, "receive.denyNonFastforwards", "true"); } if (!(flags & INIT_DB_QUIET)) { int len = strlen(git_dir); if (reinit) - printf(repo_settings_get_shared_repository(the_repository) + printf(repo_settings_get_shared_repository(repo) ? _("Reinitialized existing shared Git repository in %s%s\n") : _("Reinitialized existing Git repository in %s%s\n"), git_dir, len && git_dir[len-1] != '/' ? "/" : ""); else - printf(repo_settings_get_shared_repository(the_repository) + printf(repo_settings_get_shared_repository(repo) ? _("Initialized empty shared Git repository in %s%s\n") : _("Initialized empty Git repository in %s%s\n"), git_dir, len && git_dir[len-1] != '/' ? "/" : ""); diff --git a/setup.h b/setup.h index 21737e9bd69108..9409326fe47c70 100644 --- a/setup.h +++ b/setup.h @@ -227,7 +227,8 @@ const char *get_template_dir(const char *option_template); #define INIT_DB_EXIST_OK (1 << 1) #define INIT_DB_SKIP_REFDB (1 << 2) -int init_db(const char *git_dir, const char *real_git_dir, +int init_db(struct repository *repo, + const char *git_dir, const char *real_git_dir, const char *template_dir, int hash_algo, enum ref_storage_format ref_storage_format, const char *initial_branch, int init_shared_repository, From 0df739ea34ad8bc54f4ad6fdc978d97cd3c82759 Mon Sep 17 00:00:00 2001 From: Jayesh Daga Date: Tue, 31 Mar 2026 15:34:26 +0000 Subject: [PATCH 065/241] unpack-trees: use repository from index instead of global unpack_trees() currently initializes its repository from the global 'the_repository', even though a repository instance is already available via the source index. Use 'o->src_index->repo' instead of the global variable, reducing reliance on global repository state. This is a step towards eliminating global repository usage in unpack_trees(). Suggested-by: Patrick Steinhardt Signed-off-by: Jayesh Daga Signed-off-by: Junio C Hamano --- unpack-trees.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/unpack-trees.c b/unpack-trees.c index 998a1e6dc70cae..191b9d4769a7b4 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1780,14 +1780,14 @@ static int clear_ce_flags(struct index_state *istate, xsnprintf(label, sizeof(label), "clear_ce_flags(0x%08lx,0x%08lx)", (unsigned long)select_mask, (unsigned long)clear_mask); - trace2_region_enter("unpack_trees", label, the_repository); + trace2_region_enter("unpack_trees", label, istate->repo); rval = clear_ce_flags_1(istate, istate->cache, istate->cache_nr, &prefix, select_mask, clear_mask, pl, 0, 0); - trace2_region_leave("unpack_trees", label, the_repository); + trace2_region_leave("unpack_trees", label, istate->repo); stop_progress(&istate->progress); return rval; @@ -1903,7 +1903,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options BUG("o->df_conflict_entry is an output only field"); trace_performance_enter(); - trace2_region_enter("unpack_trees", "unpack_trees", the_repository); + trace2_region_enter("unpack_trees", "unpack_trees", repo); prepare_repo_settings(repo); if (repo->settings.command_requires_full_index) { @@ -2007,9 +2007,9 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options } trace_performance_enter(); - trace2_region_enter("unpack_trees", "traverse_trees", the_repository); + trace2_region_enter("unpack_trees", "traverse_trees", repo); ret = traverse_trees(o->src_index, len, t, &info); - trace2_region_leave("unpack_trees", "traverse_trees", the_repository); + trace2_region_leave("unpack_trees", "traverse_trees", repo); trace_performance_leave("traverse_trees"); if (ret < 0) goto return_failed; @@ -2106,7 +2106,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options dir_clear(o->internal.dir); o->internal.dir = NULL; } - trace2_region_leave("unpack_trees", "unpack_trees", the_repository); + trace2_region_leave("unpack_trees", "unpack_trees", repo); trace_performance_leave("unpack_trees"); return ret; From c74767b7a33325dcbf58df6977bc9442b0c35375 Mon Sep 17 00:00:00 2001 From: Jayesh Daga Date: Tue, 31 Mar 2026 15:34:27 +0000 Subject: [PATCH 066/241] unpack-trees: use repository from index instead of global unpack_trees() currently initializes its repository from the global 'the_repository', even though a repository instance is already available via the source index. Use 'o->src_index->repo' instead of the global variable, reducing reliance on global repository state. This is a step towards eliminating global repository usage in unpack_trees(). Suggested-by: Patrick Steinhardt Signed-off-by: Jayesh Daga Signed-off-by: Junio C Hamano --- unpack-trees.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unpack-trees.c b/unpack-trees.c index 191b9d4769a7b4..b42020f16b10ae 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1882,7 +1882,7 @@ static int verify_absent(const struct cache_entry *, */ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options *o) { - struct repository *repo = the_repository; + struct repository *repo = o->src_index->repo; int i, ret; static struct cache_entry *dfc; struct pattern_list pl; From d21652903c95a306f1a0a196562ec827b1af89ec Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 2 Apr 2026 14:33:07 +0000 Subject: [PATCH 067/241] t0001: allow implicit bare repo discovery for aliased-command test 8d1a7448206e (setup.c: create `safe.bareRepository`, 2022-07-14) introduced a setting to restrict implicit bare repository discovery, mitigating a social-engineering attack where an embedded bare repo's hooks get executed unknowingly. To allow for that default to change at some stage in the future, the tests need to be prepared. This commit adjusts a test accordingly that runs `git aliasedinit` from inside a bare repo to verify that aliased commands work there. The test is about alias resolution, not bare repo discovery, so add `test_config_global safe.bareRepository all` to opt in explicitly. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t0001-init.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/t/t0001-init.sh b/t/t0001-init.sh index e4d32bb4d259f6..6bd0a15dac1869 100755 --- a/t/t0001-init.sh +++ b/t/t0001-init.sh @@ -77,6 +77,7 @@ test_expect_success 'plain nested through aliased command' ' ' test_expect_success 'plain nested in bare through aliased command' ' + test_config_global safe.bareRepository all && ( git init --bare bare-ancestor-aliased.git && cd bare-ancestor-aliased.git && From f1a852920aa08ed273537e3bfeb541106febfb35 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 2 Apr 2026 14:33:08 +0000 Subject: [PATCH 068/241] t0001: replace `cd`+`git` with `git --git-dir` in `check_config` To prepare for `safe.bareRepository` defaulting to `explicit` (see 8d1a7448206e), replace `cd && git config` with `git --git-dir= config` so the helper does not rely on implicit bare repository discovery. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t0001-init.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/t0001-init.sh b/t/t0001-init.sh index 6bd0a15dac1869..db2bf1001f1bd3 100755 --- a/t/t0001-init.sh +++ b/t/t0001-init.sh @@ -20,8 +20,8 @@ check_config () { return 1 fi - bare=$(cd "$1" && git config --bool core.bare) - worktree=$(cd "$1" && git config core.worktree) || + bare=$(git --git-dir="$1" config --bool core.bare) + worktree=$(git --git-dir="$1" config core.worktree) || worktree=unset test "$bare" = "$2" && test "$worktree" = "$3" || { From 68aa4c7a654f406e281617db6f5378d2c4620e9e Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 2 Apr 2026 14:33:09 +0000 Subject: [PATCH 069/241] t0003: use `--git-dir` for bare repo attribute tests The bare repo tests in t0003-attributes.sh currently `cd` into the bare repository inside subshells, relying on implicit discovery. Restructure these tests to pass `--git-dir=bare.git` to the `attr_check` and `attr_check_source` helpers instead. This makes the code much easier to read, and also makes bare repo access explicit, i.e. compatible with an eventual `safe.bareRepository=explicit` default. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t0003-attributes.sh | 66 ++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 39 deletions(-) diff --git a/t/t0003-attributes.sh b/t/t0003-attributes.sh index 582e207aa12eb1..3a34f5dbc24eaa 100755 --- a/t/t0003-attributes.sh +++ b/t/t0003-attributes.sh @@ -346,17 +346,14 @@ test_expect_success 'setup bare' ' test_expect_success 'bare repository: check that .gitattribute is ignored' ' ( - cd bare.git && - ( - echo "f test=f" && - echo "a/i test=a/i" - ) >.gitattributes && - attr_check f unspecified && - attr_check a/f unspecified && - attr_check a/c/f unspecified && - attr_check a/i unspecified && - attr_check subdir/a/i unspecified - ) + echo "f test=f" && + echo "a/i test=a/i" + ) >bare.git/.gitattributes && + attr_check f unspecified --git-dir=bare.git && + attr_check a/f unspecified --git-dir=bare.git && + attr_check a/c/f unspecified --git-dir=bare.git && + attr_check a/i unspecified --git-dir=bare.git && + attr_check subdir/a/i unspecified --git-dir=bare.git ' bad_attr_source_err="fatal: bad --attr-source or GIT_ATTR_SOURCE" @@ -449,41 +446,32 @@ test_expect_success 'diff without repository with attr source' ' ' test_expect_success 'bare repository: with --source' ' - ( - cd bare.git && - attr_check_source foo/bar/f f tag-1 && - attr_check_source foo/bar/a/i n tag-1 && - attr_check_source foo/bar/f unspecified tag-2 && - attr_check_source foo/bar/a/i m tag-2 && - attr_check_source foo/bar/g g tag-2 && - attr_check_source foo/bar/g unspecified tag-1 - ) + attr_check_source foo/bar/f f tag-1 --git-dir=bare.git && + attr_check_source foo/bar/a/i n tag-1 --git-dir=bare.git && + attr_check_source foo/bar/f unspecified tag-2 --git-dir=bare.git && + attr_check_source foo/bar/a/i m tag-2 --git-dir=bare.git && + attr_check_source foo/bar/g g tag-2 --git-dir=bare.git && + attr_check_source foo/bar/g unspecified tag-1 --git-dir=bare.git ' test_expect_success 'bare repository: check that --cached honors index' ' - ( - cd bare.git && - GIT_INDEX_FILE=../.git/index \ - git check-attr --cached --stdin --all <../stdin-all | - sort >actual && - test_cmp ../specified-all actual - ) + GIT_INDEX_FILE=.git/index \ + git --git-dir=bare.git check-attr --cached --stdin --all actual && + test_cmp specified-all actual ' test_expect_success 'bare repository: test info/attributes' ' + mkdir -p bare.git/info && ( - cd bare.git && - mkdir info && - ( - echo "f test=f" && - echo "a/i test=a/i" - ) >info/attributes && - attr_check f f && - attr_check a/f f && - attr_check a/c/f f && - attr_check a/i a/i && - attr_check subdir/a/i unspecified - ) + echo "f test=f" && + echo "a/i test=a/i" + ) >bare.git/info/attributes && + attr_check f f --git-dir=bare.git && + attr_check a/f f --git-dir=bare.git && + attr_check a/c/f f --git-dir=bare.git && + attr_check a/i a/i --git-dir=bare.git && + attr_check subdir/a/i unspecified --git-dir=bare.git ' test_expect_success 'binary macro expanded by -a' ' From ca053477d93d76791413418766c3ff24996ca528 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 2 Apr 2026 14:33:10 +0000 Subject: [PATCH 070/241] t0056: allow implicit bare repo discovery for `-C` work-tree tests The `git -C c/a.git --work-tree=../a` invocations in t0056-git-C.sh enter what is technically the `.git` directory of a repository to test `-C` combined with `--work-tree`. In doing so, the code relies on implicit discovery of bare repositories, which 8d1a7448206e (setup.c: create `safe.bareRepository`, 2022-07-14) prepared to be prevented by default. These tests verify the interaction between those flags, so changing them to use `--git-dir` would defeat their purpose. So let's just temporarily force-enable implicit discovery of bare repositories, no matter what `safe.bareRepository` defaults to. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t0056-git-C.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/t/t0056-git-C.sh b/t/t0056-git-C.sh index 2630e756dab732..6b7122add56b2b 100755 --- a/t/t0056-git-C.sh +++ b/t/t0056-git-C.sh @@ -57,11 +57,13 @@ test_expect_success 'Order should not matter: "--git-dir=a.git -C c" is equivale test_expect_success 'Effect on --work-tree option: "-C c/a.git --work-tree=../a" is equivalent to "--work-tree=c/a --git-dir=c/a.git"' ' rm c/a/a.txt && git --git-dir=c/a.git --work-tree=c/a status >expected && + test_config_global safe.bareRepository all && git -C c/a.git --work-tree=../a status >actual && test_cmp expected actual ' test_expect_success 'Order should not matter: "--work-tree=../a -C c/a.git" is equivalent to "-C c/a.git --work-tree=../a"' ' + test_config_global safe.bareRepository all && git -C c/a.git --work-tree=../a status >expected && git --work-tree=../a -C c/a.git status >actual && test_cmp expected actual From ecefd7ea3fb5b0321317786648bb912143c037cd Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 2 Apr 2026 14:33:11 +0000 Subject: [PATCH 071/241] t1020: use `--git-dir` instead of subshell for bare repo Replace an unnecessarily complex subshell pattern with a much simpler `--git-dir`-based one. The latter is not only simpler, it also no longer relies on implicit bare repo discovery, which would fail with `safe.bareRepository=explicit`. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t1020-subdirectory.sh | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/t/t1020-subdirectory.sh b/t/t1020-subdirectory.sh index 9fdbb2af80e0a8..20d2d306fecdce 100755 --- a/t/t1020-subdirectory.sh +++ b/t/t1020-subdirectory.sh @@ -177,10 +177,7 @@ test_expect_success 'no file/rev ambiguity check inside a bare repo (explicit GI test_expect_success 'no file/rev ambiguity check inside a bare repo' ' test_when_finished "rm -fr foo.git" && git clone -s --bare .git foo.git && - ( - cd foo.git && - git show -s HEAD - ) + git --git-dir=foo.git show -s HEAD ' test_expect_success SYMLINKS 'detection should not be fooled by a symlink' ' From ee58468e1f652a82aa250a7b214256e6ceea4f4e Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 2 Apr 2026 14:33:12 +0000 Subject: [PATCH 072/241] t1900: avoid using `-C ` for a bare repository To prepare for `safe.bareRepository` defaulting to `explicit` (see 8d1a7448206e), add an optional 6th parameter `repo_flag` (defaulting to `-C`) to the `test_repo_info` helper, and use it in the caller that wants to operate on a bare repository. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t1900-repo-info.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/t/t1900-repo-info.sh b/t/t1900-repo-info.sh index 39bb77dda0c327..6280da1efb426a 100755 --- a/t/t1900-repo-info.sh +++ b/t/t1900-repo-info.sh @@ -20,6 +20,7 @@ test_repo_info () { repo_name=$3 key=$4 expected_value=$5 + repo_flag=${6:--C} test_expect_success "setup: $label" ' eval "$init_command $repo_name" @@ -27,13 +28,13 @@ test_repo_info () { test_expect_success "lines: $label" ' echo "$key=$expected_value" > expect && - git -C "$repo_name" repo info "$key" >actual && + git $repo_flag "$repo_name" repo info "$key" >actual && test_cmp expect actual ' test_expect_success "nul: $label" ' printf "%s\n%s\0" "$key" "$expected_value" >expect && - git -C "$repo_name" repo info --format=nul "$key" >actual && + git $repo_flag "$repo_name" repo info --format=nul "$key" >actual && test_cmp_bin expect actual ' } @@ -48,7 +49,7 @@ test_repo_info 'bare repository = false is retrieved correctly' \ 'git init' 'nonbare' 'layout.bare' 'false' test_repo_info 'bare repository = true is retrieved correctly' \ - 'git init --bare' 'bare' 'layout.bare' 'true' + 'git init --bare' 'bare' 'layout.bare' 'true' '--git-dir' test_repo_info 'shallow repository = false is retrieved correctly' \ 'git init' 'nonshallow' 'layout.shallow' 'false' From a6818366b7017258730da57a9531c6c99e6ac306 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 2 Apr 2026 14:33:13 +0000 Subject: [PATCH 073/241] t2400: explicitly specify bare repo for `git worktree add` To prepare for `safe.bareRepository` defaulting to `explicit` (see 8d1a7448206e), specify the gitdir specifically in bare-repo `git worktree add` invocations via `--git-dir=.` so Git does not rely on implicit bare repository discovery. While at it, also avoid unnecessary subshells and `cd`ing. This simplifies the logic in a rather pleasant way. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t2400-worktree-add.sh | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/t/t2400-worktree-add.sh b/t/t2400-worktree-add.sh index 023e1301c8e68e..0f8c83764705e7 100755 --- a/t/t2400-worktree-add.sh +++ b/t/t2400-worktree-add.sh @@ -171,11 +171,8 @@ test_expect_success 'not die on re-checking out current branch' ' ' test_expect_success '"add" from a bare repo' ' - ( - git clone --bare . bare && - cd bare && - git worktree add -b bare-main ../there2 main - ) + git clone --bare . bare && + git -C bare --git-dir=. worktree add -b bare-main ../there2 main ' test_expect_success 'checkout from a bare repo without "add"' ' @@ -186,15 +183,11 @@ test_expect_success 'checkout from a bare repo without "add"' ' ' test_expect_success '"add" default branch of a bare repo' ' - ( - git clone --bare . bare2 && - cd bare2 && - git worktree add ../there3 main && - cd ../there3 && - # Simple check that a Git command does not - # immediately fail with the current setup - git status - ) && + git clone --bare . bare2 && + git -C bare2 --git-dir=. worktree add ../there3 main && + # Simple check that a Git command does not + # immediately fail with the current setup + git status && cat >expect <<-EOF && init.t EOF From f71488534d5a0c0d56bc033a6fe617be610e8ded Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 2 Apr 2026 14:33:14 +0000 Subject: [PATCH 074/241] t2406: use `--git-dir=.` for bare repository worktree repair To prepare for `safe.bareRepository` defaulting to `explicit` (see 8d1a7448206e), the test case t2406.10(repair .git file from bare.git) cannot rely on the implicit discovery of thee bare repository. Simply add a `--git-dir=.` to the invocation. The `-C bare.git` argument is still needed so that the `repair` command realizes works on the intended directory. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t2406-worktree-repair.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t2406-worktree-repair.sh b/t/t2406-worktree-repair.sh index f5f19b3169384f..cac448b57559c9 100755 --- a/t/t2406-worktree-repair.sh +++ b/t/t2406-worktree-repair.sh @@ -84,7 +84,7 @@ test_expect_success 'repair .git file from bare.git' ' git -C bare.git worktree add --detach ../corrupt && git -C corrupt rev-parse --absolute-git-dir >expect && rm -f corrupt/.git && - git -C bare.git worktree repair && + git -C bare.git --git-dir=. worktree repair && git -C corrupt rev-parse --absolute-git-dir >actual && test_cmp expect actual ' From 58c5358a63baa70755c5d21833d5423642e1bf44 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 2 Apr 2026 14:33:15 +0000 Subject: [PATCH 075/241] t5503: avoid discovering a bare repository The test case "fetch specific OID with tag following" creates a bare repository and wants to operate on it by changing the working directory and relying on Git's implicit discovery of the bare repository. Once the `safe.bareRepository` default is changed, this is no longer an option. So let's adjust the commands to specify the bare repository explicitly, via `--git-dir`, and avoid changing the working directory. As a bonus, the result is arguably more readable than the original code. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t5503-tagfollow.sh | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/t/t5503-tagfollow.sh b/t/t5503-tagfollow.sh index febe44104177e1..6d178d84dde811 100755 --- a/t/t5503-tagfollow.sh +++ b/t/t5503-tagfollow.sh @@ -168,16 +168,13 @@ test_expect_success 'new clone fetch main and tags' ' test_expect_success 'fetch specific OID with tag following' ' git init --bare clone3.git && - ( - cd clone3.git && - git remote add origin .. && - git fetch origin $B:refs/heads/main && + git --git-dir=clone3.git remote add origin "$PWD" && + git --git-dir=clone3.git fetch origin $B:refs/heads/main && - git -C .. for-each-ref >expect && - git for-each-ref >actual && + git for-each-ref >expect && + git --git-dir=clone3.git for-each-ref >actual && - test_cmp expect actual - ) + test_cmp expect actual ' test_done From 9abf46deeb0dc2d07094200b1dd2a99690e58494 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 2 Apr 2026 14:33:16 +0000 Subject: [PATCH 076/241] t5505: export `GIT_DIR` after `git init --bare` To prepare for `safe.bareRepository` defaulting to `explicit` (see 8d1a7448206e), export `GIT_DIR=.` right after `git init --bare &&` so subsequent commands access the bare repo explicitly. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t5505-remote.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/t5505-remote.sh b/t/t5505-remote.sh index e592c0bcde91e9..6d3d8510cae191 100755 --- a/t/t5505-remote.sh +++ b/t/t5505-remote.sh @@ -561,7 +561,7 @@ test_expect_success 'add --mirror && prune' ' mkdir mirror && ( cd mirror && - git init --bare && + git init --bare && GIT_DIR=. && export GIT_DIR && git remote add --mirror -f origin ../one ) && ( @@ -583,7 +583,7 @@ test_expect_success 'add --mirror setting HEAD' ' mkdir headmirror && ( cd headmirror && - git init --bare -b notmain && + git init --bare -b notmain && GIT_DIR=. && export GIT_DIR && git remote add --mirror -f origin ../one && test "$(git symbolic-ref HEAD)" = "refs/heads/main" ) From c1e5cd1dda16d42b6678e5cc0a434574511d40e2 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 2 Apr 2026 14:33:17 +0000 Subject: [PATCH 077/241] t5509: specify bare repository path explicitly After switching from `-C pushee` to `--git-dir=pushee` as part of the `safe.bareRepository` preparation, `ext::` URLs that used `.` (resolved relative to the `-C` target) must spell out the directory name explicitly. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t5509-fetch-push-namespaces.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/t/t5509-fetch-push-namespaces.sh b/t/t5509-fetch-push-namespaces.sh index 095df1a7535d57..7771a3b34a6601 100755 --- a/t/t5509-fetch-push-namespaces.sh +++ b/t/t5509-fetch-push-namespaces.sh @@ -88,8 +88,8 @@ test_expect_success 'mirroring a repository using a ref namespace' ' test_expect_success 'hide namespaced refs with transfer.hideRefs' ' GIT_NAMESPACE=namespace \ - git -C pushee -c transfer.hideRefs=refs/tags \ - ls-remote "ext::git %s ." >actual && + git --git-dir=pushee -c transfer.hideRefs=refs/tags \ + ls-remote "ext::git %s pushee" >actual && printf "$commit1\trefs/heads/main\n" >expected && test_cmp expected actual ' @@ -97,8 +97,8 @@ test_expect_success 'hide namespaced refs with transfer.hideRefs' ' test_expect_success 'check that transfer.hideRefs does not match unstripped refs' ' git -C pushee pack-refs --all && GIT_NAMESPACE=namespace \ - git -C pushee -c transfer.hideRefs=refs/namespaces/namespace/refs/tags \ - ls-remote "ext::git %s ." >actual && + git --git-dir=pushee -c transfer.hideRefs=refs/namespaces/namespace/refs/tags \ + ls-remote "ext::git %s pushee" >actual && printf "$commit1\trefs/heads/main\n" >expected && printf "$commit0\trefs/tags/0\n" >>expected && printf "$commit1\trefs/tags/1\n" >>expected && @@ -107,8 +107,8 @@ test_expect_success 'check that transfer.hideRefs does not match unstripped refs test_expect_success 'hide full refs with transfer.hideRefs' ' GIT_NAMESPACE=namespace \ - git -C pushee -c transfer.hideRefs="^refs/namespaces/namespace/refs/tags" \ - ls-remote "ext::git %s ." >actual && + git --git-dir=pushee -c transfer.hideRefs="^refs/namespaces/namespace/refs/tags" \ + ls-remote "ext::git %s pushee" >actual && printf "$commit1\trefs/heads/main\n" >expected && test_cmp expected actual ' From 5513fb51b7024dc2ec26cf2d72dc3ca8b3f7513a Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 2 Apr 2026 14:33:18 +0000 Subject: [PATCH 078/241] t5540/t5541: avoid accessing a bare repository via `-C ` In the `test_http_push_nonff` function both of these test scripts call, there were two Git invocations that assume that bare repositories will always be discovered when the current working directory is inside one. This is unlikely to be true forever because at some stage, the `safe.bareRepository` config is prone to be modified to be safe by default. So let's be safe and specify the bare repository explicitly. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/lib-httpd.sh | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/t/lib-httpd.sh b/t/lib-httpd.sh index 4c76e813e396bf..f15158b2c579c7 100644 --- a/t/lib-httpd.sh +++ b/t/lib-httpd.sh @@ -259,7 +259,7 @@ test_http_push_nonff () { test_expect_success 'non-fast-forward push fails' ' cd "$REMOTE_REPO" && - HEAD=$(git rev-parse --verify HEAD) && + HEAD=$(git --git-dir=. rev-parse --verify HEAD) && cd "$LOCAL_REPO" && git checkout $BRANCH && @@ -270,7 +270,7 @@ test_http_push_nonff () { ( cd "$REMOTE_REPO" && echo "$HEAD" >expect && - git rev-parse --verify HEAD >actual && + git --git-dir=. rev-parse --verify HEAD >actual && test_cmp expect actual ) ' @@ -284,18 +284,16 @@ test_http_push_nonff () { ' test_expect_${EXPECT_CAS_RESULT} 'force with lease aka cas' ' - HEAD=$( cd "$REMOTE_REPO" && git rev-parse --verify HEAD ) && + HEAD=$(git --git-dir="$REMOTE_REPO" rev-parse --verify HEAD) && test_when_finished '\'' - (cd "$REMOTE_REPO" && git update-ref HEAD "$HEAD") + git --git-dir="$REMOTE_REPO" update-ref HEAD "$HEAD" '\'' && ( cd "$LOCAL_REPO" && git push -v --force-with-lease=$BRANCH:$HEAD origin ) && git rev-parse --verify "$BRANCH" >expect && - ( - cd "$REMOTE_REPO" && git rev-parse --verify HEAD - ) >actual && + git --git-dir="$REMOTE_REPO" rev-parse --verify HEAD >actual && test_cmp expect actual ' } From 4dceee1c8ca39b506ee417e6ff94b3057609ced2 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 2 Apr 2026 14:33:19 +0000 Subject: [PATCH 079/241] t5619: wrap `test_commit_bulk` in `GIT_DIR` subshell for bare repo To prepare for `safe.bareRepository` defaulting to `explicit` (see 8d1a7448206e), wrap the `test_commit_bulk` call in `(GIT_DIR="$REPO" && export GIT_DIR && test_commit_bulk ...)` because `test_commit_bulk -C` relies on implicit discovery which would fail once the default changes. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t5619-clone-local-ambiguous-transport.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t5619-clone-local-ambiguous-transport.sh b/t/t5619-clone-local-ambiguous-transport.sh index cce62bf78d3351..3e9aac9015a01d 100755 --- a/t/t5619-clone-local-ambiguous-transport.sh +++ b/t/t5619-clone-local-ambiguous-transport.sh @@ -21,7 +21,7 @@ test_expect_success 'setup' ' echo "secret" >sensitive/secret && git init --bare "$REPO" && - test_commit_bulk -C "$REPO" --ref=main 1 && + (GIT_DIR="$REPO" && export GIT_DIR && test_commit_bulk --ref=main 1) && git -C "$REPO" update-ref HEAD main && git -C "$REPO" update-server-info && From f1ecb9def010a69dd9f81091d002af0423a06c66 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 2 Apr 2026 14:33:20 +0000 Subject: [PATCH 080/241] t6020: use `-C` for worktree, `--git-dir` for bare repository To prepare for `safe.bareRepository` defaulting to `explicit` (see 8d1a7448206e), adjust a loop that iterated over both a bare (`cloned`) and a non-bare (`unbundled`) repository using the same `-C` flag: the bare repo needs `--git-dir` to avoid implicit discovery, while the non-bare one keeps `-C`. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t6020-bundle-misc.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/t6020-bundle-misc.sh b/t/t6020-bundle-misc.sh index 500c81b8a14237..82df105b47f0bd 100755 --- a/t/t6020-bundle-misc.sh +++ b/t/t6020-bundle-misc.sh @@ -594,9 +594,9 @@ do reflist=$(git for-each-ref --format="%(objectname)") && git rev-list --objects --filter=$filter --missing=allow-any \ $reflist >expect && - for repo in cloned unbundled + for opt in "--git-dir cloned" "-C unbundled" do - git -C $repo rev-list --objects --missing=allow-any \ + git $opt rev-list --objects --missing=allow-any \ $reflist >actual && test_cmp expect actual || return 1 done From 3a20e2a04b147288b7af7d8504b3cbce2773e46f Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 2 Apr 2026 14:33:21 +0000 Subject: [PATCH 081/241] t9210: pass `safe.bareRepository=all` to `scalar register` This test expects `scalar register` to discover a bare repo and reject it. Since `scalar` does not support `--git-dir` (that option would not make sense in the context of that command), pass `-c safe.bareRepository=all` to opt into implicit discovery of bare repositories, so the test keeps working once the default changes to `explicit`. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t9210-scalar.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t9210-scalar.sh b/t/t9210-scalar.sh index 009437a5f3168f..54513c220b679a 100755 --- a/t/t9210-scalar.sh +++ b/t/t9210-scalar.sh @@ -88,7 +88,7 @@ test_expect_success 'scalar enlistments need a worktree' ' test_when_finished rm -rf bare test && git init --bare bare/src && - ! scalar register bare/src 2>err && + ! scalar -c safe.bareRepository=all register bare/src 2>err && grep "Scalar enlistments require a worktree" err && git init test/src && From bf92209ddc7de6b19022d32ccad3d38576611a11 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 2 Apr 2026 14:33:22 +0000 Subject: [PATCH 082/241] t9700: stop relying on implicit bare repo discovery Currently, the "alternate bare repo" test case relies on Git discovering non-bare and bare repositories alike. However, the automatic discovery of bare repository represents a weakness that leaves Git users vulnerable. To that end, the `safe.bareRepository` config was introduced, but out of backwards-compatibility concerns, the default is not yet secure. To prepare for that default to switch to the secure one, where bare repositories are never discovered automatically but instead must be specified explicitly, let's do exactly that in this test case: specify it explicitly, via setting the environment variable `GIT_DIR`. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t9700/test.pl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/t/t9700/test.pl b/t/t9700/test.pl index f83e6169e2c100..99b712b626cfd8 100755 --- a/t/t9700/test.pl +++ b/t/t9700/test.pl @@ -153,9 +153,12 @@ sub adjust_dirsep { chdir($abs_repo_dir); # open alternate bare repo -my $r4 = Git->repository(Directory => "$abs_repo_dir/bare.git"); -is($r4->command_oneline(qw(log --format=%s)), "bare commit", - "log of bare repo works"); +{ + local $ENV{GIT_DIR} = "$abs_repo_dir/bare.git"; + my $r4 = Git->repository(Directory => "$abs_repo_dir/bare.git"); + is($r4->command_oneline(qw(log --format=%s)), "bare commit", + "log of bare repo works"); +} # unquoting paths is(Git::unquote_path('abc'), 'abc', 'unquote unquoted path'); From 347da97f3e6efe4cb85ffa39b42da9ff96cb8e28 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 2 Apr 2026 14:33:23 +0000 Subject: [PATCH 083/241] git p4 clone --bare: need to be explicit about the gitdir When `safe.bareRepository` will change to be safe by default, bare repositories won't be discovered by default anymore. To prepare for this, `git p4` must be explicit about the gitdir when cloning into a bare repository, and no longer rely on that implicit discovery. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- git-p4.py | 1 + 1 file changed, 1 insertion(+) diff --git a/git-p4.py b/git-p4.py index c0ca7becaf4861..dd38dbca221580 100755 --- a/git-p4.py +++ b/git-p4.py @@ -4360,6 +4360,7 @@ def run(self, args): init_cmd = ["git", "init"] if self.cloneBare: init_cmd.append("--bare") + os.environ["GIT_DIR"] = os.getcwd() retcode = subprocess.call(init_cmd) if retcode: raise subprocess.CalledProcessError(retcode, init_cmd) From d429ab497942f8d20953c00d6db9741d29572b62 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Thu, 2 Apr 2026 16:32:14 -0500 Subject: [PATCH 084/241] odb: split `struct odb_transaction` into separate header The current ODB transaction interface is colocated with other ODB interfaces in "odb.{c,h}". Subsequent commits will expand `struct odb_transaction` to support write operations on the transaction directly. To keep things organized and prevent "odb.{c,h}" from becoming more unwieldy, split out `struct odb_transaction` into a separate header. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/add.c | 1 + builtin/unpack-objects.c | 1 + builtin/update-index.c | 1 + cache-tree.c | 1 + meson.build | 1 + object-file.c | 1 + odb.c | 25 ------------------------- odb.h | 31 ------------------------------- odb/transaction.c | 28 ++++++++++++++++++++++++++++ odb/transaction.h | 38 ++++++++++++++++++++++++++++++++++++++ read-cache.c | 1 + 12 files changed, 74 insertions(+), 56 deletions(-) create mode 100644 odb/transaction.c create mode 100644 odb/transaction.h diff --git a/Makefile b/Makefile index dbf00220541ce1..6342db13e53232 100644 --- a/Makefile +++ b/Makefile @@ -1219,6 +1219,7 @@ LIB_OBJS += odb.o LIB_OBJS += odb/source.o LIB_OBJS += odb/source-files.o LIB_OBJS += odb/streaming.o +LIB_OBJS += odb/transaction.o LIB_OBJS += oid-array.o LIB_OBJS += oidmap.o LIB_OBJS += oidset.o diff --git a/builtin/add.c b/builtin/add.c index 7737ab878bfceb..c859f665199efa 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -16,6 +16,7 @@ #include "run-command.h" #include "object-file.h" #include "odb.h" +#include "odb/transaction.h" #include "parse-options.h" #include "path.h" #include "preload-index.h" diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 6fc64e9e4b8d5a..bc9b1e047e2e4e 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -9,6 +9,7 @@ #include "hex.h" #include "object-file.h" #include "odb.h" +#include "odb/transaction.h" #include "object.h" #include "delta.h" #include "pack.h" diff --git a/builtin/update-index.c b/builtin/update-index.c index 8a5907767bf297..bcc43852ef47aa 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -19,6 +19,7 @@ #include "tree-walk.h" #include "object-file.h" #include "odb.h" +#include "odb/transaction.h" #include "refs.h" #include "resolve-undo.h" #include "parse-options.h" diff --git a/cache-tree.c b/cache-tree.c index 60bcc07c3b8357..f056869cfdbcf1 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -10,6 +10,7 @@ #include "cache-tree.h" #include "object-file.h" #include "odb.h" +#include "odb/transaction.h" #include "read-cache-ll.h" #include "replace-object.h" #include "repository.h" diff --git a/meson.build b/meson.build index 8309942d184847..6dc23b3af2f387 100644 --- a/meson.build +++ b/meson.build @@ -405,6 +405,7 @@ libgit_sources = [ 'odb/source.c', 'odb/source-files.c', 'odb/streaming.c', + 'odb/transaction.c', 'oid-array.c', 'oidmap.c', 'oidset.c', diff --git a/object-file.c b/object-file.c index f0b029ff0b2cb0..bfbb632cf8b971 100644 --- a/object-file.c +++ b/object-file.c @@ -21,6 +21,7 @@ #include "object-file.h" #include "odb.h" #include "odb/streaming.h" +#include "odb/transaction.h" #include "oidtree.h" #include "pack.h" #include "packfile.h" diff --git a/odb.c b/odb.c index 350e23f3c0798d..8c3cbc1b53e11d 100644 --- a/odb.c +++ b/odb.c @@ -1069,28 +1069,3 @@ void odb_reprepare(struct object_database *o) obj_read_unlock(); } - -struct odb_transaction *odb_transaction_begin(struct object_database *odb) -{ - if (odb->transaction) - return NULL; - - odb->transaction = odb_transaction_files_begin(odb->sources); - - return odb->transaction; -} - -void odb_transaction_commit(struct odb_transaction *transaction) -{ - if (!transaction) - return; - - /* - * Ensure the transaction ending matches the pending transaction. - */ - ASSERT(transaction == transaction->source->odb->transaction); - - transaction->commit(transaction); - transaction->source->odb->transaction = NULL; - free(transaction); -} diff --git a/odb.h b/odb.h index 9aee260105ae54..ec5367b13ed8bc 100644 --- a/odb.h +++ b/odb.h @@ -35,24 +35,6 @@ struct packed_git; struct packfile_store; struct cached_object_entry; -/* - * A transaction may be started for an object database prior to writing new - * objects via odb_transaction_begin(). These objects are not committed until - * odb_transaction_commit() is invoked. Only a single transaction may be pending - * at a time. - * - * Each ODB source is expected to implement its own transaction handling. - */ -struct odb_transaction; -typedef void (*odb_transaction_commit_fn)(struct odb_transaction *transaction); -struct odb_transaction { - /* The ODB source the transaction is opened against. */ - struct odb_source *source; - - /* The ODB source specific callback invoked to commit a transaction. */ - odb_transaction_commit_fn commit; -}; - /* * The object database encapsulates access to objects in a repository. It * manages one or more sources that store the actual objects which are @@ -154,19 +136,6 @@ void odb_close(struct object_database *o); */ void odb_reprepare(struct object_database *o); -/* - * Starts an ODB transaction. Subsequent objects are written to the transaction - * and not committed until odb_transaction_commit() is invoked on the - * transaction. If the ODB already has a pending transaction, NULL is returned. - */ -struct odb_transaction *odb_transaction_begin(struct object_database *odb); - -/* - * Commits an ODB transaction making the written objects visible. If the - * specified transaction is NULL, the function is a no-op. - */ -void odb_transaction_commit(struct odb_transaction *transaction); - /* * Find source by its object directory path. Returns a `NULL` pointer in case * the source could not be found. diff --git a/odb/transaction.c b/odb/transaction.c new file mode 100644 index 00000000000000..9bf3f347dcf261 --- /dev/null +++ b/odb/transaction.c @@ -0,0 +1,28 @@ +#include "git-compat-util.h" +#include "object-file.h" +#include "odb/transaction.h" + +struct odb_transaction *odb_transaction_begin(struct object_database *odb) +{ + if (odb->transaction) + return NULL; + + odb->transaction = odb_transaction_files_begin(odb->sources); + + return odb->transaction; +} + +void odb_transaction_commit(struct odb_transaction *transaction) +{ + if (!transaction) + return; + + /* + * Ensure the transaction ending matches the pending transaction. + */ + ASSERT(transaction == transaction->source->odb->transaction); + + transaction->commit(transaction); + transaction->source->odb->transaction = NULL; + free(transaction); +} diff --git a/odb/transaction.h b/odb/transaction.h new file mode 100644 index 00000000000000..a56e392f217f4f --- /dev/null +++ b/odb/transaction.h @@ -0,0 +1,38 @@ +#ifndef ODB_TRANSACTION_H +#define ODB_TRANSACTION_H + +#include "odb.h" +#include "odb/source.h" + +/* + * A transaction may be started for an object database prior to writing new + * objects via odb_transaction_begin(). These objects are not committed until + * odb_transaction_commit() is invoked. Only a single transaction may be pending + * at a time. + * + * Each ODB source is expected to implement its own transaction handling. + */ +struct odb_transaction; +typedef void (*odb_transaction_commit_fn)(struct odb_transaction *transaction); +struct odb_transaction { + /* The ODB source the transaction is opened against. */ + struct odb_source *source; + + /* The ODB source specific callback invoked to commit a transaction. */ + odb_transaction_commit_fn commit; +}; + +/* + * Starts an ODB transaction. Subsequent objects are written to the transaction + * and not committed until odb_transaction_commit() is invoked on the + * transaction. If the ODB already has a pending transaction, NULL is returned. + */ +struct odb_transaction *odb_transaction_begin(struct object_database *odb); + +/* + * Commits an ODB transaction making the written objects visible. If the + * specified transaction is NULL, the function is a no-op. + */ +void odb_transaction_commit(struct odb_transaction *transaction); + +#endif diff --git a/read-cache.c b/read-cache.c index 5049f9baca9c5e..8147c7e94a8b8e 100644 --- a/read-cache.c +++ b/read-cache.c @@ -20,6 +20,7 @@ #include "dir.h" #include "object-file.h" #include "odb.h" +#include "odb/transaction.h" #include "oid-array.h" #include "tree.h" #include "commit.h" From ec562fe26b305d7cff3e324f6b2f50a8b7469015 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Thu, 2 Apr 2026 16:32:15 -0500 Subject: [PATCH 085/241] odb/transaction: use pluggable `begin_transaction()` Each ODB source is expected to provide an ODB transaction implementation that should be used when starting a transaction. With d6fc6fe6f8 (odb/source: make `begin_transaction()` function pluggable, 2026-03-05), the `struct odb_source` now provides a pluggable callback for beginning transactions. Use the callback provided by the ODB source accordingly. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- odb/transaction.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/odb/transaction.c b/odb/transaction.c index 9bf3f347dcf261..592ac840759a07 100644 --- a/odb/transaction.c +++ b/odb/transaction.c @@ -1,5 +1,5 @@ #include "git-compat-util.h" -#include "object-file.h" +#include "odb/source.h" #include "odb/transaction.h" struct odb_transaction *odb_transaction_begin(struct object_database *odb) @@ -7,7 +7,7 @@ struct odb_transaction *odb_transaction_begin(struct object_database *odb) if (odb->transaction) return NULL; - odb->transaction = odb_transaction_files_begin(odb->sources); + odb_source_begin_transaction(odb->sources, &odb->transaction); return odb->transaction; } From 61effed8751a65e15e12cf049043b961211fc69a Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Thu, 2 Apr 2026 16:32:16 -0500 Subject: [PATCH 086/241] odb: update `struct odb_write_stream` read() callback The `read()` callback used by `struct odb_write_stream` currently returns a pointer to an internal buffer along with the number of bytes read. This makes buffer ownership unclear and provides no way to report errors. Update the interface to instead require the caller to provide a buffer, and have the callback return the number of bytes written to it or a negative value on error. While at it, also move the `struct odb_write_stream` definition to "odb/streaming.h". Call sites are updated accordingly. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/unpack-objects.c | 20 ++++++++------------ object-file.c | 14 +++++++++++--- odb.h | 6 +----- odb/streaming.c | 5 +++++ odb/streaming.h | 18 ++++++++++++++++++ 5 files changed, 43 insertions(+), 20 deletions(-) diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index bc9b1e047e2e4e..64e58e79fdac15 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -9,6 +9,7 @@ #include "hex.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "odb/transaction.h" #include "object.h" #include "delta.h" @@ -360,24 +361,21 @@ static void unpack_non_delta_entry(enum object_type type, unsigned long size, struct input_zstream_data { git_zstream *zstream; - unsigned char buf[8192]; int status; }; -static const void *feed_input_zstream(struct odb_write_stream *in_stream, - unsigned long *readlen) +static ssize_t feed_input_zstream(struct odb_write_stream *in_stream, + unsigned char *buf, size_t buf_len) { struct input_zstream_data *data = in_stream->data; git_zstream *zstream = data->zstream; void *in = fill(1); - if (in_stream->is_finished) { - *readlen = 0; - return NULL; - } + if (in_stream->is_finished) + return 0; - zstream->next_out = data->buf; - zstream->avail_out = sizeof(data->buf); + zstream->next_out = buf; + zstream->avail_out = buf_len; zstream->next_in = in; zstream->avail_in = len; @@ -385,9 +383,7 @@ static const void *feed_input_zstream(struct odb_write_stream *in_stream, in_stream->is_finished = data->status != Z_OK; use(len - zstream->avail_in); - *readlen = sizeof(data->buf) - zstream->avail_out; - - return data->buf; + return buf_len - zstream->avail_out; } static void stream_blob(unsigned long size, unsigned nr) diff --git a/object-file.c b/object-file.c index bfbb632cf8b971..0ae36314aacc05 100644 --- a/object-file.c +++ b/object-file.c @@ -1066,6 +1066,7 @@ int odb_source_loose_write_stream(struct odb_source *source, struct git_hash_ctx c, compat_c; struct strbuf tmp_file = STRBUF_INIT; struct strbuf filename = STRBUF_INIT; + unsigned char buf[8192]; int dirlen; char hdr[MAX_HEADER_LEN]; int hdrlen; @@ -1098,9 +1099,16 @@ int odb_source_loose_write_stream(struct odb_source *source, unsigned char *in0 = stream.next_in; if (!stream.avail_in && !in_stream->is_finished) { - const void *in = in_stream->read(in_stream, &stream.avail_in); - stream.next_in = (void *)in; - in0 = (unsigned char *)in; + ssize_t read_len = odb_write_stream_read(in_stream, buf, + sizeof(buf)); + if (read_len < 0) { + err = -1; + goto cleanup; + } + + stream.avail_in = read_len; + stream.next_in = buf; + in0 = buf; /* All data has been read. */ if (in_stream->is_finished) flush = 1; diff --git a/odb.h b/odb.h index ec5367b13ed8bc..6faeaa05891d7b 100644 --- a/odb.h +++ b/odb.h @@ -529,11 +529,7 @@ static inline int odb_write_object(struct object_database *odb, return odb_write_object_ext(odb, buf, len, type, oid, NULL, 0); } -struct odb_write_stream { - const void *(*read)(struct odb_write_stream *, unsigned long *len); - void *data; - int is_finished; -}; +struct odb_write_stream; int odb_write_object_stream(struct object_database *odb, struct odb_write_stream *stream, size_t len, diff --git a/odb/streaming.c b/odb/streaming.c index 5927a12954ba59..a68dd2cbe37821 100644 --- a/odb/streaming.c +++ b/odb/streaming.c @@ -232,6 +232,11 @@ struct odb_read_stream *odb_read_stream_open(struct object_database *odb, return st; } +ssize_t odb_write_stream_read(struct odb_write_stream *st, void *buf, size_t sz) +{ + return st->read(st, buf, sz); +} + int odb_stream_blob_to_fd(struct object_database *odb, int fd, const struct object_id *oid, diff --git a/odb/streaming.h b/odb/streaming.h index c7861f7e13c606..65ced911fecd69 100644 --- a/odb/streaming.h +++ b/odb/streaming.h @@ -47,6 +47,24 @@ int odb_read_stream_close(struct odb_read_stream *stream); */ ssize_t odb_read_stream_read(struct odb_read_stream *stream, void *buf, size_t len); +/* + * A stream that provides an object to be written to the object database without + * loading all of it into memory. + */ +struct odb_write_stream { + ssize_t (*read)(struct odb_write_stream *, unsigned char *, size_t); + void *data; + int is_finished; +}; + +/* + * Read data from the stream into the buffer. Returns 0 when finished and the + * number of bytes read on success. Returns a negative error code in case + * reading from the stream fails. + */ +ssize_t odb_write_stream_read(struct odb_write_stream *stream, void *buf, + size_t len); + /* * Look up the object by its ID and write the full contents to the file * descriptor. The object must be a blob, or the function will fail. When From 43eadce3407aa63829633b5dc903db79f4cdf29e Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Thu, 2 Apr 2026 16:32:17 -0500 Subject: [PATCH 087/241] object-file: remove flags from transaction packfile writes The `index_blob_packfile_transaction()` function handles streaming a blob from an fd to compute its object ID and conditionally writes the object directly to a packfile if the INDEX_WRITE_OBJECT flag is set. A subsequent commit will make these packfile object writes part of the transaction interface. Consequently, having the object write be conditional on this flag is a bit awkward. In preparation for this change, introduce a dedicated `hash_blob_stream()` helper that only computes the OID from a `struct odb_write_stream`. This is invoked by `index_fd()` instead when the INDEX_WRITE_OBJECT is not set. The object write performed via `index_blob_packfile_transaction()` is made unconditional accordingly. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- object-file.c | 132 +++++++++++++++++++++++++++++------------------- odb/streaming.c | 46 +++++++++++++++++ odb/streaming.h | 12 +++++ 3 files changed, 138 insertions(+), 52 deletions(-) diff --git a/object-file.c b/object-file.c index 0ae36314aacc05..382d14c8c0a5fb 100644 --- a/object-file.c +++ b/object-file.c @@ -1396,11 +1396,10 @@ static int already_written(struct odb_transaction_files *transaction, } /* Lazily create backing packfile for the state */ -static void prepare_packfile_transaction(struct odb_transaction_files *transaction, - unsigned flags) +static void prepare_packfile_transaction(struct odb_transaction_files *transaction) { struct transaction_packfile *state = &transaction->packfile; - if (!(flags & INDEX_WRITE_OBJECT) || state->f) + if (state->f) return; state->f = create_tmp_packfile(transaction->base.source->odb->repo, @@ -1413,6 +1412,39 @@ static void prepare_packfile_transaction(struct odb_transaction_files *transacti die_errno("unable to write pack header"); } +static int hash_blob_stream(struct odb_write_stream *stream, + const struct git_hash_algo *hash_algo, + struct object_id *result_oid, size_t size) +{ + unsigned char buf[16384]; + struct git_hash_ctx ctx; + unsigned header_len; + size_t bytes_hashed = 0; + + header_len = format_object_header((char *)buf, sizeof(buf), + OBJ_BLOB, size); + hash_algo->init_fn(&ctx); + git_hash_update(&ctx, buf, header_len); + + while (!stream->is_finished) { + ssize_t read_result = odb_write_stream_read(stream, buf, + sizeof(buf)); + + if (read_result < 0) + return -1; + + git_hash_update(&ctx, buf, read_result); + bytes_hashed += read_result; + } + + if (bytes_hashed != size) + return -1; + + git_hash_final_oid(result_oid, &ctx); + + return 0; +} + /* * Read the contents from fd for size bytes, streaming it to the * packfile in state while updating the hash in ctx. Signal a failure @@ -1430,15 +1462,13 @@ static void prepare_packfile_transaction(struct odb_transaction_files *transacti */ static int stream_blob_to_pack(struct transaction_packfile *state, struct git_hash_ctx *ctx, off_t *already_hashed_to, - int fd, size_t size, const char *path, - unsigned flags) + int fd, size_t size, const char *path) { git_zstream s; unsigned char ibuf[16384]; unsigned char obuf[16384]; unsigned hdrlen; int status = Z_OK; - int write_object = (flags & INDEX_WRITE_OBJECT); off_t offset = 0; git_deflate_init(&s, pack_compression_level); @@ -1473,20 +1503,18 @@ static int stream_blob_to_pack(struct transaction_packfile *state, status = git_deflate(&s, size ? 0 : Z_FINISH); if (!s.avail_out || status == Z_STREAM_END) { - if (write_object) { - size_t written = s.next_out - obuf; - - /* would we bust the size limit? */ - if (state->nr_written && - pack_size_limit_cfg && - pack_size_limit_cfg < state->offset + written) { - git_deflate_abort(&s); - return -1; - } - - hashwrite(state->f, obuf, written); - state->offset += written; + size_t written = s.next_out - obuf; + + /* would we bust the size limit? */ + if (state->nr_written && + pack_size_limit_cfg && + pack_size_limit_cfg < state->offset + written) { + git_deflate_abort(&s); + return -1; } + + hashwrite(state->f, obuf, written); + state->offset += written; s.next_out = obuf; s.avail_out = sizeof(obuf); } @@ -1574,8 +1602,7 @@ static void flush_packfile_transaction(struct odb_transaction_files *transaction */ static int index_blob_packfile_transaction(struct odb_transaction_files *transaction, struct object_id *result_oid, int fd, - size_t size, const char *path, - unsigned flags) + size_t size, const char *path) { struct transaction_packfile *state = &transaction->packfile; off_t seekback, already_hashed_to; @@ -1583,7 +1610,7 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac unsigned char obuf[16384]; unsigned header_len; struct hashfile_checkpoint checkpoint; - struct pack_idx_entry *idx = NULL; + struct pack_idx_entry *idx; seekback = lseek(fd, 0, SEEK_CUR); if (seekback == (off_t)-1) @@ -1594,33 +1621,26 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac transaction->base.source->odb->repo->hash_algo->init_fn(&ctx); git_hash_update(&ctx, obuf, header_len); - /* Note: idx is non-NULL when we are writing */ - if ((flags & INDEX_WRITE_OBJECT) != 0) { - CALLOC_ARRAY(idx, 1); - - prepare_packfile_transaction(transaction, flags); - hashfile_checkpoint_init(state->f, &checkpoint); - } + CALLOC_ARRAY(idx, 1); + prepare_packfile_transaction(transaction); + hashfile_checkpoint_init(state->f, &checkpoint); already_hashed_to = 0; while (1) { - prepare_packfile_transaction(transaction, flags); - if (idx) { - hashfile_checkpoint(state->f, &checkpoint); - idx->offset = state->offset; - crc32_begin(state->f); - } + prepare_packfile_transaction(transaction); + hashfile_checkpoint(state->f, &checkpoint); + idx->offset = state->offset; + crc32_begin(state->f); + if (!stream_blob_to_pack(state, &ctx, &already_hashed_to, - fd, size, path, flags)) + fd, size, path)) break; /* * Writing this object to the current pack will make * it too big; we need to truncate it, start a new * pack, and write into it. */ - if (!idx) - BUG("should not happen"); hashfile_truncate(state->f, &checkpoint); state->offset = checkpoint.offset; flush_packfile_transaction(transaction); @@ -1628,8 +1648,6 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac return error("cannot seek back"); } git_hash_final_oid(result_oid, &ctx); - if (!idx) - return 0; idx->crc32 = crc32_end(state->f); if (already_written(transaction, result_oid)) { @@ -1667,18 +1685,28 @@ int index_fd(struct index_state *istate, struct object_id *oid, ret = index_core(istate, oid, fd, xsize_t(st->st_size), type, path, flags); } else { - struct object_database *odb = the_repository->objects; - struct odb_transaction_files *files_transaction; - struct odb_transaction *transaction; - - transaction = odb_transaction_begin(odb); - files_transaction = container_of(odb->transaction, - struct odb_transaction_files, - base); - ret = index_blob_packfile_transaction(files_transaction, oid, fd, - xsize_t(st->st_size), - path, flags); - odb_transaction_commit(transaction); + struct odb_write_stream stream; + odb_write_stream_from_fd(&stream, fd, xsize_t(st->st_size)); + + if (flags & INDEX_WRITE_OBJECT) { + struct object_database *odb = the_repository->objects; + struct odb_transaction_files *files_transaction; + struct odb_transaction *transaction; + + transaction = odb_transaction_begin(odb); + files_transaction = container_of(odb->transaction, + struct odb_transaction_files, + base); + ret = index_blob_packfile_transaction(files_transaction, oid, fd, + xsize_t(st->st_size), path); + odb_transaction_commit(transaction); + } else { + ret = hash_blob_stream(&stream, + the_repository->hash_algo, oid, + xsize_t(st->st_size)); + } + + odb_write_stream_release(&stream); } close(fd); diff --git a/odb/streaming.c b/odb/streaming.c index a68dd2cbe37821..20531e864c9561 100644 --- a/odb/streaming.c +++ b/odb/streaming.c @@ -237,6 +237,11 @@ ssize_t odb_write_stream_read(struct odb_write_stream *st, void *buf, size_t sz) return st->read(st, buf, sz); } +void odb_write_stream_release(struct odb_write_stream *st) +{ + free(st->data); +} + int odb_stream_blob_to_fd(struct object_database *odb, int fd, const struct object_id *oid, @@ -292,3 +297,44 @@ int odb_stream_blob_to_fd(struct object_database *odb, odb_read_stream_close(st); return result; } + +struct read_object_fd_data { + int fd; + size_t remaining; +}; + +static ssize_t read_object_fd(struct odb_write_stream *stream, + unsigned char *buf, size_t len) +{ + struct read_object_fd_data *data = stream->data; + ssize_t read_result; + size_t count; + + if (stream->is_finished) + return 0; + + count = data->remaining < len ? data->remaining : len; + read_result = read_in_full(data->fd, buf, count); + if (read_result < 0 || (size_t)read_result != count) + return -1; + + data->remaining -= count; + if (!data->remaining) + stream->is_finished = 1; + + return read_result; +} + +void odb_write_stream_from_fd(struct odb_write_stream *stream, int fd, + size_t size) +{ + struct read_object_fd_data *data; + + CALLOC_ARRAY(data, 1); + data->fd = fd; + data->remaining = size; + + stream->data = data; + stream->read = read_object_fd; + stream->is_finished = 0; +} diff --git a/odb/streaming.h b/odb/streaming.h index 65ced911fecd69..2a8cac19a43c00 100644 --- a/odb/streaming.h +++ b/odb/streaming.h @@ -5,6 +5,7 @@ #define STREAMING_H 1 #include "object.h" +#include "odb.h" struct object_database; struct odb_read_stream; @@ -65,6 +66,11 @@ struct odb_write_stream { ssize_t odb_write_stream_read(struct odb_write_stream *stream, void *buf, size_t len); +/* + * Releases memory allocated for underlying stream data. + */ +void odb_write_stream_release(struct odb_write_stream *stream); + /* * Look up the object by its ID and write the full contents to the file * descriptor. The object must be a blob, or the function will fail. When @@ -82,4 +88,10 @@ int odb_stream_blob_to_fd(struct object_database *odb, struct stream_filter *filter, int can_seek); +/* + * Sets up an ODB write stream that reads from an fd. + */ +void odb_write_stream_from_fd(struct odb_write_stream *stream, int fd, + size_t size); + #endif /* STREAMING_H */ From b247256a17fdf52777aab4ef412bd9132a3a4d9f Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Thu, 2 Apr 2026 16:32:18 -0500 Subject: [PATCH 088/241] object-file: avoid fd seekback by checking object size upfront In certain scenarios, Git handles writing blobs that exceed "core.bigFileThreshold" differently by streaming the object directly into a packfile. When there is an active ODB transaction, these blobs are streamed to the same packfile instead of using a separate packfile for each. If "pack.packSizeLimit" is configured and streaming another object causes the packfile to exceed the configured limit, the packfile is truncated back to the previous object and the object write is restarted in a new packfile. This works fine, but requires the fd being read from to save a checkpoint so it becomes possible to rewind the input source via seeking back to a known offset at the beginning. In a subsequent commit, blob streaming is converted to use `struct odb_write_stream` as a more generic input source instead of an fd which doesn't provide a mechanism for rewinding. For this use case though, rewinding the fd is not strictly necessary because the inflated size of the object is known and can be used to approximate whether writing the object would cause the packfile to exceed the configured limit prior to writing anything. These blobs written to the packfile are never deltified thus the size difference between what is written versus the inflated size is due to zlib compression. While this does prevent packfiles from being filled to the potential maximum is some cases, it should be good enough and still prevents the packfile from exceeding any configured limit. Use the inflated blob size to determine whether writing an object to a packfile will exceed the configured "pack.packSizeLimit". Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- object-file.c | 86 +++++++++++++++------------------------------------ 1 file changed, 25 insertions(+), 61 deletions(-) diff --git a/object-file.c b/object-file.c index 382d14c8c0a5fb..0284d5434b4e99 100644 --- a/object-file.c +++ b/object-file.c @@ -1447,29 +1447,17 @@ static int hash_blob_stream(struct odb_write_stream *stream, /* * Read the contents from fd for size bytes, streaming it to the - * packfile in state while updating the hash in ctx. Signal a failure - * by returning a negative value when the resulting pack would exceed - * the pack size limit and this is not the first object in the pack, - * so that the caller can discard what we wrote from the current pack - * by truncating it and opening a new one. The caller will then call - * us again after rewinding the input fd. - * - * The already_hashed_to pointer is kept untouched by the caller to - * make sure we do not hash the same byte when we are called - * again. This way, the caller does not have to checkpoint its hash - * status before calling us just in case we ask it to call us again - * with a new pack. + * packfile in state while updating the hash in ctx. */ -static int stream_blob_to_pack(struct transaction_packfile *state, - struct git_hash_ctx *ctx, off_t *already_hashed_to, - int fd, size_t size, const char *path) +static void stream_blob_to_pack(struct transaction_packfile *state, + struct git_hash_ctx *ctx, int fd, size_t size, + const char *path) { git_zstream s; unsigned char ibuf[16384]; unsigned char obuf[16384]; unsigned hdrlen; int status = Z_OK; - off_t offset = 0; git_deflate_init(&s, pack_compression_level); @@ -1486,15 +1474,9 @@ static int stream_blob_to_pack(struct transaction_packfile *state, if ((size_t)read_result != rsize) die("failed to read %u bytes from '%s'", (unsigned)rsize, path); - offset += rsize; - if (*already_hashed_to < offset) { - size_t hsize = offset - *already_hashed_to; - if (rsize < hsize) - hsize = rsize; - if (hsize) - git_hash_update(ctx, ibuf, hsize); - *already_hashed_to = offset; - } + + git_hash_update(ctx, ibuf, rsize); + s.next_in = ibuf; s.avail_in = rsize; size -= rsize; @@ -1505,14 +1487,6 @@ static int stream_blob_to_pack(struct transaction_packfile *state, if (!s.avail_out || status == Z_STREAM_END) { size_t written = s.next_out - obuf; - /* would we bust the size limit? */ - if (state->nr_written && - pack_size_limit_cfg && - pack_size_limit_cfg < state->offset + written) { - git_deflate_abort(&s); - return -1; - } - hashwrite(state->f, obuf, written); state->offset += written; s.next_out = obuf; @@ -1529,7 +1503,6 @@ static int stream_blob_to_pack(struct transaction_packfile *state, } } git_deflate_end(&s); - return 0; } static void flush_packfile_transaction(struct odb_transaction_files *transaction) @@ -1605,48 +1578,39 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac size_t size, const char *path) { struct transaction_packfile *state = &transaction->packfile; - off_t seekback, already_hashed_to; struct git_hash_ctx ctx; unsigned char obuf[16384]; unsigned header_len; struct hashfile_checkpoint checkpoint; struct pack_idx_entry *idx; - seekback = lseek(fd, 0, SEEK_CUR); - if (seekback == (off_t)-1) - return error("cannot find the current offset"); - header_len = format_object_header((char *)obuf, sizeof(obuf), OBJ_BLOB, size); transaction->base.source->odb->repo->hash_algo->init_fn(&ctx); git_hash_update(&ctx, obuf, header_len); + /* + * If writing another object to the packfile could result in it + * exceeding the configured size limit, flush the current packfile + * transaction. + * + * Note that this uses the inflated object size as an approximation. + * Blob objects written in this manner are not delta-compressed, so + * the difference between the inflated and on-disk size is limited + * to zlib compression and is sufficient for this check. + */ + if (state->nr_written && pack_size_limit_cfg && + pack_size_limit_cfg < state->offset + size) + flush_packfile_transaction(transaction); + CALLOC_ARRAY(idx, 1); prepare_packfile_transaction(transaction); hashfile_checkpoint_init(state->f, &checkpoint); - already_hashed_to = 0; - - while (1) { - prepare_packfile_transaction(transaction); - hashfile_checkpoint(state->f, &checkpoint); - idx->offset = state->offset; - crc32_begin(state->f); - - if (!stream_blob_to_pack(state, &ctx, &already_hashed_to, - fd, size, path)) - break; - /* - * Writing this object to the current pack will make - * it too big; we need to truncate it, start a new - * pack, and write into it. - */ - hashfile_truncate(state->f, &checkpoint); - state->offset = checkpoint.offset; - flush_packfile_transaction(transaction); - if (lseek(fd, seekback, SEEK_SET) == (off_t)-1) - return error("cannot seek back"); - } + hashfile_checkpoint(state->f, &checkpoint); + idx->offset = state->offset; + crc32_begin(state->f); + stream_blob_to_pack(state, &ctx, fd, size, path); git_hash_final_oid(result_oid, &ctx); idx->crc32 = crc32_end(state->f); From a4e0a8bbb7102adeb16bd66074a5bc9102a3bb46 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Thu, 2 Apr 2026 16:32:19 -0500 Subject: [PATCH 089/241] object-file: generalize packfile writes to use odb_write_stream The `index_blob_packfile_transaction()` function streams blob data directly from an fd. This makes it difficult to reuse as part of a generic transactional object writing interface. Refactor the packfile write path to operate on a `struct odb_write_stream`, allowing callers to supply data from arbitrary sources. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- object-file.c | 56 +++++++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/object-file.c b/object-file.c index 0284d5434b4e99..7fa2b9239f409c 100644 --- a/object-file.c +++ b/object-file.c @@ -1446,18 +1446,19 @@ static int hash_blob_stream(struct odb_write_stream *stream, } /* - * Read the contents from fd for size bytes, streaming it to the + * Read the contents from the stream provided, streaming it to the * packfile in state while updating the hash in ctx. */ static void stream_blob_to_pack(struct transaction_packfile *state, - struct git_hash_ctx *ctx, int fd, size_t size, - const char *path) + struct git_hash_ctx *ctx, size_t size, + struct odb_write_stream *stream) { git_zstream s; unsigned char ibuf[16384]; unsigned char obuf[16384]; unsigned hdrlen; int status = Z_OK; + size_t bytes_read = 0; git_deflate_init(&s, pack_compression_level); @@ -1466,23 +1467,21 @@ static void stream_blob_to_pack(struct transaction_packfile *state, s.avail_out = sizeof(obuf) - hdrlen; while (status != Z_STREAM_END) { - if (size && !s.avail_in) { - size_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf); - ssize_t read_result = read_in_full(fd, ibuf, rsize); - if (read_result < 0) - die_errno("failed to read from '%s'", path); - if ((size_t)read_result != rsize) - die("failed to read %u bytes from '%s'", - (unsigned)rsize, path); + if (!stream->is_finished && !s.avail_in) { + ssize_t rsize = odb_write_stream_read(stream, ibuf, + sizeof(ibuf)); + + if (rsize < 0) + die("failed to read blob data"); git_hash_update(ctx, ibuf, rsize); s.next_in = ibuf; s.avail_in = rsize; - size -= rsize; + bytes_read += rsize; } - status = git_deflate(&s, size ? 0 : Z_FINISH); + status = git_deflate(&s, stream->is_finished ? Z_FINISH : 0); if (!s.avail_out || status == Z_STREAM_END) { size_t written = s.next_out - obuf; @@ -1502,6 +1501,11 @@ static void stream_blob_to_pack(struct transaction_packfile *state, die("unexpected deflate failure: %d", status); } } + + if (bytes_read != size) + die("read %" PRIuMAX " bytes of blob data, but expected %" PRIuMAX " bytes", + (uintmax_t)bytes_read, (uintmax_t)size); + git_deflate_end(&s); } @@ -1573,10 +1577,13 @@ static void flush_packfile_transaction(struct odb_transaction_files *transaction * binary blobs, they generally do not want to get any conversion, and * callers should avoid this code path when filters are requested. */ -static int index_blob_packfile_transaction(struct odb_transaction_files *transaction, - struct object_id *result_oid, int fd, - size_t size, const char *path) +static int index_blob_packfile_transaction(struct odb_transaction *base, + struct odb_write_stream *stream, + size_t size, struct object_id *result_oid) { + struct odb_transaction_files *transaction = container_of(base, + struct odb_transaction_files, + base); struct transaction_packfile *state = &transaction->packfile; struct git_hash_ctx ctx; unsigned char obuf[16384]; @@ -1610,7 +1617,7 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac hashfile_checkpoint(state->f, &checkpoint); idx->offset = state->offset; crc32_begin(state->f); - stream_blob_to_pack(state, &ctx, fd, size, path); + stream_blob_to_pack(state, &ctx, size, stream); git_hash_final_oid(result_oid, &ctx); idx->crc32 = crc32_end(state->f); @@ -1654,15 +1661,12 @@ int index_fd(struct index_state *istate, struct object_id *oid, if (flags & INDEX_WRITE_OBJECT) { struct object_database *odb = the_repository->objects; - struct odb_transaction_files *files_transaction; - struct odb_transaction *transaction; - - transaction = odb_transaction_begin(odb); - files_transaction = container_of(odb->transaction, - struct odb_transaction_files, - base); - ret = index_blob_packfile_transaction(files_transaction, oid, fd, - xsize_t(st->st_size), path); + struct odb_transaction *transaction = odb_transaction_begin(odb); + + ret = index_blob_packfile_transaction(odb->transaction, + &stream, + xsize_t(st->st_size), + oid); odb_transaction_commit(transaction); } else { ret = hash_blob_stream(&stream, From ddf6aee9c68e7cfb3e5e21421b1d7798a67534bb Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Thu, 2 Apr 2026 16:32:20 -0500 Subject: [PATCH 090/241] odb/transaction: make `write_object_stream()` pluggable How an ODB transaction handles writing objects is expected to vary between implementations. Introduce a new `write_object_stream()` callback in `struct odb_transaction` to make this function pluggable. Rename `index_blob_packfile_transaction()` to `odb_transaction_files_write_object_stream()` and wire it up for use with `struct odb_transaction_files` accordingly. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- object-file.c | 16 +++++++++------- odb/transaction.c | 7 +++++++ odb/transaction.h | 25 ++++++++++++++++++++++--- 3 files changed, 38 insertions(+), 10 deletions(-) diff --git a/object-file.c b/object-file.c index 7fa2b9239f409c..65356998f3f785 100644 --- a/object-file.c +++ b/object-file.c @@ -1577,9 +1577,10 @@ static void flush_packfile_transaction(struct odb_transaction_files *transaction * binary blobs, they generally do not want to get any conversion, and * callers should avoid this code path when filters are requested. */ -static int index_blob_packfile_transaction(struct odb_transaction *base, - struct odb_write_stream *stream, - size_t size, struct object_id *result_oid) +static int odb_transaction_files_write_object_stream(struct odb_transaction *base, + struct odb_write_stream *stream, + size_t size, + struct object_id *result_oid) { struct odb_transaction_files *transaction = container_of(base, struct odb_transaction_files, @@ -1663,10 +1664,10 @@ int index_fd(struct index_state *istate, struct object_id *oid, struct object_database *odb = the_repository->objects; struct odb_transaction *transaction = odb_transaction_begin(odb); - ret = index_blob_packfile_transaction(odb->transaction, - &stream, - xsize_t(st->st_size), - oid); + ret = odb_transaction_write_object_stream(odb->transaction, + &stream, + xsize_t(st->st_size), + oid); odb_transaction_commit(transaction); } else { ret = hash_blob_stream(&stream, @@ -2131,6 +2132,7 @@ struct odb_transaction *odb_transaction_files_begin(struct odb_source *source) transaction = xcalloc(1, sizeof(*transaction)); transaction->base.source = source; transaction->base.commit = odb_transaction_files_commit; + transaction->base.write_object_stream = odb_transaction_files_write_object_stream; return &transaction->base; } diff --git a/odb/transaction.c b/odb/transaction.c index 592ac840759a07..b16e07aebfc5ac 100644 --- a/odb/transaction.c +++ b/odb/transaction.c @@ -26,3 +26,10 @@ void odb_transaction_commit(struct odb_transaction *transaction) transaction->source->odb->transaction = NULL; free(transaction); } + +int odb_transaction_write_object_stream(struct odb_transaction *transaction, + struct odb_write_stream *stream, + size_t len, struct object_id *oid) +{ + return transaction->write_object_stream(transaction, stream, len, oid); +} diff --git a/odb/transaction.h b/odb/transaction.h index a56e392f217f4f..854fda06f576e4 100644 --- a/odb/transaction.h +++ b/odb/transaction.h @@ -12,14 +12,24 @@ * * Each ODB source is expected to implement its own transaction handling. */ -struct odb_transaction; -typedef void (*odb_transaction_commit_fn)(struct odb_transaction *transaction); struct odb_transaction { /* The ODB source the transaction is opened against. */ struct odb_source *source; /* The ODB source specific callback invoked to commit a transaction. */ - odb_transaction_commit_fn commit; + void (*commit)(struct odb_transaction *transaction); + + /* + * This callback is expected to write the given object stream into + * the ODB transaction. Note that for now, only blobs support streaming. + * + * The resulting object ID shall be written into the out pointer. The + * callback is expected to return 0 on success, a negative error code + * otherwise. + */ + int (*write_object_stream)(struct odb_transaction *transaction, + struct odb_write_stream *stream, size_t len, + struct object_id *oid); }; /* @@ -35,4 +45,13 @@ struct odb_transaction *odb_transaction_begin(struct object_database *odb); */ void odb_transaction_commit(struct odb_transaction *transaction); +/* + * Writes the object in the provided stream into the transaction. The resulting + * object ID is written into the out pointer. Returns 0 on success, a negative + * error code otherwise. + */ +int odb_transaction_write_object_stream(struct odb_transaction *transaction, + struct odb_write_stream *stream, + size_t len, struct object_id *oid); + #endif From f7d92bc039ef70c6fed9e297df4cb115d694b9c6 Mon Sep 17 00:00:00 2001 From: Chen Linxuan Date: Fri, 3 Apr 2026 15:02:28 +0800 Subject: [PATCH 091/241] config: refactor include_by_gitdir() into include_by_path() The include_by_gitdir() function matches the realpath of a given path against a glob pattern, but its interface is tightly coupled to the gitdir condition: it takes a struct config_options *opts and extracts opts->git_dir internally. Refactor it into a more generic include_by_path() helper that takes a const char *path parameter directly, and update the gitdir and gitdir/i callers to pass opts->git_dir explicitly. No behavior change, just preparing for the addition of a new worktree condition that will reuse the same path-matching logic with a different path. Signed-off-by: Chen Linxuan Signed-off-by: Junio C Hamano --- config.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/config.c b/config.c index 156f2a24fa0027..7d5dae0e845085 100644 --- a/config.c +++ b/config.c @@ -235,23 +235,20 @@ static int prepare_include_condition_pattern(const struct key_value_info *kvi, return 0; } -static int include_by_gitdir(const struct key_value_info *kvi, - const struct config_options *opts, - const char *cond, size_t cond_len, int icase) +static int include_by_path(const struct key_value_info *kvi, + const char *path, + const char *cond, size_t cond_len, int icase) { struct strbuf text = STRBUF_INIT; struct strbuf pattern = STRBUF_INIT; size_t prefix; int ret = 0; - const char *git_dir; int already_tried_absolute = 0; - if (opts->git_dir) - git_dir = opts->git_dir; - else + if (!path) goto done; - strbuf_realpath(&text, git_dir, 1); + strbuf_realpath(&text, path, 1); strbuf_add(&pattern, cond, cond_len); ret = prepare_include_condition_pattern(kvi, &pattern, &prefix); if (ret < 0) @@ -284,7 +281,7 @@ static int include_by_gitdir(const struct key_value_info *kvi, * which'll do the right thing */ strbuf_reset(&text); - strbuf_add_absolute_path(&text, git_dir); + strbuf_add_absolute_path(&text, path); already_tried_absolute = 1; goto again; } @@ -400,9 +397,9 @@ static int include_condition_is_true(const struct key_value_info *kvi, const struct config_options *opts = inc->opts; if (skip_prefix_mem(cond, cond_len, "gitdir:", &cond, &cond_len)) - return include_by_gitdir(kvi, opts, cond, cond_len, 0); + return include_by_path(kvi, opts->git_dir, cond, cond_len, 0); else if (skip_prefix_mem(cond, cond_len, "gitdir/i:", &cond, &cond_len)) - return include_by_gitdir(kvi, opts, cond, cond_len, 1); + return include_by_path(kvi, opts->git_dir, cond, cond_len, 1); else if (skip_prefix_mem(cond, cond_len, "onbranch:", &cond, &cond_len)) return include_by_branch(inc, cond, cond_len); else if (skip_prefix_mem(cond, cond_len, "hasconfig:remote.*.url:", &cond, From da1f520f136d3418a7bd29d947e0bd8630a2379d Mon Sep 17 00:00:00 2001 From: Chen Linxuan Date: Fri, 3 Apr 2026 15:02:29 +0800 Subject: [PATCH 092/241] config: add "worktree" and "worktree/i" includeIf conditions The includeIf mechanism already supports matching on the .git directory path (gitdir) and the currently checked out branch (onbranch). But in multi-worktree setups the .git directory of a linked worktree points into the main repository's .git/worktrees/ area, which makes gitdir patterns cumbersome when one wants to include config based on the working tree's checkout path instead. Introduce two new condition keywords: - worktree: matches the realpath of the current worktree's working directory (i.e. repo_get_work_tree()) against a glob pattern. This is the path returned by git rev-parse --show-toplevel. - worktree/i: is the case-insensitive variant. The implementation reuses the include_by_path() helper introduced in the previous commit, passing the worktree path in place of the gitdir. The condition never matches in bare repositories (where there is no worktree) or during early config reading (where no repository is available). Add documentation describing the new conditions and their supported pattern features (glob wildcards, **/ and /**, ~ expansion, ./ relative paths, and trailing-/ prefix matching). Add tests covering bare repositories, multiple worktrees, and symlinked worktree paths. Signed-off-by: Chen Linxuan Signed-off-by: Junio C Hamano --- Documentation/config.adoc | 50 +++++++++++++++++++++++++++++ config.c | 6 ++++ t/t1305-config-include.sh | 66 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+) diff --git a/Documentation/config.adoc b/Documentation/config.adoc index 62eebe7c54501c..a4f3ec90509816 100644 --- a/Documentation/config.adoc +++ b/Documentation/config.adoc @@ -146,6 +146,48 @@ refer to linkgit:gitignore[5] for details. For convenience: This is the same as `gitdir` except that matching is done case-insensitively (e.g. on case-insensitive file systems) +`worktree`:: + The data that follows the keyword `worktree` and a colon is used as a + glob pattern. If the working directory of the current worktree matches + the pattern, the include condition is met. ++ +The worktree location is the path where files are checked out (as returned +by `git rev-parse --show-toplevel`). This is different from `gitdir`, which +matches the `.git` directory path. In a linked worktree, the worktree path +is the directory where that worktree's files are located, not the main +repository's `.git` directory. ++ +The pattern can contain standard globbing wildcards and two additional +ones, `**/` and `/**`, that can match multiple path components. Please +refer to linkgit:gitignore[5] for details. For convenience: + + * If the pattern starts with `~/`, `~` will be substituted with the + content of the environment variable `HOME`. + + * If the pattern starts with `./`, it is replaced with the directory + containing the current config file. + + * If the pattern does not start with either `~/`, `./` or `/`, `**/` + will be automatically prepended. For example, the pattern `foo/bar` + becomes `**/foo/bar` and would match `/any/path/to/foo/bar`. + + * If the pattern ends with `/`, `**` will be automatically added. For + example, the pattern `foo/` becomes `foo/**`. In other words, it + matches "foo" and everything inside, recursively. ++ +This condition will never match in a bare repository (which has no worktree). ++ +This is useful when you need to use different `user.name`, `user.email`, or +GPG keys in different worktrees of the same repository. While +`extensions.worktreeConfig` also allows per-worktree configuration, it +requires changes inside each repository. This condition can be set in the +user's global configuration file (e.g. `~/.config/git/config`) and applies +to multiple repositories at once. + +`worktree/i`:: + This is the same as `worktree` except that matching is done + case-insensitively (e.g. on case-insensitive file systems) + `onbranch`:: The data that follows the keyword `onbranch` and a colon is taken to be a pattern with standard globbing wildcards and two additional @@ -244,6 +286,14 @@ Example [includeIf "gitdir:~/to/group/"] path = /path/to/foo.inc +; include if the worktree is at /path/to/project-build +[includeIf "worktree:/path/to/project-build"] + path = build-config.inc + +; include for all worktrees inside /path/to/group +[includeIf "worktree:/path/to/group/"] + path = group-config.inc + ; relative paths are always relative to the including ; file (if the condition is true); their location is not ; affected by the condition diff --git a/config.c b/config.c index 7d5dae0e845085..6d0c2d0725e4f7 100644 --- a/config.c +++ b/config.c @@ -400,6 +400,12 @@ static int include_condition_is_true(const struct key_value_info *kvi, return include_by_path(kvi, opts->git_dir, cond, cond_len, 0); else if (skip_prefix_mem(cond, cond_len, "gitdir/i:", &cond, &cond_len)) return include_by_path(kvi, opts->git_dir, cond, cond_len, 1); + else if (skip_prefix_mem(cond, cond_len, "worktree:", &cond, &cond_len)) + return include_by_path(kvi, inc->repo ? repo_get_work_tree(inc->repo) : NULL, + cond, cond_len, 0); + else if (skip_prefix_mem(cond, cond_len, "worktree/i:", &cond, &cond_len)) + return include_by_path(kvi, inc->repo ? repo_get_work_tree(inc->repo) : NULL, + cond, cond_len, 1); else if (skip_prefix_mem(cond, cond_len, "onbranch:", &cond, &cond_len)) return include_by_branch(inc, cond, cond_len); else if (skip_prefix_mem(cond, cond_len, "hasconfig:remote.*.url:", &cond, diff --git a/t/t1305-config-include.sh b/t/t1305-config-include.sh index 6e51f892f320bb..8a5ba4b884d3ff 100755 --- a/t/t1305-config-include.sh +++ b/t/t1305-config-include.sh @@ -396,4 +396,70 @@ test_expect_success 'onbranch without repository but explicit nonexistent Git di test_must_fail nongit git --git-dir=nonexistent config get foo.bar ' +# worktree: conditional include tests + +test_expect_success 'conditional include, worktree bare repo' ' + git init --bare wt-bare && + ( + cd wt-bare && + echo "[includeIf \"worktree:/\"]path=bar-bare" >>config && + echo "[test]wtbare=1" >bar-bare && + test_must_fail git config test.wtbare + ) +' + +test_expect_success 'conditional include, worktree multiple worktrees' ' + git init wt-multi && + ( + cd wt-multi && + test_commit initial && + git worktree add -b linked-branch ../wt-linked HEAD && + git worktree add -b prefix-branch ../wt-prefix/linked HEAD + ) && + wt_main="$(cd wt-multi && pwd)" && + wt_linked="$(cd wt-linked && pwd)" && + wt_prefix_parent="$(cd wt-prefix && pwd)" && + cat >>wt-multi/.git/config <<-EOF && + [includeIf "worktree:$wt_main"] + path = main-config + [includeIf "worktree:$wt_linked"] + path = linked-config + [includeIf "worktree:$wt_prefix_parent/"] + path = prefix-config + EOF + echo "[test]mainvar=main" >wt-multi/.git/main-config && + echo "[test]linkedvar=linked" >wt-multi/.git/linked-config && + echo "[test]prefixvar=prefix" >wt-multi/.git/prefix-config && + echo main >expect && + git -C wt-multi config test.mainvar >actual && + test_cmp expect actual && + test_must_fail git -C wt-multi config test.linkedvar && + test_must_fail git -C wt-multi config test.prefixvar && + echo linked >expect && + git -C wt-linked config test.linkedvar >actual && + test_cmp expect actual && + test_must_fail git -C wt-linked config test.mainvar && + test_must_fail git -C wt-linked config test.prefixvar && + echo prefix >expect && + git -C wt-prefix/linked config test.prefixvar >actual && + test_cmp expect actual && + test_must_fail git -C wt-prefix/linked config test.mainvar && + test_must_fail git -C wt-prefix/linked config test.linkedvar +' + +test_expect_success SYMLINKS 'conditional include, worktree resolves symlinks' ' + mkdir real-wt && + ln -s real-wt link-wt && + git init link-wt/repo && + ( + cd link-wt/repo && + # repo->worktree resolves symlinks, so use real path in pattern + echo "[includeIf \"worktree:**/real-wt/repo\"]path=bar-link" >>.git/config && + echo "[test]wtlink=2" >.git/bar-link && + echo 2 >expect && + git config test.wtlink >actual && + test_cmp expect actual + ) +' + test_done From ab16b9ae0574e80e87d61bdba282aa58b06d0043 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= Date: Fri, 3 Apr 2026 12:01:35 +0200 Subject: [PATCH 093/241] config: retry acquiring config.lock for 100ms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When multiple processes write to a config file concurrently, they contend on its ".lock" file, which is acquired via open(O_EXCL) with no retry. The losers fail immediately with "could not lock config file". Two processes writing unrelated keys (say, "branch.a.remote" and "branch.b.remote") have no semantic conflict, yet one of them fails for a purely mechanical reason. This bites in practice when running `git worktree add -b` concurrently against the same repository. Each invocation makes several writes to ".git/config" to set up branch tracking, and tooling that creates worktrees in parallel sees intermittent failures. Worse, `git worktree add` does not propagate the failed config write to its exit code: the worktree is created and the command exits 0, but tracking configuration is silently dropped. The lock is held only for the duration of rewriting a small file, so retrying for 100 ms papers over any realistic contention while still failing fast if a stale lock has been left behind by a crashed process. This mirrors what we already do for individual reference locks (4ff0f01cb7 (refs: retry acquiring reference locks for 100ms, 2017-08-21)). Signed-off-by: Jörg Thalheim Signed-off-by: Junio C Hamano --- config.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/config.c b/config.c index 156f2a24fa0027..f7aff8725de4a3 100644 --- a/config.c +++ b/config.c @@ -2903,6 +2903,14 @@ char *git_config_prepare_comment_string(const char *comment) return prepared; } +/* + * How long to retry acquiring config.lock when another process holds it. + * The lock is held only for the duration of rewriting a small file, so + * 100 ms covers any realistic contention while still failing fast if + * a stale lock has been left behind by a crashed process. + */ +#define CONFIG_LOCK_TIMEOUT_MS 100 + static void validate_comment_string(const char *comment) { size_t leading_blanks; @@ -2986,7 +2994,8 @@ int repo_config_set_multivar_in_file_gently(struct repository *r, * The lock serves a purpose in addition to locking: the new * contents of .git/config will be written into it. */ - fd = hold_lock_file_for_update(&lock, config_filename, 0); + fd = hold_lock_file_for_update_timeout(&lock, config_filename, 0, + CONFIG_LOCK_TIMEOUT_MS); if (fd < 0) { error_errno(_("could not lock config file %s"), config_filename); ret = CONFIG_NO_LOCK; @@ -3331,7 +3340,8 @@ static int repo_config_copy_or_rename_section_in_file( if (!config_filename) config_filename = filename_buf = repo_git_path(r, "config"); - out_fd = hold_lock_file_for_update(&lock, config_filename, 0); + out_fd = hold_lock_file_for_update_timeout(&lock, config_filename, 0, + CONFIG_LOCK_TIMEOUT_MS); if (out_fd < 0) { ret = error(_("could not lock config file %s"), config_filename); goto out; From b8fb48871478c64d296c4fd57e6cc08073861afc Mon Sep 17 00:00:00 2001 From: Pablo Sabater Date: Sat, 4 Apr 2026 11:24:25 +0200 Subject: [PATCH 094/241] graph: add indentation for commits preceded by a parentless commit When having a history with multiple root commits or commits that act like roots (they have excluded parents), let's call them parentless, and drawing the history near them, the graphing engine renders the commits one below the other, seeming that they are related. This issue has been attempted multiple times: https://lore.kernel.org/git/xmqqwnwajbuj.fsf@gitster.c.googlers.com/ This happens because for these parentless commits, in the next row the column becomes empty and the engine prints from left to right from the first empty column, filling the gap below these parentless commits. Keep a parentless commit for at least one row more to avoid having the column empty but hide it as indentation, therefore making the next unrelated commit live in the next column (column means even positions where edges live: 0, 2, 4), then clean that "placeholder" column and let the unrelated commit to naturally collapse to the column where the parentless commit was. Add is_placeholder to the struct column to mark if a column is acting as a placeholder for the padding. When a column is parentless, add a column with the parentless commit data to prevent segfaults when 'column->commit' and mark it as a placeholder. Teach rendering functions to print a padding ' ' instead of an edge when a placeholder column is met. Then, unless the next commit is also parentless (then we need to keep cascading the indentation) clean the mapping and columns from the placeholder to allow it to collapse naturally. Add tests for different cases. before this patch: * parentless-B * child-A2 * child-A1 * parentless-A after this patch: * parentless-B * child-A2 / * child-A1 * parentless-A Signed-off-by: Pablo Sabater Signed-off-by: Junio C Hamano --- graph.c | 70 ++++++++++++++++++-- t/t4215-log-skewed-merges.sh | 124 +++++++++++++++++++++++++++++++++++ 2 files changed, 188 insertions(+), 6 deletions(-) diff --git a/graph.c b/graph.c index 26f6fbf000aef5..89d4a11bfeb85c 100644 --- a/graph.c +++ b/graph.c @@ -60,6 +60,12 @@ struct column { * index into column_colors. */ unsigned short color; + /* + * A placeholder column keeps the column of a parentless commit filled + * for one extra row, avoiding a next unrelated commit to be printed + * in the same column. + */ + unsigned is_placeholder:1; }; enum graph_state { @@ -563,6 +569,7 @@ static void graph_insert_into_new_columns(struct git_graph *graph, i = graph->num_new_columns++; graph->new_columns[i].commit = commit; graph->new_columns[i].color = graph_find_commit_color(graph, commit); + graph->new_columns[i].is_placeholder = 0; } if (graph->num_parents > 1 && idx > -1 && graph->merge_layout == -1) { @@ -607,7 +614,7 @@ static void graph_update_columns(struct git_graph *graph) { struct commit_list *parent; int max_new_columns; - int i, seen_this, is_commit_in_columns; + int i, seen_this, is_commit_in_columns, seems_root; /* * Swap graph->columns with graph->new_columns @@ -654,6 +661,12 @@ static void graph_update_columns(struct git_graph *graph) */ seen_this = 0; is_commit_in_columns = 1; + /* + * num_parents == 0 means that there are no parents flagged as + * interesting to being shown. + */ + seems_root = graph->num_parents == 0 && + !(graph->commit->object.flags & BOUNDARY); for (i = 0; i <= graph->num_columns; i++) { struct commit *col_commit; if (i == graph->num_columns) { @@ -688,11 +701,40 @@ static void graph_update_columns(struct git_graph *graph) * least 2, even if it has no interesting parents. * The current commit always takes up at least 2 * spaces. + * + * Check for the commit to seem like a root, no parents + * rendered and that it is not a boundary commit. If so, + * add a placeholder to keep that column filled for + * at least one row. + * + * Prevents the next commit from being inserted + * just below and making the graph confusing. */ - if (graph->num_parents == 0) + if (seems_root) { + graph_insert_into_new_columns(graph, graph->commit, i); + graph->new_columns[graph->num_new_columns - 1] + .is_placeholder = 1; + } else if (graph->num_parents == 0) { graph->width += 2; + } } else { - graph_insert_into_new_columns(graph, col_commit, -1); + if (graph->columns[i].is_placeholder) { + /* + * Keep the placeholders if the next commit is + * parentless also, making the indentation cascade. + */ + if (!seen_this && seems_root) { + graph_insert_into_new_columns(graph, + graph->columns[i].commit, i); + graph->new_columns[graph->num_new_columns - 1] + .is_placeholder = 1; + } else if (!seen_this) { + graph->mapping[graph->width] = -1; + graph->width += 2; + } + } else { + graph_insert_into_new_columns(graph, col_commit, -1); + } } } @@ -846,7 +888,10 @@ static void graph_output_padding_line(struct git_graph *graph, * Output a padding row, that leaves all branch lines unchanged */ for (i = 0; i < graph->num_new_columns; i++) { - graph_line_write_column(line, &graph->new_columns[i], '|'); + if (graph->new_columns[i].is_placeholder) + graph_line_write_column(line, &graph->new_columns[i], ' '); + else + graph_line_write_column(line, &graph->new_columns[i], '|'); graph_line_addch(line, ' '); } } @@ -1058,7 +1103,13 @@ static void graph_output_commit_line(struct git_graph *graph, struct graph_line graph->mapping[2 * i] < i) { graph_line_write_column(line, col, '/'); } else { - graph_line_write_column(line, col, '|'); + if (col->is_placeholder) { + if (seen_this) + continue; + graph_line_write_column(line, col, ' '); + } else { + graph_line_write_column(line, col, '|'); + } } graph_line_addch(line, ' '); } @@ -1135,7 +1186,14 @@ static void graph_output_post_merge_line(struct git_graph *graph, struct graph_l graph_line_write_column(line, col, '|'); graph_line_addch(line, ' '); } else { - graph_line_write_column(line, col, '|'); + if (col->is_placeholder) { + if (seen_this) + continue; + graph_line_write_column(line, col, ' '); + } else { + graph_line_write_column(line, col, '|'); + } + if (graph->merge_layout != 0 || i != graph->commit_index - 1) { if (parent_col) graph_line_write_column( diff --git a/t/t4215-log-skewed-merges.sh b/t/t4215-log-skewed-merges.sh index 28d0779a8c599e..0f6f95a6b5aff5 100755 --- a/t/t4215-log-skewed-merges.sh +++ b/t/t4215-log-skewed-merges.sh @@ -370,4 +370,128 @@ test_expect_success 'log --graph with multiple tips' ' EOF ' +test_expect_success 'log --graph with root commit' ' + git checkout --orphan 8_1 && test_commit 8_A && test_commit 8_A1 && + git checkout --orphan 8_2 && test_commit 8_B && + + check_graph 8_2 8_1 <<-\EOF + * 8_B + * 8_A1 + / + * 8_A + EOF +' + +test_expect_success 'log --graph with multiple root commits' ' + test_commit 8_B1 && + git checkout --orphan 8_3 && test_commit 8_C && + + check_graph 8_3 8_2 8_1 <<-\EOF + * 8_C + * 8_B1 + / + * 8_B + * 8_A1 + / + * 8_A + EOF +' + +test_expect_success 'log --graph commit from a two parent merge shifted' ' + git checkout --orphan 9_1 && test_commit 9_B && + git checkout --orphan 9_2 && test_commit 9_C && + git checkout 9_1 && + git merge 9_2 --allow-unrelated-histories -m 9_M && + git checkout --orphan 9_3 && + test_commit 9_A && test_commit 9_A1 && test_commit 9_A2 && + + check_graph 9_3 9_1 <<-\EOF + * 9_A2 + * 9_A1 + * 9_A + * 9_M + /| + | * 9_C + * 9_B + EOF +' + +test_expect_success 'log --graph commit from a three parent merge shifted' ' + git checkout --orphan 10_1 && test_commit 10_B && + git checkout --orphan 10_2 && test_commit 10_C && + git checkout --orphan 10_3 && test_commit 10_D && + git checkout 10_1 && + TREE=$(git write-tree) && + MERGE=$(git commit-tree $TREE -p 10_1 -p 10_2 -p 10_3 -m 10_M) && + git reset --hard $MERGE && + git checkout --orphan 10_4 && + test_commit 10_A && test_commit 10_A1 && test_commit 10_A2 && + + check_graph 10_4 10_1 <<-\EOF + * 10_A2 + * 10_A1 + * 10_A + * 10_M + /|\ + | | * 10_D + | * 10_C + * 10_B + EOF +' + +test_expect_success 'log --graph commit from a four parent merge shifted' ' + git checkout --orphan 11_1 && test_commit 11_B && + git checkout --orphan 11_2 && test_commit 11_C && + git checkout --orphan 11_3 && test_commit 11_D && + git checkout --orphan 11_4 && test_commit 11_E && + git checkout 11_1 && + TREE=$(git write-tree) && + MERGE=$(git commit-tree $TREE -p 11_1 -p 11_2 -p 11_3 -p 11_4 -m 11_M) && + git reset --hard $MERGE && + git checkout --orphan 11_5 && + test_commit 11_A && test_commit 11_A1 && test_commit 11_A2 && + + check_graph 11_5 11_1 <<-\EOF + * 11_A2 + * 11_A1 + * 11_A + *-. 11_M + /|\ \ + | | | * 11_E + | | * 11_D + | * 11_C + * 11_B + EOF +' + +test_expect_success 'log --graph disconnected three roots cascading' ' + git checkout --orphan 12_1 && test_commit 12_D && test_commit 12_D1 && + git checkout --orphan 12_2 && test_commit 12_C && + git checkout --orphan 12_3 && test_commit 12_B && + git checkout --orphan 12_4 && test_commit 12_A && + + check_graph 12_4 12_3 12_2 12_1 <<-\EOF + * 12_A + * 12_B + * 12_C + * 12_D1 + _ / + / + / + * 12_D + EOF +' + +test_expect_success 'log --graph with excluded parent (not a root)' ' + git checkout --orphan 13_1 && test_commit 13_X && test_commit 13_Y && + git checkout --orphan 13_2 && test_commit 13_O && test_commit 13_A && + + check_graph 13_O..13_A 13_1 <<-\EOF + * 13_A + * 13_Y + / + * 13_X + EOF +' + test_done From 1d980196adfd79ae0936e681e8d98c57d9900785 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Avila?= Date: Sat, 4 Apr 2026 17:12:44 +0000 Subject: [PATCH 095/241] doc: convert git-difftool manual page to synopsis style MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * convert commands to synopsis style * use __ for arguments * fix conditional text to sentence limits Signed-off-by: Jean-Noël Avila Signed-off-by: Junio C Hamano --- Documentation/config/difftool.adoc | 24 ++++----- Documentation/config/mergetool.adoc | 8 +-- Documentation/git-difftool.adoc | 80 ++++++++++++++--------------- 3 files changed, 56 insertions(+), 56 deletions(-) diff --git a/Documentation/config/difftool.adoc b/Documentation/config/difftool.adoc index 4f7d40ce242b78..1b8d48381357aa 100644 --- a/Documentation/config/difftool.adoc +++ b/Documentation/config/difftool.adoc @@ -1,43 +1,43 @@ -diff.tool:: +`diff.tool`:: Controls which diff tool is used by linkgit:git-difftool[1]. This variable overrides the value configured in `merge.tool`. The list below shows the valid built-in values. Any other value is treated as a custom diff tool and requires - that a corresponding difftool..cmd variable is defined. + that a corresponding `difftool..cmd` variable is defined. -diff.guitool:: +`diff.guitool`:: Controls which diff tool is used by linkgit:git-difftool[1] when - the -g/--gui flag is specified. This variable overrides the value + the `-g`/`--gui` flag is specified. This variable overrides the value configured in `merge.guitool`. The list below shows the valid built-in values. Any other value is treated as a custom diff tool - and requires that a corresponding difftool..cmd variable + and requires that a corresponding `difftool..cmd` variable is defined. include::{build_dir}/mergetools-diff.adoc[] -difftool..cmd:: +`difftool..cmd`:: Specify the command to invoke the specified diff tool. The specified command is evaluated in shell with the following - variables available: 'LOCAL' is set to the name of the temporary - file containing the contents of the diff pre-image and 'REMOTE' + variables available: `LOCAL` is set to the name of the temporary + file containing the contents of the diff pre-image and `REMOTE` is set to the name of the temporary file containing the contents of the diff post-image. + See the `--tool=` option in linkgit:git-difftool[1] for more details. -difftool..path:: +`difftool..path`:: Override the path for the given tool. This is useful in case your tool is not in the PATH. -difftool.trustExitCode:: +`difftool.trustExitCode`:: Exit difftool if the invoked diff tool returns a non-zero exit status. + See the `--trust-exit-code` option in linkgit:git-difftool[1] for more details. -difftool.prompt:: +`difftool.prompt`:: Prompt before each invocation of the diff tool. -difftool.guiDefault:: +`difftool.guiDefault`:: Set `true` to use the `diff.guitool` by default (equivalent to specifying the `--gui` argument), or `auto` to select `diff.guitool` or `diff.tool` depending on the presence of a `DISPLAY` environment variable value. The diff --git a/Documentation/config/mergetool.adoc b/Documentation/config/mergetool.adoc index 7064f5a462cb56..7afdcad92b3934 100644 --- a/Documentation/config/mergetool.adoc +++ b/Documentation/config/mergetool.adoc @@ -52,13 +52,13 @@ if `merge.tool` is configured as __), Git will consult `mergetool..layout` to determine the tool's layout. If the variant-specific configuration is not available, `vimdiff` ' s is used as - fallback. If that too is not available, a default layout with 4 windows - will be used. To configure the layout, see the 'BACKEND SPECIFIC HINTS' + fallback. If that too is not available, a default layout with 4 windows + will be used. ifdef::git-mergetool[] - section. +To configure the layout, see the 'BACKEND SPECIFIC HINTS' section. endif::[] ifndef::git-mergetool[] - section in linkgit:git-mergetool[1]. +To configure the layout, see the 'BACKEND SPECIFIC HINTS' section in linkgit:git-mergetool[1]. endif::[] `mergetool.hideResolved`:: diff --git a/Documentation/git-difftool.adoc b/Documentation/git-difftool.adoc index 064bc683471f21..dd7cacf95e35df 100644 --- a/Documentation/git-difftool.adoc +++ b/Documentation/git-difftool.adoc @@ -7,64 +7,64 @@ git-difftool - Show changes using common diff tools SYNOPSIS -------- -[verse] -'git difftool' [] [ []] [--] [...] +[synopsis] +git difftool [] [ []] [--] [...] DESCRIPTION ----------- -'git difftool' is a Git command that allows you to compare and edit files -between revisions using common diff tools. 'git difftool' is a frontend -to 'git diff' and accepts the same options and arguments. See +`git difftool` is a Git command that allows you to compare and edit files +between revisions using common diff tools. `git difftool` is a frontend +to `git diff` and accepts the same options and arguments. See linkgit:git-diff[1]. OPTIONS ------- --d:: ---dir-diff:: +`-d`:: +`--dir-diff`:: Copy the modified files to a temporary location and perform a directory diff on them. This mode never prompts before launching the diff tool. --y:: ---no-prompt:: +`-y`:: +`--no-prompt`:: Do not prompt before launching a diff tool. ---prompt:: +`--prompt`:: Prompt before each invocation of the diff tool. This is the default behaviour; the option is provided to override any configuration settings. ---rotate-to=:: - Start showing the diff for the given path, +`--rotate-to=`:: + Start showing the diff for __, the paths before it will move to the end and output. ---skip-to=:: - Start showing the diff for the given path, skipping all +`--skip-to=`:: + Start showing the diff for __, skipping all the paths before it. --t :: ---tool=:: - Use the diff tool specified by . Valid values include +`-t `:: +`--tool=`:: + Use the diff tool specified by __. Valid values include emerge, kompare, meld, and vimdiff. Run `git difftool --tool-help` - for the list of valid settings. + for the list of valid __ settings. + -If a diff tool is not specified, 'git difftool' +If a diff tool is not specified, `git difftool` will use the configuration variable `diff.tool`. If the -configuration variable `diff.tool` is not set, 'git difftool' +configuration variable `diff.tool` is not set, `git difftool` will pick a suitable default. + You can explicitly provide a full path to the tool by setting the configuration variable `difftool..path`. For example, you can configure the absolute path to kdiff3 by setting -`difftool.kdiff3.path`. Otherwise, 'git difftool' assumes the +`difftool.kdiff3.path`. Otherwise, `git difftool` assumes the tool is available in PATH. + Instead of running one of the known diff tools, -'git difftool' can be customized to run an alternative program +`git difftool` can be customized to run an alternative program by specifying the command line to invoke in a configuration variable `difftool..cmd`. + -When 'git difftool' is invoked with this tool (either through the +When `git difftool` is invoked with this tool (either through the `-t` or `--tool` option or the `diff.tool` configuration variable) the configured command line will be invoked with the following variables available: `$LOCAL` is set to the name of the temporary @@ -74,30 +74,30 @@ of the diff post-image. `$MERGED` is the name of the file which is being compared. `$BASE` is provided for compatibility with custom merge tool commands and has the same value as `$MERGED`. ---tool-help:: +`--tool-help`:: Print a list of diff tools that may be used with `--tool`. ---symlinks:: ---no-symlinks:: - 'git difftool''s default behavior is to create symlinks to the +`--symlinks`:: +`--no-symlinks`:: + `git difftool`'s default behavior is to create symlinks to the working tree when run in `--dir-diff` mode and the right-hand side of the comparison yields the same content as the file in the working tree. + -Specifying `--no-symlinks` instructs 'git difftool' to create copies +Specifying `--no-symlinks` instructs `git difftool` to create copies instead. `--no-symlinks` is the default on Windows. --x :: ---extcmd=:: +`-x `:: +`--extcmd=`:: Specify a custom command for viewing diffs. - 'git-difftool' ignores the configured defaults and runs + `git-difftool` ignores the configured defaults and runs ` $LOCAL $REMOTE` when this option is specified. Additionally, `$BASE` is set in the environment. --g:: ---gui:: ---no-gui:: - When 'git-difftool' is invoked with the `-g` or `--gui` option +`-g`:: +`--gui`:: +`--no-gui`:: + When `git-difftool` is invoked with the `-g` or `--gui` option the default diff tool will be read from the configured `diff.guitool` variable instead of `diff.tool`. This may be selected automatically using the configuration variable @@ -106,20 +106,20 @@ instead. `--no-symlinks` is the default on Windows. fallback in the order of `merge.guitool`, `diff.tool`, `merge.tool` until a tool is found. ---trust-exit-code:: ---no-trust-exit-code:: +`--trust-exit-code`:: +`--no-trust-exit-code`:: Errors reported by the diff tool are ignored by default. - Use `--trust-exit-code` to make 'git-difftool' exit when an + Use `--trust-exit-code` to make `git-difftool` exit when an invoked diff tool returns a non-zero exit code. + -'git-difftool' will forward the exit code of the invoked tool when +`git-difftool` will forward the exit code of the invoked tool when `--trust-exit-code` is used. See linkgit:git-diff[1] for the full list of supported options. CONFIGURATION ------------- -'git difftool' falls back to 'git mergetool' config variables when the +`git difftool` falls back to `git mergetool` config variables when the difftool equivalents have not been defined. include::includes/cmd-config-section-rest.adoc[] From 5594be68eaa0fc9c87f7a50be09b85762415f070 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Avila?= Date: Sat, 4 Apr 2026 17:12:45 +0000 Subject: [PATCH 096/241] doc: convert git-range-diff manual page to synopsis style MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * convert commands and options to synopsis style * use __ for arguments * small style fixes Signed-off-by: Jean-Noël Avila Signed-off-by: Junio C Hamano --- Documentation/git-range-diff.adoc | 50 +++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/Documentation/git-range-diff.adoc b/Documentation/git-range-diff.adoc index b5e85d37f1bee7..880557084533fb 100644 --- a/Documentation/git-range-diff.adoc +++ b/Documentation/git-range-diff.adoc @@ -7,8 +7,8 @@ git-range-diff - Compare two commit ranges (e.g. two versions of a branch) SYNOPSIS -------- -[verse] -'git range-diff' [--color=[]] [--no-color] [] +[synopsis] +git range-diff [--color=[]] [--no-color] [] [--no-dual-color] [--creation-factor=] [--left-only | --right-only] [--diff-merges=] [--remerge-diff] @@ -21,14 +21,14 @@ DESCRIPTION This command shows the differences between two versions of a patch series, or more generally, two commit ranges (ignoring merge commits). -In the presence of `` arguments, these commit ranges are limited +In the presence of __ arguments, these commit ranges are limited accordingly. To that end, it first finds pairs of commits from both commit ranges that correspond with each other. Two commits are said to correspond when the diff between their patches (i.e. the author information, the commit message and the commit diff) is reasonably small compared to the -patches' size. See ``Algorithm`` below for details. +patches' size. See 'Algorithm' below for details. Finally, the list of matching commits is shown in the order of the second commit range, with unmatched commits being inserted just after @@ -37,7 +37,7 @@ all of their ancestors have been shown. There are three ways to specify the commit ranges: - ` `: Either commit range can be of the form - `..`, `^!` or `^-`. See `SPECIFYING RANGES` + `..`, `^!` or `^-`. See 'SPECIFYING RANGES' in linkgit:gitrevisions[7] for more details. - `...`. This is equivalent to @@ -48,7 +48,7 @@ There are three ways to specify the commit ranges: OPTIONS ------- ---no-dual-color:: +`--no-dual-color`:: When the commit diffs differ, `git range-diff` recreates the original diffs' coloring, and adds outer -/+ diff markers with the *background* being red/green to make it easier to see e.g. @@ -56,33 +56,33 @@ OPTIONS + Additionally, the commit diff lines that are only present in the first commit range are shown "dimmed" (this can be overridden using the `color.diff.` -config setting where `` is one of `contextDimmed`, `oldDimmed` and +config setting where __ is one of `contextDimmed`, `oldDimmed` and `newDimmed`), and the commit diff lines that are only present in the second commit range are shown in bold (which can be overridden using the config -settings `color.diff.` with `` being one of `contextBold`, +settings `color.diff.` with __ being one of `contextBold`, `oldBold` or `newBold`). + This is known to `range-diff` as "dual coloring". Use `--no-dual-color` to revert to color all lines according to the outer diff markers (and completely ignore the inner diff when it comes to color). ---creation-factor=:: - Set the creation/deletion cost fudge factor to ``. +`--creation-factor=`:: + Set the creation/deletion cost fudge factor to __. Defaults to 60. Try a larger value if `git range-diff` erroneously considers a large change a total rewrite (deletion of one commit and addition of another), and a smaller one in the reverse case. - See the ``Algorithm`` section below for an explanation of why this is + See the 'Algorithm' section below for an explanation of why this is needed. ---left-only:: +`--left-only`:: Suppress commits that are missing from the first specified range - (or the "left range" when using the `...` format). + (or the "left range" when using the `...` form). ---right-only:: +`--right-only`:: Suppress commits that are missing from the second specified range - (or the "right range" when using the `...` format). + (or the "right range" when using the `...` form). ---diff-merges=:: +`--diff-merges=`:: Instead of ignoring merge commits, generate diffs for them using the corresponding `--diff-merges=` option of linkgit:git-log[1], and include them in the comparison. @@ -93,30 +93,30 @@ have produced. In other words, if a merge commit is the result of a non-conflicting `git merge`, the `remerge` mode will represent it with an empty diff. ---remerge-diff:: +`--remerge-diff`:: Convenience option, equivalent to `--diff-merges=remerge`. ---notes[=]:: ---no-notes:: +`--notes[=]`:: +`--no-notes`:: This flag is passed to the `git log` program (see linkgit:git-log[1]) that generates the patches. - :: +` `:: Compare the commits specified by the two ranges, where - `` is considered an older version of ``. + __ is considered an older version of __. -...:: +`...`:: Equivalent to passing `..` and `..`. - :: +` `:: Equivalent to passing `..` and `..`. - Note that `` does not need to be the exact branch point + Note that __ does not need to be the exact branch point of the branches. Example: after rebasing a branch `my-topic`, `git range-diff my-topic@{u} my-topic@{1} my-topic` would show the differences introduced by the rebase. `git range-diff` also accepts the regular diff options (see -linkgit:git-diff[1]), most notably the `--color=[]` and +linkgit:git-diff[1]), most notably the `--color[=]` and `--no-color` options. These options are used when generating the "diff between patches", i.e. to compare the author, commit message and diff of corresponding old/new commits. There is currently no means to tweak most of the From f4c1b8e3fe855355f3e4c84d3e1a50b9957bd240 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Avila?= Date: Sat, 4 Apr 2026 17:12:46 +0000 Subject: [PATCH 097/241] doc: convert git-shortlog manual page to synopsis style MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * convert commands and options to synopsis style * use __ for arguments * small style fixes Signed-off-by: Jean-Noël Avila Signed-off-by: Junio C Hamano --- Documentation/git-shortlog.adoc | 60 ++++++++++++++++----------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/Documentation/git-shortlog.adoc b/Documentation/git-shortlog.adoc index a11b57c1cd7b2d..e067d39b3880a8 100644 --- a/Documentation/git-shortlog.adoc +++ b/Documentation/git-shortlog.adoc @@ -3,63 +3,63 @@ git-shortlog(1) NAME ---- -git-shortlog - Summarize 'git log' output +git-shortlog - Summarize `git log` output SYNOPSIS -------- -[verse] -'git shortlog' [] [] [[--] ...] -git log --pretty=short | 'git shortlog' [] +[synopsis] +git shortlog [] [] [[--] ...] +git log --pretty=short | git shortlog [] DESCRIPTION ----------- -Summarizes 'git log' output in a format suitable for inclusion +Summarizes `git log` output in a format suitable for inclusion in release announcements. Each commit will be grouped by author and title. Additionally, "[PATCH]" will be stripped from the commit description. If no revisions are passed on the command line and either standard input -is not a terminal or there is no current branch, 'git shortlog' will +is not a terminal or there is no current branch, `git shortlog` will output a summary of the log read from standard input, without reference to the current repository. OPTIONS ------- --n:: ---numbered:: +`-n`:: +`--numbered`:: Sort output according to the number of commits per author instead of author alphabetic order. --s:: ---summary:: +`-s`:: +`--summary`:: Suppress commit description and provide a commit count summary only. --e:: ---email:: +`-e`:: +`--email`:: Show the email address of each author. ---format[=]:: +`--format[=]`:: Instead of the commit subject, use some other information to - describe each commit. '' can be any string accepted - by the `--format` option of 'git log', such as '* [%h] %s'. - (See the "PRETTY FORMATS" section of linkgit:git-log[1].) + describe each commit. __ can be any string accepted + by the `--format` option of `git log`, such as '* [%h] %s'. + (See the 'PRETTY FORMATS' section of linkgit:git-log[1].) + Each pretty-printed commit will be rewrapped before it is shown. ---date=:: +`--date=`:: Show dates formatted according to the given date string. (See - the `--date` option in the "Commit Formatting" section of + the `--date` option in the 'Commit Formatting' section of linkgit:git-log[1]). Useful with `--group=format:`. ---group=:: - Group commits based on ``. If no `--group` option is - specified, the default is `author`. `` is one of: +`--group=`:: + Group commits based on __. If no `--group` option is + specified, the default is `author`. __ is one of: + -- - `author`, commits are grouped by author - `committer`, commits are grouped by committer (the same as `-c`) - - `trailer:`, the `` is interpreted as a case-insensitive + - `trailer:`, the __ is interpreted as a case-insensitive commit message trailer (see linkgit:git-interpret-trailers[1]). For example, if your project uses `Reviewed-by` trailers, you might want to see who has been reviewing with @@ -76,7 +76,7 @@ unless the `--email` option is specified. If the value cannot be parsed as an identity, it will be taken literally and completely. - `format:`, any string accepted by the `--format` option of - 'git log'. (See the "PRETTY FORMATS" section of + `git log`. (See the 'PRETTY FORMATS' section of linkgit:git-log[1].) -- + @@ -85,11 +85,11 @@ value (but again, only once per unique value in that commit). For example, `git shortlog --group=author --group=trailer:co-authored-by` counts both authors and co-authors. --c:: ---committer:: +`-c`:: +`--committer`:: This is an alias for `--group=committer`. --w[[,[,]]]:: +`-w[[,[,]]]`:: Linewrap the output by wrapping each line at `width`. The first line of each entry is indented by `indent1` spaces, and the second and subsequent lines are indented by `indent2` spaces. `width`, @@ -98,16 +98,16 @@ counts both authors and co-authors. If width is `0` (zero) then indent the lines of the output without wrapping them. -:: +``:: Show only commits in the specified revision range. When no - is specified, it defaults to `HEAD` (i.e. the + __ is specified, it defaults to `HEAD` (i.e. the whole history leading to the current commit). `origin..HEAD` specifies all the commits reachable from the current commit (i.e. `HEAD`), but not from `origin`. For a complete list of - ways to spell , see the "Specifying Ranges" + ways to spell __, see the 'Specifying Ranges' section of linkgit:gitrevisions[7]. -[--] ...:: +`[--] ...`:: Consider only commits that are enough to explain how the files that match the specified paths came to be. + From 80f4b802e964559c65b08641c07a8acb95d0617e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Avila?= Date: Sat, 4 Apr 2026 17:12:47 +0000 Subject: [PATCH 098/241] doc: convert git-describe manual page to synopsis style MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * convert commands and options to synopsis style * use __ for arguments Signed-off-by: Jean-Noël Avila Signed-off-by: Junio C Hamano --- Documentation/git-describe.adoc | 96 ++++++++++++++++----------------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/Documentation/git-describe.adoc b/Documentation/git-describe.adoc index 08ff715709ccd1..b2cb1e47e46c67 100644 --- a/Documentation/git-describe.adoc +++ b/Documentation/git-describe.adoc @@ -7,10 +7,10 @@ git-describe - Give an object a human readable name based on an available ref SYNOPSIS -------- -[verse] -'git describe' [--all] [--tags] [--contains] [--abbrev=] [...] -'git describe' [--all] [--tags] [--contains] [--abbrev=] --dirty[=] -'git describe' +[synopsis] +git describe [--all] [--tags] [--contains] [--abbrev=] [...] +git describe [--all] [--tags] [--contains] [--abbrev=] --dirty[=] +git describe DESCRIPTION ----------- @@ -22,70 +22,70 @@ abbreviated object name of the most recent commit. The result is a "human-readable" object name which can also be used to identify the commit to other git commands. -By default (without --all or --tags) `git describe` only shows +By default (without `--all` or `--tags`) `git describe` only shows annotated tags. For more information about creating annotated tags -see the -a and -s options to linkgit:git-tag[1]. +see the `-a` and `-s` options to linkgit:git-tag[1]. If the given object refers to a blob, it will be described as `:`, such that the blob can be found -at `` in the ``, which itself describes the +at __ in the __, which itself describes the first commit in which this blob occurs in a reverse revision walk -from HEAD. +from `HEAD`. OPTIONS ------- -...:: - Commit-ish object names to describe. Defaults to HEAD if omitted. +`...`:: + Commit-ish object names to describe. Defaults to `HEAD` if omitted. ---dirty[=]:: ---broken[=]:: +`--dirty[=]`:: +`--broken[=]`:: Describe the state of the working tree. When the working - tree matches HEAD, the output is the same as "git describe - HEAD". If the working tree has local modification "-dirty" + tree matches `HEAD`, the output is the same as `git describe HEAD`. + If the working tree has local modification, `-dirty` is appended to it. If a repository is corrupt and Git cannot determine if there is local modification, Git will - error out, unless `--broken' is given, which appends - the suffix "-broken" instead. + error out, unless `--broken` is given, which appends + the suffix `-broken` instead. ---all:: +`--all`:: Instead of using only the annotated tags, use any ref found in `refs/` namespace. This option enables matching any known branch, remote-tracking branch, or lightweight tag. ---tags:: +`--tags`:: Instead of using only the annotated tags, use any tag found in `refs/tags` namespace. This option enables matching a lightweight (non-annotated) tag. ---contains:: +`--contains`:: Instead of finding the tag that predates the commit, find the tag that comes after the commit, and thus contains it. - Automatically implies --tags. + Automatically implies `--tags`. ---abbrev=:: +`--abbrev=`:: Instead of using the default number of hexadecimal digits (which will vary according to the number of objects in the repository with - a default of 7) of the abbreviated object name, use digits, or - as many digits as needed to form a unique object name. An of 0 + a default of 7) of the abbreviated object name, use __ digits, or + as many digits as needed to form a unique object name. An __ of 0 will suppress long format, only showing the closest tag. ---candidates=:: +`--candidates=`:: Instead of considering only the 10 most recent tags as candidates to describe the input commit-ish consider - up to candidates. Increasing above 10 will take + up to __ candidates. Increasing __ above 10 will take slightly longer but may produce a more accurate result. - An of 0 will cause only exact matches to be output. + An __ of 0 will cause only exact matches to be output. ---exact-match:: +`--exact-match`:: Only output exact matches (a tag directly references the - supplied commit). This is a synonym for --candidates=0. + supplied commit). This is a synonym for `--candidates=0`. ---debug:: +`--debug`:: Verbosely display information about the searching strategy being employed to standard error. The tag name will still be printed to standard out. ---long:: +`--long`:: Always output the long format (the tag, the number of commits and the abbreviated commit name) even when it matches a tag. This is useful when you want to see parts of the commit object name @@ -94,8 +94,8 @@ OPTIONS describe such a commit as v1.2-0-gdeadbee (0th commit since tag v1.2 that points at object deadbee....). ---match :: - Only consider tags matching the given `glob(7)` pattern, +`--match `:: + Only consider tags matching the given `glob`(7) pattern, excluding the "refs/tags/" prefix. If used with `--all`, it also considers local branches and remote-tracking references matching the pattern, excluding respectively "refs/heads/" and "refs/remotes/" @@ -104,22 +104,22 @@ OPTIONS matching any of the patterns will be considered. Use `--no-match` to clear and reset the list of patterns. ---exclude :: - Do not consider tags matching the given `glob(7)` pattern, excluding +`--exclude `:: + Do not consider tags matching the given `glob`(7) pattern, excluding the "refs/tags/" prefix. If used with `--all`, it also does not consider local branches and remote-tracking references matching the pattern, - excluding respectively "refs/heads/" and "refs/remotes/" prefix; + excluding respectively "`refs/heads/`" and "`refs/remotes/`" prefix; references of other types are never considered. If given multiple times, a list of patterns will be accumulated and tags matching any of the - patterns will be excluded. When combined with --match a tag will be - considered when it matches at least one --match pattern and does not - match any of the --exclude patterns. Use `--no-exclude` to clear and + patterns will be excluded. When combined with `--match` a tag will be + considered when it matches at least one `--match` pattern and does not + match any of the `--exclude` patterns. Use `--no-exclude` to clear and reset the list of patterns. ---always:: +`--always`:: Show uniquely abbreviated commit object as fallback. ---first-parent:: +`--first-parent`:: Follow only the first parent commit upon seeing a merge commit. This is useful when you wish to not match tags on branches merged in the history of the target commit. @@ -139,8 +139,8 @@ an abbreviated object name for the commit itself ("2414721") at the end. The number of additional commits is the number -of commits which would be displayed by "git log v1.0.4..parent". -The hash suffix is "-g" + an unambiguous abbreviation for the tip commit +of commits which would be displayed by `git log v1.0.4..parent`. +The hash suffix is "`-g`" + an unambiguous abbreviation for the tip commit of parent (which was `2414721b194453f058079d897d13c4e377f92dc6`). The length of the abbreviation scales as the repository grows, using the approximate number of objects in the repository and a bit of math @@ -149,12 +149,12 @@ The "g" prefix stands for "git" and is used to allow describing the version of a software depending on the SCM the software is managed with. This is useful in an environment where people may use different SCMs. -Doing a 'git describe' on a tag-name will just show the tag name: +Doing a `git describe` on a tag-name will just show the tag name: [torvalds@g5 git]$ git describe v1.0.4 v1.0.4 -With --all, the command can use branch heads as references, so +With `--all`, the command can use branch heads as references, so the output shows the reference path as well: [torvalds@g5 git]$ git describe --all --abbrev=4 v1.0.5^2 @@ -163,7 +163,7 @@ the output shows the reference path as well: [torvalds@g5 git]$ git describe --all --abbrev=4 HEAD^ heads/lt/describe-7-g975b -With --abbrev set to 0, the command can be used to find the +With `--abbrev` set to 0, the command can be used to find the closest tagname without any suffix: [torvalds@g5 git]$ git describe --abbrev=0 v1.0.5^2 @@ -179,13 +179,13 @@ be sufficient to disambiguate these commits. SEARCH STRATEGY --------------- -For each commit-ish supplied, 'git describe' will first look for +For each commit-ish supplied, `git describe` will first look for a tag which tags exactly that commit. Annotated tags will always be preferred over lightweight tags, and tags with newer dates will always be preferred over tags with older dates. If an exact match is found, its name will be output and searching will stop. -If an exact match was not found, 'git describe' will walk back +If an exact match was not found, `git describe` will walk back through the commit history to locate an ancestor commit which has been tagged. The ancestor's tag will be output along with an abbreviation of the input commit-ish's SHA-1. If `--first-parent` was @@ -203,7 +203,7 @@ BUGS Tree objects as well as tag objects not pointing at commits, cannot be described. When describing blobs, the lightweight tags pointing at blobs are ignored, -but the blob is still described as : despite the lightweight +but the blob is still described as `:` despite the lightweight tag being favorable. GIT From fa1468a1f7c7765a6c7dd1faca4c9dc241d0538c Mon Sep 17 00:00:00 2001 From: Trieu Huynh Date: Tue, 7 Apr 2026 03:30:41 +0900 Subject: [PATCH 099/241] promisor-remote: fix promisor.quiet to use the correct repository fetch_objects() reads the promisor.quiet configuration from the_repository instead of the repo parameter it receives. This means that when git lazy-fetches objects for a non-main repository, eg. a submodule that is itself a partial clone opened via repo_submodule_init(). The submodule's own promisor.quiet setting is ignored and the superproject's setting is used instead. Fix by replacing the_repository with repo in the repo_config_get_bool() call. The practical trigger is git grep --recurse-submodules on a superproject where the submodule is a partial clone. Add a test where promisor.quiet is set only in a partial-clone submodule; a lazy fetch triggered by "git grep --recurse-submodules" must honor that setting. Signed-off-by: Trieu Huynh Signed-off-by: Junio C Hamano --- promisor-remote.c | 2 +- t/t0410-partial-clone.sh | 45 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/promisor-remote.c b/promisor-remote.c index 96fa215b06a924..225260b05f8d65 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -46,7 +46,7 @@ static int fetch_objects(struct repository *repo, "fetch", remote_name, "--no-tags", "--no-write-fetch-head", "--recurse-submodules=no", "--filter=blob:none", "--stdin", NULL); - if (!repo_config_get_bool(the_repository, "promisor.quiet", &quiet) && quiet) + if (!repo_config_get_bool(repo, "promisor.quiet", &quiet) && quiet) strvec_push(&child.args, "--quiet"); if (start_command(&child)) die(_("promisor-remote: unable to fork off fetch subprocess")); diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh index 52e19728a3fca0..dff442da2090b5 100755 --- a/t/t0410-partial-clone.sh +++ b/t/t0410-partial-clone.sh @@ -717,7 +717,29 @@ test_expect_success 'setup for promisor.quiet tests' ' git -C server rm foo.t && git -C server commit -m remove && git -C server config uploadpack.allowanysha1inwant 1 && - git -C server config uploadpack.allowfilter 1 + git -C server config uploadpack.allowfilter 1 && + + # Setup for submodule repo test: superproject whose submodule is a + # partial clone, so that promisor.quiet is read via a non-main repo. + rm -rf sub-pc-src sub-pc-srv.bare super-src super-work && + git init sub-pc-src && + test_commit -C sub-pc-src initial file.txt "hello" && + + git clone --bare sub-pc-src sub-pc-srv.bare && + git -C sub-pc-srv.bare config uploadpack.allowfilter 1 && + git -C sub-pc-srv.bare config uploadpack.allowanysha1inwant 1 && + + git init super-src && + git -C super-src -c protocol.file.allow=always \ + submodule add "file://$(pwd)/sub-pc-srv.bare" sub && + git -C super-src commit -m "add submodule" && + + git -c protocol.file.allow=always clone super-src super-work && + git -C super-work -c protocol.file.allow=always \ + submodule update --init --filter=blob:none sub && + + # Allow file:// in the submodule so that lazy-fetch subprocesses work. + git -C super-work/sub config protocol.file.allow always ' test_expect_success TTY 'promisor.quiet=false shows progress messages' ' @@ -752,6 +774,27 @@ test_expect_success TTY 'promisor.quiet=unconfigured shows progress messages' ' grep "Receiving objects" err ' +test_expect_success 'promisor.quiet from submodule repo is honored' ' + rm -f pc-quiet-trace && + + # Set promisor.quiet only in the submodule, not the superproject. + git -C super-work/sub config promisor.quiet true && + + # Push a new commit+blob to the server; the blob stays missing in the + # partial-clone submodule until a lazy fetch is triggered. + test_commit -C sub-pc-src updated new-file.txt "world" && + git -C sub-pc-src push "$(pwd)/sub-pc-srv.bare" HEAD:master && + git -C super-work/sub -c protocol.file.allow=always fetch origin && + git -C super-work/sub reset --mixed origin/master && + + # grep descends into the submodule and triggers a lazy fetch for the + # missing blob; verify the fetch subprocess carries --quiet. + GIT_TRACE2_EVENT="$(pwd)/pc-quiet-trace" \ + git -C super-work grep --cached --recurse-submodules "world" \ + 2>/dev/null && + grep negotiationAlgorithm pc-quiet-trace | grep -e --quiet +' + . "$TEST_DIRECTORY"/lib-httpd.sh start_httpd From 521731213c905f0dfec6a55393f010d185492c85 Mon Sep 17 00:00:00 2001 From: David Lin Date: Mon, 6 Apr 2026 15:27:11 -0400 Subject: [PATCH 100/241] cache-tree: fix inverted object existence check in cache_tree_fully_valid The negation in front of the object existence check in cache_tree_fully_valid() was lost in 062b914c84 (treewide: convert users of `repo_has_object_file()` to `has_object()`, 2025-04-29), turning `!repo_has_object_file(...)` into `has_object(...)` instead of `!has_object(...)`. This makes cache_tree_fully_valid() always report the cache tree as invalid when objects exist (the common case), forcing callers like write_index_as_tree() to call cache_tree_update() on every invocation. An odb_has_object() check inside update_one() avoids a full tree rebuild, but the unnecessary call still pays the cost of opening an ODB transaction and, in partial clones, a promisor remote check. Restore the missing negation and add a test that verifies write-tree takes the cache-tree shortcut when the cache tree is valid. Helped-by: Derrick Stolee Signed-off-by: David Lin Signed-off-by: Junio C Hamano --- cache-tree.c | 2 +- t/t0090-cache-tree.sh | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/cache-tree.c b/cache-tree.c index 66ef2becbe01a4..366b1d7dcd8081 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -239,7 +239,7 @@ int cache_tree_fully_valid(struct cache_tree *it) if (!it) return 0; if (it->entry_count < 0 || - odb_has_object(the_repository->objects, &it->oid, + !odb_has_object(the_repository->objects, &it->oid, HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) return 0; for (i = 0; i < it->subtree_nr; i++) { diff --git a/t/t0090-cache-tree.sh b/t/t0090-cache-tree.sh index d901588294668c..0964718d7f33f5 100755 --- a/t/t0090-cache-tree.sh +++ b/t/t0090-cache-tree.sh @@ -278,4 +278,12 @@ test_expect_success 'switching trees does not invalidate shared index' ' ) ' +test_expect_success 'cache-tree is used by write-tree when valid' ' + test_commit use-valid && + + # write-tree with a valid cache-tree should skip cache_tree_update + GIT_TRACE2_PERF="$(pwd)/trace.output" git write-tree && + test_grep ! region_enter.*cache_tree.*update trace.output +' + test_done From 8808e61fd3e953c3534633b8b5adc5b243dd696f Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 7 Apr 2026 13:52:34 +0200 Subject: [PATCH 101/241] promisor-remote: try accepted remotes before others in get_direct() When a server advertises promisor remotes and the client accepts some of them, those remotes carry the server's intent: 'fetch missing objects preferably from here', and the client agrees with that for the remotes it accepts. However promisor_remote_get_direct() actually iterates over all promisor remotes in list order, which is the order they appear in the config files (except perhaps for the one appearing in the `extensions.partialClone` config variable which is tried last). This means an existing, but not accepted, promisor remote, could be tried before the accepted ones, which does not reflect the intent of the agreement between client and server. If the client doesn't care about what the server suggests, it should accept nothing and rely on its remotes as they are already configured. To better reflect the agreement between client and server, let's make promisor_remote_get_direct() try the accepted promisor remotes before the non-accepted ones. Concretely, let's extract a try_promisor_remotes() helper and call it twice from promisor_remote_get_direct(): - first with an `accepted_only=true` argument to try only the accepted remotes, - then with `accepted_only=false` to fall back to any remaining remote. Ensuring that accepted remotes are preferred will be even more important if in the future a mechanism is developed to allow the client to auto-configure remotes that the server advertises. This will in particular avoid fetching from the server (which is already configured as a promisor remote) before trying the auto-configured remotes, as these new remotes would likely appear at the end of the config file, and as the server might not appear in the `extensions.partialClone` config variable. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- Documentation/gitprotocol-v2.adoc | 4 ++ promisor-remote.c | 44 ++++++++++++----- t/t5710-promisor-remote-capability.sh | 69 +++++++++++++++++++++++++++ 3 files changed, 104 insertions(+), 13 deletions(-) diff --git a/Documentation/gitprotocol-v2.adoc b/Documentation/gitprotocol-v2.adoc index f985cb4c474953..4fcb1a7bda1be7 100644 --- a/Documentation/gitprotocol-v2.adoc +++ b/Documentation/gitprotocol-v2.adoc @@ -848,6 +848,10 @@ advertised, it can reply with "promisor-remote=" where where `pr-name` is the urlencoded name of a promisor remote the server advertised and the client accepts. +The promisor remotes that the client accepted will be tried before the +other configured promisor remotes when the client attempts to fetch +missing objects. + Note that, everywhere in this document, the ';' and ',' characters MUST be encoded if they appear in `pr-name` or `field-value`. diff --git a/promisor-remote.c b/promisor-remote.c index 96fa215b06a924..7ce7d22f952e70 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -268,11 +268,35 @@ static int remove_fetched_oids(struct repository *repo, return remaining_nr; } +static int try_promisor_remotes(struct repository *repo, + struct object_id **remaining_oids, + int *remaining_nr, int *to_free, + bool accepted_only) +{ + struct promisor_remote *r = repo->promisor_remote_config->promisors; + + for (; r; r = r->next) { + if (accepted_only != r->accepted) + continue; + if (fetch_objects(repo, r->name, *remaining_oids, *remaining_nr) < 0) { + if (*remaining_nr == 1) + continue; + *remaining_nr = remove_fetched_oids(repo, remaining_oids, + *remaining_nr, *to_free); + if (*remaining_nr) { + *to_free = 1; + continue; + } + } + return 1; /* all fetched */ + } + return 0; +} + void promisor_remote_get_direct(struct repository *repo, const struct object_id *oids, int oid_nr) { - struct promisor_remote *r; struct object_id *remaining_oids = (struct object_id *)oids; int remaining_nr = oid_nr; int to_free = 0; @@ -283,19 +307,13 @@ void promisor_remote_get_direct(struct repository *repo, promisor_remote_init(repo); - for (r = repo->promisor_remote_config->promisors; r; r = r->next) { - if (fetch_objects(repo, r->name, remaining_oids, remaining_nr) < 0) { - if (remaining_nr == 1) - continue; - remaining_nr = remove_fetched_oids(repo, &remaining_oids, - remaining_nr, to_free); - if (remaining_nr) { - to_free = 1; - continue; - } - } + /* Try accepted remotes first (those the server told us to use) */ + if (try_promisor_remotes(repo, &remaining_oids, &remaining_nr, + &to_free, true)) + goto all_fetched; + if (try_promisor_remotes(repo, &remaining_oids, &remaining_nr, + &to_free, false)) goto all_fetched; - } for (i = 0; i < remaining_nr; i++) { if (is_promisor_object(repo, &remaining_oids[i])) diff --git a/t/t5710-promisor-remote-capability.sh b/t/t5710-promisor-remote-capability.sh index 357822c01a7530..bf0eed9f109742 100755 --- a/t/t5710-promisor-remote-capability.sh +++ b/t/t5710-promisor-remote-capability.sh @@ -166,6 +166,75 @@ test_expect_success "init + fetch with promisor.advertise set to 'true'" ' check_missing_objects server 1 "$oid" ' +test_expect_success "clone with two promisors but only one advertised" ' + git -C server config promisor.advertise true && + test_when_finished "rm -rf client unused_lop" && + + # Create a promisor that will be configured but not be used + git init --bare unused_lop && + + # Clone from server to create a client + GIT_TRACE="$(pwd)/trace" GIT_NO_LAZY_FETCH=0 git clone \ + -c remote.unused_lop.promisor=true \ + -c remote.unused_lop.fetch="+refs/heads/*:refs/remotes/unused_lop/*" \ + -c remote.unused_lop.url="file://$(pwd)/unused_lop" \ + -c remote.lop.promisor=true \ + -c remote.lop.fetch="+refs/heads/*:refs/remotes/lop/*" \ + -c remote.lop.url="file://$(pwd)/lop" \ + -c promisor.acceptfromserver=All \ + --no-local --filter="blob:limit=5k" server client && + + # Check that "unused_lop" appears before "lop" in the config + printf "remote.%s.promisor true\n" "unused_lop" "lop" "origin" >expect && + git -C client config get --all --show-names --regexp "^remote\..*\.promisor$" >actual && + test_cmp expect actual && + + # Check that "lop" was tried + test_grep " fetch lop " trace && + # Check that "unused_lop" was not contacted + # This means "lop", the accepted promisor, was tried first + test_grep ! " fetch unused_lop " trace && + + # Check that the largest object is still missing on the server + check_missing_objects server 1 "$oid" +' + +test_expect_success "init + fetch two promisors but only one advertised" ' + git -C server config promisor.advertise true && + test_when_finished "rm -rf client unused_lop" && + + # Create a promisor that will be configured but not be used + git init --bare unused_lop && + + mkdir client && + git -C client init && + git -C client config remote.unused_lop.promisor true && + git -C client config remote.unused_lop.fetch "+refs/heads/*:refs/remotes/unused_lop/*" && + git -C client config remote.unused_lop.url "file://$(pwd)/unused_lop" && + git -C client config remote.lop.promisor true && + git -C client config remote.lop.fetch "+refs/heads/*:refs/remotes/lop/*" && + git -C client config remote.lop.url "file://$(pwd)/lop" && + git -C client config remote.server.url "file://$(pwd)/server" && + git -C client config remote.server.fetch "+refs/heads/*:refs/remotes/server/*" && + git -C client config promisor.acceptfromserver All && + + # Check that "unused_lop" appears before "lop" in the config + printf "remote.%s.promisor true\n" "unused_lop" "lop" >expect && + git -C client config get --all --show-names --regexp "^remote\..*\.promisor$" >actual && + test_cmp expect actual && + + GIT_TRACE="$(pwd)/trace" GIT_NO_LAZY_FETCH=0 git -C client fetch --filter="blob:limit=5k" server && + + # Check that "lop" was tried + test_grep " fetch lop " trace && + # Check that "unused_lop" was not contacted + # This means "lop", the accepted promisor, was tried first + test_grep ! " fetch unused_lop " trace && + + # Check that the largest object is still missing on the server + check_missing_objects server 1 "$oid" +' + test_expect_success "clone with promisor.acceptfromserver set to 'KnownName'" ' git -C server config promisor.advertise true && test_when_finished "rm -rf client" && From 720b7c26c82ef212852897bedb0d38eee78cb531 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 7 Apr 2026 13:52:35 +0200 Subject: [PATCH 102/241] promisor-remote: pass config entry to all_fields_match() directly The `in_list == 0` path of all_fields_match() looks up the remote in `config_info` by `advertised->name` repeatedly, even though every caller in should_accept_remote() has already performed this lookup and holds the result in `p`. To avoid this useless work, let's replace the `int in_list` parameter with a `struct promisor_info *config_entry` pointer: - When NULL (ACCEPT_ALL mode): scan the whole `config_info` list, as the old `in_list == 1` path did. - When non-NULL: match against that single config entry directly, avoiding the redundant string_list_lookup() call. This removes the hidden dependency on `advertised->name` inside all_fields_match(), which would be wrong if in the future auto-configured remotes are implemented, as the local config name may differ from the server's advertised name. While at it, let's also add a comment before all_fields_match() and match_field_against_config() to help understand how things work and help avoid similar issues. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- promisor-remote.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/promisor-remote.c b/promisor-remote.c index 7ce7d22f952e70..6c935f855af752 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -575,6 +575,12 @@ enum accept_promisor { ACCEPT_ALL }; +/* + * Check if a specific field and its advertised value match the local + * configuration of a given promisor remote. + * + * Returns 1 if they match, 0 otherwise. + */ static int match_field_against_config(const char *field, const char *value, struct promisor_info *config_info) { @@ -586,9 +592,18 @@ static int match_field_against_config(const char *field, const char *value, return 0; } +/* + * Check that the advertised fields match the local configuration. + * + * When 'config_entry' is NULL (ACCEPT_ALL mode), every checked field + * must match at least one remote in 'config_info'. + * + * When 'config_entry' points to a specific remote's config, the + * checked fields are compared against that single remote only. + */ static int all_fields_match(struct promisor_info *advertised, struct string_list *config_info, - int in_list) + struct promisor_info *config_entry) { struct string_list *fields = fields_checked(); struct string_list_item *item_checked; @@ -597,7 +612,6 @@ static int all_fields_match(struct promisor_info *advertised, int match = 0; const char *field = item_checked->string; const char *value = NULL; - struct string_list_item *item; if (!strcasecmp(field, promisor_field_filter)) value = advertised->filter; @@ -607,7 +621,11 @@ static int all_fields_match(struct promisor_info *advertised, if (!value) return 0; - if (in_list) { + if (config_entry) { + match = match_field_against_config(field, value, + config_entry); + } else { + struct string_list_item *item; for_each_string_list_item(item, config_info) { struct promisor_info *p = item->util; if (match_field_against_config(field, value, p)) { @@ -615,12 +633,6 @@ static int all_fields_match(struct promisor_info *advertised, break; } } - } else { - item = string_list_lookup(config_info, advertised->name); - if (item) { - struct promisor_info *p = item->util; - match = match_field_against_config(field, value, p); - } } if (!match) @@ -640,7 +652,7 @@ static int should_accept_remote(enum accept_promisor accept, const char *remote_url = advertised->url; if (accept == ACCEPT_ALL) - return all_fields_match(advertised, config_info, 1); + return all_fields_match(advertised, config_info, NULL); /* Get config info for that promisor remote */ item = string_list_lookup(config_info, remote_name); @@ -652,7 +664,7 @@ static int should_accept_remote(enum accept_promisor accept, p = item->util; if (accept == ACCEPT_KNOWN_NAME) - return all_fields_match(advertised, config_info, 0); + return all_fields_match(advertised, config_info, p); if (accept != ACCEPT_KNOWN_URL) BUG("Unhandled 'enum accept_promisor' value '%d'", accept); @@ -663,7 +675,7 @@ static int should_accept_remote(enum accept_promisor accept, } if (!strcmp(p->url, remote_url)) - return all_fields_match(advertised, config_info, 0); + return all_fields_match(advertised, config_info, p); warning(_("known remote named '%s' but with URL '%s' instead of '%s'"), remote_name, p->url, remote_url); From 4ed9283b36bc8652954578c3024a00b6e70f8960 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 7 Apr 2026 13:52:36 +0200 Subject: [PATCH 103/241] promisor-remote: clarify that a remote is ignored In should_accept_remote() and parse_one_advertised_remote(), when a remote is ignored, we tell users why it is ignored in a warning, but we don't tell them that the remote is actually ignored. Let's clarify that, so users have a better idea of what's actually happening. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- promisor-remote.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/promisor-remote.c b/promisor-remote.c index 6c935f855af752..8e062ec16098ac 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -670,15 +670,16 @@ static int should_accept_remote(enum accept_promisor accept, BUG("Unhandled 'enum accept_promisor' value '%d'", accept); if (!remote_url || !*remote_url) { - warning(_("no or empty URL advertised for remote '%s'"), remote_name); + warning(_("no or empty URL advertised for remote '%s', " + "ignoring this remote"), remote_name); return 0; } if (!strcmp(p->url, remote_url)) return all_fields_match(advertised, config_info, p); - warning(_("known remote named '%s' but with URL '%s' instead of '%s'"), - remote_name, p->url, remote_url); + warning(_("known remote named '%s' but with URL '%s' instead of '%s', " + "ignoring this remote"), remote_name, p->url, remote_url); return 0; } @@ -722,8 +723,8 @@ static struct promisor_info *parse_one_advertised_remote(const char *remote_info string_list_clear(&elem_list, 0); if (!info->name || !info->url) { - warning(_("server advertised a promisor remote without a name or URL: %s"), - remote_info); + warning(_("server advertised a promisor remote without a name or URL: '%s', " + "ignoring this remote"), remote_info); promisor_info_free(info); return NULL; } From 3b4f0403d19738a26f0da58f4efc6f4e2473fcac Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 7 Apr 2026 13:52:37 +0200 Subject: [PATCH 104/241] promisor-remote: reject empty name or URL in advertised remote In parse_one_advertised_remote(), we check for a NULL remote name and remote URL, but not for empty ones. An empty URL seems possible as url_percent_decode("") doesn't return NULL. In promisor_config_info_list(), we ignore remotes with empty URLs, so a Git server should not advertise remotes with empty URLs. It's possible that a buggy or malicious server would do it though. So let's tighten the check in parse_one_advertised_remote() to also reject empty strings at parse time. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- promisor-remote.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/promisor-remote.c b/promisor-remote.c index 8e062ec16098ac..8322349ae8ba87 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -722,7 +722,7 @@ static struct promisor_info *parse_one_advertised_remote(const char *remote_info string_list_clear(&elem_list, 0); - if (!info->name || !info->url) { + if (!info->name || !*info->name || !info->url || !*info->url) { warning(_("server advertised a promisor remote without a name or URL: '%s', " "ignoring this remote"), remote_info); promisor_info_free(info); From 64f0f6b88aea33546afd1271862b486fafe7e9cc Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 7 Apr 2026 13:52:38 +0200 Subject: [PATCH 105/241] promisor-remote: refactor should_accept_remote() control flow A previous commit made sure we now reject empty URLs early at parse time. This makes the existing warning() in case a remote URL is NULL or empty very unlikely to be useful. In future work, we also plan to add URL-based acceptance logic into should_accept_remote(). To adapt to previous changes and prepare for upcoming changes, let's restructure the control flow in should_accept_remote(). Concretely, let's: - Replace the warning() in case of an empty URL with a BUG(), as a previous commit made sure empty URLs are rejected early at parse time. - Move that modified empty-URL check to the very top of the function, so that every acceptance mode, instead of only ACCEPT_KNOWN_URL, is covered. - Invert the URL comparison: instead of returning on match and warning on mismatch, return early on mismatch and let the match case fall through. This opens a single exit path at the bottom of the function for future commits to extend. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- promisor-remote.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/promisor-remote.c b/promisor-remote.c index 8322349ae8ba87..5860a3d3f36e09 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -651,6 +651,11 @@ static int should_accept_remote(enum accept_promisor accept, const char *remote_name = advertised->name; const char *remote_url = advertised->url; + if (!remote_url || !*remote_url) + BUG("no or empty URL advertised for remote '%s'; " + "this remote should have been rejected earlier", + remote_name); + if (accept == ACCEPT_ALL) return all_fields_match(advertised, config_info, NULL); @@ -669,19 +674,14 @@ static int should_accept_remote(enum accept_promisor accept, if (accept != ACCEPT_KNOWN_URL) BUG("Unhandled 'enum accept_promisor' value '%d'", accept); - if (!remote_url || !*remote_url) { - warning(_("no or empty URL advertised for remote '%s', " - "ignoring this remote"), remote_name); + if (strcmp(p->url, remote_url)) { + warning(_("known remote named '%s' but with URL '%s' instead of '%s', " + "ignoring this remote"), + remote_name, p->url, remote_url); return 0; } - if (!strcmp(p->url, remote_url)) - return all_fields_match(advertised, config_info, p); - - warning(_("known remote named '%s' but with URL '%s' instead of '%s', " - "ignoring this remote"), remote_name, p->url, remote_url); - - return 0; + return all_fields_match(advertised, config_info, p); } static int skip_field_name_prefix(const char *elem, const char *field_name, const char **value) From 16a4372a3df7579429b7bc23e984bd797a4b7b8d Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 7 Apr 2026 13:52:39 +0200 Subject: [PATCH 106/241] promisor-remote: refactor has_control_char() In a future commit we are going to check if some strings contain control characters, so let's refactor the logic to do that in a new has_control_char() helper function. It cleans up the code a bit anyway. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- promisor-remote.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/promisor-remote.c b/promisor-remote.c index 5860a3d3f36e09..d60518f19c053e 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -642,6 +642,14 @@ static int all_fields_match(struct promisor_info *advertised, return 1; } +static bool has_control_char(const char *s) +{ + for (const char *c = s; *c; c++) + if (iscntrl(*c)) + return true; + return false; +} + static int should_accept_remote(enum accept_promisor accept, struct promisor_info *advertised, struct string_list *config_info) @@ -772,18 +780,14 @@ static bool valid_filter(const char *filter, const char *remote_name) return !res; } -/* Check that a token doesn't contain any control character */ static bool valid_token(const char *token, const char *remote_name) { - const char *c = token; - - for (; *c; c++) - if (iscntrl(*c)) { - warning(_("invalid token '%s' for remote '%s' " - "will not be stored"), - token, remote_name); - return false; - } + if (has_control_char(token)) { + warning(_("invalid token '%s' for remote '%s' " + "will not be stored"), + token, remote_name); + return false; + } return true; } From 7557a562434804d27f1417fe94c4081e2ee7e68b Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 7 Apr 2026 13:52:40 +0200 Subject: [PATCH 107/241] promisor-remote: refactor accept_from_server() In future commits, we are going to add more logic to filter_promisor_remote() which is already doing a lot of things. Let's alleviate that by moving the logic that checks and validates the value of the `promisor.acceptFromServer` config variable into its own accept_from_server() helper function. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- promisor-remote.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/promisor-remote.c b/promisor-remote.c index d60518f19c053e..8d80ef6040534c 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -862,20 +862,12 @@ static bool promisor_store_advertised_fields(struct promisor_info *advertised, return reload_config; } -static void filter_promisor_remote(struct repository *repo, - struct strvec *accepted, - const char *info) +static enum accept_promisor accept_from_server(struct repository *repo) { const char *accept_str; enum accept_promisor accept = ACCEPT_NONE; - struct string_list config_info = STRING_LIST_INIT_NODUP; - struct string_list remote_info = STRING_LIST_INIT_DUP; - struct store_info *store_info = NULL; - struct string_list_item *item; - bool reload_config = false; - struct string_list accepted_filters = STRING_LIST_INIT_DUP; - if (!repo_config_get_string_tmp(the_repository, "promisor.acceptfromserver", &accept_str)) { + if (!repo_config_get_string_tmp(repo, "promisor.acceptfromserver", &accept_str)) { if (!*accept_str || !strcasecmp("None", accept_str)) accept = ACCEPT_NONE; else if (!strcasecmp("KnownUrl", accept_str)) @@ -889,6 +881,21 @@ static void filter_promisor_remote(struct repository *repo, accept_str, "promisor.acceptfromserver"); } + return accept; +} + +static void filter_promisor_remote(struct repository *repo, + struct strvec *accepted, + const char *info) +{ + struct string_list config_info = STRING_LIST_INIT_NODUP; + struct string_list remote_info = STRING_LIST_INIT_DUP; + struct store_info *store_info = NULL; + struct string_list_item *item; + bool reload_config = false; + struct string_list accepted_filters = STRING_LIST_INIT_DUP; + enum accept_promisor accept = accept_from_server(repo); + if (accept == ACCEPT_NONE) return; From e0f80d8876960442dd2645215c4fe5e1b1d80fc3 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 7 Apr 2026 13:52:41 +0200 Subject: [PATCH 108/241] promisor-remote: keep accepted promisor_info structs alive In filter_promisor_remote(), the instances of `struct promisor_info` for accepted remotes are dismantled into separate parallel data structures (the 'accepted' strvec for server names, and 'accepted_filters' for filter strings) and then immediately freed. Instead, let's keep these instances on an 'accepted_remotes' list. This way the post-loop phase can iterate a single list to build the protocol reply, apply advertised filters, and mark remotes as accepted, rather than iterating three separate structures. This refactoring also prepares for a future commit that will add a 'local_name' member to 'struct promisor_info'. Since struct instances stay alive, downstream code will be able to simply read both names from them rather than needing yet another parallel strvec. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- promisor-remote.c | 42 +++++++++++++++++------------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/promisor-remote.c b/promisor-remote.c index 8d80ef6040534c..74e65e9dd0de48 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -890,10 +890,10 @@ static void filter_promisor_remote(struct repository *repo, { struct string_list config_info = STRING_LIST_INIT_NODUP; struct string_list remote_info = STRING_LIST_INIT_DUP; + struct string_list accepted_remotes = STRING_LIST_INIT_NODUP; struct store_info *store_info = NULL; struct string_list_item *item; bool reload_config = false; - struct string_list accepted_filters = STRING_LIST_INIT_DUP; enum accept_promisor accept = accept_from_server(repo); if (accept == ACCEPT_NONE) @@ -922,17 +922,10 @@ static void filter_promisor_remote(struct repository *repo, if (promisor_store_advertised_fields(advertised, store_info)) reload_config = true; - strvec_push(accepted, advertised->name); - - /* Capture advertised filters for accepted remotes */ - if (advertised->filter) { - struct string_list_item *i; - i = string_list_append(&accepted_filters, advertised->name); - i->util = xstrdup(advertised->filter); - } + string_list_append(&accepted_remotes, advertised->name)->util = advertised; + } else { + promisor_info_free(advertised); } - - promisor_info_free(advertised); } promisor_info_list_clear(&config_info); @@ -942,24 +935,23 @@ static void filter_promisor_remote(struct repository *repo, if (reload_config) repo_promisor_remote_reinit(repo); - /* Apply accepted remote filters to the stable repo state */ - for_each_string_list_item(item, &accepted_filters) { - struct promisor_remote *r = repo_promisor_remote_find(repo, item->string); - if (r) { - free(r->advertised_filter); - r->advertised_filter = item->util; - item->util = NULL; - } - } + /* Apply accepted remotes to the stable repo state */ + for_each_string_list_item(item, &accepted_remotes) { + struct promisor_info *info = item->util; + struct promisor_remote *r = repo_promisor_remote_find(repo, info->name); - string_list_clear(&accepted_filters, 1); + strvec_push(accepted, info->name); - /* Mark the remotes as accepted in the repository state */ - for (size_t i = 0; i < accepted->nr; i++) { - struct promisor_remote *r = repo_promisor_remote_find(repo, accepted->v[i]); - if (r) + if (r) { r->accepted = 1; + if (info->filter) { + free(r->advertised_filter); + r->advertised_filter = xstrdup(info->filter); + } + } } + + promisor_info_list_clear(&accepted_remotes); } void promisor_remote_reply(const char *info, char **accepted_out) From d56e483b03bfe46340af5cdbcddec8858661d2e9 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 7 Apr 2026 13:52:42 +0200 Subject: [PATCH 109/241] promisor-remote: remove the 'accepted' strvec In a previous commit, filter_promisor_remote() was refactored to keep accepted 'struct promisor_info' instances alive instead of dismantling them into separate parallel data structures. Let's go one step further and replace the 'struct strvec *accepted' argument passed to filter_promisor_remote() with a 'struct string_list *accepted_remotes' argument. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- promisor-remote.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/promisor-remote.c b/promisor-remote.c index 74e65e9dd0de48..38fa05054227f6 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -885,12 +885,11 @@ static enum accept_promisor accept_from_server(struct repository *repo) } static void filter_promisor_remote(struct repository *repo, - struct strvec *accepted, + struct string_list *accepted_remotes, const char *info) { struct string_list config_info = STRING_LIST_INIT_NODUP; struct string_list remote_info = STRING_LIST_INIT_DUP; - struct string_list accepted_remotes = STRING_LIST_INIT_NODUP; struct store_info *store_info = NULL; struct string_list_item *item; bool reload_config = false; @@ -922,7 +921,7 @@ static void filter_promisor_remote(struct repository *repo, if (promisor_store_advertised_fields(advertised, store_info)) reload_config = true; - string_list_append(&accepted_remotes, advertised->name)->util = advertised; + string_list_append(accepted_remotes, advertised->name)->util = advertised; } else { promisor_info_free(advertised); } @@ -936,12 +935,10 @@ static void filter_promisor_remote(struct repository *repo, repo_promisor_remote_reinit(repo); /* Apply accepted remotes to the stable repo state */ - for_each_string_list_item(item, &accepted_remotes) { + for_each_string_list_item(item, accepted_remotes) { struct promisor_info *info = item->util; struct promisor_remote *r = repo_promisor_remote_find(repo, info->name); - strvec_push(accepted, info->name); - if (r) { r->accepted = 1; if (info->filter) { @@ -950,23 +947,23 @@ static void filter_promisor_remote(struct repository *repo, } } } - - promisor_info_list_clear(&accepted_remotes); } void promisor_remote_reply(const char *info, char **accepted_out) { - struct strvec accepted = STRVEC_INIT; + struct string_list accepted_remotes = STRING_LIST_INIT_NODUP; - filter_promisor_remote(the_repository, &accepted, info); + filter_promisor_remote(the_repository, &accepted_remotes, info); if (accepted_out) { - if (accepted.nr) { + if (accepted_remotes.nr) { struct strbuf reply = STRBUF_INIT; - for (size_t i = 0; i < accepted.nr; i++) { - if (i) + struct string_list_item *item; + + for_each_string_list_item(item, &accepted_remotes) { + if (reply.len) strbuf_addch(&reply, ';'); - strbuf_addstr_urlencode(&reply, accepted.v[i], allow_unsanitized); + strbuf_addstr_urlencode(&reply, item->string, allow_unsanitized); } *accepted_out = strbuf_detach(&reply, NULL); } else { @@ -974,7 +971,7 @@ void promisor_remote_reply(const char *info, char **accepted_out) } } - strvec_clear(&accepted); + promisor_info_list_clear(&accepted_remotes); } void mark_promisor_remotes_as_accepted(struct repository *r, const char *remotes) From 8eb863597f630efe08f96ed12f8defbe5a5f0b1d Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Tue, 7 Apr 2026 13:52:43 +0200 Subject: [PATCH 110/241] t5710: use proper file:// URIs for absolute paths In t5710, we frequently construct local file URIs using `file://$(pwd)`. On Unix-like systems, $(pwd) returns an absolute path starting with a slash (e.g., `/tmp/repo`), resulting in a valid 3-slash URI with an empty host (`file:///tmp/repo`). However, on Windows, $(pwd) returns a path starting with a drive letter (e.g., `D:/a/repo`). This results in a 2-slash URI (`file://D:/a/repo`). Standard URI parsers misinterpret this format, treating `D:` as the host rather than part of the absolute path. This is to be expected because RFC 8089 says that the `//` prefix with an empty local host must be followed by an absolute path starting with a slash. While this hasn't broken the existing tests (because the old `promisor.acceptFromServer` logic relies entirely on strict `strcmp()` without normalizing the URLs), it will break future commits that pass these URLs through `url_normalize()` or similar functions. To future-proof the tests and ensure cross-platform URI compliance, let's introduce a $TRASH_DIRECTORY_URL helper variable that explicitly guarantees a leading slash for the path component, ensuring valid 3-slash `file:///` URIs on all operating systems. While at it, let's also introduce $ENCODED_TRASH_DIRECTORY_URL to handle some common special characters in directory paths. To be extra safe, let's skip all the tests if there are uncommon special characters in the directory path. Then let's replace all instances of `file://$(pwd)` with $TRASH_DIRECTORY_URL across the test script, and let's simplify the `sendFields` and `checkFields` tests to use $ENCODED_TRASH_DIRECTORY_URL directly. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- t/t5710-promisor-remote-capability.sh | 79 +++++++++++++++++---------- 1 file changed, 51 insertions(+), 28 deletions(-) diff --git a/t/t5710-promisor-remote-capability.sh b/t/t5710-promisor-remote-capability.sh index bf0eed9f109742..b404ad9f0a9e3d 100755 --- a/t/t5710-promisor-remote-capability.sh +++ b/t/t5710-promisor-remote-capability.sh @@ -76,6 +76,31 @@ copy_to_lop () { cp "$path" "$path2" } +# On Windows, `pwd` returns a path like 'D:/foo/bar'. Prepend '/' to turn +# it into '/D:/foo/bar', which is what git expects in file:// URLs on Windows. +# On Unix, the path already starts with '/', so this is a no-op. +pwd_path=$(pwd) +case "$pwd_path" in +[a-zA-Z]:*) pwd_path="/$pwd_path" ;; +esac + +# Allowed characters: alphanumeric, standard path/URI (_ . ~ / : -), +# and those percent-encoded below (% space = , ;) +rest=$(printf "%s" "$pwd_path" | tr -d 'a-zA-Z0-9_.~/:% =,;-') +if test -n "$rest" +then + skip_all="PWD contains unsupported special characters" + test_done +fi + +TRASH_DIRECTORY_URL="file://$pwd_path" + +encoded_path=$(printf "%s" "$pwd_path" | + sed -e 's/%/%25/g' -e 's/ /%20/g' -e 's/=/%3D/g' \ + -e 's/;/%3B/g' -e 's/,/%2C/g') + +ENCODED_TRASH_DIRECTORY_URL="file://$encoded_path" + test_expect_success "setup for testing promisor remote advertisement" ' # Create another bare repo called "lop" (for Large Object Promisor) git init --bare lop && @@ -88,7 +113,7 @@ test_expect_success "setup for testing promisor remote advertisement" ' initialize_server 1 "$oid" && # Configure lop as promisor remote for server - git -C server remote add lop "file://$(pwd)/lop" && + git -C server remote add lop "$TRASH_DIRECTORY_URL/lop" && git -C server config remote.lop.promisor true && git -C lop config uploadpack.allowFilter true && @@ -104,7 +129,7 @@ test_expect_success "clone with promisor.advertise set to 'true'" ' # Clone from server to create a client GIT_NO_LAZY_FETCH=0 git clone -c remote.lop.promisor=true \ -c remote.lop.fetch="+refs/heads/*:refs/remotes/lop/*" \ - -c remote.lop.url="file://$(pwd)/lop" \ + -c remote.lop.url="$TRASH_DIRECTORY_URL/lop" \ -c promisor.acceptfromserver=All \ --no-local --filter="blob:limit=5k" server client && @@ -119,7 +144,7 @@ test_expect_success "clone with promisor.advertise set to 'false'" ' # Clone from server to create a client GIT_NO_LAZY_FETCH=0 git clone -c remote.lop.promisor=true \ -c remote.lop.fetch="+refs/heads/*:refs/remotes/lop/*" \ - -c remote.lop.url="file://$(pwd)/lop" \ + -c remote.lop.url="$TRASH_DIRECTORY_URL/lop" \ -c promisor.acceptfromserver=All \ --no-local --filter="blob:limit=5k" server client && @@ -137,7 +162,7 @@ test_expect_success "clone with promisor.acceptfromserver set to 'None'" ' # Clone from server to create a client GIT_NO_LAZY_FETCH=0 git clone -c remote.lop.promisor=true \ -c remote.lop.fetch="+refs/heads/*:refs/remotes/lop/*" \ - -c remote.lop.url="file://$(pwd)/lop" \ + -c remote.lop.url="$TRASH_DIRECTORY_URL/lop" \ -c promisor.acceptfromserver=None \ --no-local --filter="blob:limit=5k" server client && @@ -156,8 +181,8 @@ test_expect_success "init + fetch with promisor.advertise set to 'true'" ' git -C client init && git -C client config remote.lop.promisor true && git -C client config remote.lop.fetch "+refs/heads/*:refs/remotes/lop/*" && - git -C client config remote.lop.url "file://$(pwd)/lop" && - git -C client config remote.server.url "file://$(pwd)/server" && + git -C client config remote.lop.url "$TRASH_DIRECTORY_URL/lop" && + git -C client config remote.server.url "$TRASH_DIRECTORY_URL/server" && git -C client config remote.server.fetch "+refs/heads/*:refs/remotes/server/*" && git -C client config promisor.acceptfromserver All && GIT_NO_LAZY_FETCH=0 git -C client fetch --filter="blob:limit=5k" server && @@ -177,10 +202,10 @@ test_expect_success "clone with two promisors but only one advertised" ' GIT_TRACE="$(pwd)/trace" GIT_NO_LAZY_FETCH=0 git clone \ -c remote.unused_lop.promisor=true \ -c remote.unused_lop.fetch="+refs/heads/*:refs/remotes/unused_lop/*" \ - -c remote.unused_lop.url="file://$(pwd)/unused_lop" \ + -c remote.unused_lop.url="$TRASH_DIRECTORY_URL/unused_lop" \ -c remote.lop.promisor=true \ -c remote.lop.fetch="+refs/heads/*:refs/remotes/lop/*" \ - -c remote.lop.url="file://$(pwd)/lop" \ + -c remote.lop.url="$TRASH_DIRECTORY_URL/lop" \ -c promisor.acceptfromserver=All \ --no-local --filter="blob:limit=5k" server client && @@ -210,11 +235,11 @@ test_expect_success "init + fetch two promisors but only one advertised" ' git -C client init && git -C client config remote.unused_lop.promisor true && git -C client config remote.unused_lop.fetch "+refs/heads/*:refs/remotes/unused_lop/*" && - git -C client config remote.unused_lop.url "file://$(pwd)/unused_lop" && + git -C client config remote.unused_lop.url "$TRASH_DIRECTORY_URL/unused_lop" && git -C client config remote.lop.promisor true && git -C client config remote.lop.fetch "+refs/heads/*:refs/remotes/lop/*" && - git -C client config remote.lop.url "file://$(pwd)/lop" && - git -C client config remote.server.url "file://$(pwd)/server" && + git -C client config remote.lop.url "$TRASH_DIRECTORY_URL/lop" && + git -C client config remote.server.url "$TRASH_DIRECTORY_URL/server" && git -C client config remote.server.fetch "+refs/heads/*:refs/remotes/server/*" && git -C client config promisor.acceptfromserver All && @@ -242,7 +267,7 @@ test_expect_success "clone with promisor.acceptfromserver set to 'KnownName'" ' # Clone from server to create a client GIT_NO_LAZY_FETCH=0 git clone -c remote.lop.promisor=true \ -c remote.lop.fetch="+refs/heads/*:refs/remotes/lop/*" \ - -c remote.lop.url="file://$(pwd)/lop" \ + -c remote.lop.url="$TRASH_DIRECTORY_URL/lop" \ -c promisor.acceptfromserver=KnownName \ --no-local --filter="blob:limit=5k" server client && @@ -257,7 +282,7 @@ test_expect_success "clone with 'KnownName' and different remote names" ' # Clone from server to create a client GIT_NO_LAZY_FETCH=0 git clone -c remote.serverTwo.promisor=true \ -c remote.serverTwo.fetch="+refs/heads/*:refs/remotes/lop/*" \ - -c remote.serverTwo.url="file://$(pwd)/lop" \ + -c remote.serverTwo.url="$TRASH_DIRECTORY_URL/lop" \ -c promisor.acceptfromserver=KnownName \ --no-local --filter="blob:limit=5k" server client && @@ -294,7 +319,7 @@ test_expect_success "clone with promisor.acceptfromserver set to 'KnownUrl'" ' # Clone from server to create a client GIT_NO_LAZY_FETCH=0 git clone -c remote.lop.promisor=true \ -c remote.lop.fetch="+refs/heads/*:refs/remotes/lop/*" \ - -c remote.lop.url="file://$(pwd)/lop" \ + -c remote.lop.url="$TRASH_DIRECTORY_URL/lop" \ -c promisor.acceptfromserver=KnownUrl \ --no-local --filter="blob:limit=5k" server client && @@ -311,7 +336,7 @@ test_expect_success "clone with 'KnownUrl' and different remote urls" ' # Clone from server to create a client GIT_NO_LAZY_FETCH=0 git clone -c remote.lop.promisor=true \ -c remote.lop.fetch="+refs/heads/*:refs/remotes/lop/*" \ - -c remote.lop.url="file://$(pwd)/serverTwo" \ + -c remote.lop.url="$TRASH_DIRECTORY_URL/serverTwo" \ -c promisor.acceptfromserver=KnownUrl \ --no-local --filter="blob:limit=5k" server client && @@ -326,7 +351,7 @@ test_expect_success "clone with 'KnownUrl' and url not configured on the server" git -C server config promisor.advertise true && test_when_finished "rm -rf client" && - test_when_finished "git -C server config set remote.lop.url \"file://$(pwd)/lop\"" && + test_when_finished "git -C server config set remote.lop.url \"$TRASH_DIRECTORY_URL/lop\"" && git -C server config unset remote.lop.url && # Clone from server to create a client @@ -335,7 +360,7 @@ test_expect_success "clone with 'KnownUrl' and url not configured on the server" # missing, so the remote name will be used instead which will fail. test_must_fail env GIT_NO_LAZY_FETCH=0 git clone -c remote.lop.promisor=true \ -c remote.lop.fetch="+refs/heads/*:refs/remotes/lop/*" \ - -c remote.lop.url="file://$(pwd)/lop" \ + -c remote.lop.url="$TRASH_DIRECTORY_URL/lop" \ -c promisor.acceptfromserver=KnownUrl \ --no-local --filter="blob:limit=5k" server client && @@ -347,7 +372,7 @@ test_expect_success "clone with 'KnownUrl' and empty url, so not advertised" ' git -C server config promisor.advertise true && test_when_finished "rm -rf client" && - test_when_finished "git -C server config set remote.lop.url \"file://$(pwd)/lop\"" && + test_when_finished "git -C server config set remote.lop.url \"$TRASH_DIRECTORY_URL/lop\"" && git -C server config set remote.lop.url "" && # Clone from server to create a client @@ -356,7 +381,7 @@ test_expect_success "clone with 'KnownUrl' and empty url, so not advertised" ' # so the remote name will be used instead which will fail. test_must_fail env GIT_NO_LAZY_FETCH=0 git clone -c remote.lop.promisor=true \ -c remote.lop.fetch="+refs/heads/*:refs/remotes/lop/*" \ - -c remote.lop.url="file://$(pwd)/lop" \ + -c remote.lop.url="$TRASH_DIRECTORY_URL/lop" \ -c promisor.acceptfromserver=KnownUrl \ --no-local --filter="blob:limit=5k" server client && @@ -380,13 +405,12 @@ test_expect_success "clone with promisor.sendFields" ' GIT_TRACE_PACKET="$(pwd)/trace" GIT_NO_LAZY_FETCH=0 git clone \ -c remote.lop.promisor=true \ -c remote.lop.fetch="+refs/heads/*:refs/remotes/lop/*" \ - -c remote.lop.url="file://$(pwd)/lop" \ + -c remote.lop.url="$TRASH_DIRECTORY_URL/lop" \ -c promisor.acceptfromserver=All \ --no-local --filter="blob:limit=5k" server client && # Check that fields are properly transmitted - ENCODED_URL=$(echo "file://$(pwd)/lop" | sed -e "s/ /%20/g") && - PR1="name=lop,url=$ENCODED_URL,partialCloneFilter=blob:none" && + PR1="name=lop,url=$ENCODED_TRASH_DIRECTORY_URL/lop,partialCloneFilter=blob:none" && PR2="name=otherLop,url=https://invalid.invalid,partialCloneFilter=blob:limit=10k,token=fooBar" && test_grep "clone< promisor-remote=$PR1;$PR2" trace && test_grep "clone> promisor-remote=lop;otherLop" trace && @@ -411,15 +435,14 @@ test_expect_success "clone with promisor.checkFields" ' GIT_TRACE_PACKET="$(pwd)/trace" GIT_NO_LAZY_FETCH=0 git clone \ -c remote.lop.promisor=true \ -c remote.lop.fetch="+refs/heads/*:refs/remotes/lop/*" \ - -c remote.lop.url="file://$(pwd)/lop" \ + -c remote.lop.url="$TRASH_DIRECTORY_URL/lop" \ -c remote.lop.partialCloneFilter="blob:none" \ -c promisor.acceptfromserver=All \ -c promisor.checkFields=partialcloneFilter \ --no-local --filter="blob:limit=5k" server client && # Check that fields are properly transmitted - ENCODED_URL=$(echo "file://$(pwd)/lop" | sed -e "s/ /%20/g") && - PR1="name=lop,url=$ENCODED_URL,partialCloneFilter=blob:none" && + PR1="name=lop,url=$ENCODED_TRASH_DIRECTORY_URL/lop,partialCloneFilter=blob:none" && PR2="name=otherLop,url=https://invalid.invalid,partialCloneFilter=blob:limit=10k,token=fooBar" && test_grep "clone< promisor-remote=$PR1;$PR2" trace && test_grep "clone> promisor-remote=lop" trace && @@ -449,7 +472,7 @@ test_expect_success "clone with promisor.storeFields=partialCloneFilter" ' GIT_TRACE_PACKET="$(pwd)/trace" GIT_NO_LAZY_FETCH=0 git clone \ -c remote.lop.promisor=true \ -c remote.lop.fetch="+refs/heads/*:refs/remotes/lop/*" \ - -c remote.lop.url="file://$(pwd)/lop" \ + -c remote.lop.url="$TRASH_DIRECTORY_URL/lop" \ -c remote.lop.token="fooYYY" \ -c remote.lop.partialCloneFilter="blob:none" \ -c promisor.acceptfromserver=All \ @@ -501,7 +524,7 @@ test_expect_success "clone and fetch with --filter=auto" ' GIT_TRACE_PACKET="$(pwd)/trace" GIT_NO_LAZY_FETCH=0 git clone \ -c remote.lop.promisor=true \ - -c remote.lop.url="file://$(pwd)/lop" \ + -c remote.lop.url="$TRASH_DIRECTORY_URL/lop" \ -c promisor.acceptfromserver=All \ --no-local --filter=auto server client 2>err && @@ -558,7 +581,7 @@ test_expect_success "clone with promisor.advertise set to 'true' but don't delet # Clone from server to create a client GIT_NO_LAZY_FETCH=0 git clone -c remote.lop.promisor=true \ -c remote.lop.fetch="+refs/heads/*:refs/remotes/lop/*" \ - -c remote.lop.url="file://$(pwd)/lop" \ + -c remote.lop.url="$TRASH_DIRECTORY_URL/lop" \ -c promisor.acceptfromserver=All \ --no-local --filter="blob:limit=5k" server client && From b886f0b5dc71030bc9dcf58376533cf8e1098e9a Mon Sep 17 00:00:00 2001 From: Shreyansh Paliwal Date: Sat, 4 Apr 2026 19:28:38 +0530 Subject: [PATCH 111/241] refs: add struct repository parameter in get_files_ref_lock_timeout_ms() get_files_ref_lock_timeout_ms() calls repo_config_get_int() using the_repository, as no repository instance is available in its scope. Add a struct repository parameter and use it instead of the_repository. Update all callers accordingly. In files-backend.c, lock_raw_ref() can obtain repository instance from the struct ref_transaction via transaction->ref_store->repo and pass it down. For create_reflock(), which is used as a callback, introduce a small wrapper struct to pass both struct lock_file and struct repository through the callback data. This reduces reliance on the_repository global, though the function still uses static variables and is not yet fully repository-scoped. This can be addressed in a follow-up change. Signed-off-by: Shreyansh Paliwal Acked-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.c | 4 ++-- refs/files-backend.c | 19 +++++++++++++------ refs/refs-internal.h | 2 +- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/refs.c b/refs.c index 5d1d28523d617f..4ab746a3cb555c 100644 --- a/refs.c +++ b/refs.c @@ -989,7 +989,7 @@ enum ref_worktree_type parse_worktree_ref(const char *maybe_worktree_ref, return REF_WORKTREE_SHARED; } -long get_files_ref_lock_timeout_ms(void) +long get_files_ref_lock_timeout_ms(struct repository *repo) { static int configured = 0; @@ -997,7 +997,7 @@ long get_files_ref_lock_timeout_ms(void) static int timeout_ms = 100; if (!configured) { - repo_config_get_int(the_repository, "core.filesreflocktimeout", &timeout_ms); + repo_config_get_int(repo, "core.filesreflocktimeout", &timeout_ms); configured = 1; } diff --git a/refs/files-backend.c b/refs/files-backend.c index 0537a72b2af9e0..10e4388d2ca01a 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -792,7 +792,7 @@ static enum ref_transaction_error lock_raw_ref(struct files_ref_store *refs, if (hold_lock_file_for_update_timeout( &lock->lk, ref_file.buf, LOCK_NO_DEREF, - get_files_ref_lock_timeout_ms()) < 0) { + get_files_ref_lock_timeout_ms(transaction->ref_store->repo)) < 0) { int myerr = errno; errno = 0; if (myerr == ENOENT && --attempts_remaining > 0) { @@ -1190,13 +1190,17 @@ static int remove_empty_directories(struct strbuf *path) return remove_dir_recursively(path, REMOVE_DIR_EMPTY_ONLY); } +struct create_reflock_cb { + struct lock_file *lk; + struct repository *repo; +}; + static int create_reflock(const char *path, void *cb) { - struct lock_file *lk = cb; - + struct create_reflock_cb *data = cb; return hold_lock_file_for_update_timeout( - lk, path, LOCK_NO_DEREF, - get_files_ref_lock_timeout_ms()) < 0 ? -1 : 0; + data->lk, path, LOCK_NO_DEREF, + get_files_ref_lock_timeout_ms(data->repo)) < 0 ? -1 : 0; } /* @@ -1208,6 +1212,7 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs, { struct strbuf ref_file = STRBUF_INIT; struct ref_lock *lock; + struct create_reflock_cb cb_data; files_assert_main_repository(refs, "lock_ref_oid_basic"); assert(err); @@ -1229,8 +1234,10 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs, lock->ref_name = xstrdup(refname); lock->count = 1; + cb_data.lk = &lock->lk; + cb_data.repo = refs->base.repo; - if (raceproof_create_file(ref_file.buf, create_reflock, &lock->lk)) { + if (raceproof_create_file(ref_file.buf, create_reflock, &cb_data)) { unable_to_lock_message(ref_file.buf, errno, err); goto error_return; } diff --git a/refs/refs-internal.h b/refs/refs-internal.h index d79e35fd269a6c..e4cfd9e19ee74f 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -43,7 +43,7 @@ struct ref_transaction; * Return the length of time to retry acquiring a loose reference lock * before giving up, in milliseconds: */ -long get_files_ref_lock_timeout_ms(void); +long get_files_ref_lock_timeout_ms(struct repository *repo); /* * Return true iff refname is minimally safe. "Safe" here means that From 9a03f165a41d708c672e18e69d43f69689981e7d Mon Sep 17 00:00:00 2001 From: Shreyansh Paliwal Date: Sat, 4 Apr 2026 19:28:39 +0530 Subject: [PATCH 112/241] refs: remove the_hash_algo global state refs.c uses the_hash_algo in multiple places, relying on global state for the object hash algorithm. Replace these uses with the appropriate repository-specific hash_algo. In transaction-related functions (ref_transaction_create, ref_transaction_delete, migrate_one_ref, and transaction_hook_feed_stdin), use transaction->ref_store->repo->hash_algo. In other cases, such as repo_get_submodule_ref_store(), use repo->hash_algo. Signed-off-by: Shreyansh Paliwal Acked-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/refs.c b/refs.c index 4ab746a3cb555c..d13ca9a37c63e3 100644 --- a/refs.c +++ b/refs.c @@ -1472,7 +1472,7 @@ int ref_transaction_create(struct ref_transaction *transaction, return 1; } return ref_transaction_update(transaction, refname, new_oid, - null_oid(the_hash_algo), new_target, NULL, flags, + null_oid(transaction->ref_store->repo->hash_algo), new_target, NULL, flags, msg, err); } @@ -1491,7 +1491,7 @@ int ref_transaction_delete(struct ref_transaction *transaction, if (old_target && !(flags & REF_NO_DEREF)) BUG("delete cannot operate on symrefs with deref mode"); return ref_transaction_update(transaction, refname, - null_oid(the_hash_algo), old_oid, + null_oid(transaction->ref_store->repo->hash_algo), old_oid, NULL, old_target, flags, msg, err); } @@ -2379,7 +2379,7 @@ struct ref_store *repo_get_submodule_ref_store(struct repository *repo, subrepo = xmalloc(sizeof(*subrepo)); if (repo_submodule_init(subrepo, repo, submodule, - null_oid(the_hash_algo))) { + null_oid(repo->hash_algo))) { free(subrepo); goto done; } @@ -2571,14 +2571,14 @@ static int transaction_hook_feed_stdin(int hook_stdin_fd, void *pp_cb, void *pp_ strbuf_reset(buf); if (!(update->flags & REF_HAVE_OLD)) - strbuf_addf(buf, "%s ", oid_to_hex(null_oid(the_hash_algo))); + strbuf_addf(buf, "%s ", oid_to_hex(null_oid(transaction->ref_store->repo->hash_algo))); else if (update->old_target) strbuf_addf(buf, "ref:%s ", update->old_target); else strbuf_addf(buf, "%s ", oid_to_hex(&update->old_oid)); if (!(update->flags & REF_HAVE_NEW)) - strbuf_addf(buf, "%s ", oid_to_hex(null_oid(the_hash_algo))); + strbuf_addf(buf, "%s ", oid_to_hex(null_oid(transaction->ref_store->repo->hash_algo))); else if (update->new_target) strbuf_addf(buf, "ref:%s ", update->new_target); else @@ -3146,6 +3146,7 @@ struct migration_data { static int migrate_one_ref(const struct reference *ref, void *cb_data) { struct migration_data *data = cb_data; + const struct git_hash_algo *hash_algo = data->transaction->ref_store->repo->hash_algo; struct strbuf symref_target = STRBUF_INIT; int ret; @@ -3154,7 +3155,7 @@ static int migrate_one_ref(const struct reference *ref, void *cb_data) if (ret < 0) goto done; - ret = ref_transaction_update(data->transaction, ref->name, NULL, null_oid(the_hash_algo), + ret = ref_transaction_update(data->transaction, ref->name, NULL, null_oid(hash_algo), symref_target.buf, NULL, REF_SKIP_CREATE_REFLOG | REF_NO_DEREF, NULL, data->errbuf); if (ret < 0) From 57c590feb96b2298e0966bea3ce88c72fca37bbd Mon Sep 17 00:00:00 2001 From: Shreyansh Paliwal Date: Sat, 4 Apr 2026 19:28:40 +0530 Subject: [PATCH 113/241] refs/reftable-backend: drop uses of the_repository reftable_be_init() and reftable_be_create_on_disk() use the_repository even though a repository instance is already available, either directly or via struct ref_store. Replace these uses with the appropriate local repository instance (repo or ref_store->repo) to avoid relying on global state. Note that USE_THE_REPOSITORY_VARIABLE cannot be removed yet, as is_bare_repository() is still there in the file. Signed-off-by: Shreyansh Paliwal Acked-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/reftable-backend.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index b124404663edf6..7c8a992fcb40b9 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -404,13 +404,13 @@ static struct ref_store *reftable_be_init(struct repository *repo, default: BUG("unknown hash algorithm %d", repo->hash_algo->format_id); } - refs->write_options.default_permissions = calc_shared_perm(the_repository, 0666 & ~mask); + refs->write_options.default_permissions = calc_shared_perm(repo, 0666 & ~mask); refs->write_options.disable_auto_compact = !git_env_bool("GIT_TEST_REFTABLE_AUTOCOMPACTION", 1); refs->write_options.lock_timeout_ms = 100; refs->write_options.fsync = reftable_be_fsync; - repo_config(the_repository, reftable_be_config, &refs->write_options); + repo_config(repo, reftable_be_config, &refs->write_options); /* * It is somewhat unfortunate that we have to mirror the default block @@ -492,7 +492,7 @@ static int reftable_be_create_on_disk(struct ref_store *ref_store, struct strbuf sb = STRBUF_INIT; strbuf_addf(&sb, "%s/reftable", refs->base.gitdir); - safe_create_dir(the_repository, sb.buf, 1); + safe_create_dir(ref_store->repo, sb.buf, 1); strbuf_reset(&sb); strbuf_release(&sb); From da1a90eab0e252b965547383552f85e675f0ddc9 Mon Sep 17 00:00:00 2001 From: Ezekiel Newren Date: Wed, 8 Apr 2026 20:26:23 +0000 Subject: [PATCH 114/241] xdiff/xdl_cleanup_records: delete local recs pointer Simplify the first 2 for loops by directly indexing the xdfile.recs. recs is unused in the last 2 for loops, remove it. Best viewed with --color-words. Signed-off-by: Ezekiel Newren Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index cd4fc405eb18fe..d6e1901d2d01c9 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -269,7 +269,6 @@ static bool xdl_clean_mmatch(uint8_t const *action, long i, long s, long e) { */ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { long i, nm, mlim; - xrecord_t *recs; xdlclass_t *rcrec; uint8_t *action1 = NULL, *action2 = NULL; bool need_min = !!(cf->flags & XDF_NEED_MINIMAL); @@ -293,16 +292,18 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd */ if ((mlim = xdl_bogosqrt((long)xdf1->nrec)) > XDL_MAX_EQLIMIT) mlim = XDL_MAX_EQLIMIT; - for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) { - rcrec = cf->rcrecs[recs->minimal_perfect_hash]; + for (i = xdf1->dstart; i <= xdf1->dend; i++) { + size_t mph1 = xdf1->recs[i].minimal_perfect_hash; + rcrec = cf->rcrecs[mph1]; nm = rcrec ? rcrec->len2 : 0; action1[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP; } if ((mlim = xdl_bogosqrt((long)xdf2->nrec)) > XDL_MAX_EQLIMIT) mlim = XDL_MAX_EQLIMIT; - for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) { - rcrec = cf->rcrecs[recs->minimal_perfect_hash]; + for (i = xdf2->dstart; i <= xdf2->dend; i++) { + size_t mph2 = xdf2->recs[i].minimal_perfect_hash; + rcrec = cf->rcrecs[mph2]; nm = rcrec ? rcrec->len1 : 0; action2[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP; } @@ -312,8 +313,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd * false, or become true. */ xdf1->nreff = 0; - for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; - i <= xdf1->dend; i++, recs++) { + for (i = xdf1->dstart; i <= xdf1->dend; i++) { if (action1[i] == KEEP || (action1[i] == INVESTIGATE && !xdl_clean_mmatch(action1, i, xdf1->dstart, xdf1->dend))) { xdf1->reference_index[xdf1->nreff++] = i; @@ -324,8 +324,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd } xdf2->nreff = 0; - for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; - i <= xdf2->dend; i++, recs++) { + for (i = xdf2->dstart; i <= xdf2->dend; i++) { if (action2[i] == KEEP || (action2[i] == INVESTIGATE && !xdl_clean_mmatch(action2, i, xdf2->dstart, xdf2->dend))) { xdf2->reference_index[xdf2->nreff++] = i; From e85a4167dde8cf6e1357e063c400a3d5c3a0e897 Mon Sep 17 00:00:00 2001 From: Ezekiel Newren Date: Wed, 8 Apr 2026 20:26:24 +0000 Subject: [PATCH 115/241] xdiff: use unambiguous types in xdl_bogo_sqrt() There is no real square root for a negative number and size_t may not be large enough for certain applications, replace long with uint64_t. Signed-off-by: Ezekiel Newren Signed-off-by: Junio C Hamano --- xdiff/xdiffi.c | 2 +- xdiff/xprepare.c | 4 ++-- xdiff/xutils.c | 4 ++-- xdiff/xutils.h | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c index 4376f943dba539..88708c12a3299a 100644 --- a/xdiff/xdiffi.c +++ b/xdiff/xdiffi.c @@ -348,7 +348,7 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, kvdf += xe->xdf2.nreff + 1; kvdb += xe->xdf2.nreff + 1; - xenv.mxcost = xdl_bogosqrt(ndiags); + xenv.mxcost = (long)xdl_bogosqrt((uint64_t)ndiags); if (xenv.mxcost < XDL_MAX_COST_MIN) xenv.mxcost = XDL_MAX_COST_MIN; xenv.snake_cnt = XDL_SNAKE_CNT; diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index d6e1901d2d01c9..48fb5ce6fe6f68 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -290,7 +290,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd /* * Initialize temporary arrays with DISCARD, KEEP, or INVESTIGATE. */ - if ((mlim = xdl_bogosqrt((long)xdf1->nrec)) > XDL_MAX_EQLIMIT) + if ((mlim = (long)xdl_bogosqrt((uint64_t)xdf1->nrec)) > XDL_MAX_EQLIMIT) mlim = XDL_MAX_EQLIMIT; for (i = xdf1->dstart; i <= xdf1->dend; i++) { size_t mph1 = xdf1->recs[i].minimal_perfect_hash; @@ -299,7 +299,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd action1[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP; } - if ((mlim = xdl_bogosqrt((long)xdf2->nrec)) > XDL_MAX_EQLIMIT) + if ((mlim = (long)xdl_bogosqrt((uint64_t)xdf2->nrec)) > XDL_MAX_EQLIMIT) mlim = XDL_MAX_EQLIMIT; for (i = xdf2->dstart; i <= xdf2->dend; i++) { size_t mph2 = xdf2->recs[i].minimal_perfect_hash; diff --git a/xdiff/xutils.c b/xdiff/xutils.c index 77ee1ad9c86875..9a999acdc079d2 100644 --- a/xdiff/xutils.c +++ b/xdiff/xutils.c @@ -23,8 +23,8 @@ #include "xinclude.h" -long xdl_bogosqrt(long n) { - long i; +uint64_t xdl_bogosqrt(uint64_t n) { + uint64_t i; /* * Classical integer square root approximation using shifts. diff --git a/xdiff/xutils.h b/xdiff/xutils.h index 615b4a9d355433..58f9d74cda37a3 100644 --- a/xdiff/xutils.h +++ b/xdiff/xutils.h @@ -25,7 +25,7 @@ -long xdl_bogosqrt(long n); +uint64_t xdl_bogosqrt(uint64_t n); int xdl_emit_diffrec(char const *rec, long size, char const *pre, long psize, xdemitcb_t *ecb); int xdl_cha_init(chastore_t *cha, long isize, long icount); From 042cefe77b10d2610c0410adf555274a6152503f Mon Sep 17 00:00:00 2001 From: Ezekiel Newren Date: Wed, 8 Apr 2026 20:26:25 +0000 Subject: [PATCH 116/241] xdiff/xdl_cleanup_records: use unambiguous types Change the parameters of xdl_clean_mmatch() and the local variables i, nm, mlim in xdl_cleanup_records() to use unambiguous types. Best viewed with --color-words. Signed-off-by: Ezekiel Newren Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 48fb5ce6fe6f68..386668a92d7358 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -197,8 +197,8 @@ void xdl_free_env(xdfenv_t *xe) { } -static bool xdl_clean_mmatch(uint8_t const *action, long i, long s, long e) { - long r, rdis0, rpdis0, rdis1, rpdis1; +static bool xdl_clean_mmatch(uint8_t const *action, ptrdiff_t i, ptrdiff_t s, ptrdiff_t e) { + ptrdiff_t r, rdis0, rpdis0, rdis1, rpdis1; /* * Limits the window that is examined during the similar-lines @@ -268,7 +268,7 @@ static bool xdl_clean_mmatch(uint8_t const *action, long i, long s, long e) { * might be potentially discarded if they appear in a run of discardable. */ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { - long i, nm, mlim; + ptrdiff_t i, nm, mlim; xdlclass_t *rcrec; uint8_t *action1 = NULL, *action2 = NULL; bool need_min = !!(cf->flags & XDF_NEED_MINIMAL); From 59cb212e849749f61d3db6e2565c7c63d7a637e6 Mon Sep 17 00:00:00 2001 From: Ezekiel Newren Date: Wed, 8 Apr 2026 20:26:26 +0000 Subject: [PATCH 117/241] xdiff/xdl_cleanup_records: make limits more clear Make the handling of per-file limits and the minimal-case clearer. * Use explicit per-file limit variables (mlim1, mlim2) and initialize them. * The additional condition `!need_min` is redudant now, remove it. Best viewed with --color-words. Signed-off-by: Ezekiel Newren Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 386668a92d7358..bd8baf214db64e 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -268,7 +268,7 @@ static bool xdl_clean_mmatch(uint8_t const *action, ptrdiff_t i, ptrdiff_t s, pt * might be potentially discarded if they appear in a run of discardable. */ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { - ptrdiff_t i, nm, mlim; + ptrdiff_t i, nm, mlim1, mlim2; xdlclass_t *rcrec; uint8_t *action1 = NULL, *action2 = NULL; bool need_min = !!(cf->flags & XDF_NEED_MINIMAL); @@ -287,25 +287,30 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd goto cleanup; } + if (need_min) { + /* i.e. infinity */ + mlim1 = PTRDIFF_MAX; + mlim2 = PTRDIFF_MAX; + } else { + mlim1 = XDL_MIN(xdl_bogosqrt(xdf1->nrec), XDL_MAX_EQLIMIT); + mlim2 = XDL_MIN(xdl_bogosqrt(xdf2->nrec), XDL_MAX_EQLIMIT); + } + /* * Initialize temporary arrays with DISCARD, KEEP, or INVESTIGATE. */ - if ((mlim = (long)xdl_bogosqrt((uint64_t)xdf1->nrec)) > XDL_MAX_EQLIMIT) - mlim = XDL_MAX_EQLIMIT; for (i = xdf1->dstart; i <= xdf1->dend; i++) { size_t mph1 = xdf1->recs[i].minimal_perfect_hash; rcrec = cf->rcrecs[mph1]; nm = rcrec ? rcrec->len2 : 0; - action1[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP; + action1[i] = (nm == 0) ? DISCARD: nm >= mlim1 ? INVESTIGATE: KEEP; } - if ((mlim = (long)xdl_bogosqrt((uint64_t)xdf2->nrec)) > XDL_MAX_EQLIMIT) - mlim = XDL_MAX_EQLIMIT; for (i = xdf2->dstart; i <= xdf2->dend; i++) { size_t mph2 = xdf2->recs[i].minimal_perfect_hash; rcrec = cf->rcrecs[mph2]; nm = rcrec ? rcrec->len1 : 0; - action2[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP; + action2[i] = (nm == 0) ? DISCARD: nm >= mlim2 ? INVESTIGATE: KEEP; } /* From e7e8d804028cc9a9b5a1b832cc6ec3078a81e3e3 Mon Sep 17 00:00:00 2001 From: Ezekiel Newren Date: Wed, 8 Apr 2026 20:26:27 +0000 Subject: [PATCH 118/241] xdiff/xdl_cleanup_records: make setting action easier to follow Rewrite nested ternaries with a clear if/else ladder for action1/action2 to improve readability while preserving behavior. Signed-off-by: Ezekiel Newren Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index bd8baf214db64e..471d9567c9345b 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -303,14 +303,24 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd size_t mph1 = xdf1->recs[i].minimal_perfect_hash; rcrec = cf->rcrecs[mph1]; nm = rcrec ? rcrec->len2 : 0; - action1[i] = (nm == 0) ? DISCARD: nm >= mlim1 ? INVESTIGATE: KEEP; + if (nm == 0) + action1[i] = DISCARD; + else if (nm < mlim1) + action1[i] = KEEP; + else /* nm >= mlim1 */ + action1[i] = INVESTIGATE; } for (i = xdf2->dstart; i <= xdf2->dend; i++) { size_t mph2 = xdf2->recs[i].minimal_perfect_hash; rcrec = cf->rcrecs[mph2]; nm = rcrec ? rcrec->len1 : 0; - action2[i] = (nm == 0) ? DISCARD: nm >= mlim2 ? INVESTIGATE: KEEP; + if (nm == 0) + action2[i] = DISCARD; + else if (nm < mlim2) + action2[i] = KEEP; + else /* nm >= mlim2 */ + action2[i] = INVESTIGATE; } /* From 0ee3c64b97fcce85c590971c2bfe2b2c91a840ea Mon Sep 17 00:00:00 2001 From: Ezekiel Newren Date: Wed, 8 Apr 2026 20:26:28 +0000 Subject: [PATCH 119/241] xdiff/xdl_cleanup_records: put braces around the else clause Signed-off-by: Ezekiel Newren Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 471d9567c9345b..18ee7e815c3f5b 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -333,9 +333,10 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd (action1[i] == INVESTIGATE && !xdl_clean_mmatch(action1, i, xdf1->dstart, xdf1->dend))) { xdf1->reference_index[xdf1->nreff++] = i; /* changed[i] remains false, i.e. keep */ - } else + } else { xdf1->changed[i] = true; /* i.e. discard */ + } } xdf2->nreff = 0; @@ -344,9 +345,10 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd (action2[i] == INVESTIGATE && !xdl_clean_mmatch(action2, i, xdf2->dstart, xdf2->dend))) { xdf2->reference_index[xdf2->nreff++] = i; /* changed[i] remains false, i.e. keep */ - } else + } else { xdf2->changed[i] = true; /* i.e. discard */ + } } cleanup: From 9232a7adf89cdb253d0c6669a0dc59155c59b0d3 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Thu, 2 Apr 2026 15:57:41 +0100 Subject: [PATCH 120/241] xdiff: reduce size of action arrays When the myers algorithm is selected the input files are pre-processed to remove any common prefix and suffix. Then any lines that appear only in one side of the diff are marked as changed and frequently occurring lines are marked as changed if they are adjacent to a changed line. This step requires a couple of temporary arrays. As as the common prefix and suffix have already been removed, the arrays only need to be big enough to hold the lines between them, not the whole file. Reduce the size of the arrays and adjust the loops that use them accordingly while taking care to keep indexing the arrays in xdfile_t with absolute line numbers. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 18ee7e815c3f5b..cbfc039bcf20b9 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -273,16 +273,19 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd uint8_t *action1 = NULL, *action2 = NULL; bool need_min = !!(cf->flags & XDF_NEED_MINIMAL); int ret = 0; + ptrdiff_t off = xdf1->dstart; + ptrdiff_t len1 = xdf1->dend - off + 1; + ptrdiff_t len2 = xdf2->dend - off + 1; /* * Create temporary arrays that will help us decide if * changed[i] should remain false, or become true. */ - if (!XDL_CALLOC_ARRAY(action1, xdf1->nrec + 1)) { + if (!XDL_CALLOC_ARRAY(action1, len1)) { ret = -1; goto cleanup; } - if (!XDL_CALLOC_ARRAY(action2, xdf2->nrec + 1)) { + if (!XDL_CALLOC_ARRAY(action2, len2)) { ret = -1; goto cleanup; } @@ -299,8 +302,8 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd /* * Initialize temporary arrays with DISCARD, KEEP, or INVESTIGATE. */ - for (i = xdf1->dstart; i <= xdf1->dend; i++) { - size_t mph1 = xdf1->recs[i].minimal_perfect_hash; + for (i = 0; i < len1; i++) { + size_t mph1 = xdf1->recs[i + off].minimal_perfect_hash; rcrec = cf->rcrecs[mph1]; nm = rcrec ? rcrec->len2 : 0; if (nm == 0) @@ -311,8 +314,8 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd action1[i] = INVESTIGATE; } - for (i = xdf2->dstart; i <= xdf2->dend; i++) { - size_t mph2 = xdf2->recs[i].minimal_perfect_hash; + for (i = 0; i < len2; i++) { + size_t mph2 = xdf2->recs[i + off].minimal_perfect_hash; rcrec = cf->rcrecs[mph2]; nm = rcrec ? rcrec->len1 : 0; if (nm == 0) @@ -328,25 +331,25 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd * false, or become true. */ xdf1->nreff = 0; - for (i = xdf1->dstart; i <= xdf1->dend; i++) { + for (i = 0; i < len1; i++) { if (action1[i] == KEEP || - (action1[i] == INVESTIGATE && !xdl_clean_mmatch(action1, i, xdf1->dstart, xdf1->dend))) { - xdf1->reference_index[xdf1->nreff++] = i; + (action1[i] == INVESTIGATE && !xdl_clean_mmatch(action1, i, 0, len1 - 1))) { + xdf1->reference_index[xdf1->nreff++] = i + off; /* changed[i] remains false, i.e. keep */ } else { - xdf1->changed[i] = true; + xdf1->changed[i + off] = true; /* i.e. discard */ } } xdf2->nreff = 0; - for (i = xdf2->dstart; i <= xdf2->dend; i++) { + for (i = 0; i < len2; i++) { if (action2[i] == KEEP || - (action2[i] == INVESTIGATE && !xdl_clean_mmatch(action2, i, xdf2->dstart, xdf2->dend))) { - xdf2->reference_index[xdf2->nreff++] = i; + (action2[i] == INVESTIGATE && !xdl_clean_mmatch(action2, i, 0, len2 - 1))) { + xdf2->reference_index[xdf2->nreff++] = i + off; /* changed[i] remains false, i.e. keep */ } else { - xdf2->changed[i] = true; + xdf2->changed[i + off] = true; /* i.e. discard */ } } From 77c188e4a6f4b401462768029a13100c55dfe312 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Thu, 2 Apr 2026 15:57:42 +0100 Subject: [PATCH 121/241] xdiff: cleanup xdl_clean_mmatch() Remove the "s" parameter as, since the last commit, this function is always called with s == 0. Also change parameter "e" to expect a length, rather than the index of the last line to simplify the caller. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index cbfc039bcf20b9..651c65a0122474 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -197,8 +197,9 @@ void xdl_free_env(xdfenv_t *xe) { } -static bool xdl_clean_mmatch(uint8_t const *action, ptrdiff_t i, ptrdiff_t s, ptrdiff_t e) { +static bool xdl_clean_mmatch(uint8_t const *action, ptrdiff_t i, ptrdiff_t len) { ptrdiff_t r, rdis0, rpdis0, rdis1, rpdis1; + ptrdiff_t s = 0, e = len - 1; /* * Limits the window that is examined during the similar-lines @@ -333,7 +334,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd xdf1->nreff = 0; for (i = 0; i < len1; i++) { if (action1[i] == KEEP || - (action1[i] == INVESTIGATE && !xdl_clean_mmatch(action1, i, 0, len1 - 1))) { + (action1[i] == INVESTIGATE && !xdl_clean_mmatch(action1, i, len1))) { xdf1->reference_index[xdf1->nreff++] = i + off; /* changed[i] remains false, i.e. keep */ } else { @@ -345,7 +346,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd xdf2->nreff = 0; for (i = 0; i < len2; i++) { if (action2[i] == KEEP || - (action2[i] == INVESTIGATE && !xdl_clean_mmatch(action2, i, 0, len2 - 1))) { + (action2[i] == INVESTIGATE && !xdl_clean_mmatch(action2, i, len2))) { xdf2->reference_index[xdf2->nreff++] = i + off; /* changed[i] remains false, i.e. keep */ } else { From 8c9d203485b2983ef87ab72f7a9b7853da0707ca Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Thu, 2 Apr 2026 15:57:43 +0100 Subject: [PATCH 122/241] xprepare: simplify error handling If either of the two allocations fail we want to take the same action so use a single if statement. This saves a few lines and makes it easier for the next commit to add a couple more allocations. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 651c65a0122474..1fd85605cec203 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -282,11 +282,8 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd * Create temporary arrays that will help us decide if * changed[i] should remain false, or become true. */ - if (!XDL_CALLOC_ARRAY(action1, len1)) { - ret = -1; - goto cleanup; - } - if (!XDL_CALLOC_ARRAY(action2, len2)) { + if (!XDL_CALLOC_ARRAY(action1, len1) || + !XDL_CALLOC_ARRAY(action2, len2)) { ret = -1; goto cleanup; } From 40c92ff457ece00aced93f2fcc6014b916d9fcf8 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Thu, 2 Apr 2026 15:57:44 +0100 Subject: [PATCH 123/241] xdiff: reduce the size of array When the myers algorithm is selected the input files are pre-processed to remove any common prefix and suffix and any lines that appear in only one file. This requires a map to be created between the lines that are processed by the myers algorithm and the lines in the original file. That map does not include the common lines at the beginning and end of the files but the array is allocated to be the size of the whole file. Move the allocation into xdl_cleanup_records() where the map is populated and we know how big it needs to be. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 1fd85605cec203..862ce827a506e8 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -171,12 +171,6 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ if (!XDL_CALLOC_ARRAY(xdf->changed, xdf->nrec + 2)) goto abort; - if ((XDF_DIFF_ALG(xpp->flags) != XDF_PATIENCE_DIFF) && - (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF)) { - if (!XDL_ALLOC_ARRAY(xdf->reference_index, xdf->nrec + 1)) - goto abort; - } - xdf->changed += 1; xdf->nreff = 0; xdf->dstart = 0; @@ -283,7 +277,10 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd * changed[i] should remain false, or become true. */ if (!XDL_CALLOC_ARRAY(action1, len1) || - !XDL_CALLOC_ARRAY(action2, len2)) { + !XDL_CALLOC_ARRAY(action2, len2) || + !XDL_ALLOC_ARRAY(xdf1->reference_index, len1) || + !XDL_ALLOC_ARRAY(xdf2->reference_index, len2)) + { ret = -1; goto cleanup; } From 6077dc8a427950392be15a5507908d5e87a721a6 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Thu, 9 Apr 2026 22:44:31 +0000 Subject: [PATCH 124/241] docs: update version with default Rust support We missed the cut-off for Rust by default in 2.53, but we still can enable it by default for 2.54, so update our breaking changes document accordingly. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- Documentation/BreakingChanges.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/BreakingChanges.adoc b/Documentation/BreakingChanges.adoc index f814450d2f65ac..510ed98b65d755 100644 --- a/Documentation/BreakingChanges.adoc +++ b/Documentation/BreakingChanges.adoc @@ -190,7 +190,7 @@ milestones for the introduction of Rust: 1. Initially, with Git 2.52, support for Rust will be auto-detected by Meson and disabled in our Makefile so that the project can sort out the initial infrastructure. -2. In Git 2.53, both build systems will default-enable support for Rust. +2. In Git 2.54, both build systems will default-enable support for Rust. Consequently, builds will break by default if Rust is not available on the build host. The use of Rust can still be explicitly disabled via build flags. From 40c789dfc250486e60b7d7cdba47d8423a754abf Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Thu, 9 Apr 2026 22:44:32 +0000 Subject: [PATCH 125/241] ci: install cargo on Alpine We'll make Rust the default in a future commit, so be sure to install Cargo (which will also install Rust) to prepare for that case. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- ci/install-dependencies.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/install-dependencies.sh b/ci/install-dependencies.sh index c55441d9df91fd..10c3530d1aacdd 100755 --- a/ci/install-dependencies.sh +++ b/ci/install-dependencies.sh @@ -29,7 +29,7 @@ alpine-*) apk add --update shadow sudo meson ninja-build gcc libc-dev curl-dev openssl-dev expat-dev gettext \ zlib-ng-dev pcre2-dev python3 musl-libintl perl-utils ncurses \ apache2 apache2-http2 apache2-proxy apache2-ssl apache2-webdav apr-util-dbd_sqlite3 \ - bash cvs gnupg perl-cgi perl-dbd-sqlite perl-io-tty >/dev/null + bash cvs gnupg perl-cgi perl-dbd-sqlite perl-io-tty cargo >/dev/null ;; fedora-*|almalinux-*) case "$jobname" in From 30e6f7adf626af926a02897294363dbf5f3bbe65 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Thu, 9 Apr 2026 22:44:33 +0000 Subject: [PATCH 126/241] Linux: link against libdl Older versions of Rust on Linux, such as that used in Debian 11 in our CI, require linking against libdl. Were we linking with Cargo, this would be included automatically, but since we're not, explicitly set it in the system-specific config. This library is part of libc, so linking against it if it happens to be unnecessary will add no dependencies to the resulting binary. In addition, it is provided by both glibc and musl, so it should be portable to almost all Linux systems. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- config.mak.uname | 1 + 1 file changed, 1 insertion(+) diff --git a/config.mak.uname b/config.mak.uname index ccb3f718812740..7aab56c590069e 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -63,6 +63,7 @@ ifeq ($(uname_S),Linux) PROCFS_EXECUTABLE_PATH = /proc/self/exe HAVE_PLATFORM_PROCINFO = YesPlease COMPAT_OBJS += compat/linux/procinfo.o + EXTLIBS += -ldl # centos7/rhel7 provides gcc 4.8.5 and zlib 1.2.7. ifneq ($(findstring .el7.,$(uname_R)),) BASIC_CFLAGS += -std=c99 From 32d5b905909e781786c4735e6bd71503b23e4fb1 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Thu, 9 Apr 2026 22:44:34 +0000 Subject: [PATCH 127/241] Enable Rust by default Our breaking changes document says that we'll enable Rust by default in Git 2.54. Adjust the Makefile to switch the option from WITH_RUST to NO_RUST to enable it by default and update the help text accordingly. Similarly, for Meson, enable the option by default and do not automatically disable it if Cargo is missing, since the goal is to help users find where they are likely to have problems in the future. Update our CI tests to swap out the single Linux job with Rust to a single job without, both for Makefile and Meson. Similarly, update the Windows Makefile job to not use Rust, while the Meson job (which does not build with ci/lib.sh) will default to having it enabled. Move the check for Cargo in the Meson build because it is no longer needed in the main script. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- Makefile | 10 +++++----- ci/lib.sh | 3 +++ ci/run-build-and-tests.sh | 6 ++++-- meson.build | 3 +-- meson_options.txt | 2 +- src/meson.build | 1 + 6 files changed, 15 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index dbf00220541ce1..84b59959dedc5a 100644 --- a/Makefile +++ b/Makefile @@ -498,9 +498,9 @@ include shared.mak # # == Optional Rust support == # -# Define WITH_RUST if you want to include features and subsystems written in -# Rust into Git. For now, Rust is still an optional feature of the build -# process. With Git 3.0 though, Rust will always be enabled. +# Define NO_RUST if you want to disable features and subsystems written in Rust +# from being compiled into Git. For now, Rust is still an optional feature of +# the build process. With Git 3.0 though, Rust will always be enabled. # # Building Rust code requires Cargo. # @@ -1351,7 +1351,7 @@ LIB_OBJS += urlmatch.o LIB_OBJS += usage.o LIB_OBJS += userdiff.o LIB_OBJS += utf8.o -ifndef WITH_RUST +ifdef NO_RUST LIB_OBJS += varint.o endif LIB_OBJS += version.o @@ -1590,7 +1590,7 @@ endif ALL_CFLAGS = $(DEVELOPER_CFLAGS) $(CPPFLAGS) $(CFLAGS) $(CFLAGS_APPEND) ALL_LDFLAGS = $(LDFLAGS) $(LDFLAGS_APPEND) -ifdef WITH_RUST +ifndef NO_RUST BASIC_CFLAGS += -DWITH_RUST GITLIBS += $(RUST_LIB) ifeq ($(uname_S),Windows) diff --git a/ci/lib.sh b/ci/lib.sh index 42a2b6a318b874..1cfc8c6efce09e 100755 --- a/ci/lib.sh +++ b/ci/lib.sh @@ -372,6 +372,9 @@ linux-asan-ubsan) osx-meson) MESONFLAGS="$MESONFLAGS -Dcredential_helpers=osxkeychain" ;; +windows-*) + export NO_RUST=UnfortunatelyYes + ;; esac MAKEFLAGS="$MAKEFLAGS CC=${CC:-cc}" diff --git a/ci/run-build-and-tests.sh b/ci/run-build-and-tests.sh index 28cfe730ee5aed..e2d783d90b0181 100755 --- a/ci/run-build-and-tests.sh +++ b/ci/run-build-and-tests.sh @@ -8,11 +8,12 @@ export TEST_CONTRIB_TOO=yes case "$jobname" in +linux-musl-meson) + MESONFLAGS="$MESONFLAGS -Drust=disabled" + ;; fedora-breaking-changes-musl|linux-breaking-changes) export WITH_BREAKING_CHANGES=YesPlease - export WITH_RUST=YesPlease MESONFLAGS="$MESONFLAGS -Dbreaking_changes=true" - MESONFLAGS="$MESONFLAGS -Drust=enabled" ;; linux-TEST-vars) export OPENSSL_SHA1_UNSAFE=YesPlease @@ -30,6 +31,7 @@ linux-TEST-vars) export GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL=1 ;; linux-clang) + export NO_RUST=UnfortunatelyYes export GIT_TEST_DEFAULT_HASH=sha1 ;; linux-sha256) diff --git a/meson.build b/meson.build index 8309942d184847..deff129cf6d33a 100644 --- a/meson.build +++ b/meson.build @@ -1745,8 +1745,7 @@ version_def_h = custom_target( ) libgit_sources += version_def_h -cargo = find_program('cargo', dirs: program_path, native: true, required: get_option('rust')) -rust_option = get_option('rust').disable_auto_if(not cargo.found()) +rust_option = get_option('rust') if rust_option.allowed() subdir('src') libgit_c_args += '-DWITH_RUST' diff --git a/meson_options.txt b/meson_options.txt index 659cbb218f46e0..80a8025f20be6e 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -77,7 +77,7 @@ option('zlib_backend', type: 'combo', choices: ['auto', 'zlib', 'zlib-ng'], valu # Build tweaks. option('breaking_changes', type: 'boolean', value: false, description: 'Enable upcoming breaking changes.') -option('rust', type: 'feature', value: 'auto', +option('rust', type: 'feature', value: 'enabled', description: 'Enable building with Rust.') option('macos_use_homebrew_gettext', type: 'boolean', value: true, description: 'Use gettext from Homebrew instead of the slightly-broken system-provided one.') diff --git a/src/meson.build b/src/meson.build index 45739957b451c9..41a4b231e660c4 100644 --- a/src/meson.build +++ b/src/meson.build @@ -29,6 +29,7 @@ libgit_rs = custom_target('git_rs', ) libgit_dependencies += declare_dependency(link_with: libgit_rs) +cargo = find_program('cargo', dirs: program_path, native: true, required: get_option('rust')) if get_option('tests') test('rust', cargo, args: [ From 8d2ffcf4b4a3a55c56c57c8df617516c25d98380 Mon Sep 17 00:00:00 2001 From: Adrian Ratiu Date: Fri, 10 Apr 2026 12:05:56 +0300 Subject: [PATCH 128/241] repository: fix repo_init() memleak due to missing _clear() There is an old pre-existing memory leak in repo_init() due to failing to call clear_repository_format() in the error case. It went undetected because a specific bug is required to trigger it: enable a v1 extension in a repository with format v0. Obviously this can only happen in a development environment, so it does not trigger in normal usage, however the memleak is real and needs fixing. Fix it by also calling clear_repository_format() in the error case. Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- repository.c | 1 + 1 file changed, 1 insertion(+) diff --git a/repository.c b/repository.c index 9e5537f53961ed..192d6dc9c477fa 100644 --- a/repository.c +++ b/repository.c @@ -323,6 +323,7 @@ int repo_init(struct repository *repo, return 0; error: + clear_repository_format(&format); repo_clear(repo); return -1; } From 1c9e5b3fa235e0da6f62359af36afea8e7617074 Mon Sep 17 00:00:00 2001 From: Adrian Ratiu Date: Fri, 10 Apr 2026 12:05:57 +0300 Subject: [PATCH 129/241] config: add a repo_config_get_uint() helper Next commits add a 'hook.jobs' config option of type 'unsigned int', so add a helper to parse it since the API only supports int and ulong. An alternative is to make 'hook.jobs' an 'int' or parse it as an 'int' then cast it to unsigned, however it's better to use proper helpers for the type. Using 'ulong' is another option which already has helpers, but it's a bit excessive in size for just the jobs number. Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- config.c | 28 ++++++++++++++++++++++++++++ config.h | 13 +++++++++++++ parse.c | 9 +++++++++ parse.h | 1 + 4 files changed, 51 insertions(+) diff --git a/config.c b/config.c index 156f2a24fa0027..a1b92fe083cf43 100644 --- a/config.c +++ b/config.c @@ -1212,6 +1212,15 @@ int git_config_int(const char *name, const char *value, return ret; } +unsigned int git_config_uint(const char *name, const char *value, + const struct key_value_info *kvi) +{ + unsigned int ret; + if (!git_parse_uint(value, &ret)) + die_bad_number(name, value, kvi); + return ret; +} + int64_t git_config_int64(const char *name, const char *value, const struct key_value_info *kvi) { @@ -1907,6 +1916,18 @@ int git_configset_get_int(struct config_set *set, const char *key, int *dest) return 1; } +int git_configset_get_uint(struct config_set *set, const char *key, unsigned int *dest) +{ + const char *value; + struct key_value_info kvi; + + if (!git_configset_get_value(set, key, &value, &kvi)) { + *dest = git_config_uint(key, value, &kvi); + return 0; + } else + return 1; +} + int git_configset_get_ulong(struct config_set *set, const char *key, unsigned long *dest) { const char *value; @@ -2356,6 +2377,13 @@ int repo_config_get_int(struct repository *repo, return git_configset_get_int(repo->config, key, dest); } +int repo_config_get_uint(struct repository *repo, + const char *key, unsigned int *dest) +{ + git_config_check_init(repo); + return git_configset_get_uint(repo->config, key, dest); +} + int repo_config_get_ulong(struct repository *repo, const char *key, unsigned long *dest) { diff --git a/config.h b/config.h index ba426a960af9f4..bf47fb3afc61bf 100644 --- a/config.h +++ b/config.h @@ -267,6 +267,12 @@ int git_config_int(const char *, const char *, const struct key_value_info *); int64_t git_config_int64(const char *, const char *, const struct key_value_info *); +/** + * Identical to `git_config_int`, but for unsigned ints. + */ +unsigned int git_config_uint(const char *, const char *, + const struct key_value_info *); + /** * Identical to `git_config_int`, but for unsigned longs. */ @@ -560,6 +566,7 @@ int git_configset_get_value(struct config_set *cs, const char *key, int git_configset_get_string(struct config_set *cs, const char *key, char **dest); int git_configset_get_int(struct config_set *cs, const char *key, int *dest); +int git_configset_get_uint(struct config_set *cs, const char *key, unsigned int *dest); int git_configset_get_ulong(struct config_set *cs, const char *key, unsigned long *dest); int git_configset_get_bool(struct config_set *cs, const char *key, int *dest); int git_configset_get_bool_or_int(struct config_set *cs, const char *key, int *is_bool, int *dest); @@ -650,6 +657,12 @@ int repo_config_get_string_tmp(struct repository *r, */ int repo_config_get_int(struct repository *r, const char *key, int *dest); +/** + * Similar to `repo_config_get_int` but for unsigned ints. + */ +int repo_config_get_uint(struct repository *r, + const char *key, unsigned int *dest); + /** * Similar to `repo_config_get_int` but for unsigned longs. */ diff --git a/parse.c b/parse.c index 48313571aab129..d77f28046a0916 100644 --- a/parse.c +++ b/parse.c @@ -107,6 +107,15 @@ int git_parse_int64(const char *value, int64_t *ret) return 1; } +int git_parse_uint(const char *value, unsigned int *ret) +{ + uintmax_t tmp; + if (!git_parse_unsigned(value, &tmp, maximum_unsigned_value_of_type(unsigned int))) + return 0; + *ret = tmp; + return 1; +} + int git_parse_ulong(const char *value, unsigned long *ret) { uintmax_t tmp; diff --git a/parse.h b/parse.h index ea32de9a91fbfb..a6dd37c4cba273 100644 --- a/parse.h +++ b/parse.h @@ -5,6 +5,7 @@ int git_parse_signed(const char *value, intmax_t *ret, intmax_t max); int git_parse_unsigned(const char *value, uintmax_t *ret, uintmax_t max); int git_parse_ssize_t(const char *, ssize_t *); int git_parse_ulong(const char *, unsigned long *); +int git_parse_uint(const char *value, unsigned int *ret); int git_parse_int(const char *value, int *ret); int git_parse_int64(const char *value, int64_t *ret); int git_parse_double(const char *value, double *ret); From b9a4c9ad247a09602e0e6d0eccec6a43857f62da Mon Sep 17 00:00:00 2001 From: Adrian Ratiu Date: Fri, 10 Apr 2026 12:05:58 +0300 Subject: [PATCH 130/241] hook: parse the hook.jobs config The hook.jobs config is a global way to set hook parallelization for all hooks, in the sense that it is not per-event nor per-hook. Finer-grained configs will be added in later commits which can override it, for e.g. via a per-event type job options. Next commits will also add to this item's documentation. Parse hook.jobs config key in hook_config_lookup_all() and store its value in hook_all_config_cb.jobs, then transfer it into r->jobs after the config pass completes. This is mostly plumbing and the cached value is not yet used. Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- Documentation/config/hook.adoc | 4 ++++ hook.c | 23 +++++++++++++++++++++-- repository.h | 3 +++ 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/Documentation/config/hook.adoc b/Documentation/config/hook.adoc index 9e78f264396ca5..b7847f9338c65f 100644 --- a/Documentation/config/hook.adoc +++ b/Documentation/config/hook.adoc @@ -22,3 +22,7 @@ hook..enabled:: configuration. This is particularly useful when a hook is defined in a system or global config file and needs to be disabled for a specific repository. See linkgit:git-hook[1]. + +hook.jobs:: + Specifies how many hooks can be run simultaneously during parallelized + hook execution. If unspecified, defaults to 1 (serial execution). diff --git a/hook.c b/hook.c index cc23276d27f035..b8cce00e578d3c 100644 --- a/hook.c +++ b/hook.c @@ -123,11 +123,13 @@ struct hook_config_cache_entry { * commands: friendly-name to command map. * event_hooks: event-name to list of friendly-names map. * disabled_hooks: set of friendly-names with hook..enabled = false. + * jobs: value of the global hook.jobs key. Defaults to 0 if unset (stored in r->hook_jobs). */ struct hook_all_config_cb { struct strmap commands; struct strmap event_hooks; struct string_list disabled_hooks; + unsigned int jobs; }; /* repo_config() callback that collects all hook.* configuration in one pass. */ @@ -143,6 +145,20 @@ static int hook_config_lookup_all(const char *key, const char *value, if (parse_config_key(key, "hook", &name, &name_len, &subkey)) return 0; + /* Handle plain hook. entries that have no hook name component. */ + if (!name) { + if (!strcmp(subkey, "jobs") && value) { + unsigned int v; + if (!git_parse_uint(value, &v)) + warning(_("hook.jobs must be a positive integer, ignoring: '%s'"), value); + else if (!v) + warning(_("hook.jobs must be positive, ignoring: 0")); + else + data->jobs = v; + } + return 0; + } + if (!value) return config_error_nonbool(key); @@ -240,7 +256,7 @@ void hook_cache_clear(struct strmap *cache) /* Populate `cache` with the complete hook configuration */ static void build_hook_config_map(struct repository *r, struct strmap *cache) { - struct hook_all_config_cb cb_data; + struct hook_all_config_cb cb_data = { 0 }; struct hashmap_iter iter; struct strmap_entry *e; @@ -248,7 +264,7 @@ static void build_hook_config_map(struct repository *r, struct strmap *cache) strmap_init(&cb_data.event_hooks); string_list_init_dup(&cb_data.disabled_hooks); - /* Parse all configs in one run. */ + /* Parse all configs in one run, capturing hook.* including hook.jobs. */ repo_config(r, hook_config_lookup_all, &cb_data); /* Construct the cache from parsed configs. */ @@ -292,6 +308,9 @@ static void build_hook_config_map(struct repository *r, struct strmap *cache) strmap_put(cache, e->key, hooks); } + if (r) + r->hook_jobs = cb_data.jobs; + strmap_clear(&cb_data.commands, 1); string_list_clear(&cb_data.disabled_hooks, 0); strmap_for_each_entry(&cb_data.event_hooks, &iter, e) { diff --git a/repository.h b/repository.h index 078059a6e02b10..58e46853d089bf 100644 --- a/repository.h +++ b/repository.h @@ -172,6 +172,9 @@ struct repository { */ struct strmap *hook_config_cache; + /* Cached value of hook.jobs config (0 if unset, defaults to serial). */ + unsigned int hook_jobs; + /* Configurations related to promisor remotes. */ char *repository_format_partial_clone; struct promisor_remote_config *promisor_remote_config; From 680e69f60d2b3838bb98938dbd3e8881bdfde7d6 Mon Sep 17 00:00:00 2001 From: Emily Shaffer Date: Fri, 10 Apr 2026 12:05:59 +0300 Subject: [PATCH 131/241] hook: allow parallel hook execution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hooks always run in sequential order due to the hardcoded jobs == 1 passed to run_process_parallel(). Remove that hardcoding to allow users to run hooks in parallel (opt-in). Users need to decide which hooks to run in parallel, by specifying "parallel = true" in the config, because Git cannot know if their specific hooks are safe to run or not in parallel (for e.g. two hooks might write to the same file or call the same program). Some hooks are unsafe to run in parallel by design: these will marked in the next commit using RUN_HOOKS_OPT_INIT_FORCE_SERIAL. The hook.jobs config specifies the default number of jobs applied to all hooks which have parallelism enabled. Signed-off-by: Emily Shaffer Helped-by: Ævar Arnfjörð Bjarmason Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- Documentation/config/hook.adoc | 13 +++ hook.c | 79 ++++++++++++++++-- hook.h | 25 ++++++ t/t1800-hook.sh | 142 +++++++++++++++++++++++++++++++++ 4 files changed, 253 insertions(+), 6 deletions(-) diff --git a/Documentation/config/hook.adoc b/Documentation/config/hook.adoc index b7847f9338c65f..21800db648dca5 100644 --- a/Documentation/config/hook.adoc +++ b/Documentation/config/hook.adoc @@ -23,6 +23,19 @@ hook..enabled:: in a system or global config file and needs to be disabled for a specific repository. See linkgit:git-hook[1]. +hook..parallel:: + Whether the hook `hook.` may run in parallel with other hooks + for the same event. Defaults to `false`. Set to `true` only when the + hook script is safe to run concurrently with other hooks for the same + event. If any hook for an event does not have this set to `true`, + all hooks for that event run sequentially regardless of `hook.jobs`. + Only configured (named) hooks need to declare this. Traditional hooks + found in the hooks directory do not need to, and run in parallel when + the effective job count is greater than 1. See linkgit:git-hook[1]. + hook.jobs:: Specifies how many hooks can be run simultaneously during parallelized hook execution. If unspecified, defaults to 1 (serial execution). ++ +This setting has no effect unless all configured hooks for the event have +`hook..parallel` set to `true`. diff --git a/hook.c b/hook.c index b8cce00e578d3c..85c0de5e47b426 100644 --- a/hook.c +++ b/hook.c @@ -116,6 +116,7 @@ struct hook_config_cache_entry { char *command; enum config_scope scope; bool disabled; + bool parallel; }; /* @@ -123,12 +124,14 @@ struct hook_config_cache_entry { * commands: friendly-name to command map. * event_hooks: event-name to list of friendly-names map. * disabled_hooks: set of friendly-names with hook..enabled = false. + * parallel_hooks: friendly-name to parallel flag. * jobs: value of the global hook.jobs key. Defaults to 0 if unset (stored in r->hook_jobs). */ struct hook_all_config_cb { struct strmap commands; struct strmap event_hooks; struct string_list disabled_hooks; + struct strmap parallel_hooks; unsigned int jobs; }; @@ -219,6 +222,15 @@ static int hook_config_lookup_all(const char *key, const char *value, default: break; /* ignore unrecognised values */ } + } else if (!strcmp(subkey, "parallel")) { + int v = git_parse_maybe_bool(value); + if (v >= 0) + strmap_put(&data->parallel_hooks, hook_name, + (void *)(uintptr_t)v); + else + warning(_("hook.%s.parallel must be a boolean," + " ignoring: '%s'"), + hook_name, value); } free(hook_name); @@ -263,6 +275,7 @@ static void build_hook_config_map(struct repository *r, struct strmap *cache) strmap_init(&cb_data.commands); strmap_init(&cb_data.event_hooks); string_list_init_dup(&cb_data.disabled_hooks); + strmap_init(&cb_data.parallel_hooks); /* Parse all configs in one run, capturing hook.* including hook.jobs. */ repo_config(r, hook_config_lookup_all, &cb_data); @@ -282,6 +295,7 @@ static void build_hook_config_map(struct repository *r, struct strmap *cache) struct hook_config_cache_entry *entry; char *command; + bool is_par = !!strmap_get(&cb_data.parallel_hooks, hname); bool is_disabled = !!unsorted_string_list_lookup( &cb_data.disabled_hooks, hname); @@ -302,6 +316,7 @@ static void build_hook_config_map(struct repository *r, struct strmap *cache) entry->command = xstrdup_or_null(command); entry->scope = scope; entry->disabled = is_disabled; + entry->parallel = is_par; string_list_append(hooks, hname)->util = entry; } @@ -312,6 +327,7 @@ static void build_hook_config_map(struct repository *r, struct strmap *cache) r->hook_jobs = cb_data.jobs; strmap_clear(&cb_data.commands, 1); + strmap_clear(&cb_data.parallel_hooks, 0); /* values are uintptr_t, not heap ptrs */ string_list_clear(&cb_data.disabled_hooks, 0); strmap_for_each_entry(&cb_data.event_hooks, &iter, e) { string_list_clear(e->value, 0); @@ -389,6 +405,7 @@ static void list_hooks_add_configured(struct repository *r, entry->command ? xstrdup(entry->command) : NULL; hook->u.configured.scope = entry->scope; hook->u.configured.disabled = entry->disabled; + hook->parallel = entry->parallel; string_list_append(list, friendly_name)->util = hook; } @@ -538,21 +555,75 @@ static void run_hooks_opt_clear(struct run_hooks_opt *options) strvec_clear(&options->args); } +/* Determine how many jobs to use for hook execution. */ +static unsigned int get_hook_jobs(struct repository *r, + struct run_hooks_opt *options, + struct string_list *hook_list) +{ + /* + * Hooks needing separate output streams must run sequentially. + * Next commit will allow parallelizing these as well. + */ + if (!options->stdout_to_stderr) + return 1; + + /* + * An explicit job count overrides everything else: this covers both + * FORCE_SERIAL callers (for hooks that must never run in parallel) + * and the -j flag from the CLI. The CLI override is intentional: users + * may want to serialize hooks declared parallel or to parallelize more + * aggressively than the default. + */ + if (options->jobs) + return options->jobs; + + /* + * Use hook.jobs from the already-parsed config cache (in-repo), or + * fallback to a direct config lookup (out-of-repo). + * Default to 1 (serial execution) on failure. + */ + options->jobs = 1; + if (r) { + if (r->gitdir && r->hook_config_cache && r->hook_jobs) + options->jobs = r->hook_jobs; + else + repo_config_get_uint(r, "hook.jobs", &options->jobs); + } + + /* + * Cap to serial any configured hook not marked as parallel = true. + * This enforces the parallel = false default, even for "traditional" + * hooks from the hookdir which cannot be marked parallel = true. + */ + for (size_t i = 0; i < hook_list->nr; i++) { + struct hook *h = hook_list->items[i].util; + if (h->kind == HOOK_CONFIGURED && !h->parallel) { + options->jobs = 1; + break; + } + } + + return options->jobs; +} + int run_hooks_opt(struct repository *r, const char *hook_name, struct run_hooks_opt *options) { + struct string_list *hook_list = list_hooks(r, hook_name, options); struct hook_cb_data cb_data = { .rc = 0, .hook_name = hook_name, + .hook_command_list = hook_list, .options = options, }; int ret = 0; + unsigned int jobs = get_hook_jobs(r, options, hook_list); const struct run_process_parallel_opts opts = { .tr2_category = "hook", .tr2_label = hook_name, - .processes = options->jobs, - .ungroup = options->jobs == 1, + .processes = jobs, + .ungroup = jobs == 1, .get_next_task = pick_next_hook, .start_failure = notify_start_failure, @@ -568,9 +639,6 @@ int run_hooks_opt(struct repository *r, const char *hook_name, if (options->path_to_stdin && options->feed_pipe) BUG("options path_to_stdin and feed_pipe are mutually exclusive"); - if (!options->jobs) - BUG("run_hooks_opt must be called with options.jobs >= 1"); - /* * Ensure cb_data copy and free functions are either provided together, * or neither one is provided. @@ -581,7 +649,6 @@ int run_hooks_opt(struct repository *r, const char *hook_name, if (options->invoked_hook) *options->invoked_hook = 0; - cb_data.hook_command_list = list_hooks(r, hook_name, options); if (!cb_data.hook_command_list->nr) { if (options->error_if_missing) ret = error("cannot find a hook named %s", hook_name); diff --git a/hook.h b/hook.h index 5c5628dd1f822c..ba7056f8723b73 100644 --- a/hook.h +++ b/hook.h @@ -35,6 +35,13 @@ struct hook { } configured; } u; + /** + * Whether this hook may run in parallel with other hooks for the same + * event. Only useful for configured (named) hooks. Traditional hooks + * always default to 0 (serial). Set via `hook..parallel = true`. + */ + bool parallel; + /** * Opaque data pointer used to keep internal state across callback calls. * @@ -72,6 +79,8 @@ struct run_hooks_opt { * * If > 1, output will be buffered and de-interleaved (ungroup=0). * If == 1, output will be real-time (ungroup=1). + * If == 0, the 'hook.jobs' config is used or, if the config is unset, + * defaults to 1 (serial execution). */ unsigned int jobs; @@ -152,7 +161,23 @@ struct run_hooks_opt { hook_data_free_fn feed_pipe_cb_data_free; }; +/** + * Default initializer for hooks. Parallelism is opt-in: .jobs = 0 defers to + * the 'hook.jobs' config, falling back to serial (1) if unset. + */ #define RUN_HOOKS_OPT_INIT { \ + .env = STRVEC_INIT, \ + .args = STRVEC_INIT, \ + .stdout_to_stderr = 1, \ + .jobs = 0, \ +} + +/** + * Initializer for hooks that must always run sequentially regardless of + * 'hook.jobs'. Use this when git knows the hook cannot safely be parallelized + * .jobs = 1 is non-overridable. + */ +#define RUN_HOOKS_OPT_INIT_FORCE_SERIAL { \ .env = STRVEC_INIT, \ .args = STRVEC_INIT, \ .stdout_to_stderr = 1, \ diff --git a/t/t1800-hook.sh b/t/t1800-hook.sh index 33decc66c0ea8d..a3011a01ca2908 100755 --- a/t/t1800-hook.sh +++ b/t/t1800-hook.sh @@ -21,6 +21,57 @@ setup_hookdir () { test_when_finished rm -rf .git/hooks } +# write_sentinel_hook [sentinel] +# +# Writes a hook that marks itself as started, sleeps for a few seconds, then +# marks itself done. The sleep must be long enough that sentinel_detector can +# observe .started before .done appears when both hooks +# run concurrently in parallel mode. +write_sentinel_hook () { + sentinel="${2:-sentinel}" + write_script "$1" <<-EOF + touch ${sentinel}.started && + sleep 2 && + touch ${sentinel}.done + EOF +} + +# sentinel_detector +# +# Returns a shell command string suitable for use as hook..command. +# The detector must be registered after the sentinel: +# 1. In serial mode, the sentinel has completed (and .done exists) +# before the detector starts. +# 2. In parallel mode, both run concurrently so .done has not appeared +# yet and the detector just sees .started. +# +# At start, poll until .started exists to absorb startup jitter, then +# write to : +# 1. 'serial' if .done exists (sentinel finished before we started), +# 2. 'parallel' if only .started exists (sentinel still running), +# 3. 'timeout' if .started never appeared. +# +# The command ends with ':' so when git appends "$@" for hooks that receive +# positional arguments (e.g. pre-push), the result ': "$@"' is valid shell +# rather than a syntax error 'fi "$@"'. +sentinel_detector () { + cat <<-EOF + i=0 + while ! test -f ${1}.started && test \$i -lt 10; do + sleep 1 + i=\$((i+1)) + done + if test -f ${1}.done; then + echo serial >${2} + elif test -f ${1}.started; then + echo parallel >${2} + else + echo timeout >${2} + fi + : + EOF +} + test_expect_success 'git hook usage' ' test_expect_code 129 git hook && test_expect_code 129 git hook run && @@ -658,4 +709,95 @@ test_expect_success 'server push-to-checkout hook expects stdout redirected to s check_stdout_merged_to_stderr push-to-checkout ' +test_expect_success 'hook.jobs=1 config runs hooks in series' ' + test_when_finished "rm -f sentinel.started sentinel.done hook.order" && + + # Use two configured hooks so the execution order is deterministic: + # hook-1 (sentinel) is listed before hook-2 (detector), so hook-1 + # always runs first even in serial mode. + test_config hook.hook-1.event test-hook && + test_config hook.hook-1.command \ + "touch sentinel.started; sleep 2; touch sentinel.done" && + test_config hook.hook-2.event test-hook && + test_config hook.hook-2.command \ + "$(sentinel_detector sentinel hook.order)" && + + test_config hook.jobs 1 && + + git hook run --allow-unknown-hook-name test-hook >out 2>err && + echo serial >expect && + test_cmp expect hook.order +' + +test_expect_success 'hook.jobs=2 config runs hooks in parallel' ' + test_when_finished "rm -f sentinel.started sentinel.done hook.order" && + test_when_finished "rm -rf .git/hooks" && + + mkdir -p .git/hooks && + write_sentinel_hook .git/hooks/test-hook && + + test_config hook.hook-2.event test-hook && + test_config hook.hook-2.command \ + "$(sentinel_detector sentinel hook.order)" && + test_config hook.hook-2.parallel true && + + test_config hook.jobs 2 && + + git hook run --allow-unknown-hook-name test-hook >out 2>err && + echo parallel >expect && + test_cmp expect hook.order +' + +test_expect_success 'hook..parallel=true enables parallel execution' ' + test_when_finished "rm -f sentinel.started sentinel.done hook.order" && + test_config hook.hook-1.event test-hook && + test_config hook.hook-1.command \ + "touch sentinel.started; sleep 2; touch sentinel.done" && + test_config hook.hook-1.parallel true && + test_config hook.hook-2.event test-hook && + test_config hook.hook-2.command \ + "$(sentinel_detector sentinel hook.order)" && + test_config hook.hook-2.parallel true && + + test_config hook.jobs 2 && + + git hook run --allow-unknown-hook-name test-hook >out 2>err && + echo parallel >expect && + test_cmp expect hook.order +' + +test_expect_success 'hook..parallel=false (default) forces serial execution' ' + test_when_finished "rm -f sentinel.started sentinel.done hook.order" && + test_config hook.hook-1.event test-hook && + test_config hook.hook-1.command \ + "touch sentinel.started; sleep 2; touch sentinel.done" && + test_config hook.hook-2.event test-hook && + test_config hook.hook-2.command \ + "$(sentinel_detector sentinel hook.order)" && + + test_config hook.jobs 2 && + + git hook run --allow-unknown-hook-name test-hook >out 2>err && + echo serial >expect && + test_cmp expect hook.order +' + +test_expect_success 'one non-parallel hook forces the whole event to run serially' ' + test_when_finished "rm -f sentinel.started sentinel.done hook.order" && + test_config hook.hook-1.event test-hook && + test_config hook.hook-1.command \ + "touch sentinel.started; sleep 2; touch sentinel.done" && + test_config hook.hook-1.parallel true && + test_config hook.hook-2.event test-hook && + test_config hook.hook-2.command \ + "$(sentinel_detector sentinel hook.order)" && + # hook-2 has no parallel=true: should force serial for all + + test_config hook.jobs 2 && + + git hook run --allow-unknown-hook-name test-hook >out 2>err && + echo serial >expect && + test_cmp expect hook.order +' + test_done From f776b77f0032fb342d567156626ef3fe9586443f Mon Sep 17 00:00:00 2001 From: Adrian Ratiu Date: Fri, 10 Apr 2026 12:06:00 +0300 Subject: [PATCH 132/241] hook: allow pre-push parallel execution pre-push is the only hook that keeps stdout and stderr separate (for backwards compatibility with git-lfs and potentially other users). This prevents parallelizing it because run-command needs stdout_to_stderr=1 to buffer and de-interleave parallel outputs. Since we now default to jobs=1, backwards compatibility is maintained without needing any extension or extra config: when no parallelism is requested, pre-push behaves exactly as before. When the user explicitly opts into parallelism via hook.jobs > 1, hook..jobs > 1, or -jN, they accept the changed output behavior. Document this and let get_hook_jobs() set stdout_to_stderr=1 automatically when jobs > 1, removing the need for any extension infrastructure. Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- Documentation/config/hook.adoc | 4 ++++ hook.c | 24 ++++++++++++++++-------- hook.h | 6 ++++-- t/t1800-hook.sh | 32 ++++++++++++++++++++++++++++++++ transport.c | 6 ++++-- 5 files changed, 60 insertions(+), 12 deletions(-) diff --git a/Documentation/config/hook.adoc b/Documentation/config/hook.adoc index 21800db648dca5..94c7a9808e29ef 100644 --- a/Documentation/config/hook.adoc +++ b/Documentation/config/hook.adoc @@ -39,3 +39,7 @@ hook.jobs:: + This setting has no effect unless all configured hooks for the event have `hook..parallel` set to `true`. ++ +For `pre-push` hooks, which normally keep stdout and stderr separate, +setting this to a value greater than 1 (or passing `-j`) will merge stdout +into stderr to allow correct de-interleaving of parallel output. diff --git a/hook.c b/hook.c index 85c0de5e47b426..25762b6c8d18f9 100644 --- a/hook.c +++ b/hook.c @@ -555,18 +555,24 @@ static void run_hooks_opt_clear(struct run_hooks_opt *options) strvec_clear(&options->args); } +/* + * When running in parallel, stdout must be merged into stderr so + * run-command can buffer and de-interleave outputs correctly. This + * applies even to hooks like pre-push that normally keep stdout and + * stderr separate: the user has opted into parallelism, so the output + * stream behavior changes accordingly. + */ +static void merge_output_if_parallel(struct run_hooks_opt *options) +{ + if (options->jobs > 1) + options->stdout_to_stderr = 1; +} + /* Determine how many jobs to use for hook execution. */ static unsigned int get_hook_jobs(struct repository *r, struct run_hooks_opt *options, struct string_list *hook_list) { - /* - * Hooks needing separate output streams must run sequentially. - * Next commit will allow parallelizing these as well. - */ - if (!options->stdout_to_stderr) - return 1; - /* * An explicit job count overrides everything else: this covers both * FORCE_SERIAL callers (for hooks that must never run in parallel) @@ -575,7 +581,7 @@ static unsigned int get_hook_jobs(struct repository *r, * aggressively than the default. */ if (options->jobs) - return options->jobs; + goto cleanup; /* * Use hook.jobs from the already-parsed config cache (in-repo), or @@ -603,6 +609,8 @@ static unsigned int get_hook_jobs(struct repository *r, } } +cleanup: + merge_output_if_parallel(options); return options->jobs; } diff --git a/hook.h b/hook.h index ba7056f8723b73..01db4226a60306 100644 --- a/hook.h +++ b/hook.h @@ -106,8 +106,10 @@ struct run_hooks_opt { * Send the hook's stdout to stderr. * * This is the default behavior for all hooks except pre-push, - * which has separate stdout and stderr streams for backwards - * compatibility reasons. + * which keeps stdout and stderr separate for backwards compatibility. + * When parallel execution is requested (jobs > 1), get_hook_jobs() + * overrides this to 1 for all hooks so run-command can de-interleave + * their outputs correctly. */ unsigned int stdout_to_stderr:1; diff --git a/t/t1800-hook.sh b/t/t1800-hook.sh index a3011a01ca2908..4a978aff5e0c1e 100755 --- a/t/t1800-hook.sh +++ b/t/t1800-hook.sh @@ -800,4 +800,36 @@ test_expect_success 'one non-parallel hook forces the whole event to run seriall test_cmp expect hook.order ' +test_expect_success 'client hooks: pre-push parallel execution merges stdout to stderr' ' + test_when_finished "rm -rf remote-par stdout.actual stderr.actual" && + git init --bare remote-par && + git remote add origin-par remote-par && + test_commit par-commit && + mkdir -p .git/hooks && + setup_hooks pre-push && + test_config hook.jobs 2 && + git push origin-par HEAD:main >stdout.actual 2>stderr.actual && + check_stdout_merged_to_stderr pre-push +' + +test_expect_success 'client hooks: pre-push runs in parallel when hook.jobs > 1' ' + test_when_finished "rm -rf repo-parallel remote-parallel" && + git init --bare remote-parallel && + git init repo-parallel && + git -C repo-parallel remote add origin ../remote-parallel && + test_commit -C repo-parallel A && + + write_sentinel_hook repo-parallel/.git/hooks/pre-push && + git -C repo-parallel config hook.hook-2.event pre-push && + git -C repo-parallel config hook.hook-2.command \ + "$(sentinel_detector sentinel hook.order)" && + git -C repo-parallel config hook.hook-2.parallel true && + + git -C repo-parallel config hook.jobs 2 && + + git -C repo-parallel push origin HEAD >out 2>err && + echo parallel >expect && + test_cmp expect repo-parallel/hook.order +' + test_done diff --git a/transport.c b/transport.c index e53936d87b641f..9406ec4f2d682a 100644 --- a/transport.c +++ b/transport.c @@ -1391,8 +1391,10 @@ static int run_pre_push_hook(struct transport *transport, opt.feed_pipe_cb_data_free = pre_push_hook_data_free; /* - * pre-push hooks expect stdout & stderr to be separate, so don't merge - * them to keep backwards compatibility with existing hooks. + * pre-push hooks keep stdout and stderr separate by default for + * backwards compatibility. When the user opts into parallel execution + * via hook.jobs > 1 or -j, get_hook_jobs() will set stdout_to_stderr=1 + * automatically so run-command can de-interleave the outputs. */ opt.stdout_to_stderr = 0; From ae25764e50f38b6625e11c3a7d7de290a0075b9c Mon Sep 17 00:00:00 2001 From: Emily Shaffer Date: Fri, 10 Apr 2026 12:06:01 +0300 Subject: [PATCH 133/241] hook: mark non-parallelizable hooks Several hooks are known to be inherently non-parallelizable, so initialize them with RUN_HOOKS_OPT_INIT_FORCE_SERIAL. This pins jobs=1 and overrides any hook.jobs or runtime -j flags. These hooks are: applypatch-msg, pre-commit, prepare-commit-msg, commit-msg, post-commit, post-checkout, and push-to-checkout. Signed-off-by: Emily Shaffer Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- Documentation/config/hook.adoc | 14 ++++++++++++++ builtin/am.c | 8 +++++--- builtin/checkout.c | 19 +++++++++++++------ builtin/clone.c | 6 ++++-- builtin/receive-pack.c | 3 ++- builtin/worktree.c | 2 +- commit.c | 2 +- t/t1800-hook.sh | 16 ++++++++++++++++ 8 files changed, 56 insertions(+), 14 deletions(-) diff --git a/Documentation/config/hook.adoc b/Documentation/config/hook.adoc index 94c7a9808e29ef..6f60775c28a902 100644 --- a/Documentation/config/hook.adoc +++ b/Documentation/config/hook.adoc @@ -36,6 +36,20 @@ hook..parallel:: hook.jobs:: Specifies how many hooks can be run simultaneously during parallelized hook execution. If unspecified, defaults to 1 (serial execution). + Some hooks always run sequentially regardless of this setting because + they operate on shared data and cannot safely be parallelized: ++ +-- +`applypatch-msg`;; +`prepare-commit-msg`;; +`commit-msg`;; + Receive a commit message file and may rewrite it in place. +`pre-commit`;; +`post-checkout`;; +`push-to-checkout`;; +`post-commit`;; + Access the working tree, index, or repository state. +-- + This setting has no effect unless all configured hooks for the event have `hook..parallel` set to `true`. diff --git a/builtin/am.c b/builtin/am.c index fe6e087eee9ff5..e9623b8307793f 100644 --- a/builtin/am.c +++ b/builtin/am.c @@ -490,9 +490,11 @@ static int run_applypatch_msg_hook(struct am_state *state) assert(state->msg); - if (!state->no_verify) - ret = run_hooks_l(the_repository, "applypatch-msg", - am_path(state, "final-commit"), NULL); + if (!state->no_verify) { + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_FORCE_SERIAL; + strvec_push(&opt.args, am_path(state, "final-commit")); + ret = run_hooks_opt(the_repository, "applypatch-msg", &opt); + } if (!ret) { FREE_AND_NULL(state->msg); diff --git a/builtin/checkout.c b/builtin/checkout.c index e031e6188613a6..ac0186a33e559a 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -31,6 +31,7 @@ #include "resolve-undo.h" #include "revision.h" #include "setup.h" +#include "strvec.h" #include "submodule.h" #include "symlinks.h" #include "trace2.h" @@ -123,13 +124,19 @@ static void branch_info_release(struct branch_info *info) static int post_checkout_hook(struct commit *old_commit, struct commit *new_commit, int changed) { - return run_hooks_l(the_repository, "post-checkout", - oid_to_hex(old_commit ? &old_commit->object.oid : null_oid(the_hash_algo)), - oid_to_hex(new_commit ? &new_commit->object.oid : null_oid(the_hash_algo)), - changed ? "1" : "0", NULL); - /* "new_commit" can be NULL when checking out from the index before - a commit exists. */ + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_FORCE_SERIAL; + /* + * "new_commit" can be NULL when checking out from the index before + * a commit exists. + */ + strvec_pushl(&opt.args, + oid_to_hex(old_commit ? &old_commit->object.oid : null_oid(the_hash_algo)), + oid_to_hex(new_commit ? &new_commit->object.oid : null_oid(the_hash_algo)), + changed ? "1" : "0", + NULL); + + return run_hooks_opt(the_repository, "post-checkout", &opt); } static int update_some(const struct object_id *oid, struct strbuf *base, diff --git a/builtin/clone.c b/builtin/clone.c index fba3c9c508bc06..d23b0cafcfec30 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -647,6 +647,7 @@ static int checkout(int submodule_progress, struct tree *tree; struct tree_desc t; int err = 0; + struct run_hooks_opt hook_opt = RUN_HOOKS_OPT_INIT_FORCE_SERIAL; if (option_no_checkout) return 0; @@ -697,8 +698,9 @@ static int checkout(int submodule_progress, if (write_locked_index(the_repository->index, &lock_file, COMMIT_LOCK)) die(_("unable to write new index file")); - err |= run_hooks_l(the_repository, "post-checkout", oid_to_hex(null_oid(the_hash_algo)), - oid_to_hex(&oid), "1", NULL); + strvec_pushl(&hook_opt.args, oid_to_hex(null_oid(the_hash_algo)), + oid_to_hex(&oid), "1", NULL); + err |= run_hooks_opt(the_repository, "post-checkout", &hook_opt); if (!err && (option_recurse_submodules.nr > 0)) { struct child_process cmd = CHILD_PROCESS_INIT; diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index dada55884a0b06..6da60f640ce3af 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -1455,7 +1455,8 @@ static const char *push_to_checkout(unsigned char *hash, struct strvec *env, const char *work_tree) { - struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_FORCE_SERIAL; + opt.invoked_hook = invoked_hook; strvec_pushf(env, "GIT_WORK_TREE=%s", absolute_path(work_tree)); diff --git a/builtin/worktree.c b/builtin/worktree.c index 4fd6f7575f9f76..d21c43fde38b5e 100644 --- a/builtin/worktree.c +++ b/builtin/worktree.c @@ -609,7 +609,7 @@ static int add_worktree(const char *path, const char *refname, * is_junk is cleared, but do return appropriate code when hook fails. */ if (!ret && opts->checkout && !opts->orphan) { - struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_FORCE_SERIAL; strvec_pushl(&opt.env, "GIT_DIR", "GIT_WORK_TREE", NULL); strvec_pushl(&opt.args, diff --git a/commit.c b/commit.c index 80d8d078757dbc..4385ae4329e921 100644 --- a/commit.c +++ b/commit.c @@ -1970,7 +1970,7 @@ size_t ignored_log_message_bytes(const char *buf, size_t len) int run_commit_hook(int editor_is_used, const char *index_file, int *invoked_hook, const char *name, ...) { - struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT_FORCE_SERIAL; va_list args; const char *arg; diff --git a/t/t1800-hook.sh b/t/t1800-hook.sh index 4a978aff5e0c1e..63fa25bca23c51 100755 --- a/t/t1800-hook.sh +++ b/t/t1800-hook.sh @@ -832,4 +832,20 @@ test_expect_success 'client hooks: pre-push runs in parallel when hook.jobs > 1' test_cmp expect repo-parallel/hook.order ' +test_expect_success 'hook.jobs=2 is ignored for force-serial hooks (pre-commit)' ' + test_when_finished "rm -f sentinel.started sentinel.done hook.order" && + test_config hook.hook-1.event pre-commit && + test_config hook.hook-1.command \ + "touch sentinel.started; sleep 2; touch sentinel.done" && + test_config hook.hook-1.parallel true && + test_config hook.hook-2.event pre-commit && + test_config hook.hook-2.command \ + "$(sentinel_detector sentinel hook.order)" && + test_config hook.hook-2.parallel true && + test_config hook.jobs 2 && + git commit --allow-empty -m "test: verify force-serial on pre-commit" && + echo serial >expect && + test_cmp expect hook.order +' + test_done From 091d2dbeb452b2c8223c622b54e96ebd273b5a78 Mon Sep 17 00:00:00 2001 From: Emily Shaffer Date: Fri, 10 Apr 2026 12:06:02 +0300 Subject: [PATCH 134/241] hook: add -j/--jobs option to git hook run MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expose the parallel job count as a command-line flag so callers can request parallelism without relying only on the hook.jobs config. Add tests covering serial/parallel execution and TTY behaviour under -j1 vs -jN. Signed-off-by: Emily Shaffer Helped-by: Ævar Arnfjörð Bjarmason Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- Documentation/git-hook.adoc | 23 +++++- builtin/hook.c | 5 +- hook.c | 17 +++++ t/t1800-hook.sh | 135 ++++++++++++++++++++++++++++++++++-- 4 files changed, 170 insertions(+), 10 deletions(-) diff --git a/Documentation/git-hook.adoc b/Documentation/git-hook.adoc index 318c637bd8eba5..46ea52db55f268 100644 --- a/Documentation/git-hook.adoc +++ b/Documentation/git-hook.adoc @@ -8,7 +8,8 @@ git-hook - Run git hooks SYNOPSIS -------- [verse] -'git hook' run [--allow-unknown-hook-name] [--ignore-missing] [--to-stdin=] [-- ] +'git hook' run [--allow-unknown-hook-name] [--ignore-missing] [--to-stdin=] [(-j|--jobs) ] + [-- ] 'git hook' list [--allow-unknown-hook-name] [-z] [--show-scope] DESCRIPTION @@ -147,6 +148,23 @@ OPTIONS mirroring the output style of `git config --show-scope`. Traditional hooks from the hookdir are unaffected. +-j:: +--jobs:: + Only valid for `run`. ++ +Specify how many hooks to run simultaneously. If this flag is not specified, +the value of the `hook.jobs` config is used, see linkgit:git-config[1]. If +neither is specified, defaults to 1 (serial execution). ++ +When greater than 1, it overrides the per-hook `hook..parallel` +setting, allowing all hooks for the event to run concurrently, even if they +are not individually marked as parallel. ++ +Some hooks always run sequentially regardless of this flag or the +`hook.jobs` config, because git knows they cannot safely run in parallel: +`applypatch-msg`, `pre-commit`, `prepare-commit-msg`, `commit-msg`, +`post-commit`, `post-checkout`, and `push-to-checkout`. + WRAPPERS -------- @@ -169,7 +187,8 @@ running: git hook run --allow-unknown-hook-name mywrapper-start-tests \ # providing something to stdin --stdin some-tempfile-123 \ - # execute hooks in serial + # execute multiple hooks in parallel + --jobs 3 \ # plus some arguments of your own... -- \ --testname bar \ diff --git a/builtin/hook.c b/builtin/hook.c index c0585587e5e4fa..bea0668b475931 100644 --- a/builtin/hook.c +++ b/builtin/hook.c @@ -8,7 +8,8 @@ #include "parse-options.h" #define BUILTIN_HOOK_RUN_USAGE \ - N_("git hook run [--allow-unknown-hook-name] [--ignore-missing] [--to-stdin=] [-- ]") + N_("git hook run [--allow-unknown-hook-name] [--ignore-missing] [--to-stdin=] [(-j|--jobs) ]\n" \ + " [-- ]") #define BUILTIN_HOOK_LIST_USAGE \ N_("git hook list [--allow-unknown-hook-name] [-z] [--show-scope] ") @@ -132,6 +133,8 @@ static int run(int argc, const char **argv, const char *prefix, N_("silently ignore missing requested ")), OPT_STRING(0, "to-stdin", &opt.path_to_stdin, N_("path"), N_("file to read into hooks' stdin")), + OPT_UNSIGNED('j', "jobs", &opt.jobs, + N_("run up to hooks simultaneously")), OPT_END(), }; int ret; diff --git a/hook.c b/hook.c index 25762b6c8d18f9..c0b71322cf2ef6 100644 --- a/hook.c +++ b/hook.c @@ -568,6 +568,22 @@ static void merge_output_if_parallel(struct run_hooks_opt *options) options->stdout_to_stderr = 1; } +static void warn_non_parallel_hooks_override(unsigned int jobs, + struct string_list *hook_list) +{ + /* Don't warn for hooks running sequentially. */ + if (jobs == 1) + return; + + for (size_t i = 0; i < hook_list->nr; i++) { + struct hook *h = hook_list->items[i].util; + if (h->kind == HOOK_CONFIGURED && !h->parallel) + warning(_("hook '%s' is not marked as parallel=true, " + "running in parallel anyway due to -j%u"), + h->u.configured.friendly_name, jobs); + } +} + /* Determine how many jobs to use for hook execution. */ static unsigned int get_hook_jobs(struct repository *r, struct run_hooks_opt *options, @@ -611,6 +627,7 @@ static unsigned int get_hook_jobs(struct repository *r, cleanup: merge_output_if_parallel(options); + warn_non_parallel_hooks_override(options->jobs, hook_list); return options->jobs; } diff --git a/t/t1800-hook.sh b/t/t1800-hook.sh index 63fa25bca23c51..aa37a5181a0e0e 100755 --- a/t/t1800-hook.sh +++ b/t/t1800-hook.sh @@ -268,10 +268,20 @@ test_expect_success 'git -c core.hooksPath= hook run' ' ' test_hook_tty () { - cat >expect <<-\EOF - STDOUT TTY - STDERR TTY - EOF + expect_tty=$1 + shift + + if test "$expect_tty" != "no_tty"; then + cat >expect <<-\EOF + STDOUT TTY + STDERR TTY + EOF + else + cat >expect <<-\EOF + STDOUT NO TTY + STDERR NO TTY + EOF + fi test_when_finished "rm -rf repo" && git init repo && @@ -289,12 +299,21 @@ test_hook_tty () { test_cmp expect repo/actual } -test_expect_success TTY 'git hook run: stdout and stderr are connected to a TTY' ' - test_hook_tty hook run pre-commit +test_expect_success TTY 'git hook run -j1: stdout and stderr are connected to a TTY' ' + # hooks running sequentially (-j1) are always connected to the tty for + # optimum real-time performance. + test_hook_tty tty hook run -j1 pre-commit +' + +test_expect_success TTY 'git hook run -jN: stdout and stderr are not connected to a TTY' ' + # Hooks are not connected to the tty when run in parallel, instead they + # output to a pipe through which run-command collects and de-interlaces + # their outputs, which then gets passed either to the tty or a sideband. + test_hook_tty no_tty hook run -j2 pre-commit ' test_expect_success TTY 'git commit: stdout and stderr are connected to a TTY' ' - test_hook_tty commit -m"B.new" + test_hook_tty tty commit -m"B.new" ' test_expect_success 'git hook list orders by config order' ' @@ -709,6 +728,108 @@ test_expect_success 'server push-to-checkout hook expects stdout redirected to s check_stdout_merged_to_stderr push-to-checkout ' +test_expect_success 'parallel hook output is not interleaved' ' + test_when_finished "rm -rf .git/hooks" && + + write_script .git/hooks/test-hook <<-EOF && + echo "Hook 1 Start" + sleep 1 + echo "Hook 1 End" + EOF + + test_config hook.hook-2.event test-hook && + test_config hook.hook-2.command \ + "echo \"Hook 2 Start\"; sleep 2; echo \"Hook 2 End\"" && + test_config hook.hook-2.parallel true && + test_config hook.hook-3.event test-hook && + test_config hook.hook-3.command \ + "echo \"Hook 3 Start\"; sleep 3; echo \"Hook 3 End\"" && + test_config hook.hook-3.parallel true && + + git hook run --allow-unknown-hook-name -j3 test-hook >out 2>err.parallel && + + # Verify Hook 1 output is grouped + sed -n "/Hook 1 Start/,/Hook 1 End/p" err.parallel >hook1_out && + test_line_count = 2 hook1_out && + + # Verify Hook 2 output is grouped + sed -n "/Hook 2 Start/,/Hook 2 End/p" err.parallel >hook2_out && + test_line_count = 2 hook2_out && + + # Verify Hook 3 output is grouped + sed -n "/Hook 3 Start/,/Hook 3 End/p" err.parallel >hook3_out && + test_line_count = 2 hook3_out +' + +test_expect_success 'git hook run -j1 runs hooks in series' ' + test_when_finished "rm -rf .git/hooks" && + + test_config hook.series-1.event "test-hook" && + test_config hook.series-1.command "echo 1" --add && + test_config hook.series-2.event "test-hook" && + test_config hook.series-2.command "echo 2" --add && + + mkdir -p .git/hooks && + write_script .git/hooks/test-hook <<-EOF && + echo 3 + EOF + + cat >expected <<-\EOF && + 1 + 2 + 3 + EOF + + git hook run --allow-unknown-hook-name -j1 test-hook 2>actual && + test_cmp expected actual +' + +test_expect_success 'git hook run -j2 runs hooks in parallel' ' + test_when_finished "rm -f sentinel.started sentinel.done hook.order" && + test_when_finished "rm -rf .git/hooks" && + + mkdir -p .git/hooks && + write_sentinel_hook .git/hooks/test-hook && + + test_config hook.hook-2.event test-hook && + test_config hook.hook-2.command \ + "$(sentinel_detector sentinel hook.order)" && + test_config hook.hook-2.parallel true && + + git hook run --allow-unknown-hook-name -j2 test-hook >out 2>err && + echo parallel >expect && + test_cmp expect hook.order +' + +test_expect_success 'git hook run -j2 overrides parallel=false' ' + test_when_finished "rm -f sentinel.started sentinel.done hook.order" && + test_config hook.hook-1.event test-hook && + test_config hook.hook-1.command \ + "touch sentinel.started; sleep 2; touch sentinel.done" && + # hook-1 intentionally has no parallel=true + test_config hook.hook-2.event test-hook && + test_config hook.hook-2.command \ + "$(sentinel_detector sentinel hook.order)" && + # hook-2 also has no parallel=true + + # -j2 overrides parallel=false; hooks run in parallel with a warning. + git hook run --allow-unknown-hook-name -j2 test-hook >out 2>err && + echo parallel >expect && + test_cmp expect hook.order +' + +test_expect_success 'git hook run -j2 warns for hooks not marked parallel=true' ' + test_config hook.hook-1.event test-hook && + test_config hook.hook-1.command "true" && + test_config hook.hook-2.event test-hook && + test_config hook.hook-2.command "true" && + # neither hook has parallel=true + + git hook run --allow-unknown-hook-name -j2 test-hook >out 2>err && + grep "hook .hook-1. is not marked as parallel=true" err && + grep "hook .hook-2. is not marked as parallel=true" err +' + test_expect_success 'hook.jobs=1 config runs hooks in series' ' test_when_finished "rm -f sentinel.started sentinel.done hook.order" && From 084a55b3adf33f70c84091d5957b8bede9b01174 Mon Sep 17 00:00:00 2001 From: Adrian Ratiu Date: Fri, 10 Apr 2026 12:06:03 +0300 Subject: [PATCH 135/241] hook: add per-event jobs config Add a hook..jobs count config that allows users to override the global hook.jobs setting for specific hook events. This allows finer-grained control over parallelism on a per-event basis. For example, to run `post-receive` hooks with up to 4 parallel jobs while keeping other events at their global default: [hook] post-receive.jobs = 4 Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- Documentation/config/hook.adoc | 19 +++++++++++ hook.c | 46 +++++++++++++++++++++++--- repository.c | 1 + repository.h | 3 ++ t/t1800-hook.sh | 59 ++++++++++++++++++++++++++++++++++ 5 files changed, 123 insertions(+), 5 deletions(-) diff --git a/Documentation/config/hook.adoc b/Documentation/config/hook.adoc index 6f60775c28a902..d4fa29d936d6e2 100644 --- a/Documentation/config/hook.adoc +++ b/Documentation/config/hook.adoc @@ -33,9 +33,28 @@ hook..parallel:: found in the hooks directory do not need to, and run in parallel when the effective job count is greater than 1. See linkgit:git-hook[1]. +hook..jobs:: + Specifies how many hooks can be run simultaneously for the `` + hook event (e.g. `hook.post-receive.jobs = 4`). Overrides `hook.jobs` + for this specific event. The same parallelism restrictions apply: this + setting has no effect unless all configured hooks for the event have + `hook..parallel` set to `true`. Must be a positive int, + zero is rejected with a warning. See linkgit:git-hook[1]. ++ +Note on naming: although this key resembles `hook..*` +(a per-hook setting), `` must be the event name, not a hook +friendly name. The key component is stored literally and looked up by +event name at runtime with no translation between the two namespaces. +A key like `hook.my-hook.jobs` is stored under `"my-hook"` but the +lookup at runtime uses the event name (e.g. `"post-receive"`), so +`hook.my-hook.jobs` is silently ignored even when `my-hook` is +registered for that event. Use `hook.post-receive.jobs` or any other +valid event name when setting `hook..jobs`. + hook.jobs:: Specifies how many hooks can be run simultaneously during parallelized hook execution. If unspecified, defaults to 1 (serial execution). + Can be overridden on a per-event basis with `hook..jobs`. Some hooks always run sequentially regardless of this setting because they operate on shared data and cannot safely be parallelized: + diff --git a/hook.c b/hook.c index c0b71322cf2ef6..d98b01156366a4 100644 --- a/hook.c +++ b/hook.c @@ -125,6 +125,7 @@ struct hook_config_cache_entry { * event_hooks: event-name to list of friendly-names map. * disabled_hooks: set of friendly-names with hook..enabled = false. * parallel_hooks: friendly-name to parallel flag. + * event_jobs: event-name to per-event jobs count (stored as uintptr_t, NULL == unset). * jobs: value of the global hook.jobs key. Defaults to 0 if unset (stored in r->hook_jobs). */ struct hook_all_config_cb { @@ -132,6 +133,7 @@ struct hook_all_config_cb { struct strmap event_hooks; struct string_list disabled_hooks; struct strmap parallel_hooks; + struct strmap event_jobs; unsigned int jobs; }; @@ -231,6 +233,18 @@ static int hook_config_lookup_all(const char *key, const char *value, warning(_("hook.%s.parallel must be a boolean," " ignoring: '%s'"), hook_name, value); + } else if (!strcmp(subkey, "jobs")) { + unsigned int v; + if (!git_parse_uint(value, &v)) + warning(_("hook.%s.jobs must be a positive integer," + " ignoring: '%s'"), + hook_name, value); + else if (!v) + warning(_("hook.%s.jobs must be positive," + " ignoring: 0"), hook_name); + else + strmap_put(&data->event_jobs, hook_name, + (void *)(uintptr_t)v); } free(hook_name); @@ -276,6 +290,7 @@ static void build_hook_config_map(struct repository *r, struct strmap *cache) strmap_init(&cb_data.event_hooks); string_list_init_dup(&cb_data.disabled_hooks); strmap_init(&cb_data.parallel_hooks); + strmap_init(&cb_data.event_jobs); /* Parse all configs in one run, capturing hook.* including hook.jobs. */ repo_config(r, hook_config_lookup_all, &cb_data); @@ -323,8 +338,10 @@ static void build_hook_config_map(struct repository *r, struct strmap *cache) strmap_put(cache, e->key, hooks); } - if (r) + if (r) { r->hook_jobs = cb_data.jobs; + r->event_jobs = cb_data.event_jobs; + } strmap_clear(&cb_data.commands, 1); strmap_clear(&cb_data.parallel_hooks, 0); /* values are uintptr_t, not heap ptrs */ @@ -587,6 +604,7 @@ static void warn_non_parallel_hooks_override(unsigned int jobs, /* Determine how many jobs to use for hook execution. */ static unsigned int get_hook_jobs(struct repository *r, struct run_hooks_opt *options, + const char *hook_name, struct string_list *hook_list) { /* @@ -606,16 +624,34 @@ static unsigned int get_hook_jobs(struct repository *r, */ options->jobs = 1; if (r) { - if (r->gitdir && r->hook_config_cache && r->hook_jobs) - options->jobs = r->hook_jobs; - else + if (r->gitdir && r->hook_config_cache) { + void *event_jobs; + + if (r->hook_jobs) + options->jobs = r->hook_jobs; + + event_jobs = strmap_get(&r->event_jobs, hook_name); + if (event_jobs) + options->jobs = (unsigned int)(uintptr_t)event_jobs; + } else { + unsigned int event_jobs; + char *key; + repo_config_get_uint(r, "hook.jobs", &options->jobs); + + key = xstrfmt("hook.%s.jobs", hook_name); + if (!repo_config_get_uint(r, key, &event_jobs) && event_jobs) + options->jobs = event_jobs; + free(key); + } } /* * Cap to serial any configured hook not marked as parallel = true. * This enforces the parallel = false default, even for "traditional" * hooks from the hookdir which cannot be marked parallel = true. + * The same restriction applies whether jobs came from hook.jobs or + * hook..jobs. */ for (size_t i = 0; i < hook_list->nr; i++) { struct hook *h = hook_list->items[i].util; @@ -642,7 +678,7 @@ int run_hooks_opt(struct repository *r, const char *hook_name, .options = options, }; int ret = 0; - unsigned int jobs = get_hook_jobs(r, options, hook_list); + unsigned int jobs = get_hook_jobs(r, options, hook_name, hook_list); const struct run_process_parallel_opts opts = { .tr2_category = "hook", .tr2_label = hook_name, diff --git a/repository.c b/repository.c index 192d6dc9c477fa..4030db4460714d 100644 --- a/repository.c +++ b/repository.c @@ -426,6 +426,7 @@ void repo_clear(struct repository *repo) hook_cache_clear(repo->hook_config_cache); FREE_AND_NULL(repo->hook_config_cache); } + strmap_clear(&repo->event_jobs, 0); /* values are uintptr_t, not heap ptrs */ if (repo->promisor_remote_config) { promisor_remote_clear(repo->promisor_remote_config); diff --git a/repository.h b/repository.h index 58e46853d089bf..6b67ec02e2984c 100644 --- a/repository.h +++ b/repository.h @@ -175,6 +175,9 @@ struct repository { /* Cached value of hook.jobs config (0 if unset, defaults to serial). */ unsigned int hook_jobs; + /* Cached map of event-name -> jobs count (as uintptr_t) from hook..jobs. */ + struct strmap event_jobs; + /* Configurations related to promisor remotes. */ char *repository_format_partial_clone; struct promisor_remote_config *promisor_remote_config; diff --git a/t/t1800-hook.sh b/t/t1800-hook.sh index aa37a5181a0e0e..24a3c92b6deb80 100755 --- a/t/t1800-hook.sh +++ b/t/t1800-hook.sh @@ -969,4 +969,63 @@ test_expect_success 'hook.jobs=2 is ignored for force-serial hooks (pre-commit)' test_cmp expect hook.order ' +test_expect_success 'hook..jobs overrides hook.jobs for that event' ' + test_when_finished "rm -f sentinel.started sentinel.done hook.order" && + test_config hook.hook-1.event test-hook && + test_config hook.hook-1.command \ + "touch sentinel.started; sleep 2; touch sentinel.done" && + test_config hook.hook-1.parallel true && + test_config hook.hook-2.event test-hook && + test_config hook.hook-2.command \ + "$(sentinel_detector sentinel hook.order)" && + test_config hook.hook-2.parallel true && + + # Global hook.jobs=1 (serial), but per-event override allows parallel. + test_config hook.jobs 1 && + test_config hook.test-hook.jobs 2 && + + git hook run --allow-unknown-hook-name test-hook >out 2>err && + echo parallel >expect && + test_cmp expect hook.order +' + +test_expect_success 'hook..jobs=1 forces serial even when hook.jobs>1' ' + test_when_finished "rm -f sentinel.started sentinel.done hook.order" && + test_config hook.hook-1.event test-hook && + test_config hook.hook-1.command \ + "touch sentinel.started; sleep 2; touch sentinel.done" && + test_config hook.hook-1.parallel true && + test_config hook.hook-2.event test-hook && + test_config hook.hook-2.command \ + "$(sentinel_detector sentinel hook.order)" && + test_config hook.hook-2.parallel true && + + # Global hook.jobs=4 allows parallel, but per-event override forces serial. + test_config hook.jobs 4 && + test_config hook.test-hook.jobs 1 && + + git hook run --allow-unknown-hook-name test-hook >out 2>err && + echo serial >expect && + test_cmp expect hook.order +' + +test_expect_success 'hook..jobs still requires hook..parallel=true' ' + test_when_finished "rm -f sentinel.started sentinel.done hook.order" && + test_config hook.hook-1.event test-hook && + test_config hook.hook-1.command \ + "touch sentinel.started; sleep 2; touch sentinel.done" && + # hook-1 intentionally has no parallel=true + test_config hook.hook-2.event test-hook && + test_config hook.hook-2.command \ + "$(sentinel_detector sentinel hook.order)" && + # hook-2 also has no parallel=true + + # Per-event jobs=2 but no hook has parallel=true: must still run serially. + test_config hook.test-hook.jobs 2 && + + git hook run --allow-unknown-hook-name test-hook >out 2>err && + echo serial >expect && + test_cmp expect hook.order +' + test_done From 5e57b209ff21bf1087dd8539c458737c89b03150 Mon Sep 17 00:00:00 2001 From: Adrian Ratiu Date: Fri, 10 Apr 2026 12:06:04 +0300 Subject: [PATCH 136/241] hook: warn when hook..jobs is set Issue a warning when the user confuses the hook process and event namespaces by setting hook..jobs. Detect this by checking whether the name carrying .jobs also has .command, .event, or .parallel configured. Extract is_friendly_name() as a helper for this check, to be reused by future per-event config handling. Suggested-by: Junio C Hamano Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- hook.c | 40 ++++++++++++++++++++++++++++++++++++++++ t/t1800-hook.sh | 30 ++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/hook.c b/hook.c index d98b01156366a4..0493993bbe6738 100644 --- a/hook.c +++ b/hook.c @@ -279,6 +279,44 @@ void hook_cache_clear(struct strmap *cache) strmap_clear(cache, 0); } +/* + * Return true if `name` is a hook friendly-name, i.e. it has at least one of + * .command, .event, or .parallel configured. These are the reliable clues + * that distinguish a friendly-name from an event name. Note: .enabled is + * deliberately excluded because it can appear under both namespaces. + */ +static int is_friendly_name(struct hook_all_config_cb *cb, const char *name) +{ + struct hashmap_iter iter; + struct strmap_entry *e; + + if (strmap_get(&cb->commands, name) || strmap_get(&cb->parallel_hooks, name)) + return 1; + + strmap_for_each_entry(&cb->event_hooks, &iter, e) { + if (unsorted_string_list_lookup(e->value, name)) + return 1; + } + + return 0; +} + +/* Warn if any name in event_jobs is also a hook friendly-name. */ +static void warn_jobs_on_friendly_names(struct hook_all_config_cb *cb_data) +{ + struct hashmap_iter iter; + struct strmap_entry *e; + + strmap_for_each_entry(&cb_data->event_jobs, &iter, e) { + if (is_friendly_name(cb_data, e->key)) + warning(_("hook.%s.jobs is set but '%s' looks like a " + "hook friendly-name, not an event name; " + "hook..jobs uses the event name " + "(e.g. hook.post-receive.jobs), so this " + "setting will be ignored"), e->key, e->key); + } +} + /* Populate `cache` with the complete hook configuration */ static void build_hook_config_map(struct repository *r, struct strmap *cache) { @@ -295,6 +333,8 @@ static void build_hook_config_map(struct repository *r, struct strmap *cache) /* Parse all configs in one run, capturing hook.* including hook.jobs. */ repo_config(r, hook_config_lookup_all, &cb_data); + warn_jobs_on_friendly_names(&cb_data); + /* Construct the cache from parsed configs. */ strmap_for_each_entry(&cb_data.event_hooks, &iter, e) { struct string_list *hook_names = e->value; diff --git a/t/t1800-hook.sh b/t/t1800-hook.sh index 24a3c92b6deb80..89fedc48ff497f 100755 --- a/t/t1800-hook.sh +++ b/t/t1800-hook.sh @@ -1028,4 +1028,34 @@ test_expect_success 'hook..jobs still requires hook..parallel=true' test_cmp expect hook.order ' +test_expect_success 'hook..jobs warns when name has .command' ' + test_config hook.my-hook.command "true" && + test_config hook.my-hook.jobs 2 && + git hook run --allow-unknown-hook-name --ignore-missing test-hook >out 2>err && + test_grep "hook.my-hook.jobs.*friendly-name" err +' + +test_expect_success 'hook..jobs warns when name has .event' ' + test_config hook.my-hook.event test-hook && + test_config hook.my-hook.command "true" && + test_config hook.my-hook.jobs 2 && + git hook run --allow-unknown-hook-name --ignore-missing test-hook >out 2>err && + test_grep "hook.my-hook.jobs.*friendly-name" err +' + +test_expect_success 'hook..jobs warns when name has .parallel' ' + test_config hook.my-hook.event test-hook && + test_config hook.my-hook.command "true" && + test_config hook.my-hook.parallel true && + test_config hook.my-hook.jobs 2 && + git hook run --allow-unknown-hook-name --ignore-missing test-hook >out 2>err && + test_grep "hook.my-hook.jobs.*friendly-name" err +' + +test_expect_success 'hook..jobs does not warn for a real event name' ' + test_config hook.test-hook.jobs 2 && + git hook run --allow-unknown-hook-name --ignore-missing test-hook >out 2>err && + test_grep ! "friendly-name" err +' + test_done From 2eb541e8f2a9b0dd923279421c741d0a0c00420d Mon Sep 17 00:00:00 2001 From: Adrian Ratiu Date: Fri, 10 Apr 2026 12:06:05 +0300 Subject: [PATCH 137/241] hook: move is_known_hook() to hook.c for wider use Move is_known_hook() from builtin/hook.c (static) into hook.c and export it via hook.h so it can be reused. Make it return bool and the iterator `h` for clarity (iterate hooks). Both meson.build and the Makefile are updated to reflect that the header is now used by libgit, not the builtin sources. The next commit will use this to reject hook friendly-names that collide with known event names. Co-authored-by: Patrick Steinhardt Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- Makefile | 2 +- builtin/hook.c | 10 ---------- hook.c | 10 ++++++++++ hook.h | 6 ++++++ meson.build | 24 ++++++++++++------------ 5 files changed, 29 insertions(+), 23 deletions(-) diff --git a/Makefile b/Makefile index 5d22394c2ec1a6..c4e83823e4a547 100644 --- a/Makefile +++ b/Makefile @@ -2675,7 +2675,7 @@ git$X: git.o GIT-LDFLAGS $(BUILTIN_OBJS) $(GITLIBS) help.sp help.s help.o: command-list.h builtin/bugreport.sp builtin/bugreport.s builtin/bugreport.o: hook-list.h -builtin/hook.sp builtin/hook.s builtin/hook.o: hook-list.h +hook.sp hook.s hook.o: hook-list.h builtin/help.sp builtin/help.s builtin/help.o: config-list.h GIT-PREFIX builtin/help.sp builtin/help.s builtin/help.o: EXTRA_CPPFLAGS = \ diff --git a/builtin/hook.c b/builtin/hook.c index bea0668b475931..1839412dca3edc 100644 --- a/builtin/hook.c +++ b/builtin/hook.c @@ -4,7 +4,6 @@ #include "environment.h" #include "gettext.h" #include "hook.h" -#include "hook-list.h" #include "parse-options.h" #define BUILTIN_HOOK_RUN_USAGE \ @@ -13,15 +12,6 @@ #define BUILTIN_HOOK_LIST_USAGE \ N_("git hook list [--allow-unknown-hook-name] [-z] [--show-scope] ") -static int is_known_hook(const char *name) -{ - const char **p; - for (p = hook_name_list; *p; p++) - if (!strcmp(*p, name)) - return 1; - return 0; -} - static const char * const builtin_hook_usage[] = { BUILTIN_HOOK_RUN_USAGE, BUILTIN_HOOK_LIST_USAGE, diff --git a/hook.c b/hook.c index 0493993bbe6738..19076f8f2baba6 100644 --- a/hook.c +++ b/hook.c @@ -5,6 +5,7 @@ #include "environment.h" #include "gettext.h" #include "hook.h" +#include "hook-list.h" #include "parse.h" #include "path.h" #include "run-command.h" @@ -12,6 +13,15 @@ #include "strbuf.h" #include "strmap.h" +bool is_known_hook(const char *name) +{ + const char **h; + for (h = hook_name_list; *h; h++) + if (!strcmp(*h, name)) + return true; + return false; +} + const char *find_hook(struct repository *r, const char *name) { static struct strbuf path = STRBUF_INIT; diff --git a/hook.h b/hook.h index 01db4226a60306..5a93f56618e123 100644 --- a/hook.h +++ b/hook.h @@ -234,6 +234,12 @@ void hook_free(void *p, const char *str); */ void hook_cache_clear(struct strmap *cache); +/** + * Returns true if `name` is a recognized hook event name + * (e.g. "pre-commit", "post-receive"). + */ +bool is_known_hook(const char *name); + /** * Returns the path to the hook file, or NULL if the hook is missing * or disabled. Note that this points to static storage that will be diff --git a/meson.build b/meson.build index 8309942d184847..f438d5545dafb7 100644 --- a/meson.build +++ b/meson.build @@ -563,6 +563,18 @@ libgit_sources += custom_target( env: script_environment, ) +libgit_sources += custom_target( + input: 'Documentation/githooks.adoc', + output: 'hook-list.h', + command: [ + shell, + meson.current_source_dir() + '/tools/generate-hooklist.sh', + meson.current_source_dir(), + '@OUTPUT@', + ], + env: script_environment, +) + builtin_sources = [ 'builtin/add.c', 'builtin/am.c', @@ -739,18 +751,6 @@ builtin_sources += custom_target( env: script_environment, ) -builtin_sources += custom_target( - input: 'Documentation/githooks.adoc', - output: 'hook-list.h', - command: [ - shell, - meson.current_source_dir() + '/tools/generate-hooklist.sh', - meson.current_source_dir(), - '@OUTPUT@', - ], - env: script_environment, -) - # This contains the variables for GIT-BUILD-OPTIONS, which we use to propagate # build options to our tests. build_options_config = configuration_data() From dcfb5af67e7d7156c4d1ede66de18088c990356c Mon Sep 17 00:00:00 2001 From: Adrian Ratiu Date: Fri, 10 Apr 2026 12:06:06 +0300 Subject: [PATCH 138/241] hook: add hook..enabled switch Add a hook..enabled config key that disables all hooks for a given event, when set to false, acting as a high-level switch above the existing per-hook hook..enabled. Event-disabled hooks are shown in "git hook list" with an "event-disabled" tab-separated prefix before the name: $ git hook list test-hook event-disabled hook-1 event-disabled hook-2 With --show-scope: $ git hook list --show-scope test-hook local event-disabled hook-1 When a hook is both per-hook disabled and event-disabled, only "event-disabled" is shown: the event-level switch is the more relevant piece of information, and the per-hook "disabled" status will surface once the event is re-enabled. Using an event name as a friendly-name (e.g. hook..enabled) can cause ambiguity, so a fatal error is issued when using a known event name and a warning is issued for unknown event name, since a collision cannot be detected with certainty for unknown events. Suggested-by: Patrick Steinhardt Suggested-by: Junio C Hamano Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- Documentation/config/hook.adoc | 20 ++++++++ builtin/hook.c | 20 +++++--- hook.c | 47 +++++++++++++++++-- hook.h | 1 + repository.c | 1 + repository.h | 4 ++ t/t1800-hook.sh | 83 ++++++++++++++++++++++++++++++++++ 7 files changed, 165 insertions(+), 11 deletions(-) diff --git a/Documentation/config/hook.adoc b/Documentation/config/hook.adoc index d4fa29d936d6e2..e0db3afa194080 100644 --- a/Documentation/config/hook.adoc +++ b/Documentation/config/hook.adoc @@ -15,6 +15,12 @@ hook..event:: events, specify the key more than once. An empty value resets the list of events, clearing any previously defined events for `hook.`. See linkgit:git-hook[1]. ++ +The `` must not be the same as a known hook event name +(e.g. do not use `hook.pre-commit.event`). Using a known event name as +a friendly-name is a fatal error because it creates an ambiguity with +`hook..enabled` and `hook..jobs`. For unknown event names, +a warning is issued when `` matches the event value. hook..enabled:: Whether the hook `hook.` is enabled. Defaults to `true`. @@ -33,6 +39,20 @@ hook..parallel:: found in the hooks directory do not need to, and run in parallel when the effective job count is greater than 1. See linkgit:git-hook[1]. +hook..enabled:: + Switch to enable or disable all hooks for the `` hook event. + When set to `false`, no hooks fire for that event, regardless of any + per-hook `hook..enabled` settings. Defaults to `true`. + See linkgit:git-hook[1]. ++ +Note on naming: `` must be the event name (e.g. `pre-commit`), +not a hook friendly-name. Since using a known event name as a +friendly-name is disallowed (see `hook..event` above), +there is no ambiguity between event-level and per-hook `.enabled` +settings for known events. For unknown events, if a friendly-name +matches the event name despite the warning, `.enabled` is treated +as per-hook only. + hook..jobs:: Specifies how many hooks can be run simultaneously for the `` hook event (e.g. `hook.post-receive.jobs = 4`). Overrides `hook.jobs` diff --git a/builtin/hook.c b/builtin/hook.c index 1839412dca3edc..8e47e22e2a1e5f 100644 --- a/builtin/hook.c +++ b/builtin/hook.c @@ -87,14 +87,22 @@ static int list(int argc, const char **argv, const char *prefix, const char *name = h->u.configured.friendly_name; const char *scope = show_scope ? config_scope_name(h->u.configured.scope) : NULL; + /* + * Show the most relevant disable reason. Event-level + * takes precedence: if the whole event is off, that + * is what the user needs to know. The per-hook + * "disabled" surfaces once the event is re-enabled. + */ + const char *disability = + h->u.configured.event_disabled ? "event-disabled\t" : + h->u.configured.disabled ? "disabled\t" : + ""; if (scope) - printf("%s\t%s%s%c", scope, - h->u.configured.disabled ? "disabled\t" : "", - name, line_terminator); + printf("%s\t%s%s%c", scope, disability, name, + line_terminator); else - printf("%s%s%c", - h->u.configured.disabled ? "disabled\t" : "", - name, line_terminator); + printf("%s%s%c", disability, name, + line_terminator); break; } default: diff --git a/hook.c b/hook.c index 19076f8f2baba6..bc990d4ed4d754 100644 --- a/hook.c +++ b/hook.c @@ -133,7 +133,9 @@ struct hook_config_cache_entry { * Callback struct to collect all hook.* keys in a single config pass. * commands: friendly-name to command map. * event_hooks: event-name to list of friendly-names map. - * disabled_hooks: set of friendly-names with hook..enabled = false. + * disabled_hooks: set of all names with hook..enabled = false; after + * parsing, names that are not friendly-names become event-level + * disables stored in r->disabled_events. This collects all. * parallel_hooks: friendly-name to parallel flag. * event_jobs: event-name to per-event jobs count (stored as uintptr_t, NULL == unset). * jobs: value of the global hook.jobs key. Defaults to 0 if unset (stored in r->hook_jobs). @@ -189,8 +191,21 @@ static int hook_config_lookup_all(const char *key, const char *value, strmap_for_each_entry(&data->event_hooks, &iter, e) unsorted_string_list_remove(e->value, hook_name, 0); } else { - struct string_list *hooks = - strmap_get(&data->event_hooks, value); + struct string_list *hooks; + + if (is_known_hook(hook_name)) + die(_("hook friendly-name '%s' collides with " + "a known event name; please choose a " + "different friendly-name"), + hook_name); + + if (!strcmp(hook_name, value)) + warning(_("hook friendly-name '%s' is the " + "same as its event; this may cause " + "ambiguity with hook.%s.enabled"), + hook_name, hook_name); + + hooks = strmap_get(&data->event_hooks, value); if (!hooks) { CALLOC_ARRAY(hooks, 1); @@ -345,6 +360,22 @@ static void build_hook_config_map(struct repository *r, struct strmap *cache) warn_jobs_on_friendly_names(&cb_data); + /* + * Populate disabled_events: names in disabled_hooks that are not + * friendly-names are event-level switches (hook..enabled = false). + * Names that are friendly-names are already handled per-hook via the + * hook_config_cache_entry.disabled flag below. + */ + if (r) { + string_list_clear(&r->disabled_events, 0); + string_list_init_dup(&r->disabled_events); + for (size_t i = 0; i < cb_data.disabled_hooks.nr; i++) { + const char *n = cb_data.disabled_hooks.items[i].string; + if (!is_friendly_name(&cb_data, n)) + string_list_append(&r->disabled_events, n); + } + } + /* Construct the cache from parsed configs. */ strmap_for_each_entry(&cb_data.event_hooks, &iter, e) { struct string_list *hook_names = e->value; @@ -446,6 +477,8 @@ static void list_hooks_add_configured(struct repository *r, { struct strmap *cache = get_hook_config_cache(r); struct string_list *configured_hooks = strmap_get(cache, hookname); + bool event_is_disabled = r ? !!unsorted_string_list_lookup(&r->disabled_events, + hookname) : 0; /* Iterate through configured hooks and initialize internal states */ for (size_t i = 0; configured_hooks && i < configured_hooks->nr; i++) { @@ -472,6 +505,7 @@ static void list_hooks_add_configured(struct repository *r, entry->command ? xstrdup(entry->command) : NULL; hook->u.configured.scope = entry->scope; hook->u.configured.disabled = entry->disabled; + hook->u.configured.event_disabled = event_is_disabled; hook->parallel = entry->parallel; string_list_append(list, friendly_name)->util = hook; @@ -484,6 +518,8 @@ static void list_hooks_add_configured(struct repository *r, if (!r || !r->gitdir) { hook_cache_clear(cache); free(cache); + if (r) + string_list_clear(&r->disabled_events, 0); } } @@ -515,7 +551,7 @@ int hook_exists(struct repository *r, const char *name) for (size_t i = 0; i < hooks->nr; i++) { struct hook *h = hooks->items[i].util; if (h->kind == HOOK_TRADITIONAL || - !h->u.configured.disabled) { + (!h->u.configured.disabled && !h->u.configured.event_disabled)) { exists = 1; break; } @@ -538,7 +574,8 @@ static int pick_next_hook(struct child_process *cp, if (hook_cb->hook_to_run_index >= hook_list->nr) return 0; h = hook_list->items[hook_cb->hook_to_run_index++].util; - } while (h->kind == HOOK_CONFIGURED && h->u.configured.disabled); + } while (h->kind == HOOK_CONFIGURED && + (h->u.configured.disabled || h->u.configured.event_disabled)); cp->no_stdin = 1; strvec_pushv(&cp->env, hook_cb->options->env.v); diff --git a/hook.h b/hook.h index 5a93f56618e123..b4372b636ff4de 100644 --- a/hook.h +++ b/hook.h @@ -32,6 +32,7 @@ struct hook { const char *command; enum config_scope scope; bool disabled; + bool event_disabled; } configured; } u; diff --git a/repository.c b/repository.c index 4030db4460714d..db57b8308b94e7 100644 --- a/repository.c +++ b/repository.c @@ -427,6 +427,7 @@ void repo_clear(struct repository *repo) FREE_AND_NULL(repo->hook_config_cache); } strmap_clear(&repo->event_jobs, 0); /* values are uintptr_t, not heap ptrs */ + string_list_clear(&repo->disabled_events, 0); if (repo->promisor_remote_config) { promisor_remote_clear(repo->promisor_remote_config); diff --git a/repository.h b/repository.h index 6b67ec02e2984c..4969d8b8ebed60 100644 --- a/repository.h +++ b/repository.h @@ -2,6 +2,7 @@ #define REPOSITORY_H #include "strmap.h" +#include "string-list.h" #include "repo-settings.h" #include "environment.h" @@ -178,6 +179,9 @@ struct repository { /* Cached map of event-name -> jobs count (as uintptr_t) from hook..jobs. */ struct strmap event_jobs; + /* Cached list of event names with hook..enabled = false. */ + struct string_list disabled_events; + /* Configurations related to promisor remotes. */ char *repository_format_partial_clone; struct promisor_remote_config *promisor_remote_config; diff --git a/t/t1800-hook.sh b/t/t1800-hook.sh index 89fedc48ff497f..c4ff25f6b088ea 100755 --- a/t/t1800-hook.sh +++ b/t/t1800-hook.sh @@ -1058,4 +1058,87 @@ test_expect_success 'hook..jobs does not warn for a real event name' ' test_grep ! "friendly-name" err ' +test_expect_success 'hook..enabled=false skips all hooks for event' ' + test_config hook.hook-1.event test-hook && + test_config hook.hook-1.command "echo ran" && + test_config hook.test-hook.enabled false && + git hook run --allow-unknown-hook-name test-hook >out 2>err && + test_must_be_empty out +' + +test_expect_success 'hook..enabled=true does not suppress hooks' ' + test_config hook.hook-1.event test-hook && + test_config hook.hook-1.command "echo ran" && + test_config hook.test-hook.enabled true && + git hook run --allow-unknown-hook-name test-hook >out 2>err && + test_grep "ran" err +' + +test_expect_success 'hook..enabled=false does not affect other events' ' + test_config hook.hook-1.event test-hook && + test_config hook.hook-1.command "echo ran" && + test_config hook.other-event.enabled false && + git hook run --allow-unknown-hook-name test-hook >out 2>err && + test_grep "ran" err +' + +test_expect_success 'hook..enabled=false still disables that hook' ' + test_config hook.hook-1.event test-hook && + test_config hook.hook-1.command "echo hook-1" && + test_config hook.hook-2.event test-hook && + test_config hook.hook-2.command "echo hook-2" && + test_config hook.hook-1.enabled false && + git hook run --allow-unknown-hook-name test-hook >out 2>err && + test_grep ! "hook-1" err && + test_grep "hook-2" err +' + +test_expect_success 'git hook list shows event-disabled hooks as event-disabled' ' + test_config hook.hook-1.event test-hook && + test_config hook.hook-1.command "echo ran" && + test_config hook.hook-2.event test-hook && + test_config hook.hook-2.command "echo ran" && + test_config hook.test-hook.enabled false && + git hook list --allow-unknown-hook-name test-hook >actual && + test_grep "^event-disabled hook-1$" actual && + test_grep "^event-disabled hook-2$" actual +' + +test_expect_success 'git hook list shows scope with event-disabled' ' + test_config hook.hook-1.event test-hook && + test_config hook.hook-1.command "echo ran" && + test_config hook.test-hook.enabled false && + git hook list --allow-unknown-hook-name --show-scope test-hook >actual && + test_grep "^local event-disabled hook-1$" actual +' + +test_expect_success 'git hook list still shows hooks when event is disabled' ' + test_config hook.hook-1.event test-hook && + test_config hook.hook-1.command "echo ran" && + test_config hook.test-hook.enabled false && + git hook list --allow-unknown-hook-name test-hook >actual && + test_grep "event-disabled" actual +' + +test_expect_success 'friendly-name matching known event name is rejected' ' + test_config hook.pre-commit.event pre-commit && + test_config hook.pre-commit.command "echo oops" && + test_must_fail git hook run pre-commit 2>err && + test_grep "collides with a known event name" err +' + +test_expect_success 'friendly-name matching known event name is rejected even for different event' ' + test_config hook.pre-commit.event post-commit && + test_config hook.pre-commit.command "echo oops" && + test_must_fail git hook run post-commit 2>err && + test_grep "collides with a known event name" err +' + +test_expect_success 'friendly-name matching unknown event warns' ' + test_config hook.test-hook.event test-hook && + test_config hook.test-hook.command "echo ran" && + git hook run --allow-unknown-hook-name test-hook >out 2>err && + test_grep "same as its event" err +' + test_done From 495b7d54dc006556548e2fd3ca15c4f533917329 Mon Sep 17 00:00:00 2001 From: Adrian Ratiu Date: Fri, 10 Apr 2026 12:06:07 +0300 Subject: [PATCH 139/241] hook: allow hook.jobs=-1 to use all available CPU cores Allow -1 as a value for hook.jobs, hook..jobs, and the -j CLI flag to mean "use as many jobs as there are CPU cores", matching the convention used by fetch.parallel and other Git subsystems. The value is resolved to online_cpus() at parse time so the rest of the code always works with a positive resolved count. Other non-positive values (0, -2, etc) are rejected with a warning (config) or die (CLI). Suggested-by: Patrick Steinhardt Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- Documentation/config/hook.adoc | 4 ++- builtin/hook.c | 15 +++++++-- hook.c | 60 ++++++++++++++++++++++++---------- t/t1800-hook.sh | 49 +++++++++++++++++++++++++++ 4 files changed, 108 insertions(+), 20 deletions(-) diff --git a/Documentation/config/hook.adoc b/Documentation/config/hook.adoc index e0db3afa194080..a9dc0063c12102 100644 --- a/Documentation/config/hook.adoc +++ b/Documentation/config/hook.adoc @@ -58,7 +58,8 @@ hook..jobs:: hook event (e.g. `hook.post-receive.jobs = 4`). Overrides `hook.jobs` for this specific event. The same parallelism restrictions apply: this setting has no effect unless all configured hooks for the event have - `hook..parallel` set to `true`. Must be a positive int, + `hook..parallel` set to `true`. Set to `-1` to use the + number of available CPU cores. Must be a positive integer or `-1`; zero is rejected with a warning. See linkgit:git-hook[1]. + Note on naming: although this key resembles `hook..*` @@ -74,6 +75,7 @@ valid event name when setting `hook..jobs`. hook.jobs:: Specifies how many hooks can be run simultaneously during parallelized hook execution. If unspecified, defaults to 1 (serial execution). + Set to `-1` to use the number of available CPU cores. Can be overridden on a per-event basis with `hook..jobs`. Some hooks always run sequentially regardless of this setting because they operate on shared data and cannot safely be parallelized: diff --git a/builtin/hook.c b/builtin/hook.c index 8e47e22e2a1e5f..cceeb3586e5daf 100644 --- a/builtin/hook.c +++ b/builtin/hook.c @@ -5,6 +5,7 @@ #include "gettext.h" #include "hook.h" #include "parse-options.h" +#include "thread-utils.h" #define BUILTIN_HOOK_RUN_USAGE \ N_("git hook run [--allow-unknown-hook-name] [--ignore-missing] [--to-stdin=] [(-j|--jobs) ]\n" \ @@ -123,6 +124,7 @@ static int run(int argc, const char **argv, const char *prefix, struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; int ignore_missing = 0; int allow_unknown = 0; + int jobs = 0; const char *hook_name; struct option run_options[] = { OPT_BOOL(0, "allow-unknown-hook-name", &allow_unknown, @@ -131,8 +133,8 @@ static int run(int argc, const char **argv, const char *prefix, N_("silently ignore missing requested ")), OPT_STRING(0, "to-stdin", &opt.path_to_stdin, N_("path"), N_("file to read into hooks' stdin")), - OPT_UNSIGNED('j', "jobs", &opt.jobs, - N_("run up to hooks simultaneously")), + OPT_INTEGER('j', "jobs", &jobs, + N_("run up to hooks simultaneously (-1 for CPU count)")), OPT_END(), }; int ret; @@ -141,6 +143,15 @@ static int run(int argc, const char **argv, const char *prefix, builtin_hook_run_usage, PARSE_OPT_KEEP_DASHDASH); + if (jobs == -1) + opt.jobs = online_cpus(); + else if (jobs < 0) + die(_("invalid value for -j: %d" + " (use -1 for CPU count or a" + " positive integer)"), jobs); + else + opt.jobs = jobs; + if (!argc) goto usage; diff --git a/hook.c b/hook.c index bc990d4ed4d754..d10eef4763c679 100644 --- a/hook.c +++ b/hook.c @@ -12,6 +12,7 @@ #include "setup.h" #include "strbuf.h" #include "strmap.h" +#include "thread-utils.h" bool is_known_hook(const char *name) { @@ -165,13 +166,17 @@ static int hook_config_lookup_all(const char *key, const char *value, /* Handle plain hook. entries that have no hook name component. */ if (!name) { if (!strcmp(subkey, "jobs") && value) { - unsigned int v; - if (!git_parse_uint(value, &v)) - warning(_("hook.jobs must be a positive integer, ignoring: '%s'"), value); - else if (!v) - warning(_("hook.jobs must be positive, ignoring: 0")); - else + int v; + if (!git_parse_int(value, &v)) + warning(_("hook.jobs must be an integer, ignoring: '%s'"), value); + else if (v == -1) + data->jobs = online_cpus(); + else if (v > 0) data->jobs = v; + else + warning(_("hook.jobs must be a positive integer" + " or -1, ignoring: '%s'"), + value); } return 0; } @@ -259,17 +264,21 @@ static int hook_config_lookup_all(const char *key, const char *value, " ignoring: '%s'"), hook_name, value); } else if (!strcmp(subkey, "jobs")) { - unsigned int v; - if (!git_parse_uint(value, &v)) - warning(_("hook.%s.jobs must be a positive integer," + int v; + if (!git_parse_int(value, &v)) + warning(_("hook.%s.jobs must be an integer," " ignoring: '%s'"), hook_name, value); - else if (!v) - warning(_("hook.%s.jobs must be positive," - " ignoring: 0"), hook_name); - else + else if (v == -1) + strmap_put(&data->event_jobs, hook_name, + (void *)(uintptr_t)online_cpus()); + else if (v > 0) strmap_put(&data->event_jobs, hook_name, (void *)(uintptr_t)v); + else + warning(_("hook.%s.jobs must be a positive" + " integer or -1, ignoring: '%s'"), + hook_name, value); } free(hook_name); @@ -688,6 +697,25 @@ static void warn_non_parallel_hooks_override(unsigned int jobs, } } +/* Resolve a hook.jobs config key, handling -1 as online_cpus(). */ +static void resolve_hook_config_jobs(struct repository *r, + const char *key, + unsigned int *jobs) +{ + int v; + + if (repo_config_get_int(r, key, &v)) + return; + + if (v == -1) + *jobs = online_cpus(); + else if (v > 0) + *jobs = v; + else + warning(_("%s must be a positive integer or -1," + " ignoring: %d"), key, v); +} + /* Determine how many jobs to use for hook execution. */ static unsigned int get_hook_jobs(struct repository *r, struct run_hooks_opt *options, @@ -721,14 +749,12 @@ static unsigned int get_hook_jobs(struct repository *r, if (event_jobs) options->jobs = (unsigned int)(uintptr_t)event_jobs; } else { - unsigned int event_jobs; char *key; - repo_config_get_uint(r, "hook.jobs", &options->jobs); + resolve_hook_config_jobs(r, "hook.jobs", &options->jobs); key = xstrfmt("hook.%s.jobs", hook_name); - if (!repo_config_get_uint(r, key, &event_jobs) && event_jobs) - options->jobs = event_jobs; + resolve_hook_config_jobs(r, key, &options->jobs); free(key); } } diff --git a/t/t1800-hook.sh b/t/t1800-hook.sh index c4ff25f6b088ea..41b2b2c7460066 100755 --- a/t/t1800-hook.sh +++ b/t/t1800-hook.sh @@ -1058,6 +1058,55 @@ test_expect_success 'hook..jobs does not warn for a real event name' ' test_grep ! "friendly-name" err ' +test_expect_success 'hook.jobs=-1 resolves to online_cpus()' ' + test_config hook.hook-1.event test-hook && + test_config hook.hook-1.command "true" && + test_config hook.hook-1.parallel true && + + test_config hook.jobs -1 && + + cpus=$(test-tool online-cpus) && + GIT_TRACE2_EVENT="$(pwd)/trace.txt" \ + git hook run --allow-unknown-hook-name test-hook >out 2>err && + grep "\"region_enter\".*\"hook\".*\"test-hook\".*\"max:$cpus\"" trace.txt +' + +test_expect_success 'hook..jobs=-1 resolves to online_cpus()' ' + test_config hook.hook-1.event test-hook && + test_config hook.hook-1.command "true" && + test_config hook.hook-1.parallel true && + + test_config hook.test-hook.jobs -1 && + + cpus=$(test-tool online-cpus) && + GIT_TRACE2_EVENT="$(pwd)/trace.txt" \ + git hook run --allow-unknown-hook-name test-hook >out 2>err && + grep "\"region_enter\".*\"hook\".*\"test-hook\".*\"max:$cpus\"" trace.txt +' + +test_expect_success 'git hook run -j-1 resolves to online_cpus()' ' + test_config hook.hook-1.event test-hook && + test_config hook.hook-1.command "true" && + test_config hook.hook-1.parallel true && + + cpus=$(test-tool online-cpus) && + GIT_TRACE2_EVENT="$(pwd)/trace.txt" \ + git hook run --allow-unknown-hook-name -j-1 test-hook >out 2>err && + grep "\"region_enter\".*\"hook\".*\"test-hook\".*\"max:$cpus\"" trace.txt +' + +test_expect_success 'hook.jobs rejects values less than -1' ' + test_config hook.jobs -2 && + git hook run --allow-unknown-hook-name --ignore-missing test-hook >out 2>err && + test_grep "hook.jobs must be a positive integer or -1" err +' + +test_expect_success 'hook..jobs rejects values less than -1' ' + test_config hook.test-hook.jobs -5 && + git hook run --allow-unknown-hook-name --ignore-missing test-hook >out 2>err && + test_grep "hook.test-hook.jobs must be a positive integer or -1" err +' + test_expect_success 'hook..enabled=false skips all hooks for event' ' test_config hook.hook-1.event test-hook && test_config hook.hook-1.command "echo ran" && From 75b7cb5e14f03965cf87a976356bcbdcfb4edbad Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 10 Apr 2026 12:06:08 +0300 Subject: [PATCH 140/241] t1800: test SIGPIPE with parallel hooks We recently fixed a bug in commit 2226ffaacd (run_processes_parallel(): fix order of sigpipe handling, 2026-04-08) where a hook that caused us to get SIGPIPE would accidentally trigger the run_processes_parallel() cleanup handler killing the child processes. For a single hook, this meant killing the already-exited hook. This case was triggered by our tests, but was only a problem on some platforms. But if you have multiple hooks running in parallel, this causes a problem everywhere, since one hook failing to read its input would take down all hooks. Now that we have parallel hook support, we can add a test for this case. It should pass already, due to the existing fix. Signed-off-by: Jeff King Signed-off-by: Adrian Ratiu Signed-off-by: Junio C Hamano --- t/t1800-hook.sh | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/t/t1800-hook.sh b/t/t1800-hook.sh index 41b2b2c7460066..0132e772e472e2 100755 --- a/t/t1800-hook.sh +++ b/t/t1800-hook.sh @@ -1190,4 +1190,42 @@ test_expect_success 'friendly-name matching unknown event warns' ' test_grep "same as its event" err ' +test_expect_success 'hooks in parallel that do not read input' ' + # Add this to our $PATH to avoid having to write the whole trash + # directory into our config options, which would require quoting. + mkdir bin && + PATH=$PWD/bin:$PATH && + + write_script bin/hook-fast <<-\EOF && + # This hook does not read its input, so the parent process + # may see SIGPIPE if it is not ignored. It should happen + # relatively quickly. + exit 0 + EOF + + write_script bin/hook-slow <<-\EOF && + # This hook is slow, so we expect it to still be running + # when the other hook has exited (and the parent has a pipe error + # writing to it). + # + # So we want to be slow enough that we expect this to happen, but not + # so slow that the test takes forever. 1 second is probably enough + # in practice (and if it is occasionally not on a loaded system, we + # will err on the side of having the test pass). + sleep 1 + exit 0 + EOF + + git init --bare parallel.git && + git -C parallel.git config hook.fast.command "hook-fast" && + git -C parallel.git config hook.fast.event pre-receive && + git -C parallel.git config hook.fast.parallel true && + git -C parallel.git config hook.slow.command "hook-slow" && + git -C parallel.git config hook.slow.event pre-receive && + git -C parallel.git config hook.slow.parallel true && + git -C parallel.git config hook.jobs 2 && + + git push ./parallel.git "+refs/heads/*:refs/heads/*" +' + test_done From 839d977b0c0fefc35e40affb38ab787b9129d421 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:31 +0200 Subject: [PATCH 141/241] odb: introduce "in-memory" source Next to our typical object database sources, each object database also has an implicit source of "cached" objects. These cached objects only exist in memory and some use cases: - They contain evergreen objects that we expect to always exist, like for example the empty tree. - They can be used to store temporary objects that we don't want to persist to disk, which is used by git-blame(1) to create a fake worktree commit. Overall, their use is somewhat restricted though. For example, we don't provide the ability to use it as a temporary object database source that allows the user to write objects, but discard them after Git exists. So while these cached objects behave almost like a source, they aren't used as one. This is about to change over the following commits, where we will turn cached objects into a new "in-memory" source. This will allow us to use it exactly the same as any other source by providing the same common interface as the "files" source. For now, the in-memory source only hosts the cached objects and doesn't provide any logic yet. This will change with subsequent commits, where we move respective functionality into the source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Makefile | 1 + meson.build | 1 + odb.c | 21 +++++++++++++-------- odb.h | 4 ++-- odb/source-inmemory.c | 12 ++++++++++++ odb/source-inmemory.h | 35 +++++++++++++++++++++++++++++++++++ odb/source.h | 3 +++ 7 files changed, 67 insertions(+), 10 deletions(-) create mode 100644 odb/source-inmemory.c create mode 100644 odb/source-inmemory.h diff --git a/Makefile b/Makefile index 22a8993482b7bd..3cda12c4556a6f 100644 --- a/Makefile +++ b/Makefile @@ -1218,6 +1218,7 @@ LIB_OBJS += object.o LIB_OBJS += odb.o LIB_OBJS += odb/source.o LIB_OBJS += odb/source-files.o +LIB_OBJS += odb/source-inmemory.o LIB_OBJS += odb/streaming.o LIB_OBJS += odb/transaction.o LIB_OBJS += oid-array.o diff --git a/meson.build b/meson.build index 6dc23b3af2f387..ffa73ce7ce811c 100644 --- a/meson.build +++ b/meson.build @@ -404,6 +404,7 @@ libgit_sources = [ 'odb.c', 'odb/source.c', 'odb/source-files.c', + 'odb/source-inmemory.c', 'odb/streaming.c', 'odb/transaction.c', 'oid-array.c', diff --git a/odb.c b/odb.c index 40a5e9c4e0ae53..60e1eead25602a 100644 --- a/odb.c +++ b/odb.c @@ -14,6 +14,7 @@ #include "object-file.h" #include "object-name.h" #include "odb.h" +#include "odb/source-inmemory.h" #include "packfile.h" #include "path.h" #include "promisor-remote.h" @@ -53,9 +54,9 @@ static const struct cached_object *find_cached_object(struct object_database *ob .type = OBJ_TREE, .buf = "", }; - const struct cached_object_entry *co = object_store->cached_objects; + const struct cached_object_entry *co = object_store->inmemory_objects->objects; - for (size_t i = 0; i < object_store->cached_object_nr; i++, co++) + for (size_t i = 0; i < object_store->inmemory_objects->objects_nr; i++, co++) if (oideq(&co->oid, oid)) return &co->value; @@ -792,9 +793,10 @@ int odb_pretend_object(struct object_database *odb, find_cached_object(odb, oid)) return 0; - ALLOC_GROW(odb->cached_objects, - odb->cached_object_nr + 1, odb->cached_object_alloc); - co = &odb->cached_objects[odb->cached_object_nr++]; + ALLOC_GROW(odb->inmemory_objects->objects, + odb->inmemory_objects->objects_nr + 1, + odb->inmemory_objects->objects_alloc); + co = &odb->inmemory_objects->objects[odb->inmemory_objects->objects_nr++]; co->value.size = len; co->value.type = type; co_buf = xmalloc(len); @@ -1083,6 +1085,7 @@ struct object_database *odb_new(struct repository *repo, o->sources = odb_source_new(o, primary_source, true); o->sources_tail = &o->sources->next; o->alternate_db = xstrdup_or_null(secondary_sources); + o->inmemory_objects = odb_source_inmemory_new(o); free(to_free); @@ -1123,9 +1126,11 @@ void odb_free(struct object_database *o) odb_close(o); odb_free_sources(o); - for (size_t i = 0; i < o->cached_object_nr; i++) - free((char *) o->cached_objects[i].value.buf); - free(o->cached_objects); + for (size_t i = 0; i < o->inmemory_objects->objects_nr; i++) + free((char *) o->inmemory_objects->objects[i].value.buf); + free(o->inmemory_objects->objects); + free(o->inmemory_objects->base.path); + free(o->inmemory_objects); string_list_clear(&o->submodule_source_paths, 0); diff --git a/odb.h b/odb.h index 9eb8355aca540b..c3a7edf9c848dd 100644 --- a/odb.h +++ b/odb.h @@ -8,6 +8,7 @@ #include "thread-utils.h" struct cached_object_entry; +struct odb_source_inmemory; struct packed_git; struct repository; struct strbuf; @@ -80,8 +81,7 @@ struct object_database { * to write them into the object store (e.g. a browse-only * application). */ - struct cached_object_entry *cached_objects; - size_t cached_object_nr, cached_object_alloc; + struct odb_source_inmemory *inmemory_objects; /* * A fast, rough count of the number of objects in the repository. diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c new file mode 100644 index 00000000000000..c7ac5c24f08e44 --- /dev/null +++ b/odb/source-inmemory.c @@ -0,0 +1,12 @@ +#include "git-compat-util.h" +#include "odb/source-inmemory.h" + +struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) +{ + struct odb_source_inmemory *source; + + CALLOC_ARRAY(source, 1); + odb_source_init(&source->base, odb, ODB_SOURCE_INMEMORY, "source", false); + + return source; +} diff --git a/odb/source-inmemory.h b/odb/source-inmemory.h new file mode 100644 index 00000000000000..15db068ef70bb2 --- /dev/null +++ b/odb/source-inmemory.h @@ -0,0 +1,35 @@ +#ifndef ODB_SOURCE_INMEMORY_H +#define ODB_SOURCE_INMEMORY_H + +#include "odb/source.h" + +struct cached_object_entry; + +/* + * An in-memory source that you can write objects to that shall be made + * available for reading, but that shouldn't ever be persisted to disk. Note + * that any objects written to this source will be stored in memory, so the + * number of objects you can store is limited by available system memory. + */ +struct odb_source_inmemory { + struct odb_source base; + + struct cached_object_entry *objects; + size_t objects_nr, objects_alloc; +}; + +/* Create a new in-memory object database source. */ +struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb); + +/* + * Cast the given object database source to the in-memory backend. This will + * cause a BUG in case the source doesn't use this backend. + */ +static inline struct odb_source_inmemory *odb_source_inmemory_downcast(struct odb_source *source) +{ + if (source->type != ODB_SOURCE_INMEMORY) + BUG("trying to downcast source of type '%d' to in-memory", source->type); + return container_of(source, struct odb_source_inmemory, base); +} + +#endif diff --git a/odb/source.h b/odb/source.h index f706e0608a4855..0a440884e4f0ab 100644 --- a/odb/source.h +++ b/odb/source.h @@ -13,6 +13,9 @@ enum odb_source_type { /* The "files" backend that uses loose objects and packfiles. */ ODB_SOURCE_FILES, + + /* The "in-memory" backend that stores objects in memory. */ + ODB_SOURCE_INMEMORY, }; struct object_id; From 594adb38ec543989045450b9bf15674631f68fe2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:32 +0200 Subject: [PATCH 142/241] odb/source-inmemory: implement `free()` callback Implement the `free()` callback function for the "in-memory" source. Note that this requires us to define `struct cached_object_entry` in "odb/source-inmemory.h", as it is accessed in both "odb.c" and "odb/source-inmemory.c" now. This will be fixed in subsequent commits though. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 25 ++++--------------------- odb/source-inmemory.c | 12 ++++++++++++ odb/source-inmemory.h | 9 ++++++++- 3 files changed, 24 insertions(+), 22 deletions(-) diff --git a/odb.c b/odb.c index 60e1eead25602a..1d65825ed3978a 100644 --- a/odb.c +++ b/odb.c @@ -32,21 +32,6 @@ KHASH_INIT(odb_path_map, const char * /* key: odb_path */, struct odb_source *, 1, fspathhash, fspatheq) -/* - * This is meant to hold a *small* number of objects that you would - * want odb_read_object() to be able to return, but yet you do not want - * to write them into the object store (e.g. a browse-only - * application). - */ -struct cached_object_entry { - struct object_id oid; - struct cached_object { - enum object_type type; - const void *buf; - unsigned long size; - } value; -}; - static const struct cached_object *find_cached_object(struct object_database *object_store, const struct object_id *oid) { @@ -1109,6 +1094,10 @@ static void odb_free_sources(struct object_database *o) odb_source_free(o->sources); o->sources = next; } + + odb_source_free(&o->inmemory_objects->base); + o->inmemory_objects = NULL; + kh_destroy_odb_path_map(o->source_by_path); o->source_by_path = NULL; } @@ -1126,12 +1115,6 @@ void odb_free(struct object_database *o) odb_close(o); odb_free_sources(o); - for (size_t i = 0; i < o->inmemory_objects->objects_nr; i++) - free((char *) o->inmemory_objects->objects[i].value.buf); - free(o->inmemory_objects->objects); - free(o->inmemory_objects->base.path); - free(o->inmemory_objects); - string_list_clear(&o->submodule_source_paths, 0); free(o); diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index c7ac5c24f08e44..ccbb622eaef031 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -1,6 +1,16 @@ #include "git-compat-util.h" #include "odb/source-inmemory.h" +static void odb_source_inmemory_free(struct odb_source *source) +{ + struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); + for (size_t i = 0; i < inmemory->objects_nr; i++) + free((char *) inmemory->objects[i].value.buf); + free(inmemory->objects); + free(inmemory->base.path); + free(inmemory); +} + struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) { struct odb_source_inmemory *source; @@ -8,5 +18,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) CALLOC_ARRAY(source, 1); odb_source_init(&source->base, odb, ODB_SOURCE_INMEMORY, "source", false); + source->base.free = odb_source_inmemory_free; + return source; } diff --git a/odb/source-inmemory.h b/odb/source-inmemory.h index 15db068ef70bb2..d1b05a3996468a 100644 --- a/odb/source-inmemory.h +++ b/odb/source-inmemory.h @@ -3,7 +3,14 @@ #include "odb/source.h" -struct cached_object_entry; +struct cached_object_entry { + struct object_id oid; + struct cached_object { + enum object_type type; + const void *buf; + unsigned long size; + } value; +}; /* * An in-memory source that you can write objects to that shall be made From f03e44c3383e0652e2e037f588244dcd09c68e5e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:33 +0200 Subject: [PATCH 143/241] odb: fix unnecessary call to `find_cached_object()` The function `odb_pretend_object()` writes an object into the in-memory object database source. The effect of this is that the object will now become readable, but it won't ever be persisted to disk. Before storing the object, we first verify whether the object already exists. This is done by calling `odb_has_object()` to check all sources, followed by `find_cached_object()` to check whether we have already stored the object in our in-memory source. This is unnecessary though, as `odb_has_object()` already checks the in-memory source transitively via: - `odb_has_object()` - `odb_read_object_info_extended()` - `do_oid_object_info_extended()` - `find_cached_object()` Drop the explicit call to `find_cached_object()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/odb.c b/odb.c index 1d65825ed3978a..ea3fcf5e118d72 100644 --- a/odb.c +++ b/odb.c @@ -774,8 +774,7 @@ int odb_pretend_object(struct object_database *odb, char *co_buf; hash_object_file(odb->repo->hash_algo, buf, len, type, oid); - if (odb_has_object(odb, oid, 0) || - find_cached_object(odb, oid)) + if (odb_has_object(odb, oid, 0)) return 0; ALLOC_GROW(odb->inmemory_objects->objects, From 72e71dbf282e56b86698c9dbd5fdb5efa7b04696 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:34 +0200 Subject: [PATCH 144/241] odb/source-inmemory: implement `read_object_info()` callback Implement the `read_object_info()` callback function for the in-memory source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 39 +------------------------------ odb/source-inmemory.c | 53 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 38 deletions(-) diff --git a/odb.c b/odb.c index ea3fcf5e118d72..6a3912adac2f0e 100644 --- a/odb.c +++ b/odb.c @@ -32,25 +32,6 @@ KHASH_INIT(odb_path_map, const char * /* key: odb_path */, struct odb_source *, 1, fspathhash, fspatheq) -static const struct cached_object *find_cached_object(struct object_database *object_store, - const struct object_id *oid) -{ - static const struct cached_object empty_tree = { - .type = OBJ_TREE, - .buf = "", - }; - const struct cached_object_entry *co = object_store->inmemory_objects->objects; - - for (size_t i = 0; i < object_store->inmemory_objects->objects_nr; i++, co++) - if (oideq(&co->oid, oid)) - return &co->value; - - if (oid->algo && oideq(oid, hash_algos[oid->algo].empty_tree)) - return &empty_tree; - - return NULL; -} - int odb_mkstemp(struct object_database *odb, struct strbuf *temp_filename, const char *pattern) { @@ -570,7 +551,6 @@ static int do_oid_object_info_extended(struct object_database *odb, const struct object_id *oid, struct object_info *oi, unsigned flags) { - const struct cached_object *co; const struct object_id *real = oid; int already_retried = 0; @@ -580,25 +560,8 @@ static int do_oid_object_info_extended(struct object_database *odb, if (is_null_oid(real)) return -1; - co = find_cached_object(odb, real); - if (co) { - if (oi) { - if (oi->typep) - *(oi->typep) = co->type; - if (oi->sizep) - *(oi->sizep) = co->size; - if (oi->disk_sizep) - *(oi->disk_sizep) = 0; - if (oi->delta_base_oid) - oidclr(oi->delta_base_oid, odb->repo->hash_algo); - if (oi->contentp) - *oi->contentp = xmemdupz(co->buf, co->size); - if (oi->mtimep) - *oi->mtimep = 0; - oi->whence = OI_CACHED; - } + if (!odb_source_read_object_info(&odb->inmemory_objects->base, oid, oi, flags)) return 0; - } odb_prepare_alternates(odb); diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index ccbb622eaef031..12c80f9b34a58a 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -1,5 +1,57 @@ #include "git-compat-util.h" +#include "odb.h" #include "odb/source-inmemory.h" +#include "repository.h" + +static const struct cached_object *find_cached_object(struct odb_source_inmemory *source, + const struct object_id *oid) +{ + static const struct cached_object empty_tree = { + .type = OBJ_TREE, + .buf = "", + }; + const struct cached_object_entry *co = source->objects; + + for (size_t i = 0; i < source->objects_nr; i++, co++) + if (oideq(&co->oid, oid)) + return &co->value; + + if (oid->algo && oideq(oid, hash_algos[oid->algo].empty_tree)) + return &empty_tree; + + return NULL; +} + +static int odb_source_inmemory_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, + enum object_info_flags flags UNUSED) +{ + struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); + const struct cached_object *object; + + object = find_cached_object(inmemory, oid); + if (!object) + return -1; + + if (oi) { + if (oi->typep) + *(oi->typep) = object->type; + if (oi->sizep) + *(oi->sizep) = object->size; + if (oi->disk_sizep) + *(oi->disk_sizep) = 0; + if (oi->delta_base_oid) + oidclr(oi->delta_base_oid, source->odb->repo->hash_algo); + if (oi->contentp) + *oi->contentp = xmemdupz(object->buf, object->size); + if (oi->mtimep) + *oi->mtimep = 0; + oi->whence = OI_CACHED; + } + + return 0; +} static void odb_source_inmemory_free(struct odb_source *source) { @@ -19,6 +71,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) odb_source_init(&source->base, odb, ODB_SOURCE_INMEMORY, "source", false); source->base.free = odb_source_inmemory_free; + source->base.read_object_info = odb_source_inmemory_read_object_info; return source; } From 70a84fb6a8377bc22aea68dcb4eee539c8033adb Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:35 +0200 Subject: [PATCH 145/241] odb/source-inmemory: implement `read_object_stream()` callback Implement the `read_object_stream()` callback function for the in-memory source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-inmemory.c | 52 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index 12c80f9b34a58a..39f0e799c74519 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -1,6 +1,7 @@ #include "git-compat-util.h" #include "odb.h" #include "odb/source-inmemory.h" +#include "odb/streaming.h" #include "repository.h" static const struct cached_object *find_cached_object(struct odb_source_inmemory *source, @@ -53,6 +54,56 @@ static int odb_source_inmemory_read_object_info(struct odb_source *source, return 0; } +struct odb_read_stream_inmemory { + struct odb_read_stream base; + const unsigned char *buf; + size_t offset; +}; + +static ssize_t odb_read_stream_inmemory_read(struct odb_read_stream *stream, + char *buf, size_t buf_len) +{ + struct odb_read_stream_inmemory *inmemory = + container_of(stream, struct odb_read_stream_inmemory, base); + size_t bytes = buf_len; + + if (buf_len > inmemory->base.size - inmemory->offset) + bytes = inmemory->base.size - inmemory->offset; + + memcpy(buf, inmemory->buf + inmemory->offset, bytes); + inmemory->offset += bytes; + + return bytes; +} + +static int odb_read_stream_inmemory_close(struct odb_read_stream *stream UNUSED) +{ + return 0; +} + +static int odb_source_inmemory_read_object_stream(struct odb_read_stream **out, + struct odb_source *source, + const struct object_id *oid) +{ + struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); + struct odb_read_stream_inmemory *stream; + const struct cached_object *object; + + object = find_cached_object(inmemory, oid); + if (!object) + return -1; + + CALLOC_ARRAY(stream, 1); + stream->base.read = odb_read_stream_inmemory_read; + stream->base.close = odb_read_stream_inmemory_close; + stream->base.size = object->size; + stream->base.type = object->type; + stream->buf = object->buf; + + *out = &stream->base; + return 0; +} + static void odb_source_inmemory_free(struct odb_source *source) { struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); @@ -72,6 +123,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) source->base.free = odb_source_inmemory_free; source->base.read_object_info = odb_source_inmemory_read_object_info; + source->base.read_object_stream = odb_source_inmemory_read_object_stream; return source; } From 3541e25ce8a838067c0358fef09c30e0ff93609a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:36 +0200 Subject: [PATCH 146/241] odb/source-inmemory: implement `write_object()` callback Implement the `write_object()` callback function for the in-memory source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 16 ++-------------- odb/source-inmemory.c | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/odb.c b/odb.c index 6a3912adac2f0e..24e929f03cbccf 100644 --- a/odb.c +++ b/odb.c @@ -733,24 +733,12 @@ int odb_pretend_object(struct object_database *odb, void *buf, unsigned long len, enum object_type type, struct object_id *oid) { - struct cached_object_entry *co; - char *co_buf; - hash_object_file(odb->repo->hash_algo, buf, len, type, oid); if (odb_has_object(odb, oid, 0)) return 0; - ALLOC_GROW(odb->inmemory_objects->objects, - odb->inmemory_objects->objects_nr + 1, - odb->inmemory_objects->objects_alloc); - co = &odb->inmemory_objects->objects[odb->inmemory_objects->objects_nr++]; - co->value.size = len; - co->value.type = type; - co_buf = xmalloc(len); - memcpy(co_buf, buf, len); - co->value.buf = co_buf; - oidcpy(&co->oid, oid); - return 0; + return odb_source_write_object(&odb->inmemory_objects->base, + buf, len, type, oid, NULL, 0); } void *odb_read_object(struct object_database *odb, diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index 39f0e799c74519..4848011df5189c 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -1,4 +1,5 @@ #include "git-compat-util.h" +#include "object-file.h" #include "odb.h" #include "odb/source-inmemory.h" #include "odb/streaming.h" @@ -104,6 +105,29 @@ static int odb_source_inmemory_read_object_stream(struct odb_read_stream **out, return 0; } +static int odb_source_inmemory_write_object(struct odb_source *source, + const void *buf, unsigned long len, + enum object_type type, + struct object_id *oid, + struct object_id *compat_oid UNUSED, + enum odb_write_object_flags flags UNUSED) +{ + struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); + struct cached_object_entry *object; + + hash_object_file(source->odb->repo->hash_algo, buf, len, type, oid); + + ALLOC_GROW(inmemory->objects, inmemory->objects_nr + 1, + inmemory->objects_alloc); + object = &inmemory->objects[inmemory->objects_nr++]; + object->value.size = len; + object->value.type = type; + object->value.buf = xmemdupz(buf, len); + oidcpy(&object->oid, oid); + + return 0; +} + static void odb_source_inmemory_free(struct odb_source *source) { struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); @@ -124,6 +148,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) source->base.free = odb_source_inmemory_free; source->base.read_object_info = odb_source_inmemory_read_object_info; source->base.read_object_stream = odb_source_inmemory_read_object_stream; + source->base.write_object = odb_source_inmemory_write_object; return source; } From ab12a57fcec732687ee3cbc796614cfc5376659c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:37 +0200 Subject: [PATCH 147/241] odb/source-inmemory: implement `write_object_stream()` callback Implement the `write_object_stream()` callback function for the in-memory source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-inmemory.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index 4848011df5189c..d05a13df45ea5f 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -128,6 +128,45 @@ static int odb_source_inmemory_write_object(struct odb_source *source, return 0; } +static int odb_source_inmemory_write_object_stream(struct odb_source *source, + struct odb_write_stream *stream, + size_t len, + struct object_id *oid) +{ + char buf[16384]; + size_t total_read = 0; + char *data; + int ret; + + CALLOC_ARRAY(data, len); + while (!stream->is_finished) { + ssize_t bytes_read; + + bytes_read = odb_write_stream_read(stream, buf, sizeof(buf)); + if (total_read + bytes_read > len) { + ret = error("object stream yielded more bytes than expected"); + goto out; + } + + memcpy(data + total_read, buf, bytes_read); + total_read += bytes_read; + } + + if (total_read != len) { + ret = error("object stream yielded less bytes than expected"); + goto out; + } + + ret = odb_source_inmemory_write_object(source, data, len, OBJ_BLOB, oid, + NULL, 0); + if (ret < 0) + goto out; + +out: + free(data); + return ret; +} + static void odb_source_inmemory_free(struct odb_source *source) { struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); @@ -149,6 +188,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) source->base.read_object_info = odb_source_inmemory_read_object_info; source->base.read_object_stream = odb_source_inmemory_read_object_stream; source->base.write_object = odb_source_inmemory_write_object; + source->base.write_object_stream = odb_source_inmemory_write_object_stream; return source; } From 4439f83061ae52711819c88e74d14e68a56ba20a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:38 +0200 Subject: [PATCH 148/241] cbtree: allow using arbitrary wrapper structures for nodes The cbtree subsystem allows the user to store arbitrary data in a prefix-free set of strings. This is used by us to store object IDs in a way that we can easily iterate through them in lexicograph order, and so that we can easily perform lookups with shortened object IDs. In its current form, it is not easily possible to store arbitrary data with the tree nodes. There are a couple of approaches such a caller could try to use, but none of them really work: - One may embed the `struct cb_node` in a custom structure. This does not work though as `struct cb_node` contains a flex array, and embedding such a struct in another struct is forbidden. - One may use a `union` over `struct cb_node` and ones own data type, which _is_ allowed even if the struct contains a flex array. This does not work though, as the compiler may align members of the struct so that the node key would not immediately start where the flex array starts. - One may allocate `struct cb_node` such that it has room for both its key and the custom data. This has the downside though that if the custom data is itself a pointer to allocated memory, then the leak checker will not consider the pointer to be alive anymore. Refactor the cbtree to drop the flex array and instead take in an explicit offset for where to find the key, which allows the caller to embed `struct cb_node` is a wrapper struct. Note that this change has the downside that we now have a bit of padding in our structure, which grows the size from 60 to 64 bytes on a 64 bit system. On the other hand though, it allows us to get rid of the memory copies that we previously had to do to ensure proper alignment. This seems like a reasonable tradeoff. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- cbtree.c | 25 ++++++++++++++++++------- cbtree.h | 17 +++++++++-------- oidtree.c | 33 ++++++++++++++------------------- 3 files changed, 41 insertions(+), 34 deletions(-) diff --git a/cbtree.c b/cbtree.c index 4ab794bddce0c6..8f5edbb80ace51 100644 --- a/cbtree.c +++ b/cbtree.c @@ -7,6 +7,11 @@ #include "git-compat-util.h" #include "cbtree.h" +static inline uint8_t *cb_node_key(struct cb_tree *t, struct cb_node *node) +{ + return (uint8_t *) node + t->key_offset; +} + static struct cb_node *cb_node_of(const void *p) { return (struct cb_node *)((uintptr_t)p - 1); @@ -33,6 +38,7 @@ struct cb_node *cb_insert(struct cb_tree *t, struct cb_node *node, size_t klen) uint8_t c; int newdirection; struct cb_node **wherep, *p; + uint8_t *node_key, *p_key; assert(!((uintptr_t)node & 1)); /* allocations must be aligned */ @@ -41,23 +47,26 @@ struct cb_node *cb_insert(struct cb_tree *t, struct cb_node *node, size_t klen) return NULL; /* success */ } + node_key = cb_node_key(t, node); + /* see if a node already exists */ - p = cb_internal_best_match(t->root, node->k, klen); + p = cb_internal_best_match(t->root, node_key, klen); + p_key = cb_node_key(t, p); /* find first differing byte */ for (newbyte = 0; newbyte < klen; newbyte++) { - if (p->k[newbyte] != node->k[newbyte]) + if (p_key[newbyte] != node_key[newbyte]) goto different_byte_found; } return p; /* element exists, let user deal with it */ different_byte_found: - newotherbits = p->k[newbyte] ^ node->k[newbyte]; + newotherbits = p_key[newbyte] ^ node_key[newbyte]; newotherbits |= newotherbits >> 1; newotherbits |= newotherbits >> 2; newotherbits |= newotherbits >> 4; newotherbits = (newotherbits & ~(newotherbits >> 1)) ^ 255; - c = p->k[newbyte]; + c = p_key[newbyte]; newdirection = (1 + (newotherbits | c)) >> 8; node->byte = newbyte; @@ -78,7 +87,7 @@ struct cb_node *cb_insert(struct cb_tree *t, struct cb_node *node, size_t klen) break; if (q->byte == newbyte && q->otherbits > newotherbits) break; - c = q->byte < klen ? node->k[q->byte] : 0; + c = q->byte < klen ? node_key[q->byte] : 0; direction = (1 + (q->otherbits | c)) >> 8; wherep = q->child + direction; } @@ -93,7 +102,7 @@ struct cb_node *cb_lookup(struct cb_tree *t, const uint8_t *k, size_t klen) { struct cb_node *p = cb_internal_best_match(t->root, k, klen); - return p && !memcmp(p->k, k, klen) ? p : NULL; + return p && !memcmp(cb_node_key(t, p), k, klen) ? p : NULL; } static int cb_descend(struct cb_node *p, cb_iter fn, void *arg) @@ -115,6 +124,7 @@ int cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen, struct cb_node *p = t->root; struct cb_node *top = p; size_t i = 0; + uint8_t *p_key; if (!p) return 0; /* empty tree */ @@ -130,8 +140,9 @@ int cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen, top = p; } + p_key = cb_node_key(t, p); for (i = 0; i < klen; i++) { - if (p->k[i] != kpfx[i]) + if (p_key[i] != kpfx[i]) return 0; /* "best" match failed */ } diff --git a/cbtree.h b/cbtree.h index c374b1b3db9d82..4647d4a32f87c6 100644 --- a/cbtree.h +++ b/cbtree.h @@ -6,9 +6,9 @@ * * This is adapted to store arbitrary data (not just NUL-terminated C strings * and allocates no memory internally. The user needs to allocate - * "struct cb_node" and fill cb_node.k[] with arbitrary match data - * for memcmp. - * If "klen" is variable, then it should be embedded into "c_node.k[]" + * "struct cb_node" and provide `key_offset` to indicate where the key can be + * found relative to the `struct cb_node` for memcmp. + * If "klen" is variable, then it should be embedded into the key. * Recursion is bound by the maximum value of "klen" used. */ #ifndef CBTREE_H @@ -23,18 +23,19 @@ struct cb_node { */ uint32_t byte; uint8_t otherbits; - uint8_t k[FLEX_ARRAY]; /* arbitrary data, unaligned */ }; struct cb_tree { struct cb_node *root; + ptrdiff_t key_offset; }; -#define CBTREE_INIT { 0 } - -static inline void cb_init(struct cb_tree *t) +static inline void cb_init(struct cb_tree *t, + ptrdiff_t key_offset) { - struct cb_tree blank = CBTREE_INIT; + struct cb_tree blank = { + .key_offset = key_offset, + }; memcpy(t, &blank, sizeof(*t)); } diff --git a/oidtree.c b/oidtree.c index ab9fe7ec7aecce..117649753fbc1f 100644 --- a/oidtree.c +++ b/oidtree.c @@ -6,9 +6,14 @@ #include "oidtree.h" #include "hash.h" +struct oidtree_node { + struct cb_node base; + struct object_id key; +}; + void oidtree_init(struct oidtree *ot) { - cb_init(&ot->tree); + cb_init(&ot->tree, offsetof(struct oidtree_node, key)); mem_pool_init(&ot->mem_pool, 0); } @@ -22,20 +27,13 @@ void oidtree_clear(struct oidtree *ot) void oidtree_insert(struct oidtree *ot, const struct object_id *oid) { - struct cb_node *on; - struct object_id k; + struct oidtree_node *on; if (!oid->algo) BUG("oidtree_insert requires oid->algo"); - on = mem_pool_alloc(&ot->mem_pool, sizeof(*on) + sizeof(*oid)); - - /* - * Clear the padding and copy the result in separate steps to - * respect the 4-byte alignment needed by struct object_id. - */ - oidcpy(&k, oid); - memcpy(on->k, &k, sizeof(k)); + on = mem_pool_alloc(&ot->mem_pool, sizeof(*on)); + oidcpy(&on->key, oid); /* * n.b. Current callers won't get us duplicates, here. If a @@ -43,7 +41,7 @@ void oidtree_insert(struct oidtree *ot, const struct object_id *oid) * that won't be freed until oidtree_clear. Currently it's not * worth maintaining a free list */ - cb_insert(&ot->tree, on, sizeof(*oid)); + cb_insert(&ot->tree, &on->base, sizeof(*oid)); } bool oidtree_contains(struct oidtree *ot, const struct object_id *oid) @@ -73,21 +71,18 @@ struct oidtree_each_data { static int iter(struct cb_node *n, void *cb_data) { + struct oidtree_node *node = container_of(n, struct oidtree_node, base); struct oidtree_each_data *data = cb_data; - struct object_id k; - - /* Copy to provide 4-byte alignment needed by struct object_id. */ - memcpy(&k, n->k, sizeof(k)); - if (data->algo != GIT_HASH_UNKNOWN && data->algo != k.algo) + if (data->algo != GIT_HASH_UNKNOWN && data->algo != node->key.algo) return 0; if (data->last_nibble_at) { - if ((k.hash[*data->last_nibble_at] ^ data->last_byte) & 0xf0) + if ((node->key.hash[*data->last_nibble_at] ^ data->last_byte) & 0xf0) return 0; } - return data->cb(&k, data->cb_data); + return data->cb(&node->key, data->cb_data); } int oidtree_each(struct oidtree *ot, const struct object_id *prefix, From d960bd0f21b7c34cafb05745e2440c5e03a944bf Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:39 +0200 Subject: [PATCH 149/241] oidtree: add ability to store data The oidtree data structure is currently only used to store object IDs, without any associated data. So consequently, it can only really be used to track which object IDs exist, and we can use the tree structure to efficiently operate on OID prefixes. But there are valid use cases where we want to both: - Store object IDs in a sorted order. - Associated arbitrary data with them. Refactor the oidtree interface so that it allows us to store arbitrary payloads within the respective nodes. This will be used in the next commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- loose.c | 2 +- object-file.c | 3 ++- oidtree.c | 37 ++++++++++++++++++++++++++++++++----- oidtree.h | 12 ++++++++++-- t/unit-tests/u-oidtree.c | 26 +++++++++++++++++++++++--- 5 files changed, 68 insertions(+), 12 deletions(-) diff --git a/loose.c b/loose.c index 07333be6969fcc..f7a3dd1a72f0fc 100644 --- a/loose.c +++ b/loose.c @@ -57,7 +57,7 @@ static int insert_loose_map(struct odb_source *source, inserted |= insert_oid_pair(map->to_compat, oid, compat_oid); inserted |= insert_oid_pair(map->to_storage, compat_oid, oid); if (inserted) - oidtree_insert(files->loose->cache, compat_oid); + oidtree_insert(files->loose->cache, compat_oid, NULL); return inserted; } diff --git a/object-file.c b/object-file.c index 3e70e5d668692f..d04ab572530c92 100644 --- a/object-file.c +++ b/object-file.c @@ -1857,6 +1857,7 @@ static int for_each_object_wrapper_cb(const struct object_id *oid, } static int for_each_prefixed_object_wrapper_cb(const struct object_id *oid, + void *node_data UNUSED, void *cb_data) { struct for_each_object_wrapper_data *data = cb_data; @@ -2002,7 +2003,7 @@ static int append_loose_object(const struct object_id *oid, const char *path UNUSED, void *data) { - oidtree_insert(data, oid); + oidtree_insert(data, oid, NULL); return 0; } diff --git a/oidtree.c b/oidtree.c index 117649753fbc1f..e43f18026e1041 100644 --- a/oidtree.c +++ b/oidtree.c @@ -9,6 +9,7 @@ struct oidtree_node { struct cb_node base; struct object_id key; + void *data; }; void oidtree_init(struct oidtree *ot) @@ -25,15 +26,22 @@ void oidtree_clear(struct oidtree *ot) } } -void oidtree_insert(struct oidtree *ot, const struct object_id *oid) +struct oidtree_data { + struct object_id oid; +}; + +void oidtree_insert(struct oidtree *ot, const struct object_id *oid, + void *data) { struct oidtree_node *on; + struct cb_node *node; if (!oid->algo) BUG("oidtree_insert requires oid->algo"); on = mem_pool_alloc(&ot->mem_pool, sizeof(*on)); oidcpy(&on->key, oid); + on->data = data; /* * n.b. Current callers won't get us duplicates, here. If a @@ -41,13 +49,19 @@ void oidtree_insert(struct oidtree *ot, const struct object_id *oid) * that won't be freed until oidtree_clear. Currently it's not * worth maintaining a free list */ - cb_insert(&ot->tree, &on->base, sizeof(*oid)); + node = cb_insert(&ot->tree, &on->base, sizeof(*oid)); + if (node) { + struct oidtree_node *preexisting = container_of(node, struct oidtree_node, base); + preexisting->data = data; + } } -bool oidtree_contains(struct oidtree *ot, const struct object_id *oid) +static struct oidtree_node *oidtree_lookup(struct oidtree *ot, + const struct object_id *oid) { struct object_id k; size_t klen = sizeof(k); + struct cb_node *node; oidcpy(&k, oid); @@ -58,7 +72,20 @@ bool oidtree_contains(struct oidtree *ot, const struct object_id *oid) klen += BUILD_ASSERT_OR_ZERO(offsetof(struct object_id, hash) < offsetof(struct object_id, algo)); - return !!cb_lookup(&ot->tree, (const uint8_t *)&k, klen); + node = cb_lookup(&ot->tree, (const uint8_t *)&k, klen); + return node ? container_of(node, struct oidtree_node, base) : NULL; +} + +bool oidtree_contains(struct oidtree *ot, const struct object_id *oid) +{ + struct oidtree_node *node = oidtree_lookup(ot, oid); + return node ? 1 : 0; +} + +void *oidtree_get(struct oidtree *ot, const struct object_id *oid) +{ + struct oidtree_node *node = oidtree_lookup(ot, oid); + return node ? node->data : NULL; } struct oidtree_each_data { @@ -82,7 +109,7 @@ static int iter(struct cb_node *n, void *cb_data) return 0; } - return data->cb(&node->key, data->cb_data); + return data->cb(&node->key, node->data, data->cb_data); } int oidtree_each(struct oidtree *ot, const struct object_id *prefix, diff --git a/oidtree.h b/oidtree.h index 2b7bad2e60a51d..baa5a436eadaf8 100644 --- a/oidtree.h +++ b/oidtree.h @@ -29,18 +29,26 @@ void oidtree_init(struct oidtree *ot); */ void oidtree_clear(struct oidtree *ot); -/* Insert the object ID into the tree. */ -void oidtree_insert(struct oidtree *ot, const struct object_id *oid); +/* + * Insert the object ID into the tree and store the given pointer alongside + * with it. The data pointer of any preexisting entry will be overwritten. + */ +void oidtree_insert(struct oidtree *ot, const struct object_id *oid, + void *data); /* Check whether the tree contains the given object ID. */ bool oidtree_contains(struct oidtree *ot, const struct object_id *oid); +/* Get the payload stored with the given object ID. */ +void *oidtree_get(struct oidtree *ot, const struct object_id *oid); + /* * Callback function used for `oidtree_each()`. Returning a non-zero exit code * will cause iteration to stop. The exit code will be propagated to the caller * of `oidtree_each()`. */ typedef int (*oidtree_each_cb)(const struct object_id *oid, + void *node_data, void *cb_data); /* diff --git a/t/unit-tests/u-oidtree.c b/t/unit-tests/u-oidtree.c index d4d05c7dc3e4f7..f0d5ebb733bfce 100644 --- a/t/unit-tests/u-oidtree.c +++ b/t/unit-tests/u-oidtree.c @@ -19,7 +19,7 @@ static int fill_tree_loc(struct oidtree *ot, const char *hexes[], size_t n) for (size_t i = 0; i < n; i++) { struct object_id oid; cl_parse_any_oid(hexes[i], &oid); - oidtree_insert(ot, &oid); + oidtree_insert(ot, &oid, NULL); } return 0; } @@ -38,9 +38,9 @@ struct expected_hex_iter { const char *query; }; -static int check_each_cb(const struct object_id *oid, void *data) +static int check_each_cb(const struct object_id *oid, void *node_data UNUSED, void *cb_data) { - struct expected_hex_iter *hex_iter = data; + struct expected_hex_iter *hex_iter = cb_data; struct object_id expected; cl_assert(hex_iter->i < hex_iter->expected_hexes.nr); @@ -105,3 +105,23 @@ void test_oidtree__each(void) check_each(&ot, "32100", "321", NULL); check_each(&ot, "32", "320", "321", NULL); } + +void test_oidtree__insert_overwrites_data(void) +{ + struct object_id oid; + struct oidtree ot; + int a, b; + + cl_parse_any_oid("1", &oid); + + oidtree_init(&ot); + + oidtree_insert(&ot, &oid, NULL); + cl_assert_equal_p(oidtree_get(&ot, &oid), NULL); + oidtree_insert(&ot, &oid, &a); + cl_assert_equal_p(oidtree_get(&ot, &oid), &a); + oidtree_insert(&ot, &oid, &b); + cl_assert_equal_p(oidtree_get(&ot, &oid), &b); + + oidtree_clear(&ot); +} From 3611f693146e215d9bcbfaba6359fb35009eb45e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:40 +0200 Subject: [PATCH 150/241] odb/source-inmemory: convert to use oidtree The in-memory source stores its objects in a simple array that we grow as needed. This has a couple of downsides: - The object lookup is O(n). This doesn't matter in practice because we only store a small number of objects. - We don't have an easy way to iterate over all objects in lexicographic order. - We don't have an easy way to compute unique object ID prefixes. Refactor the code to use an oidtree instead. This is the same data structure used by our loose object source, and thus it means we get a bunch of functionality for free. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-inmemory.c | 72 +++++++++++++++++++++++++++++++------------ odb/source-inmemory.h | 13 ++------ 2 files changed, 54 insertions(+), 31 deletions(-) diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index d05a13df45ea5f..3b51cc7fefd86b 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -3,20 +3,29 @@ #include "odb.h" #include "odb/source-inmemory.h" #include "odb/streaming.h" +#include "oidtree.h" #include "repository.h" -static const struct cached_object *find_cached_object(struct odb_source_inmemory *source, - const struct object_id *oid) +struct inmemory_object { + enum object_type type; + const void *buf; + unsigned long size; +}; + +static const struct inmemory_object *find_cached_object(struct odb_source_inmemory *source, + const struct object_id *oid) { - static const struct cached_object empty_tree = { + static const struct inmemory_object empty_tree = { .type = OBJ_TREE, .buf = "", }; - const struct cached_object_entry *co = source->objects; + const struct inmemory_object *object; - for (size_t i = 0; i < source->objects_nr; i++, co++) - if (oideq(&co->oid, oid)) - return &co->value; + if (source->objects) { + object = oidtree_get(source->objects, oid); + if (object) + return object; + } if (oid->algo && oideq(oid, hash_algos[oid->algo].empty_tree)) return &empty_tree; @@ -30,7 +39,7 @@ static int odb_source_inmemory_read_object_info(struct odb_source *source, enum object_info_flags flags UNUSED) { struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); - const struct cached_object *object; + const struct inmemory_object *object; object = find_cached_object(inmemory, oid); if (!object) @@ -88,7 +97,7 @@ static int odb_source_inmemory_read_object_stream(struct odb_read_stream **out, { struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); struct odb_read_stream_inmemory *stream; - const struct cached_object *object; + const struct inmemory_object *object; object = find_cached_object(inmemory, oid); if (!object) @@ -113,17 +122,23 @@ static int odb_source_inmemory_write_object(struct odb_source *source, enum odb_write_object_flags flags UNUSED) { struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); - struct cached_object_entry *object; + struct inmemory_object *object; hash_object_file(source->odb->repo->hash_algo, buf, len, type, oid); - ALLOC_GROW(inmemory->objects, inmemory->objects_nr + 1, - inmemory->objects_alloc); - object = &inmemory->objects[inmemory->objects_nr++]; - object->value.size = len; - object->value.type = type; - object->value.buf = xmemdupz(buf, len); - oidcpy(&object->oid, oid); + if (!inmemory->objects) { + CALLOC_ARRAY(inmemory->objects, 1); + oidtree_init(inmemory->objects); + } else if (oidtree_contains(inmemory->objects, oid)) { + return 0; + } + + CALLOC_ARRAY(object, 1); + object->size = len; + object->type = type; + object->buf = xmemdupz(buf, len); + + oidtree_insert(inmemory->objects, oid, object); return 0; } @@ -167,12 +182,29 @@ static int odb_source_inmemory_write_object_stream(struct odb_source *source, return ret; } +static int inmemory_object_free(const struct object_id *oid UNUSED, + void *node_data, + void *cb_data UNUSED) +{ + struct inmemory_object *object = node_data; + free((void *) object->buf); + free(object); + return 0; +} + static void odb_source_inmemory_free(struct odb_source *source) { struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); - for (size_t i = 0; i < inmemory->objects_nr; i++) - free((char *) inmemory->objects[i].value.buf); - free(inmemory->objects); + + if (inmemory->objects) { + struct object_id null_oid = { 0 }; + + oidtree_each(inmemory->objects, &null_oid, 0, + inmemory_object_free, NULL); + oidtree_clear(inmemory->objects); + free(inmemory->objects); + } + free(inmemory->base.path); free(inmemory); } diff --git a/odb/source-inmemory.h b/odb/source-inmemory.h index d1b05a3996468a..a88fc2e320ed5c 100644 --- a/odb/source-inmemory.h +++ b/odb/source-inmemory.h @@ -3,14 +3,7 @@ #include "odb/source.h" -struct cached_object_entry { - struct object_id oid; - struct cached_object { - enum object_type type; - const void *buf; - unsigned long size; - } value; -}; +struct oidtree; /* * An in-memory source that you can write objects to that shall be made @@ -20,9 +13,7 @@ struct cached_object_entry { */ struct odb_source_inmemory { struct odb_source base; - - struct cached_object_entry *objects; - size_t objects_nr, objects_alloc; + struct oidtree *objects; }; /* Create a new in-memory object database source. */ From b0419fb07662f85ffb8ecd8515c358dc6a2f9dcb Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:41 +0200 Subject: [PATCH 151/241] odb/source-inmemory: implement `for_each_object()` callback Implement the `for_each_object()` callback function for the in-memory source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-inmemory.c | 88 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 72 insertions(+), 16 deletions(-) diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index 3b51cc7fefd86b..f60eecbdbbdfff 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -33,6 +33,28 @@ static const struct inmemory_object *find_cached_object(struct odb_source_inmemo return NULL; } +static void populate_object_info(struct odb_source_inmemory *source, + struct object_info *oi, + const struct inmemory_object *object) +{ + if (!oi) + return; + + if (oi->typep) + *(oi->typep) = object->type; + if (oi->sizep) + *(oi->sizep) = object->size; + if (oi->disk_sizep) + *(oi->disk_sizep) = 0; + if (oi->delta_base_oid) + oidclr(oi->delta_base_oid, source->base.odb->repo->hash_algo); + if (oi->contentp) + *oi->contentp = xmemdupz(object->buf, object->size); + if (oi->mtimep) + *oi->mtimep = 0; + oi->whence = OI_CACHED; +} + static int odb_source_inmemory_read_object_info(struct odb_source *source, const struct object_id *oid, struct object_info *oi, @@ -45,22 +67,7 @@ static int odb_source_inmemory_read_object_info(struct odb_source *source, if (!object) return -1; - if (oi) { - if (oi->typep) - *(oi->typep) = object->type; - if (oi->sizep) - *(oi->sizep) = object->size; - if (oi->disk_sizep) - *(oi->disk_sizep) = 0; - if (oi->delta_base_oid) - oidclr(oi->delta_base_oid, source->odb->repo->hash_algo); - if (oi->contentp) - *oi->contentp = xmemdupz(object->buf, object->size); - if (oi->mtimep) - *oi->mtimep = 0; - oi->whence = OI_CACHED; - } - + populate_object_info(inmemory, oi, object); return 0; } @@ -114,6 +121,54 @@ static int odb_source_inmemory_read_object_stream(struct odb_read_stream **out, return 0; } +struct odb_source_inmemory_for_each_object_data { + struct odb_source_inmemory *inmemory; + const struct object_info *request; + odb_for_each_object_cb cb; + void *cb_data; +}; + +static int odb_source_inmemory_for_each_object_cb(const struct object_id *oid, + void *node_data, void *cb_data) +{ + struct odb_source_inmemory_for_each_object_data *data = cb_data; + struct inmemory_object *object = node_data; + + if (data->request) { + struct object_info oi = *data->request; + populate_object_info(data->inmemory, &oi, object); + return data->cb(oid, &oi, data->cb_data); + } else { + return data->cb(oid, NULL, data->cb_data); + } +} + +static int odb_source_inmemory_for_each_object(struct odb_source *source, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + const struct odb_for_each_object_options *opts) +{ + struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); + struct odb_source_inmemory_for_each_object_data payload = { + .inmemory = inmemory, + .request = request, + .cb = cb, + .cb_data = cb_data, + }; + struct object_id null_oid = { 0 }; + + if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) || + (opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local)) + return 0; + if (!inmemory->objects) + return 0; + + return oidtree_each(inmemory->objects, + opts->prefix ? opts->prefix : &null_oid, opts->prefix_hex_len, + odb_source_inmemory_for_each_object_cb, &payload); +} + static int odb_source_inmemory_write_object(struct odb_source *source, const void *buf, unsigned long len, enum object_type type, @@ -219,6 +274,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) source->base.free = odb_source_inmemory_free; source->base.read_object_info = odb_source_inmemory_read_object_info; source->base.read_object_stream = odb_source_inmemory_read_object_stream; + source->base.for_each_object = odb_source_inmemory_for_each_object; source->base.write_object = odb_source_inmemory_write_object; source->base.write_object_stream = odb_source_inmemory_write_object_stream; From bb59813e252fac1a78c9d178ba004772124198b1 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:42 +0200 Subject: [PATCH 152/241] odb/source-inmemory: implement `find_abbrev_len()` callback Implement the `find_abbrev_len()` callback function for the in-memory source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-inmemory.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index f60eecbdbbdfff..44d9bbedeca6f2 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -169,6 +169,44 @@ static int odb_source_inmemory_for_each_object(struct odb_source *source, odb_source_inmemory_for_each_object_cb, &payload); } +struct find_abbrev_len_data { + const struct object_id *oid; + unsigned len; +}; + +static int find_abbrev_len_cb(const struct object_id *oid, + struct object_info *oi UNUSED, + void *cb_data) +{ + struct find_abbrev_len_data *data = cb_data; + unsigned len = oid_common_prefix_hexlen(oid, data->oid); + if (len != hash_algos[oid->algo].hexsz && len >= data->len) + data->len = len + 1; + return 0; +} + +static int odb_source_inmemory_find_abbrev_len(struct odb_source *source, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + struct odb_for_each_object_options opts = { + .prefix = oid, + .prefix_hex_len = min_len, + }; + struct find_abbrev_len_data data = { + .oid = oid, + .len = min_len, + }; + int ret; + + ret = odb_source_inmemory_for_each_object(source, NULL, find_abbrev_len_cb, + &data, &opts); + *out = data.len; + + return ret; +} + static int odb_source_inmemory_write_object(struct odb_source *source, const void *buf, unsigned long len, enum object_type type, @@ -275,6 +313,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) source->base.read_object_info = odb_source_inmemory_read_object_info; source->base.read_object_stream = odb_source_inmemory_read_object_stream; source->base.for_each_object = odb_source_inmemory_for_each_object; + source->base.find_abbrev_len = odb_source_inmemory_find_abbrev_len; source->base.write_object = odb_source_inmemory_write_object; source->base.write_object_stream = odb_source_inmemory_write_object_stream; From aaa0a8585d55421c5ba13368cb51f938ab89b325 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:43 +0200 Subject: [PATCH 153/241] odb/source-inmemory: implement `count_objects()` callback Implement the `count_objects()` callback function for the in-memory source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-inmemory.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index 44d9bbedeca6f2..674dbcad3001a6 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -207,6 +207,25 @@ static int odb_source_inmemory_find_abbrev_len(struct odb_source *source, return ret; } +static int count_objects_cb(const struct object_id *oid UNUSED, + struct object_info *oi UNUSED, + void *cb_data) +{ + unsigned long *counter = cb_data; + (*counter)++; + return 0; +} + +static int odb_source_inmemory_count_objects(struct odb_source *source, + enum odb_count_objects_flags flags UNUSED, + unsigned long *out) +{ + struct odb_for_each_object_options opts = { 0 }; + *out = 0; + return odb_source_inmemory_for_each_object(source, NULL, count_objects_cb, + out, &opts); +} + static int odb_source_inmemory_write_object(struct odb_source *source, const void *buf, unsigned long len, enum object_type type, @@ -314,6 +333,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) source->base.read_object_stream = odb_source_inmemory_read_object_stream; source->base.for_each_object = odb_source_inmemory_for_each_object; source->base.find_abbrev_len = odb_source_inmemory_find_abbrev_len; + source->base.count_objects = odb_source_inmemory_count_objects; source->base.write_object = odb_source_inmemory_write_object; source->base.write_object_stream = odb_source_inmemory_write_object_stream; From 770c0329787c1539f907f582ac8353f645b6af85 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:44 +0200 Subject: [PATCH 154/241] odb/source-inmemory: implement `freshen_object()` callback Implement the `freshen_object()` callback function for the in-memory source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-inmemory.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index 674dbcad3001a6..8934e0f54785de 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -294,6 +294,15 @@ static int odb_source_inmemory_write_object_stream(struct odb_source *source, return ret; } +static int odb_source_inmemory_freshen_object(struct odb_source *source, + const struct object_id *oid) +{ + struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); + if (find_cached_object(inmemory, oid)) + return 1; + return 0; +} + static int inmemory_object_free(const struct object_id *oid UNUSED, void *node_data, void *cb_data UNUSED) @@ -336,6 +345,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) source->base.count_objects = odb_source_inmemory_count_objects; source->base.write_object = odb_source_inmemory_write_object; source->base.write_object_stream = odb_source_inmemory_write_object_stream; + source->base.freshen_object = odb_source_inmemory_freshen_object; return source; } From dcd7f1c97f856f891da489dd305e0712ac40fa36 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:45 +0200 Subject: [PATCH 155/241] odb/source-inmemory: stub out remaining functions Stub out remaining functions that we either don't need or that are basically no-ops. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-inmemory.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index 8934e0f54785de..e004566d768b01 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -303,6 +303,32 @@ static int odb_source_inmemory_freshen_object(struct odb_source *source, return 0; } +static int odb_source_inmemory_begin_transaction(struct odb_source *source UNUSED, + struct odb_transaction **out UNUSED) +{ + return error("in-memory source does not support transactions"); +} + +static int odb_source_inmemory_read_alternates(struct odb_source *source UNUSED, + struct strvec *out UNUSED) +{ + return 0; +} + +static int odb_source_inmemory_write_alternate(struct odb_source *source UNUSED, + const char *alternate UNUSED) +{ + return error("in-memory source does not support alternates"); +} + +static void odb_source_inmemory_close(struct odb_source *source UNUSED) +{ +} + +static void odb_source_inmemory_reprepare(struct odb_source *source UNUSED) +{ +} + static int inmemory_object_free(const struct object_id *oid UNUSED, void *node_data, void *cb_data UNUSED) @@ -338,6 +364,8 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) odb_source_init(&source->base, odb, ODB_SOURCE_INMEMORY, "source", false); source->base.free = odb_source_inmemory_free; + source->base.close = odb_source_inmemory_close; + source->base.reprepare = odb_source_inmemory_reprepare; source->base.read_object_info = odb_source_inmemory_read_object_info; source->base.read_object_stream = odb_source_inmemory_read_object_stream; source->base.for_each_object = odb_source_inmemory_for_each_object; @@ -346,6 +374,9 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) source->base.write_object = odb_source_inmemory_write_object; source->base.write_object_stream = odb_source_inmemory_write_object_stream; source->base.freshen_object = odb_source_inmemory_freshen_object; + source->base.begin_transaction = odb_source_inmemory_begin_transaction; + source->base.read_alternates = odb_source_inmemory_read_alternates; + source->base.write_alternate = odb_source_inmemory_write_alternate; return source; } From 98a194d93525bed456921142f71fb6ae44257ef5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:46 +0200 Subject: [PATCH 156/241] odb: generic in-memory source Make the in-memory source generic. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 8 ++++---- odb.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/odb.c b/odb.c index 24e929f03cbccf..965ef68e4eca22 100644 --- a/odb.c +++ b/odb.c @@ -560,7 +560,7 @@ static int do_oid_object_info_extended(struct object_database *odb, if (is_null_oid(real)) return -1; - if (!odb_source_read_object_info(&odb->inmemory_objects->base, oid, oi, flags)) + if (!odb_source_read_object_info(odb->inmemory_objects, oid, oi, flags)) return 0; odb_prepare_alternates(odb); @@ -737,7 +737,7 @@ int odb_pretend_object(struct object_database *odb, if (odb_has_object(odb, oid, 0)) return 0; - return odb_source_write_object(&odb->inmemory_objects->base, + return odb_source_write_object(odb->inmemory_objects, buf, len, type, oid, NULL, 0); } @@ -1020,7 +1020,7 @@ struct object_database *odb_new(struct repository *repo, o->sources = odb_source_new(o, primary_source, true); o->sources_tail = &o->sources->next; o->alternate_db = xstrdup_or_null(secondary_sources); - o->inmemory_objects = odb_source_inmemory_new(o); + o->inmemory_objects = &odb_source_inmemory_new(o)->base; free(to_free); @@ -1045,7 +1045,7 @@ static void odb_free_sources(struct object_database *o) o->sources = next; } - odb_source_free(&o->inmemory_objects->base); + odb_source_free(o->inmemory_objects); o->inmemory_objects = NULL; kh_destroy_odb_path_map(o->source_by_path); diff --git a/odb.h b/odb.h index c3a7edf9c848dd..73553ed5a7b1ea 100644 --- a/odb.h +++ b/odb.h @@ -81,7 +81,7 @@ struct object_database { * to write them into the object store (e.g. a browse-only * application). */ - struct odb_source_inmemory *inmemory_objects; + struct odb_source *inmemory_objects; /* * A fast, rough count of the number of objects in the repository. From fe633bd199547a581191c4c30755d39aa7c29280 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:47 +0200 Subject: [PATCH 157/241] t/unit-tests: add tests for the in-memory object source While the in-memory object source is a full-fledged source, our code base only exercises parts of its functionality because we only use it in git-blame(1). Implement unit tests to verify that the yet-unused functionality of the backend works as expected. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Makefile | 1 + t/meson.build | 1 + t/unit-tests/u-odb-inmemory.c | 313 ++++++++++++++++++++++++++++++++++ 3 files changed, 315 insertions(+) create mode 100644 t/unit-tests/u-odb-inmemory.c diff --git a/Makefile b/Makefile index 3cda12c4556a6f..68b4daa1ad275f 100644 --- a/Makefile +++ b/Makefile @@ -1529,6 +1529,7 @@ CLAR_TEST_SUITES += u-hash CLAR_TEST_SUITES += u-hashmap CLAR_TEST_SUITES += u-list-objects-filter-options CLAR_TEST_SUITES += u-mem-pool +CLAR_TEST_SUITES += u-odb-inmemory CLAR_TEST_SUITES += u-oid-array CLAR_TEST_SUITES += u-oidmap CLAR_TEST_SUITES += u-oidtree diff --git a/t/meson.build b/t/meson.build index 7528e5cda5fef0..db5e01c49b9b2b 100644 --- a/t/meson.build +++ b/t/meson.build @@ -6,6 +6,7 @@ clar_test_suites = [ 'unit-tests/u-hashmap.c', 'unit-tests/u-list-objects-filter-options.c', 'unit-tests/u-mem-pool.c', + 'unit-tests/u-odb-inmemory.c', 'unit-tests/u-oid-array.c', 'unit-tests/u-oidmap.c', 'unit-tests/u-oidtree.c', diff --git a/t/unit-tests/u-odb-inmemory.c b/t/unit-tests/u-odb-inmemory.c new file mode 100644 index 00000000000000..482502ef4b1e11 --- /dev/null +++ b/t/unit-tests/u-odb-inmemory.c @@ -0,0 +1,313 @@ +#include "unit-test.h" +#include "hex.h" +#include "odb/source-inmemory.h" +#include "odb/streaming.h" +#include "oidset.h" +#include "repository.h" +#include "strbuf.h" + +#define RANDOM_OID "da39a3ee5e6b4b0d3255bfef95601890afd80709" +#define FOOBAR_OID "f6ea0495187600e7b2288c8ac19c5886383a4632" + +static struct repository repo = { + .hash_algo = &hash_algos[GIT_HASH_SHA1], +}; +static struct object_database *odb; + +static void cl_assert_object_info(struct odb_source_inmemory *source, + const struct object_id *oid, + enum object_type expected_type, + const char *expected_content) +{ + enum object_type actual_type; + unsigned long actual_size; + void *actual_content; + struct object_info oi = { + .typep = &actual_type, + .sizep = &actual_size, + .contentp = &actual_content, + }; + + cl_must_pass(odb_source_read_object_info(&source->base, oid, &oi, 0)); + cl_assert_equal_u(actual_size, strlen(expected_content)); + cl_assert_equal_u(actual_type, expected_type); + cl_assert_equal_s((char *) actual_content, expected_content); + + free(actual_content); +} + +void test_odb_inmemory__initialize(void) +{ + odb = odb_new(&repo, "", ""); +} + +void test_odb_inmemory__cleanup(void) +{ + odb_free(odb); +} + +void test_odb_inmemory__new(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + cl_assert_equal_i(source->base.type, ODB_SOURCE_INMEMORY); + odb_source_free(&source->base); +} + +void test_odb_inmemory__read_missing_object(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct object_id oid; + const char *end; + + cl_must_pass(parse_oid_hex_algop(RANDOM_OID, &oid, &end, repo.hash_algo)); + cl_must_fail(odb_source_read_object_info(&source->base, &oid, NULL, 0)); + + odb_source_free(&source->base); +} + +void test_odb_inmemory__read_empty_tree(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + cl_assert_object_info(source, repo.hash_algo->empty_tree, OBJ_TREE, ""); + odb_source_free(&source->base); +} + +void test_odb_inmemory__read_written_object(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + const char data[] = "foobar"; + struct object_id written_oid; + + cl_must_pass(odb_source_write_object(&source->base, data, strlen(data), + OBJ_BLOB, &written_oid, NULL, 0)); + cl_assert_equal_s(oid_to_hex(&written_oid), FOOBAR_OID); + cl_assert_object_info(source, &written_oid, OBJ_BLOB, "foobar"); + + odb_source_free(&source->base); +} + +void test_odb_inmemory__read_stream_object(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct odb_read_stream *stream; + struct object_id written_oid; + const char data[] = "foobar"; + char buf[3] = { 0 }; + + cl_must_pass(odb_source_write_object(&source->base, data, strlen(data), + OBJ_BLOB, &written_oid, NULL, 0)); + + cl_must_pass(odb_source_read_object_stream(&stream, &source->base, + &written_oid)); + cl_assert_equal_i(stream->type, OBJ_BLOB); + cl_assert_equal_u(stream->size, 6); + + cl_assert_equal_i(odb_read_stream_read(stream, buf, 2), 2); + cl_assert_equal_s(buf, "fo"); + cl_assert_equal_i(odb_read_stream_read(stream, buf, 2), 2); + cl_assert_equal_s(buf, "ob"); + cl_assert_equal_i(odb_read_stream_read(stream, buf, 2), 2); + cl_assert_equal_s(buf, "ar"); + cl_assert_equal_i(odb_read_stream_read(stream, buf, 2), 0); + + odb_read_stream_close(stream); + odb_source_free(&source->base); +} + +static int add_one_object(const struct object_id *oid, + struct object_info *oi UNUSED, + void *payload) +{ + struct oidset *actual_oids = payload; + cl_must_pass(oidset_insert(actual_oids, oid)); + return 0; +} + +void test_odb_inmemory__for_each_object(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct odb_for_each_object_options opts = { 0 }; + struct oidset expected_oids = OIDSET_INIT; + struct oidset actual_oids = OIDSET_INIT; + struct strbuf buf = STRBUF_INIT; + + cl_must_pass(odb_source_for_each_object(&source->base, NULL, + add_one_object, &actual_oids, &opts)); + cl_assert_equal_u(oidset_size(&actual_oids), 0); + + for (int i = 0; i < 10; i++) { + struct object_id written_oid; + + strbuf_reset(&buf); + strbuf_addf(&buf, "%d", i); + + cl_must_pass(odb_source_write_object(&source->base, buf.buf, buf.len, + OBJ_BLOB, &written_oid, NULL, 0)); + cl_must_pass(oidset_insert(&expected_oids, &written_oid)); + } + + cl_must_pass(odb_source_for_each_object(&source->base, NULL, + add_one_object, &actual_oids, &opts)); + cl_assert_equal_b(oidset_equal(&expected_oids, &actual_oids), true); + + odb_source_free(&source->base); + oidset_clear(&expected_oids); + oidset_clear(&actual_oids); + strbuf_release(&buf); +} + +static int abort_after_two_objects(const struct object_id *oid UNUSED, + struct object_info *oi UNUSED, + void *payload) +{ + unsigned *counter = payload; + (*counter)++; + if (*counter == 2) + return 123; + return 0; +} + +void test_odb_inmemory__for_each_object_can_abort_iteration(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct odb_for_each_object_options opts = { 0 }; + struct object_id written_oid; + unsigned counter = 0; + + cl_must_pass(odb_source_write_object(&source->base, "1", 1, + OBJ_BLOB, &written_oid, NULL, 0)); + cl_must_pass(odb_source_write_object(&source->base, "2", 1, + OBJ_BLOB, &written_oid, NULL, 0)); + cl_must_pass(odb_source_write_object(&source->base, "3", 1, + OBJ_BLOB, &written_oid, NULL, 0)); + + cl_assert_equal_i(odb_source_for_each_object(&source->base, NULL, + abort_after_two_objects, + &counter, &opts), + 123); + cl_assert_equal_u(counter, 2); + + odb_source_free(&source->base); +} + +void test_odb_inmemory__count_objects(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct object_id written_oid; + unsigned long count; + + cl_must_pass(odb_source_count_objects(&source->base, 0, &count)); + cl_assert_equal_u(count, 0); + + cl_must_pass(odb_source_write_object(&source->base, "1", 1, + OBJ_BLOB, &written_oid, NULL, 0)); + cl_must_pass(odb_source_write_object(&source->base, "2", 1, + OBJ_BLOB, &written_oid, NULL, 0)); + cl_must_pass(odb_source_write_object(&source->base, "3", 1, + OBJ_BLOB, &written_oid, NULL, 0)); + + cl_must_pass(odb_source_count_objects(&source->base, 0, &count)); + cl_assert_equal_u(count, 3); + + odb_source_free(&source->base); +} + +void test_odb_inmemory__find_abbrev_len(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct object_id oid1, oid2; + unsigned abbrev_len; + + /* + * The two blobs we're about to write share the first 10 hex characters + * of their object IDs ("a09f43dc45"), so at least 11 characters are + * needed to tell them apart: + * + * "368317" -> a09f43dc4562d45115583f5094640ae237df55f7 + * "514796" -> a09f43dc45fef837235eb7e6b1a6ca5e169a3981 + * + * With only one blob written we expect a length of 4. + */ + cl_must_pass(odb_source_write_object(&source->base, "368317", strlen("368317"), + OBJ_BLOB, &oid1, NULL, 0)); + cl_must_pass(odb_source_find_abbrev_len(&source->base, &oid1, 4, + &abbrev_len)); + cl_assert_equal_u(abbrev_len, 4); + + /* + * With both objects present, the shared 10-character prefix means we + * need at least 11 characters to uniquely identify either object. + */ + cl_must_pass(odb_source_write_object(&source->base, "514796", strlen("514796"), + OBJ_BLOB, &oid2, NULL, 0)); + cl_must_pass(odb_source_find_abbrev_len(&source->base, &oid1, 4, + &abbrev_len)); + cl_assert_equal_u(abbrev_len, 11); + + odb_source_free(&source->base); +} + +void test_odb_inmemory__freshen_object(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct object_id written_oid; + struct object_id oid; + const char *end; + + cl_must_pass(parse_oid_hex_algop(RANDOM_OID, &oid, &end, repo.hash_algo)); + cl_assert_equal_i(odb_source_freshen_object(&source->base, &oid), 0); + + cl_must_pass(odb_source_write_object(&source->base, "foobar", + strlen("foobar"), OBJ_BLOB, + &written_oid, NULL, 0)); + cl_assert_equal_i(odb_source_freshen_object(&source->base, + &written_oid), 1); + + odb_source_free(&source->base); +} + +struct membuf_write_stream { + struct odb_write_stream base; + const char *buf; + size_t offset; + size_t size; +}; + +static ssize_t membuf_write_stream_read(struct odb_write_stream *stream, + unsigned char *buf, size_t len) +{ + struct membuf_write_stream *s = container_of(stream, struct membuf_write_stream, base); + size_t chunk_size = 2; + + if (chunk_size > len) + chunk_size = len; + if (chunk_size > s->size - s->offset) + chunk_size = s->size - s->offset; + + memcpy(buf, s->buf + s->offset, chunk_size); + + s->offset += chunk_size; + if (s->offset == s->size) + s->base.is_finished = 1; + + return chunk_size; +} + +void test_odb_inmemory__write_object_stream(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + const char data[] = "foobar"; + struct membuf_write_stream stream = { + .base.read = membuf_write_stream_read, + .buf = data, + .size = strlen(data), + }; + struct object_id written_oid; + + cl_must_pass(odb_source_write_object_stream(&source->base, &stream.base, + strlen(data), &written_oid)); + cl_assert_equal_s(oid_to_hex(&written_oid), FOOBAR_OID); + cl_assert_object_info(source, &written_oid, OBJ_BLOB, "foobar"); + + odb_source_free(&source->base); +} From 880636fbe5a0df1d6904c89d1697ac13ea568368 Mon Sep 17 00:00:00 2001 From: LorenzoPegorari Date: Sat, 11 Apr 2026 00:54:59 +0200 Subject: [PATCH 158/241] pack-write: add explanation to promisor file content In the entire codebase there is no explanation as to why the ".promisor" files may contain the ref names (and their associated hashes) that were fetched at the time the corresponding packfile was downloaded. As explained in the log message of commit 5374a290 (fetch-pack: write fetched refs to .promisor, 2019-10-14), where this loop originally came from, these ref names (and associated hashes) are not used for anything in the production, but are solely there to help debugging. Explain this in a new comment. Signed-off-by: LorenzoPegorari Signed-off-by: Junio C Hamano --- pack-write.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pack-write.c b/pack-write.c index 83eaf88541eefb..b8ab9510fff098 100644 --- a/pack-write.c +++ b/pack-write.c @@ -603,6 +603,15 @@ void write_promisor_file(const char *promisor_name, struct ref **sought, int nr_ int i, err; FILE *output = xfopen(promisor_name, "w"); + /* + * Write in the .promisor file the ref names and associated hashes, + * obtained by fetch-pack, at the point of generation of the + * corresponding packfile. These pieces of info are only used to make + * it easier to debug issues with partial clones, as we can identify + * what refs (and their associated hashes) were fetched at the time + * the packfile was downloaded, and if necessary, compare those hashes + * against what the promisor remote reports now. + */ for (i = 0; i < nr_sought; i++) fprintf(output, "%s %s\n", oid_to_hex(&sought[i]->old_oid), sought[i]->name); From 1be880725457180fb629bd4bcef5f3d5efb1f803 Mon Sep 17 00:00:00 2001 From: LorenzoPegorari Date: Sat, 11 Apr 2026 00:55:11 +0200 Subject: [PATCH 159/241] repack-promisor add helper to fill promisor file after repack A ".promisor" file may contain ref names (and their associated hashes) that were fetched at the time the corresponding packfile was downloaded. This information is used for debugging reasons. This information is stored as lines structured like this: " ". Create a `copy_promisor_content()` helper function that allows this debugging info to not be lost after a `repack`, by copying it inside a new ".promisor" file. The function logic is the following: * Take all ".promisor" files contained inside the given `repo`. * Ignore those whose name is contained inside the given `strset not_repacked_names`, which basically acts as a "promisor ignorelist" (intended to be used for packfiles that have not been repacked). * Read each line of the remaining ".promisor" files, which can be: * " " if the ".promisor" file was never repacked. If so, add the time (in Unix time) at which the ".promisor" file was last modified " is the filename for a specific packfile (e.g., "pack-abc123.pack"), and "/path/to/commits.list" is a list of commit OIDs which will receive bitmaps. The helper respects `bitmapPseudoMerge.*` configuration for creating pseudo-merge bitmaps alongside the regular commit bitmaps. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- t/helper/test-bitmap.c | 110 +++++++++++++++++++++++++++++++++++++++- t/t5310-pack-bitmaps.sh | 24 +++++++++ 2 files changed, 133 insertions(+), 1 deletion(-) diff --git a/t/helper/test-bitmap.c b/t/helper/test-bitmap.c index 16a01669e4149a..96c0000c787094 100644 --- a/t/helper/test-bitmap.c +++ b/t/helper/test-bitmap.c @@ -2,7 +2,10 @@ #include "test-tool.h" #include "git-compat-util.h" +#include "hex.h" +#include "odb.h" #include "pack-bitmap.h" +#include "pseudo-merge.h" #include "setup.h" static int bitmap_list_commits(void) @@ -35,6 +38,108 @@ static int bitmap_dump_pseudo_merge_objects(uint32_t n) return test_bitmap_pseudo_merge_objects(the_repository, n); } +struct bitmap_writer_data { + struct packing_data packed; + struct pack_idx_entry **index; + uint32_t nr; +}; + +static int add_packed_object(const struct object_id *oid, + struct packed_git *pack, + uint32_t pos, + void *_data) +{ + struct bitmap_writer_data *data = _data; + struct object_entry *entry; + struct object_info oi = OBJECT_INFO_INIT; + enum object_type type; + + oi.typep = &type; + + entry = packlist_alloc(&data->packed, oid); + entry->idx.offset = nth_packed_object_offset(pack, pos); + if (packed_object_info(pack, entry->idx.offset, &oi) < 0) + die("could not get type of object %s", + oid_to_hex(oid)); + oe_set_type(entry, type); + oe_set_in_pack(&data->packed, entry, pack); + data->index[data->nr++] = &entry->idx; + + return 0; +} + +static int idx_oid_cmp(const void *va, const void *vb) +{ + const struct pack_idx_entry *a = *(const struct pack_idx_entry **)va; + const struct pack_idx_entry *b = *(const struct pack_idx_entry **)vb; + + return oidcmp(&a->oid, &b->oid); +} + +static int bitmap_write(const char *basename) +{ + struct packed_git *p = NULL; + struct bitmap_writer_data data = { 0 }; + struct bitmap_writer writer; + struct strbuf buf = STRBUF_INIT; + + prepare_repo_settings(the_repository); + repo_for_each_pack(the_repository, p) { + if (!strcmp(pack_basename(p), basename)) + break; + } + + if (!p) + die("could not find pack '%s'", basename); + + if (open_pack_index(p)) + die("cannot open pack index for '%s'", p->pack_name); + + prepare_packing_data(the_repository, &data.packed); + ALLOC_ARRAY(data.index, p->num_objects); + + for_each_object_in_pack(p, add_packed_object, &data, + ODB_FOR_EACH_OBJECT_PACK_ORDER); + + bitmap_writer_init(&writer, the_repository, &data.packed, NULL); + bitmap_writer_build_type_index(&writer, data.index); + + while (strbuf_getline_lf(&buf, stdin) != EOF) { + struct object_id oid; + struct commit *c; + + if (get_oid_hex(buf.buf, &oid)) + die("invalid OID: %s", buf.buf); + + c = lookup_commit(the_repository, &oid); + if (!c || repo_parse_commit(the_repository, c)) + die("could not parse commit %s", buf.buf); + + bitmap_writer_push_commit(&writer, c, false); + } + + select_pseudo_merges(&writer); + if (bitmap_writer_build(&writer) < 0) + die("failed to build bitmaps"); + + bitmap_writer_set_checksum(&writer, p->hash); + + QSORT(data.index, p->num_objects, idx_oid_cmp); + + strbuf_reset(&buf); + strbuf_addstr(&buf, p->pack_name); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".bitmap"); + bitmap_writer_finish(&writer, data.index, buf.buf, 0); + + bitmap_writer_free(&writer); + strbuf_release(&buf); + free(data.index); + clear_packing_data(&data.packed); + + return 0; +} + int cmd__bitmap(int argc, const char **argv) { setup_git_directory(); @@ -51,13 +156,16 @@ int cmd__bitmap(int argc, const char **argv) return bitmap_dump_pseudo_merge_commits(atoi(argv[2])); if (argc == 3 && !strcmp(argv[1], "dump-pseudo-merge-objects")) return bitmap_dump_pseudo_merge_objects(atoi(argv[2])); + if (argc == 3 && !strcmp(argv[1], "write")) + return bitmap_write(argv[2]); usage("\ttest-tool bitmap list-commits\n" "\ttest-tool bitmap list-commits-with-offset\n" "\ttest-tool bitmap dump-hashes\n" "\ttest-tool bitmap dump-pseudo-merges\n" "\ttest-tool bitmap dump-pseudo-merge-commits \n" - "\ttest-tool bitmap dump-pseudo-merge-objects "); + "\ttest-tool bitmap dump-pseudo-merge-objects \n" + "\ttest-tool bitmap write < "); return -1; } diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh index f693cb56691988..9489e59fa55c61 100755 --- a/t/t5310-pack-bitmaps.sh +++ b/t/t5310-pack-bitmaps.sh @@ -648,4 +648,28 @@ test_expect_success 'truncated bitmap fails gracefully (lookup table)' ' test_grep corrupted.bitmap.index stderr ' +test_expect_success 'test-tool bitmap write' ' + git init bitmap-write-helper && + test_when_finished "rm -fr bitmap-write-helper" && + ( + cd bitmap-write-helper && + + test_commit_bulk 64 && + git repack -ad && + + pack="$(ls .git/objects/pack/pack-*.pack)" && + + git rev-parse HEAD >commits && + test-tool bitmap write "$(basename $pack)" actual && + sort commits >expect && + test_cmp expect actual && + + git rev-list --count --objects --use-bitmap-index HEAD >actual && + git rev-list --count --objects HEAD >expect && + test_cmp expect actual + ) +' + test_done From 856b377390ff987a4b42e3fb9e49a39afb84d761 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 13 Apr 2026 19:56:43 -0400 Subject: [PATCH 178/241] t5333: demonstrate various pseudo-merge bugs Using the test helper introduced via the previous commit, add various failing tests demonstrating bugs in the pseudo-merge implementation. These are all marked as failing with one exception. The "sampleRate=0" test describes a latent bug, which is only reachable through a code path that is itself masked by a separate bug. A future commit will fix that bug, and, in turn, cause the aforementioned test to fail. Accordingly, that commit will mark the test as failing, and it will be re-marked as passing in a separate commit which fixes the once-latent bug. For the rest: the following commits will explain and fix the underlying bugs in detail. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- t/t5333-pseudo-merge-bitmaps.sh | 198 ++++++++++++++++++++++++++++++++ 1 file changed, 198 insertions(+) diff --git a/t/t5333-pseudo-merge-bitmaps.sh b/t/t5333-pseudo-merge-bitmaps.sh index 1f7a5d82ee4d44..20e77ab4390e85 100755 --- a/t/t5333-pseudo-merge-bitmaps.sh +++ b/t/t5333-pseudo-merge-bitmaps.sh @@ -462,4 +462,202 @@ test_expect_success 'use pseudo-merge in boundary traversal' ' ) ' +test_expect_failure 'apply pseudo-merges during fill-in traversal' ' + git init pseudo-merge-fill-in-traversal && + test_when_finished "rm -fr pseudo-merge-fill-in-traversal" && + ( + cd pseudo-merge-fill-in-traversal && + + git config bitmapPseudoMerge.test.pattern refs/tags/ && + git config bitmapPseudoMerge.test.maxMerges 1 && + git config bitmapPseudoMerge.test.stableThreshold never && + + test_commit_bulk 64 && + tag_everything && + git repack -ad && + + pack=$(ls .git/objects/pack/pack-*.pack) && + git rev-parse HEAD~63 >in && + test-tool bitmap write "$(basename $pack)" merges && + test_line_count = 1 merges && + + test_commit stale && + + git rev-list --count --objects HEAD >expect && + + : >trace2.txt && + GIT_TRACE2_EVENT=$PWD/trace2.txt \ + git rev-list --count --objects --use-bitmap-index HEAD >actual && + test_pseudo_merges_satisfied 1 in && + while read oid + do + echo "create refs/group-$side/$oid $oid" || return 1 + done in && + test-tool bitmap write "$(basename $pack)" merges && + test_line_count = 2 merges && + + test_commit stale && + + git rev-list --count --objects HEAD >expect && + + : >trace2.txt && + GIT_TRACE2_EVENT=$PWD/trace2.txt \ + git rev-list --count --objects --use-bitmap-index HEAD >actual && + test_pseudo_merges_satisfied 2 in && + test-tool bitmap write "$(basename $pack)" merges && + test_line_count = 2 merges && + + test_commit stale && + + git rev-list --count --objects HEAD >expect && + + : >trace2.txt && + GIT_TRACE2_EVENT=$PWD/trace2.txt \ + git rev-list --count --objects --use-bitmap-index HEAD >actual && + test_pseudo_merges_satisfied 2 in && + GIT_TEST_DATE_NOW=$test_tick \ + test-tool bitmap write "$(basename $pack)" merges && + test_line_count = 1 merges + ) +' + +test_expect_success 'sampleRate=0 does not cause division by zero' ' + git init pseudo-merge-sample-rate-zero && + test_when_finished "rm -fr pseudo-merge-sample-rate-zero" && + ( + cd pseudo-merge-sample-rate-zero && + + test_commit_bulk 64 && + tag_everything && + git repack -ad && + + pack="$(ls .git/objects/pack/pack-*.pack)" && + + git config bitmapPseudoMerge.test.pattern "refs/tags/" && + git config bitmapPseudoMerge.test.maxMerges 1 && + git config bitmapPseudoMerge.test.sampleRate 0 && + git config bitmapPseudoMerge.test.threshold now && + git config bitmapPseudoMerge.test.stableThreshold never && + + git rev-parse HEAD~63 >in && + test-tool bitmap write "$(basename $pack)" Date: Mon, 13 Apr 2026 19:56:46 -0400 Subject: [PATCH 179/241] pack-bitmap-write: sort pseudo-merge commit lookup table in pack order The pseudo-merge commit lookup table stores each commit's position in the pack- or pseudo-pack order, and is used to perform a binary search in order to determine which pseudo-merge(s) a given commit belongs to. However, the table was previously sorted in lexical order (via `oid_array_sort()`), causing the binary search to fail. While this causes pseudo-merge bitmaps to be de-facto broken for fill-in traversal, there are a couple of important points to keep in mind: * Pseudo-merge application during the initial phases of a bitmap-based traversal are applied via `cascade_pseudo_merges_1()`. This function enumerates the known pseudo-merges and determines if its parents are a subset of the traversal roots. This is a different path than the fill-in traversal, where we are looking for any pseudo-merges which may be satisfied after visiting some commit along an object walk, which involves the aforementioned (broken) binary search. As a consequence, any pseudo-merges we apply at this stage are done so correctly. * While this bug makes applying pseudo-merges during fill-in traversal effectively broken, it does not produce wrong results. Instead of applying the *wrong* pseudo-merge, we will simply fail to find satisfied pseudo-merges, leaving the traversal to use the existing fill-in routines. Fix this by sorting the table by bit position before writing, matching the order that the reader's binary search expects. This does produce a change the on-disk format insofar as the actual code now complies with the documented format (for more details, refer to: Documentation/technical/bitmap-format.adoc). Given that this never worked in the first place, such a change should be OK to perform. If an out-of-tree implementation of pseudo-merges happened to generate bitmaps that comply with the documented format, they will continue to be read and interpreted as normal. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pack-bitmap-write.c | 21 ++++++++++++++++++++- t/t5333-pseudo-merge-bitmaps.sh | 2 +- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 8338d7217ef48f..86ed6a5d78cd04 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -819,6 +819,20 @@ static void write_selected_commits_v1(struct bitmap_writer *writer, } } +static int pseudo_merge_commit_pos_cmp(const void *_va, const void *_vb, + void *_data) +{ + struct bitmap_writer *writer = _data; + uint32_t pos_a = find_object_pos(writer, _va, NULL); + uint32_t pos_b = find_object_pos(writer, _vb, NULL); + + if (pos_a < pos_b) + return -1; + if (pos_a > pos_b) + return 1; + return 0; +} + static void write_pseudo_merges(struct bitmap_writer *writer, struct hashfile *f) { @@ -876,7 +890,12 @@ static void write_pseudo_merges(struct bitmap_writer *writer, oid_array_append(&commits, &kh_key(writer->pseudo_merge_commits, i)); } - oid_array_sort(&commits); + /* + * Sort the commits by their bit position so that the lookup + * table can be binary searched by the reader (see + * find_pseudo_merge()). + */ + QSORT_S(commits.oid, commits.nr, pseudo_merge_commit_pos_cmp, writer); /* write lookup table (non-extended) */ for (i = 0; i < commits.nr; i++) { diff --git a/t/t5333-pseudo-merge-bitmaps.sh b/t/t5333-pseudo-merge-bitmaps.sh index 20e77ab4390e85..dce43ed8dc6596 100755 --- a/t/t5333-pseudo-merge-bitmaps.sh +++ b/t/t5333-pseudo-merge-bitmaps.sh @@ -462,7 +462,7 @@ test_expect_success 'use pseudo-merge in boundary traversal' ' ) ' -test_expect_failure 'apply pseudo-merges during fill-in traversal' ' +test_expect_success 'apply pseudo-merges during fill-in traversal' ' git init pseudo-merge-fill-in-traversal && test_when_finished "rm -fr pseudo-merge-fill-in-traversal" && ( From 9ca8305510f20653f334c61aef31b458fd3c27d3 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 13 Apr 2026 19:56:49 -0400 Subject: [PATCH 180/241] pack-bitmap: fix inverted binary search in `pseudo_merge_at()` The binary search in `pseudo_merge_at()` has its "lo" and "hi" updates swapped: when the midpoint's offset is less than the target, it sets `hi = mi` (searching left) instead of `lo = mi + 1` (searching right), and vice versa. This means that lookups for pseudo-merges whose offset is not near the midpoint of the pseudo-merge table are likely to fail. In practice, with a single pseudo-merge group this is masked because the lone entry is always at the midpoint. With multiple groups, the inverted comparisons cause lookups to search in the wrong direction, potentially missing entries. Swap the "lo" and "hi" assignments to search in the correct direction, making it possible to apply pseudo-merges during fill-in when more than one pseudo-merge exists in a group. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pseudo-merge.c | 4 ++-- t/t5333-pseudo-merge-bitmaps.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pseudo-merge.c b/pseudo-merge.c index ff18b6c364245e..fb71c7617924a7 100644 --- a/pseudo-merge.c +++ b/pseudo-merge.c @@ -559,9 +559,9 @@ static struct pseudo_merge *pseudo_merge_at(const struct pseudo_merge_map *pm, if (got == want) return use_pseudo_merge(pm, &pm->v[mi]); else if (got < want) - hi = mi; - else lo = mi + 1; + else + hi = mi; } warning(_("could not find pseudo-merge for commit %s at offset %"PRIuMAX), diff --git a/t/t5333-pseudo-merge-bitmaps.sh b/t/t5333-pseudo-merge-bitmaps.sh index dce43ed8dc6596..5bfb510312451c 100755 --- a/t/t5333-pseudo-merge-bitmaps.sh +++ b/t/t5333-pseudo-merge-bitmaps.sh @@ -496,7 +496,7 @@ test_expect_success 'apply pseudo-merges during fill-in traversal' ' ) ' -test_expect_failure 'apply pseudo-merges from multiple groups during fill-in' ' +test_expect_success 'apply pseudo-merges from multiple groups during fill-in' ' git init pseudo-merge-fill-in-multi && test_when_finished "rm -fr pseudo-merge-fill-in-multi" && ( From 4ca18785553325b564e91a1c4b7455e07721cb00 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 13 Apr 2026 19:56:52 -0400 Subject: [PATCH 181/241] pack-bitmap: fix pseudo-merge lookup for shared commits When a commit appears in more than one pseudo-merge group, its entry in the commit lookup table has the high bit set in its offset field, indicating that the offset points to an "extended" table containing the set of pseudo-merges for that commit. There are three bugs in this path: * The `next_ext` offset in `write_pseudo_merges()` undercounts the per-entry size of the lookup table (8 vs. 12 bytes). * `nth_pseudo_merge_ext()` calls `read_pseudo_merge_commit_at()` on a pseudo-merge bitmap offset, misinterpreting it as a 12-byte commit table entry. * The error check after `pseudo_merge_ext_at()` in `apply_pseudo_merges_for_commit()` tests `< -1` instead of `< 0`, silently swallowing errors from `error()`. The first bug is on the write side: each commit lookup entry contains a 4- and 8-byte unsigned value for a total of 12 bytes, but the calculation assumes that the entry only contains 8 bytes of data. This makes `next_ext` too small, so the extended-table offsets that get written point into the middle of the non-extended lookup table rather than past it. The reader then interprets non-extended lookup data as extended entries, producing garbage. The second bug is on the read side and is independently fatal: even with a correctly positioned extended table, `nth_pseudo_merge_ext()` feeds the offset it reads (which points at pseudo-merge bitmap data) to `read_pseudo_merge_commit_at()`. That function tries to parse 12 bytes as a `pseudo_merge_commit` struct, clobbering `merge->pseudo_merge_ofs` with whatever happens to be at that location. The caller only needs `pseudo_merge_ofs`, so the fix is to store the offset directly rather than re-parsing a commit table entry. The `commit_pos` field is left untouched, retaining the value that `find_pseudo_merge()` set earlier. The third bug is latent. With the first two fixes applied, the extended table is correctly written and read, so `pseudo_merge_ext_at()` does not fail during normal operation. The `< -1` vs `< 0` distinction only matters when the bitmap file is corrupt or truncated, in which case the error would be silently ignored and the code would proceed with uninitialized data. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pack-bitmap-write.c | 2 +- pseudo-merge.c | 4 ++-- t/t5333-pseudo-merge-bitmaps.sh | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 86ed6a5d78cd04..1c8070f99c03ca 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -877,7 +877,7 @@ static void write_pseudo_merges(struct bitmap_writer *writer, next_ext = st_add(hashfile_total(f), st_mult(kh_size(writer->pseudo_merge_commits), - sizeof(uint64_t))); + sizeof(uint32_t) + sizeof(uint64_t))); table_start = hashfile_total(f); diff --git a/pseudo-merge.c b/pseudo-merge.c index fb71c7617924a7..34e1da00b4e200 100644 --- a/pseudo-merge.c +++ b/pseudo-merge.c @@ -600,7 +600,7 @@ static int nth_pseudo_merge_ext(const struct pseudo_merge_map *pm, return error(_("out-of-bounds read: (%"PRIuMAX" >= %"PRIuMAX")"), (uintmax_t)ofs, (uintmax_t)pm->map_size); - read_pseudo_merge_commit_at(merge, pm->map + ofs); + merge->pseudo_merge_ofs = ofs; return 0; } @@ -671,7 +671,7 @@ int apply_pseudo_merges_for_commit(const struct pseudo_merge_map *pm, off_t ofs = merge_commit.pseudo_merge_ofs & ~((uint64_t)1<<63); uint32_t i; - if (pseudo_merge_ext_at(pm, &ext, ofs) < -1) { + if (pseudo_merge_ext_at(pm, &ext, ofs) < 0) { warning(_("could not read extended pseudo-merge table " "for commit %s"), oid_to_hex(&commit->object.oid)); diff --git a/t/t5333-pseudo-merge-bitmaps.sh b/t/t5333-pseudo-merge-bitmaps.sh index 5bfb510312451c..8844a3bced9728 100755 --- a/t/t5333-pseudo-merge-bitmaps.sh +++ b/t/t5333-pseudo-merge-bitmaps.sh @@ -549,7 +549,7 @@ test_expect_success 'apply pseudo-merges from multiple groups during fill-in' ' ) ' -test_expect_failure 'apply pseudo-merges with overlapping groups during fill-in' ' +test_expect_success 'apply pseudo-merges with overlapping groups during fill-in' ' test_when_finished "rm -fr pseudo-merge-fill-in-overlap" && git init pseudo-merge-fill-in-overlap && ( From beb821741ac8df4808dbbbc141c7b2b48cfcea4c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 13 Apr 2026 19:56:55 -0400 Subject: [PATCH 182/241] pack-bitmap: parse commits in `find_pseudo_merge_group_for_ref()` `find_pseudo_merge_group_for_ref()` uses the commit's date to classify it as either "stable" (older than the stable threshold) or "unstable" (otherwise). However, to find the relevant commit from a given OID, the function `find_pseudo_merge_group_for_ref()` uses `lookup_commit()` which does not parse commits. Because an unparsed commit has its "date" set to zero, every candidate is placed in the "stable" bucket regardless of its actual committer timestamp. This means the `bitmapPseudoMerge.*.threshold` and `stableThreshold` configuration options have no effect: the stable/unstable split is always determined by comparing against zero rather than the real commit date. The net result is that pseudo-merge groups are partitioned by `stableSize` instead of the intended decay-based sizing, and the `sampleRate` knob (which only applies to the unstable path) is never exercised. Fix this by calling `repo_parse_commit()` after `lookup_commit()`, bailing out of the callback if parsing fails. The corresponding test configures two pseudo-merge groups that both match all tags. The "stable" group uses `threshold=1.month.ago`, and the "all" group uses `threshold=now`. The test use our custom "GIT_TEST_DATE_NOW" environment variable by setting it to the value of "$test_tick" to align Git's notion of "now" (and therefore "1.month.ago") with the `test_tick` timestamps, so the commits appear to be younger than one month: only the "all" group matches them, producing exactly one pseudo-merge. Without the fix every commit has `date == 0`, which satisfies `date <= threshold` for both groups (since 0 is older than one month ago), and the "stable" group erroneously matches as well. Now that commits are correctly classified as "unstable", the bug described in the test exercising the "sampleRate=0" test is reachable, and the test is marked as failing. It will be fixed in a following commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pseudo-merge.c | 2 ++ t/t5333-pseudo-merge-bitmaps.sh | 22 ++++++++++++---------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/pseudo-merge.c b/pseudo-merge.c index 34e1da00b4e200..d79e5fb649a8b5 100644 --- a/pseudo-merge.c +++ b/pseudo-merge.c @@ -236,6 +236,8 @@ static int find_pseudo_merge_group_for_ref(const struct reference *ref, void *_d c = lookup_commit(the_repository, maybe_peeled); if (!c) return 0; + if (repo_parse_commit(the_repository, c)) + return 0; if (!packlist_find(writer->to_pack, maybe_peeled)) return 0; diff --git a/t/t5333-pseudo-merge-bitmaps.sh b/t/t5333-pseudo-merge-bitmaps.sh index 8844a3bced9728..63d2f64361d0b6 100755 --- a/t/t5333-pseudo-merge-bitmaps.sh +++ b/t/t5333-pseudo-merge-bitmaps.sh @@ -592,32 +592,34 @@ test_expect_success 'apply pseudo-merges with overlapping groups during fill-in' ) ' -test_expect_failure 'pseudo-merge commits are correctly classified by date' ' +test_expect_success 'pseudo-merge commits are correctly classified by date' ' git init pseudo-merge-date-classification && test_when_finished "rm -fr pseudo-merge-date-classification" && ( cd pseudo-merge-date-classification && test_commit_bulk 64 && + tag_everything && git repack -ad && pack="$(ls .git/objects/pack/pack-*.pack)" && # Configure two pseudo-merge groups: one that only - # matches "stable" refs (older than one month), and one - # that matches all refs. With 64 freshly-created tags - # (all younger than one month) the stable group should - # have zero pseudo-merges and the catch-all group should - # have one. + # matches "stable" refs (older than one month), and + # one that matches all refs. With 64 tags whose + # commits are all younger than one month, the + # "stable" group should have zero pseudo-merges and + # the "all" group should have one. # # Use GIT_TEST_DATE_NOW to align "now" (and therefore # "1.month.ago") with the test_tick timestamps so that # the commits are within the last month. # - # This exercises the date-based classification in - # find_pseudo_merge_group_for_ref(), which requires - # that commits are parsed before inspecting their date. + # Without parsing the commit, its date field would + # be zero, causing it to satisfy date <= threshold + # for the "stable" group as well, and both groups + # would produce pseudo-merges. git config bitmapPseudoMerge.stable.pattern "refs/tags/" && git config bitmapPseudoMerge.stable.maxMerges 64 && git config bitmapPseudoMerge.stable.stableThreshold never && @@ -637,7 +639,7 @@ test_expect_failure 'pseudo-merge commits are correctly classified by date' ' ) ' -test_expect_success 'sampleRate=0 does not cause division by zero' ' +test_expect_failure 'sampleRate=0 does not cause division by zero' ' git init pseudo-merge-sample-rate-zero && test_when_finished "rm -fr pseudo-merge-sample-rate-zero" && ( From 2ad7d58dc3cd3f3600151003cbc4c9ff3df166ba Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 13 Apr 2026 19:56:58 -0400 Subject: [PATCH 183/241] pack-bitmap: reject pseudo-merge "sampleRate" of 0 The "bitmapPseudoMerge.*.sampleRate" configuration controls what fraction of unstable commits are included in each pseudo-merge group. The config validation accepts values in the range `[0, 1]`, but a value of exactly 0 causes a division by zero in `select_pseudo_merges_1()`: if (j % (uint32_t)(1.0 / group->sample_rate)) When `sample_rate` is 0, `1.0 / 0.0` produces `+inf`, and casting infinity to `uint32_t` is undefined behavior in C. On most platforms this yields 0, making the subsequent modulo operation (`j % 0`) a fatal arithmetic trap. This path was not previously reachable because an earlier bug caused all pseudo-merge candidates to be classified as "stable" (where the sampling rate is not used), regardless of their actual commit date. Now that the date classification is fixed, the unstable path is exercised and the division by zero can fire. Fix this by changing the validation to require a strict lower bound and thus reject 0. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pseudo-merge.c | 4 ++-- t/t5333-pseudo-merge-bitmaps.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pseudo-merge.c b/pseudo-merge.c index d79e5fb649a8b5..75bed043602744 100644 --- a/pseudo-merge.c +++ b/pseudo-merge.c @@ -169,8 +169,8 @@ static int pseudo_merge_config(const char *var, const char *value, } } else if (!strcmp(key, "samplerate")) { group->sample_rate = git_config_double(var, value, ctx->kvi); - if (!(0 <= group->sample_rate && group->sample_rate <= 1)) { - warning(_("%s must be between 0 and 1, using default"), var); + if (!(0 < group->sample_rate && group->sample_rate <= 1)) { + warning(_("%s must be between 0 (exclusive) and 1, using default"), var); group->sample_rate = DEFAULT_PSEUDO_MERGE_SAMPLE_RATE; } } else if (!strcmp(key, "threshold")) { diff --git a/t/t5333-pseudo-merge-bitmaps.sh b/t/t5333-pseudo-merge-bitmaps.sh index 63d2f64361d0b6..46e8e6a8ea17c9 100755 --- a/t/t5333-pseudo-merge-bitmaps.sh +++ b/t/t5333-pseudo-merge-bitmaps.sh @@ -639,7 +639,7 @@ test_expect_success 'pseudo-merge commits are correctly classified by date' ' ) ' -test_expect_failure 'sampleRate=0 does not cause division by zero' ' +test_expect_success 'sampleRate=0 does not cause division by zero' ' git init pseudo-merge-sample-rate-zero && test_when_finished "rm -fr pseudo-merge-sample-rate-zero" && ( From 0cc702d23225f1c6153b7c459c60a6bfb1117e6a Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 13 Apr 2026 19:57:01 -0400 Subject: [PATCH 184/241] pack-bitmap: prevent pattern leak on pseudo-merge re-assignment When "bitmapPseudoMerge.*.pattern" appears more than once for the same group, `pseudo_merge_config()` frees the old `regex_t *` pointer but does not call `regfree()` on it first. This leaks whatever internal state `regcomp()` allocated. The final cleanup path in `pseudo_merge_group_release()` does call `regfree()` before `free()`, so only the intermediate replacement is affected. Fix this by guarding the replacement with a NULL check and calling `regfree()` before `free()` when the pointer is non-NULL. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pseudo-merge.c | 5 ++++- t/t5333-pseudo-merge-bitmaps.sh | 30 ++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/pseudo-merge.c b/pseudo-merge.c index 75bed043602744..22b8600d689de5 100644 --- a/pseudo-merge.c +++ b/pseudo-merge.c @@ -150,7 +150,10 @@ static int pseudo_merge_config(const char *var, const char *value, if (!strcmp(key, "pattern")) { struct strbuf re = STRBUF_INIT; - free(group->pattern); + if (group->pattern) { + regfree(group->pattern); + free(group->pattern); + } if (*value != '^') strbuf_addch(&re, '^'); strbuf_addstr(&re, value); diff --git a/t/t5333-pseudo-merge-bitmaps.sh b/t/t5333-pseudo-merge-bitmaps.sh index 46e8e6a8ea17c9..34d432ce76d668 100755 --- a/t/t5333-pseudo-merge-bitmaps.sh +++ b/t/t5333-pseudo-merge-bitmaps.sh @@ -662,4 +662,34 @@ test_expect_success 'sampleRate=0 does not cause division by zero' ' ) ' +test_expect_success 'duplicate pseudo-merge pattern does not leak' ' + git init pseudo-merge-dup-pattern && + test_when_finished "rm -fr pseudo-merge-dup-pattern" && + + ( + cd pseudo-merge-dup-pattern && + + test_commit_bulk 64 && + tag_everything && + git repack -ad && + + pack=$(ls .git/objects/pack/pack-*.pack) && + + # Set the same group'\''s pattern twice. The second + # assignment should cleanly release the compiled regex + # from the first without leaking. + git config bitmapPseudoMerge.test.pattern "refs/tags/" && + git config --add bitmapPseudoMerge.test.pattern "refs/tags/" && + git config bitmapPseudoMerge.test.maxMerges 1 && + git config bitmapPseudoMerge.test.threshold now && + git config bitmapPseudoMerge.test.stableThreshold never && + + git rev-parse HEAD~63 | + test-tool bitmap write "$(basename $pack)" && + + test_pseudo_merges >merges && + test_line_count = 1 merges + ) +' + test_done From 955c88fbc5ac916f8dababa458a963ebbeba9b41 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Wed, 15 Apr 2026 02:27:42 +0000 Subject: [PATCH 185/241] userdiff: tighten word-diff test case of the scheme driver The scheme driver separates identifiers only at parentheses of all sorts and whitespace, except that vertical bars act as brackets that enclose an identifier. The test case attempts to demonstrate the vertical bars with a change from 'some-text' to '|a greeting|'. However, this misses the goal because the same word coloring would be applied if '|a greeting|' were parsed as two words. Have an identifier between vertical bars with a space in both the pre- and the post-image and change only one side of the space to show that the single word exists between the vertical bars. Also add cases that change parentheses of all kinds in a sequence of parentheses to show that they are their own word each. Signed-off-by: Johannes Sixt Signed-off-by: Scott L. Burson Signed-off-by: Junio C Hamano --- t/t4034/scheme/expect | 5 +++-- t/t4034/scheme/post | 1 + t/t4034/scheme/pre | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/t/t4034/scheme/expect b/t/t4034/scheme/expect index 496cd5de8c9af3..138abe9f56b38f 100644 --- a/t/t4034/scheme/expect +++ b/t/t4034/scheme/expect @@ -2,10 +2,11 @@ index 74b6605..63b6ac4 100644 --- a/pre +++ b/post -@@ -1,6 +1,6 @@ +@@ -1,7 +1,7 @@ (define (myfunc a bmy-func first second) ; This is a really(moderately) cool function. (this\placethat\place (+ 3 4)) - (define some-text|a greeting| "hello") + (define |the greeting||a greeting| "hello") + ({}(([](func-n)[])){}) (let ((c (+ a badd1 first))) (format "one more than the total is %d" (add1+ c second)))) diff --git a/t/t4034/scheme/post b/t/t4034/scheme/post index 63b6ac4f8754d8..0e3bab101da03e 100644 --- a/t/t4034/scheme/post +++ b/t/t4034/scheme/post @@ -2,5 +2,6 @@ ; This is a (moderately) cool function. (that\place (+ 3 4)) (define |a greeting| "hello") + ({(([(func-n)]))}) (let ((c (add1 first))) (format "one more than the total is %d" (+ c second)))) diff --git a/t/t4034/scheme/pre b/t/t4034/scheme/pre index 74b66053574b67..03d77c7c430e07 100644 --- a/t/t4034/scheme/pre +++ b/t/t4034/scheme/pre @@ -1,6 +1,7 @@ (define (myfunc a b) ; This is a really cool function. (this\place (+ 3 4)) - (define some-text "hello") + (define |the greeting| "hello") + ({}(([](func-n)[])){}) (let ((c (+ a b))) (format "one more than the total is %d" (add1 c)))) From b79f7a3ad3ffde16b2cbc2457561669f4833f861 Mon Sep 17 00:00:00 2001 From: "Scott L. Burson" Date: Wed, 15 Apr 2026 02:27:43 +0000 Subject: [PATCH 186/241] userdiff: extend Scheme support to cover other Lisp dialects Common Lisp has top-level forms, such as 'defun' and 'defmacro', that are not matched by the current Scheme pattern. Also, it is more common in CL, when defining user macros intended as top-level forms, to prefix their names with "def" instead of "define"; such forms are also not matched. And some top-level forms don't even begin with "def". On the other hand, it is an established formatting convention in the Lisp community that only top-level forms start at the left margin. So matching any unindented line starting with an open parenthesis is an acceptable heuristic; false positives will be rare. However, there are also cases where notionally top-level forms are grouped together within some containing form. At least in the Common Lisp community, it is conventional to indent these by two spaces, or sometimes one. But matching just an open parenthesis indented by two spaces would be too broad; so the pattern added by this commit requires an indented form to start with "(def". It is believed that this strikes a good balance between potential false positives and false negatives. Signed-off-by: Scott L. Burson Acked-by: Johannes Sixt Signed-off-by: Junio C Hamano --- Documentation/gitattributes.adoc | 3 ++- t/t4018/scheme-lisp-defun-a | 4 ++++ t/t4018/scheme-lisp-defun-b | 4 ++++ t/t4018/scheme-lisp-eval-when | 4 ++++ t/t4018/{scheme-module => scheme-module-a} | 0 t/t4018/scheme-module-b | 6 ++++++ t/t4034/scheme/expect | 2 +- t/t4034/scheme/post | 2 +- t/t4034/scheme/pre | 2 +- userdiff.c | 22 ++++++++++++++++------ 10 files changed, 39 insertions(+), 10 deletions(-) create mode 100644 t/t4018/scheme-lisp-defun-a create mode 100644 t/t4018/scheme-lisp-defun-b create mode 100644 t/t4018/scheme-lisp-eval-when rename t/t4018/{scheme-module => scheme-module-a} (100%) create mode 100644 t/t4018/scheme-module-b diff --git a/Documentation/gitattributes.adoc b/Documentation/gitattributes.adoc index f20041a323d174..bd76167a45eb71 100644 --- a/Documentation/gitattributes.adoc +++ b/Documentation/gitattributes.adoc @@ -911,7 +911,8 @@ patterns are available: - `rust` suitable for source code in the Rust language. -- `scheme` suitable for source code in the Scheme language. +- `scheme` suitable for source code in most Lisp dialects, + including Scheme, Emacs Lisp, Common Lisp, and Clojure. - `tex` suitable for source code for LaTeX documents. diff --git a/t/t4018/scheme-lisp-defun-a b/t/t4018/scheme-lisp-defun-a new file mode 100644 index 00000000000000..c3c750f76d7b07 --- /dev/null +++ b/t/t4018/scheme-lisp-defun-a @@ -0,0 +1,4 @@ +(defun some-func (x y z) RIGHT + (let ((a x) + (b y)) + (ChangeMe a b))) diff --git a/t/t4018/scheme-lisp-defun-b b/t/t4018/scheme-lisp-defun-b new file mode 100644 index 00000000000000..21be305968bf6b --- /dev/null +++ b/t/t4018/scheme-lisp-defun-b @@ -0,0 +1,4 @@ +(macrolet ((foo (x) `(bar ,x))) + (defun mumble (x) ; RIGHT + (when (> x 0) + (foo x)))) ; ChangeMe diff --git a/t/t4018/scheme-lisp-eval-when b/t/t4018/scheme-lisp-eval-when new file mode 100644 index 00000000000000..5d941d7e0edda2 --- /dev/null +++ b/t/t4018/scheme-lisp-eval-when @@ -0,0 +1,4 @@ +(eval-when (:compile-toplevel :load-toplevel :execute) ; RIGHT + (set-macro-character #\? + (lambda (stream char) + `(make-pattern-variable ,(read stream))))) ; ChangeMe diff --git a/t/t4018/scheme-module b/t/t4018/scheme-module-a similarity index 100% rename from t/t4018/scheme-module rename to t/t4018/scheme-module-a diff --git a/t/t4018/scheme-module-b b/t/t4018/scheme-module-b new file mode 100644 index 00000000000000..77bc0c5eff4775 --- /dev/null +++ b/t/t4018/scheme-module-b @@ -0,0 +1,6 @@ +(module A + (export with-display-exception) + (extern (display-exception display-exception)) + (def (with-display-exception thunk) RIGHT + (with-catch (lambda (e) (display-exception e (current-error-port)) e) + thunk ChangeMe))) diff --git a/t/t4034/scheme/expect b/t/t4034/scheme/expect index 138abe9f56b38f..fb7f2616fea547 100644 --- a/t/t4034/scheme/expect +++ b/t/t4034/scheme/expect @@ -6,7 +6,7 @@ (define (myfunc a bmy-func first second) ; This is a really(moderately) cool function. (this\placethat\place (+ 3 4)) - (define |the greeting||a greeting| "hello") + (define |the \| \greeting||a \greeting| |hello there|) ({}(([](func-n)[])){}) (let ((c (+ a badd1 first))) (format "one more than the total is %d" (add1+ c second)))) diff --git a/t/t4034/scheme/post b/t/t4034/scheme/post index 0e3bab101da03e..450cc234f75aea 100644 --- a/t/t4034/scheme/post +++ b/t/t4034/scheme/post @@ -1,7 +1,7 @@ (define (my-func first second) ; This is a (moderately) cool function. (that\place (+ 3 4)) - (define |a greeting| "hello") + (define |a \greeting| |hello there|) ({(([(func-n)]))}) (let ((c (add1 first))) (format "one more than the total is %d" (+ c second)))) diff --git a/t/t4034/scheme/pre b/t/t4034/scheme/pre index 03d77c7c430e07..e16ee7584946e4 100644 --- a/t/t4034/scheme/pre +++ b/t/t4034/scheme/pre @@ -1,7 +1,7 @@ (define (myfunc a b) ; This is a really cool function. (this\place (+ 3 4)) - (define |the greeting| "hello") + (define |the \| \greeting| |hello there|) ({}(([](func-n)[])){}) (let ((c (+ a b))) (format "one more than the total is %d" (add1 c)))) diff --git a/userdiff.c b/userdiff.c index fe710a68bfdfa6..b5412e6bc3ecd3 100644 --- a/userdiff.c +++ b/userdiff.c @@ -344,14 +344,24 @@ PATTERNS("rust", "|[0-9][0-9_a-fA-Fiosuxz]*(\\.([0-9]*[eE][+-]?)?[0-9_fF]*)?" "|[-+*\\/<>%&^|=!:]=|<<=?|>>=?|&&|\\|\\||->|=>|\\.{2}=|\\.{3}|::"), PATTERNS("scheme", - "^[\t ]*(\\(((define|def(struct|syntax|class|method|rules|record|proto|alias)?)[-*/ \t]|(library|module|struct|class)[*+ \t]).*)$", /* - * R7RS valid identifiers include any sequence enclosed - * within vertical lines having no backslashes + * An unindented opening parenthesis identifies a top-level + * expression in all Lisp dialects. */ - "\\|([^\\\\]*)\\|" - /* All other words should be delimited by spaces or parentheses */ - "|([^][)(}{[ \t])+"), + "^(\\(.*)$\n" + /* For Scheme: a possibly indented left paren followed by a keyword. */ + "^[\t ]*(\\(((define|def(struct|syntax|class|method|rules|record|proto|alias)?)[-*/ \t]|(library|module|struct|class)[*+ \t]).*)$\n" + /* + * For all Lisp dialects: a slightly indented line starting with "(def". + */ + "^ ?(\\([Dd][Ee][Ff].*)$", + /* + * The union of R7RS and Common Lisp symbol syntax: allows arbitrary + * strings between vertical bars, including any escaped characters. + */ + "\\|([^|\\\\]|\\\\.)*\\|" + /* All other words should be delimited by spaces or parentheses. */ + "|([^][)(}{ \t])+"), PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$", "\\\\[a-zA-Z@]+|\\\\.|([a-zA-Z0-9]|[^\x01-\x7f])+"), { .name = "default", .binary = -1 }, From 7cce609e086866d054a1433d0356fa71e55c108d Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Wed, 15 Apr 2026 13:27:25 +0000 Subject: [PATCH 187/241] t9210, t9211: disable GIT_TEST_SPLIT_INDEX for scalar clone tests index.skipHash (Scalar default) and split-index are incompatible: the shared index gets a null OID when skipHash skips computing the hash, and the null OID causes the shared index to not be loaded on re-read. This triggers a BUG assertion in fsmonitor when the fsmonitor_dirty bitmap references more entries than the (now empty) index has. Disable GIT_TEST_SPLIT_INDEX in the scalar clone tests that hit this: tests 12, 13, and 22 in t9210 (matching the existing workaround in test 16), and all of t9211 (every test does scalar clone). Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- t/t9210-scalar.sh | 6 ++++++ t/t9211-scalar-clone.sh | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/t/t9210-scalar.sh b/t/t9210-scalar.sh index 009437a5f3168f..f2a6df77ceeb01 100755 --- a/t/t9210-scalar.sh +++ b/t/t9210-scalar.sh @@ -152,6 +152,10 @@ test_expect_success 'set up repository to clone' ' ' test_expect_success 'scalar clone' ' + # index.skipHash (Scalar default) and GIT_TEST_SPLIT_INDEX are + # incompatible: the shared index gets a null OID and fails to + # load on re-read. + sane_unset GIT_TEST_SPLIT_INDEX && second=$(git rev-parse --verify second:second.t) && scalar clone "file://$(pwd)" cloned --single-branch && ( @@ -182,6 +186,7 @@ test_expect_success 'scalar clone' ' ' test_expect_success 'scalar clone --no-... opts' ' + sane_unset GIT_TEST_SPLIT_INDEX && # Note: redirect stderr always to avoid having a verbose test # run result in a difference in the --[no-]progress option. GIT_TRACE2_EVENT="$(pwd)/no-opt-trace" scalar clone \ @@ -307,6 +312,7 @@ test_expect_success '`scalar [...] ` errors out when dir is missing' ' SQ="'" test_expect_success UNZIP 'scalar diagnose' ' + sane_unset GIT_TEST_SPLIT_INDEX && scalar clone "file://$(pwd)" cloned --single-branch && git repack && echo "$(pwd)/.git/objects/" >>cloned/src/.git/objects/info/alternates && diff --git a/t/t9211-scalar-clone.sh b/t/t9211-scalar-clone.sh index bfbf22a4621843..2043f48a1acdf7 100755 --- a/t/t9211-scalar-clone.sh +++ b/t/t9211-scalar-clone.sh @@ -8,6 +8,11 @@ test_description='test the `scalar clone` subcommand' GIT_TEST_MAINT_SCHEDULER="crontab:test-tool crontab cron.txt,launchctl:true,schtasks:true" export GIT_TEST_MAINT_SCHEDULER +# index.skipHash (Scalar default) and GIT_TEST_SPLIT_INDEX are +# incompatible: the shared index gets a null OID and fails to +# load on re-read. Every test here uses scalar clone. +sane_unset GIT_TEST_SPLIT_INDEX + test_expect_success 'set up repository to clone' ' rm -rf .git && git init to-clone && From e21be6cd45db554862f40c90b385c1bc465c8335 Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Wed, 15 Apr 2026 13:27:26 +0000 Subject: [PATCH 188/241] fsmonitor: fix khash memory leak in do_handle_client The `shown` kh_str_t was freed with kh_release_str() at a point in the code only reachable in the non-trivial response path. When the client receives a trivial response, the code jumps to the `cleanup` label, skipping the kh_release_str() call entirely and leaking the hash table. Fix this by initializing `shown` to NULL and moving the cleanup to the `cleanup` label using kh_destroy_str(), which is safe to call on NULL. This ensures the hash table is freed regardless of which code path is taken. Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- builtin/fsmonitor--daemon.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c index 242c594646d1f5..bc4571938cc1db 100644 --- a/builtin/fsmonitor--daemon.c +++ b/builtin/fsmonitor--daemon.c @@ -671,7 +671,7 @@ static int do_handle_client(struct fsmonitor_daemon_state *state, const struct fsmonitor_batch *batch; struct fsmonitor_batch *remainder = NULL; intmax_t count = 0, duplicates = 0; - kh_str_t *shown; + kh_str_t *shown = NULL; int hash_ret; int do_trivial = 0; int do_flush = 0; @@ -909,8 +909,6 @@ static int do_handle_client(struct fsmonitor_daemon_state *state, total_response_len += payload.len; } - kh_release_str(shown); - pthread_mutex_lock(&state->main_lock); if (token_data->client_ref_count > 0) @@ -954,6 +952,7 @@ static int do_handle_client(struct fsmonitor_daemon_state *state, trace2_data_intmax("fsmonitor", the_repository, "response/count/duplicates", duplicates); cleanup: + kh_destroy_str(shown); strbuf_release(&response_token); strbuf_release(&requested_token_id); strbuf_release(&payload); From 8b1d96554261aeef649bb3f36f9812a3c6e3f4da Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Wed, 15 Apr 2026 13:27:27 +0000 Subject: [PATCH 189/241] fsmonitor: fix hashmap memory leak in fsmonitor_run_daemon The `state.cookies` hashmap is initialized during daemon startup but never freed during cleanup in the `done:` label of fsmonitor_run_daemon(). The cookie entries also have names allocated via strbuf_detach() that must be freed individually. Iterate the hashmap to free each cookie name, then call hashmap_clear_and_free() to release the entries and table. Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- builtin/fsmonitor--daemon.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c index bc4571938cc1db..d8d32b01ef2859 100644 --- a/builtin/fsmonitor--daemon.c +++ b/builtin/fsmonitor--daemon.c @@ -1404,6 +1404,15 @@ static int fsmonitor_run_daemon(void) done: pthread_cond_destroy(&state.cookies_cond); pthread_mutex_destroy(&state.main_lock); + { + struct hashmap_iter iter; + struct fsmonitor_cookie_item *cookie; + + hashmap_for_each_entry(&state.cookies, &iter, cookie, entry) + free(cookie->name); + hashmap_clear_and_free(&state.cookies, + struct fsmonitor_cookie_item, entry); + } fsm_listen__dtor(&state); fsm_health__dtor(&state); From 8372c88f583b8910f1e57c00c89c0afcca7018dc Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Wed, 15 Apr 2026 13:27:28 +0000 Subject: [PATCH 190/241] compat/win32: add pthread_cond_timedwait Add a pthread_cond_timedwait() implementation to the Windows pthread compatibility layer using SleepConditionVariableCS() with a millisecond timeout computed from the absolute deadline. Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- compat/win32/pthread.c | 26 ++++++++++++++++++++++++++ compat/win32/pthread.h | 2 ++ 2 files changed, 28 insertions(+) diff --git a/compat/win32/pthread.c b/compat/win32/pthread.c index 7e93146963ec56..398caa96029718 100644 --- a/compat/win32/pthread.c +++ b/compat/win32/pthread.c @@ -66,3 +66,29 @@ int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) return err_win_to_posix(GetLastError()); return 0; } + +int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, + const struct timespec *abstime) +{ + struct timeval now; + long long now_ms, deadline_ms; + DWORD timeout_ms; + + gettimeofday(&now, NULL); + now_ms = (long long)now.tv_sec * 1000 + now.tv_usec / 1000; + deadline_ms = (long long)abstime->tv_sec * 1000 + + abstime->tv_nsec / 1000000; + + if (deadline_ms <= now_ms) + return ETIMEDOUT; + else + timeout_ms = (DWORD)(deadline_ms - now_ms); + + if (SleepConditionVariableCS(cond, mutex, timeout_ms) == 0) { + DWORD err = GetLastError(); + if (err == ERROR_TIMEOUT) + return ETIMEDOUT; + return err_win_to_posix(err); + } + return 0; +} diff --git a/compat/win32/pthread.h b/compat/win32/pthread.h index ccacc5a53ba976..d80df8d12af2dc 100644 --- a/compat/win32/pthread.h +++ b/compat/win32/pthread.h @@ -64,6 +64,8 @@ int win32_pthread_join(pthread_t *thread, void **value_ptr); pthread_t pthread_self(void); int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex); +int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, + const struct timespec *abstime); static inline void NORETURN pthread_exit(void *ret) { From 56cef9cb1a083c47b12b88548bf2126af8bfb263 Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Wed, 15 Apr 2026 13:27:29 +0000 Subject: [PATCH 191/241] fsmonitor: use pthread_cond_timedwait for cookie wait The cookie wait in with_lock__wait_for_cookie() uses an infinite pthread_cond_wait() loop. The existing comment notes the desire to switch to pthread_cond_timedwait(), but the routine was not available in git thread-utils. On certain container or overlay filesystems, inotify watches may succeed but events are never delivered. In this case the daemon would hang indefinitely waiting for the cookie event, which in turn causes the client to hang. Replace the infinite wait with a one-second timeout using pthread_cond_timedwait(). If the timeout fires, report an error and let the client proceed with a trivial (full-scan) response rather than blocking forever. Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- builtin/fsmonitor--daemon.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c index d8d32b01ef2859..c8ec7b722e953e 100644 --- a/builtin/fsmonitor--daemon.c +++ b/builtin/fsmonitor--daemon.c @@ -197,20 +197,31 @@ static enum fsmonitor_cookie_item_result with_lock__wait_for_cookie( unlink(cookie_pathname.buf); /* - * Technically, this is an infinite wait (well, unless another - * thread sends us an abort). I'd like to change this to - * use `pthread_cond_timedwait()` and return an error/timeout - * and let the caller do the trivial response thing, but we - * don't have that routine in our thread-utils. - * - * After extensive beta testing I'm not really worried about - * this. Also note that the above open() and unlink() calls - * will cause at least two FS events on that path, so the odds - * of getting stuck are pretty slim. + * Wait for the listener thread to observe the cookie file. + * Time out after a short interval so that the client + * does not hang forever if the filesystem does not deliver + * events (e.g., on certain container/overlay filesystems + * where inotify watches succeed but events never arrive). */ - while (cookie->result == FCIR_INIT) - pthread_cond_wait(&state->cookies_cond, - &state->main_lock); + { + struct timeval now; + struct timespec ts; + int err = 0; + + gettimeofday(&now, NULL); + ts.tv_sec = now.tv_sec + 1; + ts.tv_nsec = now.tv_usec * 1000; + + while (cookie->result == FCIR_INIT && !err) + err = pthread_cond_timedwait(&state->cookies_cond, + &state->main_lock, + &ts); + if (err == ETIMEDOUT && cookie->result == FCIR_INIT) { + trace_printf_key(&trace_fsmonitor, + "cookie_wait timed out"); + cookie->result = FCIR_ERROR; + } + } done: hashmap_remove(&state->cookies, &cookie->entry, NULL); From ff384ebfad074321e22b2fb310a8f35df19576d6 Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Wed, 15 Apr 2026 13:27:30 +0000 Subject: [PATCH 192/241] fsmonitor: rename fsm-ipc-darwin.c to fsm-ipc-unix.c The fsmonitor IPC path logic in fsm-ipc-darwin.c is not Darwin-specific and will be reused by the upcoming Linux implementation. Rename it to fsm-ipc-unix.c to reflect that it is shared by all Unix platforms. Introduce FSMONITOR_OS_SETTINGS (set to "unix" for non-Windows, "win32" for Windows) as a separate variable from FSMONITOR_DAEMON_BACKEND so that the build files can distinguish between platform-specific files (listen, health, path-utils) and shared Unix files (ipc, settings). Move fsm-ipc to the FSMONITOR_OS_SETTINGS section in the Makefile, and switch fsm-path-utils to use FSMONITOR_DAEMON_BACKEND since path-utils is platform-specific (there will be separate darwin and linux versions). Based-on-patch-by: Eric DeCosta Based-on-patch-by: Marziyeh Esipreh Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- Makefile | 6 ++--- .../{fsm-ipc-darwin.c => fsm-ipc-unix.c} | 0 config.mak.uname | 2 +- contrib/buildsystems/CMakeLists.txt | 25 +++++++++---------- meson.build | 7 ++++-- 5 files changed, 21 insertions(+), 19 deletions(-) rename compat/fsmonitor/{fsm-ipc-darwin.c => fsm-ipc-unix.c} (100%) diff --git a/Makefile b/Makefile index 89d8d73ec0a21b..c04e747af8d463 100644 --- a/Makefile +++ b/Makefile @@ -408,7 +408,7 @@ include shared.mak # If your platform has OS-specific ways to tell if a repo is incompatible with # fsmonitor (whether the hook or IPC daemon version), set FSMONITOR_OS_SETTINGS # to the "" of the corresponding `compat/fsmonitor/fsm-settings-.c` -# that implements the `fsm_os_settings__*()` routines. +# and `compat/fsmonitor/fsm-ipc-.c` files. # # Define LINK_FUZZ_PROGRAMS if you want `make all` to also build the fuzz test # programs in oss-fuzz/. @@ -2323,13 +2323,13 @@ ifdef FSMONITOR_DAEMON_BACKEND COMPAT_CFLAGS += -DHAVE_FSMONITOR_DAEMON_BACKEND COMPAT_OBJS += compat/fsmonitor/fsm-listen-$(FSMONITOR_DAEMON_BACKEND).o COMPAT_OBJS += compat/fsmonitor/fsm-health-$(FSMONITOR_DAEMON_BACKEND).o - COMPAT_OBJS += compat/fsmonitor/fsm-ipc-$(FSMONITOR_DAEMON_BACKEND).o endif ifdef FSMONITOR_OS_SETTINGS COMPAT_CFLAGS += -DHAVE_FSMONITOR_OS_SETTINGS + COMPAT_OBJS += compat/fsmonitor/fsm-ipc-$(FSMONITOR_OS_SETTINGS).o COMPAT_OBJS += compat/fsmonitor/fsm-settings-$(FSMONITOR_OS_SETTINGS).o - COMPAT_OBJS += compat/fsmonitor/fsm-path-utils-$(FSMONITOR_OS_SETTINGS).o + COMPAT_OBJS += compat/fsmonitor/fsm-path-utils-$(FSMONITOR_DAEMON_BACKEND).o endif ifdef WITH_BREAKING_CHANGES diff --git a/compat/fsmonitor/fsm-ipc-darwin.c b/compat/fsmonitor/fsm-ipc-unix.c similarity index 100% rename from compat/fsmonitor/fsm-ipc-darwin.c rename to compat/fsmonitor/fsm-ipc-unix.c diff --git a/config.mak.uname b/config.mak.uname index 1691c6ae6e01e3..00bcb84cee15c3 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -178,7 +178,7 @@ ifeq ($(uname_S),Darwin) ifndef NO_PTHREADS ifndef NO_UNIX_SOCKETS FSMONITOR_DAEMON_BACKEND = darwin - FSMONITOR_OS_SETTINGS = darwin + FSMONITOR_OS_SETTINGS = unix endif endif diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index 28877feb9d1707..6197d5729cbfe4 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -291,23 +291,22 @@ endif() if(SUPPORTS_SIMPLE_IPC) if(CMAKE_SYSTEM_NAME STREQUAL "Windows") - add_compile_definitions(HAVE_FSMONITOR_DAEMON_BACKEND) - list(APPEND compat_SOURCES compat/fsmonitor/fsm-listen-win32.c) - list(APPEND compat_SOURCES compat/fsmonitor/fsm-health-win32.c) - list(APPEND compat_SOURCES compat/fsmonitor/fsm-ipc-win32.c) - list(APPEND compat_SOURCES compat/fsmonitor/fsm-path-utils-win32.c) - - add_compile_definitions(HAVE_FSMONITOR_OS_SETTINGS) - list(APPEND compat_SOURCES compat/fsmonitor/fsm-settings-win32.c) + set(FSMONITOR_DAEMON_BACKEND "win32") + set(FSMONITOR_OS_SETTINGS "win32") elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + set(FSMONITOR_DAEMON_BACKEND "darwin") + set(FSMONITOR_OS_SETTINGS "unix") + endif() + + if(FSMONITOR_DAEMON_BACKEND) add_compile_definitions(HAVE_FSMONITOR_DAEMON_BACKEND) - list(APPEND compat_SOURCES compat/fsmonitor/fsm-listen-darwin.c) - list(APPEND compat_SOURCES compat/fsmonitor/fsm-health-darwin.c) - list(APPEND compat_SOURCES compat/fsmonitor/fsm-ipc-darwin.c) - list(APPEND compat_SOURCES compat/fsmonitor/fsm-path-utils-darwin.c) + list(APPEND compat_SOURCES compat/fsmonitor/fsm-listen-${FSMONITOR_DAEMON_BACKEND}.c) + list(APPEND compat_SOURCES compat/fsmonitor/fsm-health-${FSMONITOR_DAEMON_BACKEND}.c) + list(APPEND compat_SOURCES compat/fsmonitor/fsm-ipc-${FSMONITOR_OS_SETTINGS}.c) + list(APPEND compat_SOURCES compat/fsmonitor/fsm-path-utils-${FSMONITOR_DAEMON_BACKEND}.c) add_compile_definitions(HAVE_FSMONITOR_OS_SETTINGS) - list(APPEND compat_SOURCES compat/fsmonitor/fsm-settings-darwin.c) + list(APPEND compat_SOURCES compat/fsmonitor/fsm-settings-${FSMONITOR_DAEMON_BACKEND}.c) endif() endif() diff --git a/meson.build b/meson.build index dd52efd1c87574..86a68365a99099 100644 --- a/meson.build +++ b/meson.build @@ -1320,10 +1320,13 @@ else endif fsmonitor_backend = '' +fsmonitor_os = '' if host_machine.system() == 'windows' fsmonitor_backend = 'win32' + fsmonitor_os = 'win32' elif host_machine.system() == 'darwin' fsmonitor_backend = 'darwin' + fsmonitor_os = 'unix' libgit_dependencies += dependency('CoreServices') endif if fsmonitor_backend != '' @@ -1332,14 +1335,14 @@ if fsmonitor_backend != '' libgit_sources += [ 'compat/fsmonitor/fsm-health-' + fsmonitor_backend + '.c', - 'compat/fsmonitor/fsm-ipc-' + fsmonitor_backend + '.c', + 'compat/fsmonitor/fsm-ipc-' + fsmonitor_os + '.c', 'compat/fsmonitor/fsm-listen-' + fsmonitor_backend + '.c', 'compat/fsmonitor/fsm-path-utils-' + fsmonitor_backend + '.c', 'compat/fsmonitor/fsm-settings-' + fsmonitor_backend + '.c', ] endif build_options_config.set_quoted('FSMONITOR_DAEMON_BACKEND', fsmonitor_backend) -build_options_config.set_quoted('FSMONITOR_OS_SETTINGS', fsmonitor_backend) +build_options_config.set_quoted('FSMONITOR_OS_SETTINGS', fsmonitor_os) if not get_option('b_sanitize').contains('address') and get_option('regex').allowed() and compiler.has_header('regex.h') and compiler.get_define('REG_STARTEND', prefix: '#include ') != '' build_options_config.set('NO_REGEX', '') From 7422200bfa1728139962cbf7481f8945add9689e Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Wed, 15 Apr 2026 13:27:31 +0000 Subject: [PATCH 193/241] fsmonitor: rename fsm-settings-darwin.c to fsm-settings-unix.c The fsmonitor settings logic in fsm-settings-darwin.c is not Darwin-specific and will be reused by the upcoming Linux implementation. Rename it to fsm-settings-unix.c to reflect that it is shared by all Unix platforms. Update the build files (meson.build and CMakeLists.txt) to use FSMONITOR_OS_SETTINGS for fsm-settings, matching the approach already used for fsm-ipc. Based-on-patch-by: Eric DeCosta Based-on-patch-by: Marziyeh Esipreh Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- compat/fsmonitor/{fsm-settings-darwin.c => fsm-settings-unix.c} | 0 contrib/buildsystems/CMakeLists.txt | 2 +- meson.build | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename compat/fsmonitor/{fsm-settings-darwin.c => fsm-settings-unix.c} (100%) diff --git a/compat/fsmonitor/fsm-settings-darwin.c b/compat/fsmonitor/fsm-settings-unix.c similarity index 100% rename from compat/fsmonitor/fsm-settings-darwin.c rename to compat/fsmonitor/fsm-settings-unix.c diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index 6197d5729cbfe4..d613809e26fd20 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -306,7 +306,7 @@ if(SUPPORTS_SIMPLE_IPC) list(APPEND compat_SOURCES compat/fsmonitor/fsm-path-utils-${FSMONITOR_DAEMON_BACKEND}.c) add_compile_definitions(HAVE_FSMONITOR_OS_SETTINGS) - list(APPEND compat_SOURCES compat/fsmonitor/fsm-settings-${FSMONITOR_DAEMON_BACKEND}.c) + list(APPEND compat_SOURCES compat/fsmonitor/fsm-settings-${FSMONITOR_OS_SETTINGS}.c) endif() endif() diff --git a/meson.build b/meson.build index 86a68365a99099..4f0c0a33b85c7d 100644 --- a/meson.build +++ b/meson.build @@ -1338,7 +1338,7 @@ if fsmonitor_backend != '' 'compat/fsmonitor/fsm-ipc-' + fsmonitor_os + '.c', 'compat/fsmonitor/fsm-listen-' + fsmonitor_backend + '.c', 'compat/fsmonitor/fsm-path-utils-' + fsmonitor_backend + '.c', - 'compat/fsmonitor/fsm-settings-' + fsmonitor_backend + '.c', + 'compat/fsmonitor/fsm-settings-' + fsmonitor_os + '.c', ] endif build_options_config.set_quoted('FSMONITOR_DAEMON_BACKEND', fsmonitor_backend) From ce48de8b2c85a4e5cbeb5dd1f2cfe042dd5392e4 Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Wed, 15 Apr 2026 13:27:32 +0000 Subject: [PATCH 194/241] fsmonitor: implement filesystem change listener for Linux Implement the built-in fsmonitor daemon for Linux using the inotify API, bringing it to feature parity with the existing Windows and macOS implementations. The implementation uses inotify rather than fanotify because fanotify requires either CAP_SYS_ADMIN or CAP_PERFMON capabilities, making it unsuitable for an unprivileged user-space daemon. While inotify has the limitation of requiring a separate watch on every directory (unlike macOS's FSEvents, which can monitor an entire directory tree with a single watch), it operates without elevated privileges and provides the per-file event granularity needed for fsmonitor. The listener uses inotify_init1(O_NONBLOCK) with a poll loop that checks for events with a 50-millisecond timeout, keeping the inotify queue well-drained to minimize the risk of overflows. Bidirectional hashmaps map between watch descriptors and directory paths for efficient event resolution. Directory renames are tracked using inotify's cookie mechanism to correlate IN_MOVED_FROM and IN_MOVED_TO event pairs; a periodic check detects stale renames where the matching IN_MOVED_TO never arrived, forcing a resync. New directory creation triggers recursive watch registration to ensure all subdirectories are monitored. The IN_MASK_CREATE flag is used where available to prevent modifying existing watches, with a fallback for older kernels. When IN_MASK_CREATE is available and inotify_add_watch returns EEXIST, it means another thread or recursive scan has already registered the watch, so it is safe to ignore. Remote filesystem detection uses statfs() to identify network-mounted filesystems (NFS, CIFS, SMB, FUSE, etc.) via their magic numbers. Mount point information is read from /proc/mounts and matched against the statfs f_fsid to get accurate, human-readable filesystem type names for logging. When the .git directory is on a remote filesystem, the IPC socket falls back to $HOME or a user-configured directory via the fsmonitor.socketDir setting. Based-on-patch-by: Eric DeCosta Based-on-patch-by: Marziyeh Esipreh Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- Documentation/config/fsmonitor--daemon.adoc | 4 +- Documentation/git-fsmonitor--daemon.adoc | 28 +- compat/fsmonitor/fsm-health-linux.c | 33 + compat/fsmonitor/fsm-listen-linux.c | 746 ++++++++++++++++++++ compat/fsmonitor/fsm-path-utils-linux.c | 217 ++++++ config.mak.uname | 10 + contrib/buildsystems/CMakeLists.txt | 8 +- meson.build | 4 + 8 files changed, 1042 insertions(+), 8 deletions(-) create mode 100644 compat/fsmonitor/fsm-health-linux.c create mode 100644 compat/fsmonitor/fsm-listen-linux.c create mode 100644 compat/fsmonitor/fsm-path-utils-linux.c diff --git a/Documentation/config/fsmonitor--daemon.adoc b/Documentation/config/fsmonitor--daemon.adoc index 671f9b94628446..6f8386e29150ff 100644 --- a/Documentation/config/fsmonitor--daemon.adoc +++ b/Documentation/config/fsmonitor--daemon.adoc @@ -4,8 +4,8 @@ fsmonitor.allowRemote:: behavior. Only respected when `core.fsmonitor` is set to `true`. fsmonitor.socketDir:: - This Mac OS-specific option, if set, specifies the directory in + This Mac OS and Linux-specific option, if set, specifies the directory in which to create the Unix domain socket used for communication between the fsmonitor daemon and various Git commands. The directory must - reside on a native Mac OS filesystem. Only respected when `core.fsmonitor` + reside on a native filesystem. Only respected when `core.fsmonitor` is set to `true`. diff --git a/Documentation/git-fsmonitor--daemon.adoc b/Documentation/git-fsmonitor--daemon.adoc index 8fe5241b08b007..12fa866a64ecc9 100644 --- a/Documentation/git-fsmonitor--daemon.adoc +++ b/Documentation/git-fsmonitor--daemon.adoc @@ -76,9 +76,9 @@ repositories; this may be overridden by setting `fsmonitor.allowRemote` to correctly with all network-mounted repositories, so such use is considered experimental. -On Mac OS, the inter-process communication (IPC) between various Git +On Mac OS and Linux, the inter-process communication (IPC) between various Git commands and the fsmonitor daemon is done via a Unix domain socket (UDS) -- a -special type of file -- which is supported by native Mac OS filesystems, +special type of file -- which is supported by native Mac OS and Linux filesystems, but not on network-mounted filesystems, NTFS, or FAT32. Other filesystems may or may not have the needed support; the fsmonitor daemon is not guaranteed to work with these filesystems and such use is considered experimental. @@ -87,13 +87,33 @@ By default, the socket is created in the `.git` directory. However, if the `.git` directory is on a network-mounted filesystem, it will instead be created at `$HOME/.git-fsmonitor-*` unless `$HOME` itself is on a network-mounted filesystem, in which case you must set the configuration -variable `fsmonitor.socketDir` to the path of a directory on a Mac OS native +variable `fsmonitor.socketDir` to the path of a directory on a native filesystem in which to create the socket file. If none of the above directories (`.git`, `$HOME`, or `fsmonitor.socketDir`) -is on a native Mac OS file filesystem the fsmonitor daemon will report an +is on a native filesystem the fsmonitor daemon will report an error that will cause the daemon and the currently running command to exit. +LINUX CAVEATS +~~~~~~~~~~~~~ + +On Linux, the fsmonitor daemon uses inotify to monitor filesystem events. +The inotify system has per-user limits on the number of watches that can +be created. The default limit is typically 8192 watches per user. + +For large repositories with many directories, you may need to increase +this limit. Check the current limit with: + + cat /proc/sys/fs/inotify/max_user_watches + +To temporarily increase the limit: + + sudo sysctl fs.inotify.max_user_watches=65536 + +To make the change permanent, add to `/etc/sysctl.conf`: + + fs.inotify.max_user_watches=65536 + CONFIGURATION ------------- diff --git a/compat/fsmonitor/fsm-health-linux.c b/compat/fsmonitor/fsm-health-linux.c new file mode 100644 index 00000000000000..43d67c4b8b9efa --- /dev/null +++ b/compat/fsmonitor/fsm-health-linux.c @@ -0,0 +1,33 @@ +#include "git-compat-util.h" +#include "config.h" +#include "fsmonitor-ll.h" +#include "fsm-health.h" +#include "fsmonitor--daemon.h" + +/* + * The Linux fsmonitor implementation uses inotify which has its own + * mechanisms for detecting filesystem unmount and other events that + * would require the daemon to shutdown. Therefore, we don't need + * a separate health thread like Windows does. + * + * These stub functions satisfy the interface requirements. + */ + +int fsm_health__ctor(struct fsmonitor_daemon_state *state UNUSED) +{ + return 0; +} + +void fsm_health__dtor(struct fsmonitor_daemon_state *state UNUSED) +{ + return; +} + +void fsm_health__loop(struct fsmonitor_daemon_state *state UNUSED) +{ + return; +} + +void fsm_health__stop_async(struct fsmonitor_daemon_state *state UNUSED) +{ +} diff --git a/compat/fsmonitor/fsm-listen-linux.c b/compat/fsmonitor/fsm-listen-linux.c new file mode 100644 index 00000000000000..e3dca14b620ee3 --- /dev/null +++ b/compat/fsmonitor/fsm-listen-linux.c @@ -0,0 +1,746 @@ +#include "git-compat-util.h" +#include "dir.h" +#include "fsmonitor-ll.h" +#include "fsm-listen.h" +#include "fsmonitor--daemon.h" +#include "fsmonitor-path-utils.h" +#include "gettext.h" +#include "simple-ipc.h" +#include "string-list.h" +#include "trace.h" + +#include + +/* + * Safe value to bitwise OR with rest of mask for + * kernels that do not support IN_MASK_CREATE + */ +#ifndef IN_MASK_CREATE +#define IN_MASK_CREATE 0x00000000 +#endif + +enum shutdown_reason { + SHUTDOWN_CONTINUE = 0, + SHUTDOWN_STOP, + SHUTDOWN_ERROR, + SHUTDOWN_FORCE +}; + +struct watch_entry { + struct hashmap_entry ent; + int wd; + uint32_t cookie; + const char *dir; +}; + +struct rename_entry { + struct hashmap_entry ent; + time_t whence; + uint32_t cookie; + const char *dir; +}; + +struct fsm_listen_data { + int fd_inotify; + enum shutdown_reason shutdown; + struct hashmap watches; + struct hashmap renames; + struct hashmap revwatches; +}; + +static int watch_entry_cmp(const void *cmp_data UNUSED, + const struct hashmap_entry *eptr, + const struct hashmap_entry *entry_or_key, + const void *keydata UNUSED) +{ + const struct watch_entry *e1, *e2; + + e1 = container_of(eptr, const struct watch_entry, ent); + e2 = container_of(entry_or_key, const struct watch_entry, ent); + return e1->wd != e2->wd; +} + +static int revwatches_entry_cmp(const void *cmp_data UNUSED, + const struct hashmap_entry *eptr, + const struct hashmap_entry *entry_or_key, + const void *keydata UNUSED) +{ + const struct watch_entry *e1, *e2; + + e1 = container_of(eptr, const struct watch_entry, ent); + e2 = container_of(entry_or_key, const struct watch_entry, ent); + return strcmp(e1->dir, e2->dir); +} + +static int rename_entry_cmp(const void *cmp_data UNUSED, + const struct hashmap_entry *eptr, + const struct hashmap_entry *entry_or_key, + const void *keydata UNUSED) +{ + const struct rename_entry *e1, *e2; + + e1 = container_of(eptr, const struct rename_entry, ent); + e2 = container_of(entry_or_key, const struct rename_entry, ent); + return e1->cookie != e2->cookie; +} + +/* + * Register an inotify watch, add watch descriptor to path mapping + * and the reverse mapping. + */ +static int add_watch(const char *path, struct fsm_listen_data *data) +{ + const char *interned = strintern(path); + struct watch_entry *w1, *w2; + + /* add the inotify watch, don't allow watches to be modified */ + int wd = inotify_add_watch(data->fd_inotify, interned, + (IN_ALL_EVENTS | IN_ONLYDIR | IN_MASK_CREATE) + ^ IN_ACCESS ^ IN_CLOSE ^ IN_OPEN); + if (wd < 0) { + if (errno == ENOENT || errno == ENOTDIR) + return 0; /* directory was deleted or is not a directory */ + if (errno == EEXIST) + return 0; /* watch already exists, no action needed */ + if (errno == ENOSPC) + return error(_("inotify watch limit reached; " + "increase fs.inotify.max_user_watches")); + return error_errno(_("inotify_add_watch('%s') failed"), interned); + } + + /* add watch descriptor -> directory mapping */ + CALLOC_ARRAY(w1, 1); + w1->wd = wd; + w1->dir = interned; + hashmap_entry_init(&w1->ent, memhash(&w1->wd, sizeof(int))); + hashmap_add(&data->watches, &w1->ent); + + /* add directory -> watch descriptor mapping */ + CALLOC_ARRAY(w2, 1); + w2->wd = wd; + w2->dir = interned; + hashmap_entry_init(&w2->ent, strhash(w2->dir)); + hashmap_add(&data->revwatches, &w2->ent); + + return 0; +} + +/* + * Remove the inotify watch, the watch descriptor to path mapping + * and the reverse mapping. + */ +static void remove_watch(struct watch_entry *w, struct fsm_listen_data *data) +{ + struct watch_entry k1, k2, *w1, *w2; + + /* remove watch, ignore error if kernel already did it */ + if (inotify_rm_watch(data->fd_inotify, w->wd) && errno != EINVAL) + error_errno(_("inotify_rm_watch() failed")); + + k1.wd = w->wd; + hashmap_entry_init(&k1.ent, memhash(&k1.wd, sizeof(int))); + w1 = hashmap_remove_entry(&data->watches, &k1, ent, NULL); + if (!w1) + BUG("double remove of watch for '%s'", w->dir); + + if (w1->cookie) + BUG("removing watch for '%s' which has a pending rename", w1->dir); + + k2.dir = w->dir; + hashmap_entry_init(&k2.ent, strhash(k2.dir)); + w2 = hashmap_remove_entry(&data->revwatches, &k2, ent, NULL); + if (!w2) + BUG("double remove of reverse watch for '%s'", w->dir); + + /* w1->dir and w2->dir are interned strings, we don't own them */ + free(w1); + free(w2); +} + +/* + * Check for stale directory renames. + * + * https://man7.org/linux/man-pages/man7/inotify.7.html + * + * Allow for some small timeout to account for the fact that insertion of the + * IN_MOVED_FROM+IN_MOVED_TO event pair is not atomic, and the possibility that + * there may not be any IN_MOVED_TO event. + * + * If the IN_MOVED_TO event is not received within the timeout then events have + * been missed and the monitor is in an inconsistent state with respect to the + * filesystem. + */ +static int check_stale_dir_renames(struct hashmap *renames, time_t max_age) +{ + struct rename_entry *re; + struct hashmap_iter iter; + + hashmap_for_each_entry(renames, &iter, re, ent) { + if (re->whence <= max_age) + return -1; + } + return 0; +} + +/* + * Track pending renames. + * + * Tracking is done via an event cookie to watch descriptor mapping. + * + * A rename is not complete until matching an IN_MOVED_TO event is received + * for a corresponding IN_MOVED_FROM event. + */ +static void add_dir_rename(uint32_t cookie, const char *path, + struct fsm_listen_data *data) +{ + struct watch_entry k, *w; + struct rename_entry *re; + + /* lookup the watch descriptor for the given path */ + k.dir = path; + hashmap_entry_init(&k.ent, strhash(path)); + w = hashmap_get_entry(&data->revwatches, &k, ent, NULL); + if (!w) { + /* + * This can happen in rare cases where the directory was + * moved before we had a chance to add a watch on it. + * Just ignore this rename. + */ + trace_printf_key(&trace_fsmonitor, + "no watch found for rename from '%s'", path); + return; + } + w->cookie = cookie; + + /* add the pending rename to match against later */ + CALLOC_ARRAY(re, 1); + re->dir = w->dir; + re->cookie = w->cookie; + re->whence = time(NULL); + hashmap_entry_init(&re->ent, memhash(&re->cookie, sizeof(uint32_t))); + hashmap_add(&data->renames, &re->ent); +} + +/* + * Handle directory renames + * + * Once an IN_MOVED_TO event is received, lookup the rename tracking information + * via the event cookie and use this information to update the watch. + */ +static void rename_dir(uint32_t cookie, const char *path, + struct fsm_listen_data *data) +{ + struct rename_entry rek, *re; + struct watch_entry k, *w; + + /* lookup a pending rename to match */ + rek.cookie = cookie; + hashmap_entry_init(&rek.ent, memhash(&rek.cookie, sizeof(uint32_t))); + re = hashmap_get_entry(&data->renames, &rek, ent, NULL); + if (re) { + k.dir = re->dir; + hashmap_entry_init(&k.ent, strhash(k.dir)); + w = hashmap_get_entry(&data->revwatches, &k, ent, NULL); + if (w) { + w->cookie = 0; /* rename handled */ + remove_watch(w, data); + if (add_watch(path, data)) + trace_printf_key(&trace_fsmonitor, + "failed to add watch for renamed dir '%s'", + path); + } else { + /* Directory was moved out of watch tree */ + trace_printf_key(&trace_fsmonitor, + "no matching watch for rename to '%s'", path); + } + hashmap_remove_entry(&data->renames, &rek, ent, NULL); + free(re); + } else { + /* Directory was moved from outside the watch tree */ + trace_printf_key(&trace_fsmonitor, + "no matching cookie for rename to '%s'", path); + } +} + +/* + * Recursively add watches to every directory under path + */ +static int register_inotify(const char *path, + struct fsmonitor_daemon_state *state, + struct fsmonitor_batch *batch) +{ + DIR *dir; + const char *rel; + struct strbuf current = STRBUF_INIT; + struct dirent *de; + struct stat fs; + int ret = -1; + + dir = opendir(path); + if (!dir) { + if (errno == ENOENT || errno == ENOTDIR) + return 0; /* directory was deleted */ + return error_errno(_("opendir('%s') failed"), path); + } + + while ((de = readdir_skip_dot_and_dotdot(dir)) != NULL) { + strbuf_reset(¤t); + strbuf_addf(¤t, "%s/%s", path, de->d_name); + if (lstat(current.buf, &fs)) { + if (errno == ENOENT) + continue; /* file was deleted */ + error_errno(_("lstat('%s') failed"), current.buf); + goto failed; + } + + /* recurse into directory */ + if (S_ISDIR(fs.st_mode)) { + if (add_watch(current.buf, state->listen_data)) + goto failed; + if (register_inotify(current.buf, state, batch)) + goto failed; + } else if (batch) { + rel = current.buf + state->path_worktree_watch.len + 1; + trace_printf_key(&trace_fsmonitor, "explicitly adding '%s'", rel); + fsmonitor_batch__add_path(batch, rel); + } + } + ret = 0; + +failed: + strbuf_release(¤t); + if (closedir(dir) < 0) + return error_errno(_("closedir('%s') failed"), path); + return ret; +} + +static int em_rename_dir_from(uint32_t mask) +{ + return ((mask & IN_ISDIR) && (mask & IN_MOVED_FROM)); +} + +static int em_rename_dir_to(uint32_t mask) +{ + return ((mask & IN_ISDIR) && (mask & IN_MOVED_TO)); +} + +static int em_remove_watch(uint32_t mask) +{ + return (mask & IN_DELETE_SELF); +} + +static int em_dir_renamed(uint32_t mask) +{ + return ((mask & IN_ISDIR) && (mask & IN_MOVE)); +} + +static int em_dir_created(uint32_t mask) +{ + return ((mask & IN_ISDIR) && (mask & IN_CREATE)); +} + +static int em_dir_deleted(uint32_t mask) +{ + return ((mask & IN_ISDIR) && (mask & IN_DELETE)); +} + +static int em_force_shutdown(uint32_t mask) +{ + return (mask & IN_UNMOUNT) || (mask & IN_Q_OVERFLOW); +} + +static int em_ignore(uint32_t mask) +{ + return (mask & IN_IGNORED) || (mask & IN_MOVE_SELF); +} + +static void log_mask_set(const char *path, uint32_t mask) +{ + struct strbuf msg = STRBUF_INIT; + + if (mask & IN_ACCESS) + strbuf_addstr(&msg, "IN_ACCESS|"); + if (mask & IN_MODIFY) + strbuf_addstr(&msg, "IN_MODIFY|"); + if (mask & IN_ATTRIB) + strbuf_addstr(&msg, "IN_ATTRIB|"); + if (mask & IN_CLOSE_WRITE) + strbuf_addstr(&msg, "IN_CLOSE_WRITE|"); + if (mask & IN_CLOSE_NOWRITE) + strbuf_addstr(&msg, "IN_CLOSE_NOWRITE|"); + if (mask & IN_OPEN) + strbuf_addstr(&msg, "IN_OPEN|"); + if (mask & IN_MOVED_FROM) + strbuf_addstr(&msg, "IN_MOVED_FROM|"); + if (mask & IN_MOVED_TO) + strbuf_addstr(&msg, "IN_MOVED_TO|"); + if (mask & IN_CREATE) + strbuf_addstr(&msg, "IN_CREATE|"); + if (mask & IN_DELETE) + strbuf_addstr(&msg, "IN_DELETE|"); + if (mask & IN_DELETE_SELF) + strbuf_addstr(&msg, "IN_DELETE_SELF|"); + if (mask & IN_MOVE_SELF) + strbuf_addstr(&msg, "IN_MOVE_SELF|"); + if (mask & IN_UNMOUNT) + strbuf_addstr(&msg, "IN_UNMOUNT|"); + if (mask & IN_Q_OVERFLOW) + strbuf_addstr(&msg, "IN_Q_OVERFLOW|"); + if (mask & IN_IGNORED) + strbuf_addstr(&msg, "IN_IGNORED|"); + if (mask & IN_ISDIR) + strbuf_addstr(&msg, "IN_ISDIR|"); + + strbuf_strip_suffix(&msg, "|"); + + trace_printf_key(&trace_fsmonitor, "inotify_event: '%s', mask=%#8.8x %s", + path, mask, msg.buf); + + strbuf_release(&msg); +} + +int fsm_listen__ctor(struct fsmonitor_daemon_state *state) +{ + int fd; + int ret = 0; + struct fsm_listen_data *data; + + CALLOC_ARRAY(data, 1); + state->listen_data = data; + state->listen_error_code = -1; + data->fd_inotify = -1; + data->shutdown = SHUTDOWN_ERROR; + + fd = inotify_init1(O_NONBLOCK); + if (fd < 0) { + FREE_AND_NULL(state->listen_data); + return error_errno(_("inotify_init1() failed")); + } + + data->fd_inotify = fd; + + hashmap_init(&data->watches, watch_entry_cmp, NULL, 0); + hashmap_init(&data->renames, rename_entry_cmp, NULL, 0); + hashmap_init(&data->revwatches, revwatches_entry_cmp, NULL, 0); + + if (add_watch(state->path_worktree_watch.buf, data)) + ret = -1; + else if (register_inotify(state->path_worktree_watch.buf, state, NULL)) + ret = -1; + else if (state->nr_paths_watching > 1) { + if (add_watch(state->path_gitdir_watch.buf, data)) + ret = -1; + else if (register_inotify(state->path_gitdir_watch.buf, state, NULL)) + ret = -1; + } + + if (!ret) { + state->listen_error_code = 0; + data->shutdown = SHUTDOWN_CONTINUE; + } + + return ret; +} + +void fsm_listen__dtor(struct fsmonitor_daemon_state *state) +{ + struct fsm_listen_data *data; + struct hashmap_iter iter; + struct watch_entry *w; + struct watch_entry **to_remove; + size_t nr_to_remove = 0, alloc_to_remove = 0; + size_t i; + int fd; + + if (!state || !state->listen_data) + return; + + data = state->listen_data; + fd = data->fd_inotify; + + /* + * Collect all entries first, then remove them. + * We can't modify the hashmap while iterating over it. + */ + to_remove = NULL; + hashmap_for_each_entry(&data->watches, &iter, w, ent) { + ALLOC_GROW(to_remove, nr_to_remove + 1, alloc_to_remove); + to_remove[nr_to_remove++] = w; + } + + for (i = 0; i < nr_to_remove; i++) { + to_remove[i]->cookie = 0; /* ignore any pending renames */ + remove_watch(to_remove[i], data); + } + free(to_remove); + + hashmap_clear(&data->watches); + + hashmap_clear(&data->revwatches); /* remove_watch freed the entries */ + + hashmap_clear_and_free(&data->renames, struct rename_entry, ent); + + FREE_AND_NULL(state->listen_data); + + if (fd >= 0 && (close(fd) < 0)) + error_errno(_("closing inotify file descriptor failed")); +} + +void fsm_listen__stop_async(struct fsmonitor_daemon_state *state) +{ + if (state && state->listen_data && + state->listen_data->shutdown == SHUTDOWN_CONTINUE) + state->listen_data->shutdown = SHUTDOWN_STOP; +} + +/* + * Process a single inotify event and queue for publication. + */ +static int process_event(const char *path, + const struct inotify_event *event, + struct fsmonitor_batch **batch, + struct string_list *cookie_list, + struct fsmonitor_daemon_state *state) +{ + const char *rel; + const char *last_sep; + + switch (fsmonitor_classify_path_absolute(state, path)) { + case IS_INSIDE_DOT_GIT_WITH_COOKIE_PREFIX: + case IS_INSIDE_GITDIR_WITH_COOKIE_PREFIX: + /* Use just the filename of the cookie file. */ + last_sep = find_last_dir_sep(path); + string_list_append(cookie_list, + last_sep ? last_sep + 1 : path); + break; + case IS_INSIDE_DOT_GIT: + case IS_INSIDE_GITDIR: + break; + case IS_DOT_GIT: + case IS_GITDIR: + /* + * If .git directory is deleted or renamed away, + * we have to quit. + */ + if (em_dir_deleted(event->mask)) { + trace_printf_key(&trace_fsmonitor, + "event: gitdir removed"); + state->listen_data->shutdown = SHUTDOWN_FORCE; + goto done; + } + + if (em_dir_renamed(event->mask)) { + trace_printf_key(&trace_fsmonitor, + "event: gitdir renamed"); + state->listen_data->shutdown = SHUTDOWN_FORCE; + goto done; + } + break; + case IS_WORKDIR_PATH: + /* normal events in the working directory */ + if (trace_pass_fl(&trace_fsmonitor)) + log_mask_set(path, event->mask); + + if (!*batch) + *batch = fsmonitor_batch__new(); + + rel = path + state->path_worktree_watch.len + 1; + fsmonitor_batch__add_path(*batch, rel); + + if (em_dir_deleted(event->mask)) + break; + + /* received IN_MOVE_FROM, add tracking for expected IN_MOVE_TO */ + if (em_rename_dir_from(event->mask)) + add_dir_rename(event->cookie, path, state->listen_data); + + /* received IN_MOVE_TO, update watch to reflect new path */ + if (em_rename_dir_to(event->mask)) { + rename_dir(event->cookie, path, state->listen_data); + if (register_inotify(path, state, *batch)) { + state->listen_data->shutdown = SHUTDOWN_ERROR; + goto done; + } + } + + if (em_dir_created(event->mask)) { + if (add_watch(path, state->listen_data)) { + state->listen_data->shutdown = SHUTDOWN_ERROR; + goto done; + } + if (register_inotify(path, state, *batch)) { + state->listen_data->shutdown = SHUTDOWN_ERROR; + goto done; + } + } + break; + case IS_OUTSIDE_CONE: + default: + trace_printf_key(&trace_fsmonitor, + "ignoring '%s'", path); + break; + } + return 0; +done: + return -1; +} + +/* + * Read the inotify event stream and pre-process events before further + * processing and eventual publishing. + */ +static void handle_events(struct fsmonitor_daemon_state *state) +{ + /* See https://man7.org/linux/man-pages/man7/inotify.7.html */ + char buf[4096] + __attribute__ ((aligned(__alignof__(struct inotify_event)))); + + struct hashmap *watches = &state->listen_data->watches; + struct fsmonitor_batch *batch = NULL; + struct string_list cookie_list = STRING_LIST_INIT_DUP; + struct watch_entry k, *w; + struct strbuf path = STRBUF_INIT; + const struct inotify_event *event; + int fd = state->listen_data->fd_inotify; + ssize_t len; + char *ptr, *p; + + for (;;) { + len = read(fd, buf, sizeof(buf)); + if (len == -1) { + if (errno == EAGAIN || errno == EINTR) + goto done; + error_errno(_("reading inotify message stream failed")); + state->listen_data->shutdown = SHUTDOWN_ERROR; + goto done; + } + + /* nothing to read */ + if (len == 0) + goto done; + + /* Loop over all events in the buffer. */ + for (ptr = buf; ptr < buf + len; + ptr += sizeof(struct inotify_event) + event->len) { + + event = (const struct inotify_event *)ptr; + + if (em_ignore(event->mask)) + continue; + + /* File system was unmounted or event queue overflowed */ + if (em_force_shutdown(event->mask)) { + if (trace_pass_fl(&trace_fsmonitor)) + log_mask_set("forcing shutdown", event->mask); + state->listen_data->shutdown = SHUTDOWN_FORCE; + goto done; + } + + k.wd = event->wd; + hashmap_entry_init(&k.ent, memhash(&k.wd, sizeof(int))); + + w = hashmap_get_entry(watches, &k, ent, NULL); + if (!w) { + /* Watch was removed, skip event */ + continue; + } + + /* directory watch was removed */ + if (em_remove_watch(event->mask)) { + remove_watch(w, state->listen_data); + continue; + } + + strbuf_reset(&path); + strbuf_addf(&path, "%s/%s", w->dir, event->name); + + p = fsmonitor__resolve_alias(path.buf, &state->alias); + if (!p) + p = strbuf_detach(&path, NULL); + + if (process_event(p, event, &batch, &cookie_list, state)) { + free(p); + goto done; + } + free(p); + } + strbuf_reset(&path); + fsmonitor_publish(state, batch, &cookie_list); + string_list_clear(&cookie_list, 0); + batch = NULL; + } +done: + strbuf_release(&path); + fsmonitor_batch__free_list(batch); + string_list_clear(&cookie_list, 0); +} + +/* + * Non-blocking read of the inotify events stream. The inotify fd is polled + * frequently to help minimize the number of queue overflows. + */ +void fsm_listen__loop(struct fsmonitor_daemon_state *state) +{ + int poll_num; + /* + * Interval in seconds between checks for stale directory renames. + * A directory rename that is not completed within this window + * (i.e. no matching IN_MOVED_TO for an IN_MOVED_FROM) indicates + * missed events, forcing a shutdown. + */ + const int interval = 1; + time_t checked = time(NULL); + struct pollfd fds[1]; + + fds[0].fd = state->listen_data->fd_inotify; + fds[0].events = POLLIN; + + /* + * Our fs event listener is now running, so it's safe to start + * serving client requests. + */ + ipc_server_start_async(state->ipc_server_data); + + for (;;) { + switch (state->listen_data->shutdown) { + case SHUTDOWN_CONTINUE: + poll_num = poll(fds, 1, 50); + if (poll_num == -1) { + if (errno == EINTR) + continue; + error_errno(_("polling inotify message stream failed")); + state->listen_data->shutdown = SHUTDOWN_ERROR; + continue; + } + + if ((time(NULL) - checked) >= interval) { + checked = time(NULL); + if (check_stale_dir_renames(&state->listen_data->renames, + checked - interval)) { + trace_printf_key(&trace_fsmonitor, + "missed IN_MOVED_TO events, forcing shutdown"); + state->listen_data->shutdown = SHUTDOWN_FORCE; + continue; + } + } + + if (poll_num > 0 && (fds[0].revents & POLLIN)) + handle_events(state); + + continue; + case SHUTDOWN_ERROR: + state->listen_error_code = -1; + ipc_server_stop_async(state->ipc_server_data); + break; + case SHUTDOWN_FORCE: + state->listen_error_code = 0; + ipc_server_stop_async(state->ipc_server_data); + break; + case SHUTDOWN_STOP: + default: + state->listen_error_code = 0; + break; + } + return; + } +} diff --git a/compat/fsmonitor/fsm-path-utils-linux.c b/compat/fsmonitor/fsm-path-utils-linux.c new file mode 100644 index 00000000000000..c9866b1b24ca8e --- /dev/null +++ b/compat/fsmonitor/fsm-path-utils-linux.c @@ -0,0 +1,217 @@ +#include "git-compat-util.h" +#include "fsmonitor-ll.h" +#include "fsmonitor-path-utils.h" +#include "gettext.h" +#include "trace.h" + +#include + +#ifdef HAVE_LINUX_MAGIC_H +#include +#endif + +/* + * Filesystem magic numbers for remote filesystems. + * Defined here if not available in linux/magic.h. + */ +#ifndef CIFS_SUPER_MAGIC +#define CIFS_SUPER_MAGIC 0xff534d42 +#endif +#ifndef SMB_SUPER_MAGIC +#define SMB_SUPER_MAGIC 0x517b +#endif +#ifndef SMB2_SUPER_MAGIC +#define SMB2_SUPER_MAGIC 0xfe534d42 +#endif +#ifndef NFS_SUPER_MAGIC +#define NFS_SUPER_MAGIC 0x6969 +#endif +#ifndef AFS_SUPER_MAGIC +#define AFS_SUPER_MAGIC 0x5346414f +#endif +#ifndef CODA_SUPER_MAGIC +#define CODA_SUPER_MAGIC 0x73757245 +#endif +#ifndef FUSE_SUPER_MAGIC +#define FUSE_SUPER_MAGIC 0x65735546 +#endif + +/* + * Check if filesystem type is a remote filesystem. + */ +static int is_remote_fs(unsigned long f_type) +{ + switch (f_type) { + case CIFS_SUPER_MAGIC: + case SMB_SUPER_MAGIC: + case SMB2_SUPER_MAGIC: + case NFS_SUPER_MAGIC: + case AFS_SUPER_MAGIC: + case CODA_SUPER_MAGIC: + case FUSE_SUPER_MAGIC: + return 1; + default: + return 0; + } +} + +/* + * Map filesystem magic numbers to human-readable names as a fallback + * when /proc/mounts is unavailable. This only covers the remote and + * special filesystems in is_remote_fs() above; local filesystems are + * never flagged as incompatible, so we do not need their names here. + */ +static const char *get_fs_typename(unsigned long f_type) +{ + switch (f_type) { + case CIFS_SUPER_MAGIC: + return "cifs"; + case SMB_SUPER_MAGIC: + return "smb"; + case SMB2_SUPER_MAGIC: + return "smb2"; + case NFS_SUPER_MAGIC: + return "nfs"; + case AFS_SUPER_MAGIC: + return "afs"; + case CODA_SUPER_MAGIC: + return "coda"; + case FUSE_SUPER_MAGIC: + return "fuse"; + default: + return "unknown"; + } +} + +/* + * Find the mount point for a given path by reading /proc/mounts. + * + * statfs(2) gives us f_type (the magic number) but not the human-readable + * filesystem type string. We scan /proc/mounts to find the mount entry + * whose path is the longest prefix of ours and whose f_fsid matches, + * which gives us the fstype string (e.g. "nfs", "ext4") for logging. + */ +static char *find_mount(const char *path, const struct statfs *path_fs) +{ + FILE *fp; + struct strbuf line = STRBUF_INIT; + struct strbuf match = STRBUF_INIT; + struct strbuf fstype = STRBUF_INIT; + char *result = NULL; + + fp = fopen("/proc/mounts", "r"); + if (!fp) + return NULL; + + while (strbuf_getline(&line, fp) != EOF) { + char *fields[6]; + char *p = line.buf; + int i; + + /* Parse mount entry: device mountpoint fstype options dump pass */ + for (i = 0; i < 6 && p; i++) { + fields[i] = p; + p = strchr(p, ' '); + if (p) + *p++ = '\0'; + } + + if (i >= 3) { + const char *mountpoint = fields[1]; + const char *type = fields[2]; + struct statfs mount_fs; + + /* Check if this mount point is a prefix of our path */ + if (starts_with(path, mountpoint) && + (path[strlen(mountpoint)] == '/' || + path[strlen(mountpoint)] == '\0')) { + /* Check if filesystem ID matches */ + if (statfs(mountpoint, &mount_fs) == 0 && + !memcmp(&mount_fs.f_fsid, &path_fs->f_fsid, + sizeof(mount_fs.f_fsid))) { + /* Keep the longest matching mount point */ + if (strlen(mountpoint) > match.len) { + strbuf_reset(&match); + strbuf_addstr(&match, mountpoint); + strbuf_reset(&fstype); + strbuf_addstr(&fstype, type); + } + } + } + } + } + + fclose(fp); + strbuf_release(&line); + strbuf_release(&match); + + if (fstype.len) + result = strbuf_detach(&fstype, NULL); + else + strbuf_release(&fstype); + + return result; +} + +int fsmonitor__get_fs_info(const char *path, struct fs_info *fs_info) +{ + struct statfs fs; + + if (statfs(path, &fs) == -1) { + int saved_errno = errno; + trace_printf_key(&trace_fsmonitor, "statfs('%s') failed: %s", + path, strerror(saved_errno)); + errno = saved_errno; + return -1; + } + + trace_printf_key(&trace_fsmonitor, + "statfs('%s') [type 0x%08lx]", + path, (unsigned long)fs.f_type); + + fs_info->is_remote = is_remote_fs(fs.f_type); + + /* + * Try to get filesystem type from /proc/mounts for a more + * descriptive name. + */ + fs_info->typename = find_mount(path, &fs); + if (!fs_info->typename) + fs_info->typename = xstrdup(get_fs_typename(fs.f_type)); + + trace_printf_key(&trace_fsmonitor, + "'%s' is_remote: %d, typename: %s", + path, fs_info->is_remote, fs_info->typename); + + return 0; +} + +int fsmonitor__is_fs_remote(const char *path) +{ + struct fs_info fs; + + if (fsmonitor__get_fs_info(path, &fs)) + return -1; + + free(fs.typename); + + return fs.is_remote; +} + +/* + * No-op for Linux - we don't have firmlinks like macOS. + */ +int fsmonitor__get_alias(const char *path UNUSED, + struct alias_info *info UNUSED) +{ + return 0; +} + +/* + * No-op for Linux - we don't have firmlinks like macOS. + */ +char *fsmonitor__resolve_alias(const char *path UNUSED, + const struct alias_info *info UNUSED) +{ + return NULL; +} diff --git a/config.mak.uname b/config.mak.uname index 00bcb84cee15c3..fd91729dd2b80f 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -68,6 +68,16 @@ ifeq ($(uname_S),Linux) BASIC_CFLAGS += -std=c99 endif LINK_FUZZ_PROGRAMS = YesPlease + + # The builtin FSMonitor on Linux builds upon Simple-IPC. Both require + # Unix domain sockets and PThreads. + ifndef NO_PTHREADS + ifndef NO_UNIX_SOCKETS + FSMONITOR_DAEMON_BACKEND = linux + FSMONITOR_OS_SETTINGS = unix + BASIC_CFLAGS += -DHAVE_LINUX_MAGIC_H + endif + endif endif ifeq ($(uname_S),GNU/kFreeBSD) HAVE_ALLOCA_H = YesPlease diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index d613809e26fd20..b7da108f298dc3 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -296,6 +296,10 @@ if(SUPPORTS_SIMPLE_IPC) elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin") set(FSMONITOR_DAEMON_BACKEND "darwin") set(FSMONITOR_OS_SETTINGS "unix") + elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux") + set(FSMONITOR_DAEMON_BACKEND "linux") + set(FSMONITOR_OS_SETTINGS "unix") + add_compile_definitions(HAVE_LINUX_MAGIC_H) endif() if(FSMONITOR_DAEMON_BACKEND) @@ -1149,8 +1153,8 @@ endif() file(STRINGS ${CMAKE_SOURCE_DIR}/GIT-BUILD-OPTIONS.in git_build_options NEWLINE_CONSUME) string(REPLACE "@BROKEN_PATH_FIX@" "" git_build_options "${git_build_options}") string(REPLACE "@DIFF@" "'${DIFF}'" git_build_options "${git_build_options}") -string(REPLACE "@FSMONITOR_DAEMON_BACKEND@" "win32" git_build_options "${git_build_options}") -string(REPLACE "@FSMONITOR_OS_SETTINGS@" "win32" git_build_options "${git_build_options}") +string(REPLACE "@FSMONITOR_DAEMON_BACKEND@" "${FSMONITOR_DAEMON_BACKEND}" git_build_options "${git_build_options}") +string(REPLACE "@FSMONITOR_OS_SETTINGS@" "${FSMONITOR_OS_SETTINGS}" git_build_options "${git_build_options}") string(REPLACE "@GITWEBDIR@" "'${GITWEBDIR}'" git_build_options "${git_build_options}") string(REPLACE "@GIT_INTEROP_MAKE_OPTS@" "" git_build_options "${git_build_options}") string(REPLACE "@GIT_PERF_LARGE_REPO@" "" git_build_options "${git_build_options}") diff --git a/meson.build b/meson.build index 4f0c0a33b85c7d..123d2184602aa9 100644 --- a/meson.build +++ b/meson.build @@ -1324,6 +1324,10 @@ fsmonitor_os = '' if host_machine.system() == 'windows' fsmonitor_backend = 'win32' fsmonitor_os = 'win32' +elif host_machine.system() == 'linux' and threads.found() and compiler.has_header('linux/magic.h') + fsmonitor_backend = 'linux' + fsmonitor_os = 'unix' + libgit_c_args += '-DHAVE_LINUX_MAGIC_H' elif host_machine.system() == 'darwin' fsmonitor_backend = 'darwin' fsmonitor_os = 'unix' From 50dc89cdfb6d8495853ceac4801c1cca9cd4ce38 Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Wed, 15 Apr 2026 13:27:33 +0000 Subject: [PATCH 195/241] run-command: add close_fd_above_stderr option Add a close_fd_above_stderr flag to struct child_process. When set, the child closes file descriptors 3 and above between fork and exec (skipping the child-notifier pipe), capped at sysconf(_SC_OPEN_MAX) or 4096, whichever is smaller. This prevents the child from inheriting pipe endpoints or other descriptors from the parent environment (e.g., the test harness). Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- run-command.c | 12 ++++++++++++ run-command.h | 9 +++++++++ 2 files changed, 21 insertions(+) diff --git a/run-command.c b/run-command.c index e3e02475ccec50..f4361906c9b0e5 100644 --- a/run-command.c +++ b/run-command.c @@ -546,6 +546,7 @@ static void atfork_parent(struct atfork_state *as) "restoring signal mask"); #endif } + #endif /* GIT_WINDOWS_NATIVE */ static inline void set_cloexec(int fd) @@ -832,6 +833,17 @@ int start_command(struct child_process *cmd) child_close(cmd->out); } + if (cmd->close_fd_above_stderr) { + long max_fd = sysconf(_SC_OPEN_MAX); + int fd; + if (max_fd < 0 || max_fd > 4096) + max_fd = 4096; + for (fd = 3; fd < max_fd; fd++) { + if (fd != child_notifier) + close(fd); + } + } + if (cmd->dir && chdir(cmd->dir)) child_die(CHILD_ERR_CHDIR); diff --git a/run-command.h b/run-command.h index 0df25e445f001c..fdaa01e140705f 100644 --- a/run-command.h +++ b/run-command.h @@ -141,6 +141,15 @@ struct child_process { unsigned stdout_to_stderr:1; unsigned clean_on_exit:1; unsigned wait_after_clean:1; + + /** + * Close file descriptors 3 and above in the child after forking + * but before exec. This prevents the child from inheriting + * pipe endpoints or other descriptors from the parent + * environment (e.g., the test harness). + */ + unsigned close_fd_above_stderr:1; + void (*clean_on_exit_handler)(struct child_process *process); }; From 9266aaff0aba923eb6ef08a24d413ed7052818d7 Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Wed, 15 Apr 2026 13:27:34 +0000 Subject: [PATCH 196/241] fsmonitor: close inherited file descriptors and detach in daemon When the fsmonitor daemon is spawned as a background process, it may inherit file descriptors from its parent that it does not need. In particular, when the test harness or a CI system captures output through pipes, the daemon can inherit duplicated pipe endpoints. If the daemon holds these open, the parent process never sees EOF and may appear to hang. Set close_fd_above_stderr on the child process at both daemon startup paths: the explicit "fsmonitor--daemon start" command and the implicit spawn triggered by fsmonitor-ipc when a client finds no running daemon. Also suppress stdout and stderr on the implicit spawn path to prevent the background daemon from writing to the client's terminal. Additionally, call setsid() when the daemon starts with --detach to create a new session and process group. This prevents the daemon from being part of the spawning shell's process group, which could cause the shell's "wait" to block until the daemon exits. Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- builtin/fsmonitor--daemon.c | 16 ++++++++++++++-- fsmonitor-ipc.c | 3 +++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c index c8ec7b722e953e..b2a816dc3fea5e 100644 --- a/builtin/fsmonitor--daemon.c +++ b/builtin/fsmonitor--daemon.c @@ -1439,7 +1439,7 @@ static int fsmonitor_run_daemon(void) return err; } -static int try_to_run_foreground_daemon(int detach_console MAYBE_UNUSED) +static int try_to_run_foreground_daemon(int detach_console) { /* * Technically, we don't need to probe for an existing daemon @@ -1459,10 +1459,21 @@ static int try_to_run_foreground_daemon(int detach_console MAYBE_UNUSED) fflush(stderr); } + if (detach_console) { #ifdef GIT_WINDOWS_NATIVE - if (detach_console) FreeConsole(); +#else + /* + * Create a new session so that the daemon is detached + * from the parent's process group. This prevents + * shells with job control (e.g. bash with "set -m") + * from waiting on the daemon when they wait for a + * foreground command that implicitly spawned it. + */ + if (setsid() == -1) + warning_errno(_("setsid failed")); #endif + } return !!fsmonitor_run_daemon(); } @@ -1525,6 +1536,7 @@ static int try_to_start_background_daemon(void) cp.no_stdin = 1; cp.no_stdout = 1; cp.no_stderr = 1; + cp.close_fd_above_stderr = 1; sbgr = start_bg_command(&cp, bg_wait_cb, NULL, fsmonitor__start_timeout_sec); diff --git a/fsmonitor-ipc.c b/fsmonitor-ipc.c index f1b163111194fb..6112d130644f04 100644 --- a/fsmonitor-ipc.c +++ b/fsmonitor-ipc.c @@ -61,6 +61,9 @@ static int spawn_daemon(void) cmd.git_cmd = 1; cmd.no_stdin = 1; + cmd.no_stdout = 1; + cmd.no_stderr = 1; + cmd.close_fd_above_stderr = 1; cmd.trace2_child_class = "fsmonitor"; strvec_pushl(&cmd.args, "fsmonitor--daemon", "start", NULL); From 1cbfa62766d04eee86d8cf0f0efe1c344e73591a Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Wed, 15 Apr 2026 13:27:35 +0000 Subject: [PATCH 197/241] fsmonitor: add timeout to daemon stop command The "fsmonitor--daemon stop" command polls in a loop waiting for the daemon to exit after sending a "quit" command over IPC. If the daemon fails to shut down (e.g. it is stuck or wedged), this loop spins forever. Add a 30-second timeout so the stop command returns an error instead of blocking indefinitely. Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- builtin/fsmonitor--daemon.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c index b2a816dc3fea5e..53d8ad1f0d2a17 100644 --- a/builtin/fsmonitor--daemon.c +++ b/builtin/fsmonitor--daemon.c @@ -86,6 +86,8 @@ static int do_as_client__send_stop(void) { struct strbuf answer = STRBUF_INIT; int ret; + int max_wait_ms = 30000; + int elapsed_ms = 0; ret = fsmonitor_ipc__send_command("quit", &answer); @@ -96,8 +98,16 @@ static int do_as_client__send_stop(void) return ret; trace2_region_enter("fsm_client", "polling-for-daemon-exit", NULL); - while (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING) + while (fsmonitor_ipc__get_state() == IPC_STATE__LISTENING) { + if (elapsed_ms >= max_wait_ms) { + trace2_region_leave("fsm_client", + "polling-for-daemon-exit", NULL); + return error(_("daemon did not stop within %d seconds"), + max_wait_ms / 1000); + } sleep_millisec(50); + elapsed_ms += 50; + } trace2_region_leave("fsm_client", "polling-for-daemon-exit", NULL); return 0; From d21fc23546e72ef7067c6664485d2436fc67fdde Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Wed, 15 Apr 2026 13:27:36 +0000 Subject: [PATCH 198/241] fsmonitor: add tests for Linux Add a smoke test that verifies the filesystem actually delivers inotify events to the daemon. On some configurations (e.g., overlayfs with older kernels), inotify watches succeed but events are never delivered. The daemon cookie wait will time out, but every subsequent test would fail. Skip the entire test file early when this is detected. Add a test that exercises rapid nested directory creation to verify the daemon correctly handles the EEXIST race between recursive scan and queued inotify events. When IN_MASK_CREATE is available and a directory watch is added during recursive registration, the kernel may also deliver a queued IN_CREATE event for the same directory. The second inotify_add_watch() returns EEXIST, which must be treated as harmless. An earlier version of the listener crashed in this scenario. Reduce --start-timeout from the default 60 seconds to 10 seconds so that tests fail promptly when the daemon cannot start. Harden the test helpers to work in environments without procps (e.g., Fedora CI): fall back to reading /proc/$pid/stat for the process group ID when ps is unavailable, guard stop_git() against an empty pgid, and redirect stderr from kill to /dev/null to avoid noise when processes have already exited. Use set -m to enable job control in the submodule-pull test so that the background git pull gets its own process group, preventing the shell wait from blocking on the daemon. setsid() in the previous commit detaches the daemon itself, but the intermediate git pull process still needs its own process group for the test shell to manage it correctly. Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- t/t7527-builtin-fsmonitor.sh | 88 +++++++++++++++++++++++++++++++++--- 1 file changed, 81 insertions(+), 7 deletions(-) diff --git a/t/t7527-builtin-fsmonitor.sh b/t/t7527-builtin-fsmonitor.sh index 409cd0cd121695..ed12f218de32f1 100755 --- a/t/t7527-builtin-fsmonitor.sh +++ b/t/t7527-builtin-fsmonitor.sh @@ -10,9 +10,57 @@ then test_done fi +# Verify that the filesystem delivers events to the daemon. +# On some configurations (e.g., overlayfs with older kernels), +# inotify watches succeed but events are never delivered. The +# cookie wait will time out and the daemon logs a trace message. +# +# Use "timeout" (if available) to guard each step against hangs. +maybe_timeout () { + if type timeout >/dev/null 2>&1 + then + timeout "$@" + else + shift + "$@" + fi +} +verify_fsmonitor_works () { + git init test_fsmonitor_smoke || return 1 + + GIT_TRACE_FSMONITOR="$PWD/smoke.trace" && + export GIT_TRACE_FSMONITOR && + maybe_timeout 30 \ + git -C test_fsmonitor_smoke fsmonitor--daemon start \ + --start-timeout=10 + ret=$? + unset GIT_TRACE_FSMONITOR + if test $ret -ne 0 + then + rm -rf test_fsmonitor_smoke smoke.trace + return 1 + fi + + maybe_timeout 10 \ + test-tool -C test_fsmonitor_smoke fsmonitor-client query \ + --token 0 >/dev/null 2>&1 + maybe_timeout 5 \ + git -C test_fsmonitor_smoke fsmonitor--daemon stop 2>/dev/null + ! grep -q "cookie_wait timed out" "$PWD/smoke.trace" 2>/dev/null + ret=$? + rm -rf test_fsmonitor_smoke smoke.trace + return $ret +} + +if ! verify_fsmonitor_works +then + skip_all="filesystem does not deliver fsmonitor events (container/overlayfs?)" + test_done +fi + stop_daemon_delete_repo () { r=$1 && - test_might_fail git -C $r fsmonitor--daemon stop && + { maybe_timeout 30 git -C $r fsmonitor--daemon stop 2>/dev/null || :; } && rm -rf $1 } @@ -67,7 +115,7 @@ start_daemon () { export GIT_TEST_FSMONITOR_TOKEN fi && - git $r fsmonitor--daemon start && + git $r fsmonitor--daemon start --start-timeout=10 && git $r fsmonitor--daemon status ) } @@ -520,6 +568,28 @@ test_expect_success 'directory changes to a file' ' grep "^event: dir1$" .git/trace ' +test_expect_success 'rapid nested directory creation' ' + test_when_finished "git fsmonitor--daemon stop; rm -rf rapid" && + + start_daemon --tf "$PWD/.git/trace" && + + # Rapidly create nested directories to exercise race conditions + # where directory watches may be added concurrently during + # event processing and recursive scanning. + for i in $(test_seq 1 20) + do + mkdir -p "rapid/nested/dir$i/subdir/deep" || return 1 + done && + + # Give the daemon time to process all events + sleep 1 && + + test-tool fsmonitor-client query --token 0 && + + # Verify daemon is still running (did not crash) + git fsmonitor--daemon status +' + # The next few test cases exercise the token-resync code. When filesystem # drops events (because of filesystem velocity or because the daemon isn't # polling fast enough), we need to discard the cached data (relative to the @@ -910,7 +980,10 @@ test_expect_success "submodule absorbgitdirs implicitly starts daemon" ' start_git_in_background () { git "$@" & git_pid=$! - git_pgid=$(ps -o pgid= -p $git_pid) + git_pgid=$(ps -o pgid= -p $git_pid 2>/dev/null || + awk '{print $5}' /proc/$git_pid/stat 2>/dev/null) && + git_pgid="${git_pgid## }" && + git_pgid="${git_pgid%% }" nr_tries_left=10 while true do @@ -921,15 +994,16 @@ start_git_in_background () { fi sleep 1 nr_tries_left=$(($nr_tries_left - 1)) - done >/dev/null 2>&1 & + done >/dev/null 2>&1 3>&- 4>&- 5>&- 6>&- 7>&- & watchdog_pid=$! wait $git_pid } stop_git () { - while kill -0 -- -$git_pgid + test -n "$git_pgid" || return 0 + while kill -0 -- -$git_pgid 2>/dev/null do - kill -- -$git_pgid + kill -- -$git_pgid 2>/dev/null sleep 1 done } @@ -944,7 +1018,7 @@ stop_watchdog () { test_expect_success !MINGW "submodule implicitly starts daemon by pull" ' test_atexit "stop_watchdog" && - test_when_finished "stop_git; rm -rf cloned super sub" && + test_when_finished "set +m; stop_git; rm -rf cloned super sub" && create_super super && create_sub sub && From b1cebd7194299ad5414ab2122b2970b339399446 Mon Sep 17 00:00:00 2001 From: Paul Tarjan Date: Wed, 15 Apr 2026 13:27:37 +0000 Subject: [PATCH 199/241] fsmonitor: convert shown khash to strset in do_handle_client Replace the khash-based string set used for deduplicating pathnames in do_handle_client() with a strset, which provides a cleaner interface for the same purpose. Since the paths are interned strings from the batch data, use strdup_strings=0 to avoid unnecessary copies. Suggested-by: Patrick Steinhardt Signed-off-by: Paul Tarjan Signed-off-by: Junio C Hamano --- builtin/fsmonitor--daemon.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/builtin/fsmonitor--daemon.c b/builtin/fsmonitor--daemon.c index 53d8ad1f0d2a17..f920cf3a8202f6 100644 --- a/builtin/fsmonitor--daemon.c +++ b/builtin/fsmonitor--daemon.c @@ -16,7 +16,7 @@ #include "fsmonitor--daemon.h" #include "simple-ipc.h" -#include "khash.h" +#include "strmap.h" #include "run-command.h" #include "trace.h" #include "trace2.h" @@ -674,8 +674,6 @@ static int fsmonitor_parse_client_token(const char *buf_token, return 0; } -KHASH_INIT(str, const char *, int, 0, kh_str_hash_func, kh_str_hash_equal) - static int do_handle_client(struct fsmonitor_daemon_state *state, const char *command, ipc_server_reply_cb *reply, @@ -692,8 +690,7 @@ static int do_handle_client(struct fsmonitor_daemon_state *state, const struct fsmonitor_batch *batch; struct fsmonitor_batch *remainder = NULL; intmax_t count = 0, duplicates = 0; - kh_str_t *shown = NULL; - int hash_ret; + struct strset shown = STRSET_INIT; int do_trivial = 0; int do_flush = 0; int do_cookie = 0; @@ -882,14 +879,14 @@ static int do_handle_client(struct fsmonitor_daemon_state *state, * so walk the batch list backwards from the current head back * to the batch (sequence number) they named. * - * We use khash to de-dup the list of pathnames. + * We use a strset to de-dup the list of pathnames. * * NEEDSWORK: each batch contains a list of interned strings, * so we only need to do pointer comparisons here to build the * hash table. Currently, we're still comparing the string * values. */ - shown = kh_init_str(); + strset_init_with_options(&shown, NULL, 0); for (batch = batch_head; batch && batch->batch_seq_nr > requested_oldest_seq_nr; batch = batch->next) { @@ -899,11 +896,9 @@ static int do_handle_client(struct fsmonitor_daemon_state *state, const char *s = batch->interned_paths[k]; size_t s_len; - if (kh_get_str(shown, s) != kh_end(shown)) + if (!strset_add(&shown, s)) duplicates++; else { - kh_put_str(shown, s, &hash_ret); - trace_printf_key(&trace_fsmonitor, "send[%"PRIuMAX"]: %s", count, s); @@ -973,7 +968,7 @@ static int do_handle_client(struct fsmonitor_daemon_state *state, trace2_data_intmax("fsmonitor", the_repository, "response/count/duplicates", duplicates); cleanup: - kh_destroy_str(shown); + strset_clear(&shown); strbuf_release(&response_token); strbuf_release(&requested_token_id); strbuf_release(&payload); From c9e31490c06832d3a7930b26ed308629cced98a8 Mon Sep 17 00:00:00 2001 From: Shreyansh Paliwal Date: Sat, 4 Apr 2026 19:28:38 +0530 Subject: [PATCH 200/241] refs: add struct repository parameter in get_files_ref_lock_timeout_ms() get_files_ref_lock_timeout_ms() calls repo_config_get_int() using the_repository, as no repository instance is available in its scope. Add a struct repository parameter and use it instead of the_repository. Update all callers accordingly. In files-backend.c, lock_raw_ref() can obtain repository instance from the struct ref_transaction via transaction->ref_store->repo and pass it down. For create_reflock(), which is used as a callback, introduce a small wrapper struct to pass both struct lock_file and struct repository through the callback data. This reduces reliance on the_repository global, though the function still uses static variables and is not yet fully repository-scoped. This can be addressed in a follow-up change. Signed-off-by: Shreyansh Paliwal Acked-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.c | 4 ++-- refs/files-backend.c | 19 +++++++++++++------ refs/refs-internal.h | 2 +- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/refs.c b/refs.c index 685a0c247b571e..214ebfd5ce7f3c 100644 --- a/refs.c +++ b/refs.c @@ -989,7 +989,7 @@ enum ref_worktree_type parse_worktree_ref(const char *maybe_worktree_ref, return REF_WORKTREE_SHARED; } -long get_files_ref_lock_timeout_ms(void) +long get_files_ref_lock_timeout_ms(struct repository *repo) { static int configured = 0; @@ -997,7 +997,7 @@ long get_files_ref_lock_timeout_ms(void) static int timeout_ms = 100; if (!configured) { - repo_config_get_int(the_repository, "core.filesreflocktimeout", &timeout_ms); + repo_config_get_int(repo, "core.filesreflocktimeout", &timeout_ms); configured = 1; } diff --git a/refs/files-backend.c b/refs/files-backend.c index 0537a72b2af9e0..10e4388d2ca01a 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -792,7 +792,7 @@ static enum ref_transaction_error lock_raw_ref(struct files_ref_store *refs, if (hold_lock_file_for_update_timeout( &lock->lk, ref_file.buf, LOCK_NO_DEREF, - get_files_ref_lock_timeout_ms()) < 0) { + get_files_ref_lock_timeout_ms(transaction->ref_store->repo)) < 0) { int myerr = errno; errno = 0; if (myerr == ENOENT && --attempts_remaining > 0) { @@ -1190,13 +1190,17 @@ static int remove_empty_directories(struct strbuf *path) return remove_dir_recursively(path, REMOVE_DIR_EMPTY_ONLY); } +struct create_reflock_cb { + struct lock_file *lk; + struct repository *repo; +}; + static int create_reflock(const char *path, void *cb) { - struct lock_file *lk = cb; - + struct create_reflock_cb *data = cb; return hold_lock_file_for_update_timeout( - lk, path, LOCK_NO_DEREF, - get_files_ref_lock_timeout_ms()) < 0 ? -1 : 0; + data->lk, path, LOCK_NO_DEREF, + get_files_ref_lock_timeout_ms(data->repo)) < 0 ? -1 : 0; } /* @@ -1208,6 +1212,7 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs, { struct strbuf ref_file = STRBUF_INIT; struct ref_lock *lock; + struct create_reflock_cb cb_data; files_assert_main_repository(refs, "lock_ref_oid_basic"); assert(err); @@ -1229,8 +1234,10 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs, lock->ref_name = xstrdup(refname); lock->count = 1; + cb_data.lk = &lock->lk; + cb_data.repo = refs->base.repo; - if (raceproof_create_file(ref_file.buf, create_reflock, &lock->lk)) { + if (raceproof_create_file(ref_file.buf, create_reflock, &cb_data)) { unable_to_lock_message(ref_file.buf, errno, err); goto error_return; } diff --git a/refs/refs-internal.h b/refs/refs-internal.h index d79e35fd269a6c..e4cfd9e19ee74f 100644 --- a/refs/refs-internal.h +++ b/refs/refs-internal.h @@ -43,7 +43,7 @@ struct ref_transaction; * Return the length of time to retry acquiring a loose reference lock * before giving up, in milliseconds: */ -long get_files_ref_lock_timeout_ms(void); +long get_files_ref_lock_timeout_ms(struct repository *repo); /* * Return true iff refname is minimally safe. "Safe" here means that From 1a349ca6df7578adf8f2ce33d0c36ee269167029 Mon Sep 17 00:00:00 2001 From: Shreyansh Paliwal Date: Sat, 4 Apr 2026 19:28:39 +0530 Subject: [PATCH 201/241] refs: remove the_hash_algo global state refs.c uses the_hash_algo in multiple places, relying on global state for the object hash algorithm. Replace these uses with the appropriate repository-specific hash_algo. In transaction-related functions (ref_transaction_create, ref_transaction_delete, migrate_one_ref, and transaction_hook_feed_stdin), use transaction->ref_store->repo->hash_algo. In other cases, such as repo_get_submodule_ref_store(), use repo->hash_algo. Signed-off-by: Shreyansh Paliwal Acked-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/refs.c b/refs.c index 214ebfd5ce7f3c..cb58e10dc624f3 100644 --- a/refs.c +++ b/refs.c @@ -1472,7 +1472,7 @@ int ref_transaction_create(struct ref_transaction *transaction, return 1; } return ref_transaction_update(transaction, refname, new_oid, - null_oid(the_hash_algo), new_target, NULL, flags, + null_oid(transaction->ref_store->repo->hash_algo), new_target, NULL, flags, msg, err); } @@ -1491,7 +1491,7 @@ int ref_transaction_delete(struct ref_transaction *transaction, if (old_target && !(flags & REF_NO_DEREF)) BUG("delete cannot operate on symrefs with deref mode"); return ref_transaction_update(transaction, refname, - null_oid(the_hash_algo), old_oid, + null_oid(transaction->ref_store->repo->hash_algo), old_oid, NULL, old_target, flags, msg, err); } @@ -2379,7 +2379,7 @@ struct ref_store *repo_get_submodule_ref_store(struct repository *repo, subrepo = xmalloc(sizeof(*subrepo)); if (repo_submodule_init(subrepo, repo, submodule, - null_oid(the_hash_algo))) { + null_oid(repo->hash_algo))) { free(subrepo); goto done; } @@ -2571,14 +2571,14 @@ static int transaction_hook_feed_stdin(int hook_stdin_fd, void *pp_cb, void *pp_ strbuf_reset(buf); if (!(update->flags & REF_HAVE_OLD)) - strbuf_addf(buf, "%s ", oid_to_hex(null_oid(the_hash_algo))); + strbuf_addf(buf, "%s ", oid_to_hex(null_oid(transaction->ref_store->repo->hash_algo))); else if (update->old_target) strbuf_addf(buf, "ref:%s ", update->old_target); else strbuf_addf(buf, "%s ", oid_to_hex(&update->old_oid)); if (!(update->flags & REF_HAVE_NEW)) - strbuf_addf(buf, "%s ", oid_to_hex(null_oid(the_hash_algo))); + strbuf_addf(buf, "%s ", oid_to_hex(null_oid(transaction->ref_store->repo->hash_algo))); else if (update->new_target) strbuf_addf(buf, "ref:%s ", update->new_target); else @@ -3145,6 +3145,7 @@ struct migration_data { static int migrate_one_ref(const struct reference *ref, void *cb_data) { struct migration_data *data = cb_data; + const struct git_hash_algo *hash_algo = data->transaction->ref_store->repo->hash_algo; struct strbuf symref_target = STRBUF_INIT; int ret; @@ -3153,7 +3154,7 @@ static int migrate_one_ref(const struct reference *ref, void *cb_data) if (ret < 0) goto done; - ret = ref_transaction_update(data->transaction, ref->name, NULL, null_oid(the_hash_algo), + ret = ref_transaction_update(data->transaction, ref->name, NULL, null_oid(hash_algo), symref_target.buf, NULL, REF_SKIP_CREATE_REFLOG | REF_NO_DEREF, NULL, data->errbuf); if (ret < 0) From 5b1ba8104319986b031b2313971c4b204619ce7d Mon Sep 17 00:00:00 2001 From: Shreyansh Paliwal Date: Sat, 4 Apr 2026 19:28:40 +0530 Subject: [PATCH 202/241] refs/reftable-backend: drop uses of the_repository reftable_be_init() and reftable_be_create_on_disk() use the_repository even though a repository instance is already available, either directly or via struct ref_store. Replace these uses with the appropriate local repository instance (repo or ref_store->repo) to avoid relying on global state. Note that USE_THE_REPOSITORY_VARIABLE cannot be removed yet, as is_bare_repository() is still there in the file. Signed-off-by: Shreyansh Paliwal Acked-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/reftable-backend.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index b124404663edf6..7c8a992fcb40b9 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -404,13 +404,13 @@ static struct ref_store *reftable_be_init(struct repository *repo, default: BUG("unknown hash algorithm %d", repo->hash_algo->format_id); } - refs->write_options.default_permissions = calc_shared_perm(the_repository, 0666 & ~mask); + refs->write_options.default_permissions = calc_shared_perm(repo, 0666 & ~mask); refs->write_options.disable_auto_compact = !git_env_bool("GIT_TEST_REFTABLE_AUTOCOMPACTION", 1); refs->write_options.lock_timeout_ms = 100; refs->write_options.fsync = reftable_be_fsync; - repo_config(the_repository, reftable_be_config, &refs->write_options); + repo_config(repo, reftable_be_config, &refs->write_options); /* * It is somewhat unfortunate that we have to mirror the default block @@ -492,7 +492,7 @@ static int reftable_be_create_on_disk(struct ref_store *ref_store, struct strbuf sb = STRBUF_INIT; strbuf_addf(&sb, "%s/reftable", refs->base.gitdir); - safe_create_dir(the_repository, sb.buf, 1); + safe_create_dir(ref_store->repo, sb.buf, 1); strbuf_reset(&sb); strbuf_release(&sb); From efbb7639c2e52723c78086fc88d8a5a801903269 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 15 Apr 2026 15:14:20 +0000 Subject: [PATCH 203/241] t5516: fix test order flakiness The 'fetch follows tags by default' test sorts using 'sort -k 4', but for-each-ref output only has 3 columns. This relies on sort treating records with fewer fields as having an empty fourth field, which may produce unstable results depending on locale. Use 'sort -k 3' to match the actual number of columns in the output. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t5516-fetch-push.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t5516-fetch-push.sh b/t/t5516-fetch-push.sh index 29e2f176081561..ac8447f21ed963 100755 --- a/t/t5516-fetch-push.sh +++ b/t/t5516-fetch-push.sh @@ -1349,7 +1349,7 @@ test_expect_success 'fetch follows tags by default' ' git for-each-ref >tmp1 && sed -n "p; s|refs/heads/main$|refs/remotes/origin/main|p" tmp1 | sed -n "p; s|refs/heads/main$|refs/remotes/origin/HEAD|p" | - sort -k 4 >../expect + sort -k 3 >../expect ) && test_when_finished "rm -rf dst" && git init dst && From d542ba848889be052927828218a0d4c88a812cb4 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 15 Apr 2026 15:14:21 +0000 Subject: [PATCH 204/241] fetch: add --negotiation-restrict option The --negotiation-tip option to 'git fetch' and 'git pull' allows users to specify that they want to focus negotiation on a small set of references. This is a _restriction_ on the negotiation set, helping to focus the negotiation when the ref count is high. However, it doesn't allow for the ability to opportunistically select references beyond that list. This subtle detail that this is a 'maximum set' and not a 'minimum set' is not immediately clear from the option name. This makes it more complicated to add a new option that provides the complementary behavior of a minimum set. For now, create a new synonym option, --negotiation-restrict, that behaves identically to --negotiation-tip. Update the documentation to make it clear that this new name is the preferred option, but we keep the old name for compatibility. Update a few warning messages with the new option, but also make them translatable with the option name inserted by formatting. At least one of these messages will be reused later for a new option. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/fetch-options.adoc | 4 ++++ builtin/fetch.c | 11 +++++++---- builtin/pull.c | 3 +++ t/t5510-fetch.sh | 25 +++++++++++++++++++++++++ t/t5702-protocol-v2.sh | 4 ++-- 5 files changed, 41 insertions(+), 6 deletions(-) diff --git a/Documentation/fetch-options.adoc b/Documentation/fetch-options.adoc index 81a9d7f9bbc11d..c07b85499fafe9 100644 --- a/Documentation/fetch-options.adoc +++ b/Documentation/fetch-options.adoc @@ -49,6 +49,7 @@ the current repository has the same history as the source repository. `.git/shallow`. This option updates `.git/shallow` and accepts such refs. +`--negotiation-restrict=(|)`:: `--negotiation-tip=(|)`:: By default, Git will report, to the server, commits reachable from all local refs to find common commits in an attempt to @@ -58,6 +59,9 @@ the current repository has the same history as the source repository. local ref is likely to have commits in common with the upstream ref being fetched. + +`--negotiation-restrict` is the preferred name for this option; +`--negotiation-tip` is accepted as a synonym. ++ This option may be specified more than once; if so, Git will report commits reachable from any of the given commits. + diff --git a/builtin/fetch.c b/builtin/fetch.c index 4795b2a13c30e3..3bcb0c9686c4c3 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1558,8 +1558,8 @@ static void add_negotiation_tips(struct git_transport_options *smart_options) refs_for_each_ref_ext(get_main_ref_store(the_repository), add_oid, oids, &opts); if (old_nr == oids->nr) - warning("ignoring --negotiation-tip=%s because it does not match any refs", - s); + warning(_("ignoring %s=%s because it does not match any refs"), + "--negotiation-restrict", s); } smart_options->negotiation_tips = oids; } @@ -1599,7 +1599,8 @@ static struct transport *prepare_transport(struct remote *remote, int deepen, if (transport->smart_options) add_negotiation_tips(transport->smart_options); else - warning("ignoring --negotiation-tip because the protocol does not support it"); + warning(_("ignoring %s because the protocol does not support it"), + "--negotiation-restrict"); } return transport; } @@ -2567,6 +2568,8 @@ int cmd_fetch(int argc, OPT_IPVERSION(&family), OPT_STRING_LIST(0, "negotiation-tip", &negotiation_tip, N_("revision"), N_("report that we have only objects reachable from this object")), + OPT_STRING_LIST(0, "negotiation-restrict", &negotiation_tip, N_("revision"), + N_("report that we have only objects reachable from this object")), OPT_BOOL(0, "negotiate-only", &negotiate_only, N_("do not fetch a packfile; instead, print ancestors of negotiation tips")), OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), @@ -2657,7 +2660,7 @@ int cmd_fetch(int argc, } if (negotiate_only && !negotiation_tip.nr) - die(_("--negotiate-only needs one or more --negotiation-tip=*")); + die(_("--negotiate-only needs one or more --negotiation-restrict=*")); if (deepen_relative) { if (deepen_relative < 0) diff --git a/builtin/pull.c b/builtin/pull.c index 7e67fdce97fd1d..821cc6699a142f 100644 --- a/builtin/pull.c +++ b/builtin/pull.c @@ -999,6 +999,9 @@ int cmd_pull(int argc, OPT_PASSTHRU_ARGV(0, "negotiation-tip", &opt_fetch, N_("revision"), N_("report that we have only objects reachable from this object"), 0), + OPT_PASSTHRU_ARGV(0, "negotiation-restrict", &opt_fetch, N_("revision"), + N_("report that we have only objects reachable from this object"), + 0), OPT_BOOL(0, "show-forced-updates", &opt_show_forced_updates, N_("check for forced-updates on all updated branches")), OPT_PASSTHRU(0, "set-upstream", &set_upstream, NULL, diff --git a/t/t5510-fetch.sh b/t/t5510-fetch.sh index 5dcb4b51a47d88..dc3ce56d84c743 100755 --- a/t/t5510-fetch.sh +++ b/t/t5510-fetch.sh @@ -1460,6 +1460,31 @@ EOF test_cmp fatal-expect fatal-actual ' +test_expect_success '--negotiation-restrict limits "have" lines sent' ' + setup_negotiation_tip server server 0 && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \ + --negotiation-restrict=alpha_1 --negotiation-restrict=beta_1 \ + origin alpha_s beta_s && + check_negotiation_tip +' + +test_expect_success '--negotiation-restrict understands globs' ' + setup_negotiation_tip server server 0 && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \ + --negotiation-restrict=*_1 \ + origin alpha_s beta_s && + check_negotiation_tip +' + +test_expect_success '--negotiation-restrict and --negotiation-tip can be mixed' ' + setup_negotiation_tip server server 0 && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \ + --negotiation-restrict=alpha_1 \ + --negotiation-tip=beta_1 \ + origin alpha_s beta_s && + check_negotiation_tip +' + test_expect_success SYMLINKS 'clone does not get confused by a D/F conflict' ' git init df-conflict && ( diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh index f826ac46a5be5a..9f6cf4142d5b83 100755 --- a/t/t5702-protocol-v2.sh +++ b/t/t5702-protocol-v2.sh @@ -869,14 +869,14 @@ setup_negotiate_only () { test_commit -C client three } -test_expect_success 'usage: --negotiate-only without --negotiation-tip' ' +test_expect_success 'usage: --negotiate-only without --negotiation-restrict' ' SERVER="server" && URI="file://$(pwd)/server" && setup_negotiate_only "$SERVER" "$URI" && cat >err.expect <<-\EOF && - fatal: --negotiate-only needs one or more --negotiation-tip=* + fatal: --negotiate-only needs one or more --negotiation-restrict=* EOF test_must_fail git -c protocol.version=2 -C client fetch \ From 9596a1ca95a847afe26d9c25cb8a17f6b697a9a2 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 15 Apr 2026 15:14:22 +0000 Subject: [PATCH 205/241] transport: rename negotiation_tips The previous change added the --negotiation-restrict synonym for the --negotiation-tips option for 'git fetch'. In anticipation of adding a new option that behaves similarly but with distinct changes to its behavior, rename the internal representation of this data from 'negotiation_tips' to 'negotiation_restrict_tips'. The 'tips' part is kept because this is an oid_array in the transport layer. This requires the builtin to handle parsing refs into collections of oids so the transport layer can handle this cleaner form of the data. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/fetch.c | 6 +++--- fetch-pack.c | 18 +++++++++--------- fetch-pack.h | 4 ++-- transport-helper.c | 2 +- transport.c | 10 +++++----- transport.h | 4 ++-- 6 files changed, 22 insertions(+), 22 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index 3bcb0c9686c4c3..4c3c5f2faa6a53 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1534,7 +1534,7 @@ static int add_oid(const struct reference *ref, void *cb_data) return 0; } -static void add_negotiation_tips(struct git_transport_options *smart_options) +static void add_negotiation_restrict_tips(struct git_transport_options *smart_options) { struct oid_array *oids = xcalloc(1, sizeof(*oids)); int i; @@ -1561,7 +1561,7 @@ static void add_negotiation_tips(struct git_transport_options *smart_options) warning(_("ignoring %s=%s because it does not match any refs"), "--negotiation-restrict", s); } - smart_options->negotiation_tips = oids; + smart_options->negotiation_restrict_tips = oids; } static struct transport *prepare_transport(struct remote *remote, int deepen, @@ -1597,7 +1597,7 @@ static struct transport *prepare_transport(struct remote *remote, int deepen, } if (negotiation_tip.nr) { if (transport->smart_options) - add_negotiation_tips(transport->smart_options); + add_negotiation_restrict_tips(transport->smart_options); else warning(_("ignoring %s because the protocol does not support it"), "--negotiation-restrict"); diff --git a/fetch-pack.c b/fetch-pack.c index 6ecd468ef766a8..baf239adf98db3 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -291,21 +291,21 @@ static int next_flush(int stateless_rpc, int count) } static void mark_tips(struct fetch_negotiator *negotiator, - const struct oid_array *negotiation_tips) + const struct oid_array *negotiation_restrict_tips) { struct refs_for_each_ref_options opts = { .flags = REFS_FOR_EACH_INCLUDE_BROKEN, }; int i; - if (!negotiation_tips) { + if (!negotiation_restrict_tips) { refs_for_each_ref_ext(get_main_ref_store(the_repository), rev_list_insert_ref_oid, negotiator, &opts); return; } - for (i = 0; i < negotiation_tips->nr; i++) - rev_list_insert_ref(negotiator, &negotiation_tips->oid[i]); + for (i = 0; i < negotiation_restrict_tips->nr; i++) + rev_list_insert_ref(negotiator, &negotiation_restrict_tips->oid[i]); return; } @@ -355,7 +355,7 @@ static int find_common(struct fetch_negotiator *negotiator, PACKET_READ_CHOMP_NEWLINE | PACKET_READ_DIE_ON_ERR_PACKET); - mark_tips(negotiator, args->negotiation_tips); + mark_tips(negotiator, args->negotiation_restrict_tips); for_each_cached_alternate(negotiator, insert_one_alternate_object); fetching = 0; @@ -1728,7 +1728,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, else state = FETCH_SEND_REQUEST; - mark_tips(negotiator, args->negotiation_tips); + mark_tips(negotiator, args->negotiation_restrict_tips); for_each_cached_alternate(negotiator, insert_one_alternate_object); break; @@ -2177,7 +2177,7 @@ static void clear_common_flag(struct oidset *s) } } -void negotiate_using_fetch(const struct oid_array *negotiation_tips, +void negotiate_using_fetch(const struct oid_array *negotiation_restrict_tips, const struct string_list *server_options, int stateless_rpc, int fd[], @@ -2195,13 +2195,13 @@ void negotiate_using_fetch(const struct oid_array *negotiation_tips, timestamp_t min_generation = GENERATION_NUMBER_INFINITY; fetch_negotiator_init(the_repository, &negotiator); - mark_tips(&negotiator, negotiation_tips); + mark_tips(&negotiator, negotiation_restrict_tips); packet_reader_init(&reader, fd[0], NULL, 0, PACKET_READ_CHOMP_NEWLINE | PACKET_READ_DIE_ON_ERR_PACKET); - oid_array_for_each((struct oid_array *) negotiation_tips, + oid_array_for_each((struct oid_array *) negotiation_restrict_tips, add_to_object_array, &nt_object_array); diff --git a/fetch-pack.h b/fetch-pack.h index 9d3470366f85ec..6c70c942c2f001 100644 --- a/fetch-pack.h +++ b/fetch-pack.h @@ -21,7 +21,7 @@ struct fetch_pack_args { * If not NULL, during packfile negotiation, fetch-pack will send "have" * lines only with these tips and their ancestors. */ - const struct oid_array *negotiation_tips; + const struct oid_array *negotiation_restrict_tips; unsigned deepen_relative:1; unsigned quiet:1; @@ -89,7 +89,7 @@ struct ref *fetch_pack(struct fetch_pack_args *args, * In the capability advertisement that has happened prior to invoking this * function, the "wait-for-done" capability must be present. */ -void negotiate_using_fetch(const struct oid_array *negotiation_tips, +void negotiate_using_fetch(const struct oid_array *negotiation_restrict_tips, const struct string_list *server_options, int stateless_rpc, int fd[], diff --git a/transport-helper.c b/transport-helper.c index 4d95d84f9e4d05..0e5b3b7202cf20 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -754,7 +754,7 @@ static int fetch_refs(struct transport *transport, set_helper_option(transport, "filter", spec); } - if (data->transport_options.negotiation_tips) + if (data->transport_options.negotiation_restrict_tips) warning("Ignoring --negotiation-tip because the protocol does not support it."); if (data->fetch) diff --git a/transport.c b/transport.c index 107f4fa5dce96a..a3051f6733633d 100644 --- a/transport.c +++ b/transport.c @@ -463,7 +463,7 @@ static int fetch_refs_via_pack(struct transport *transport, args.refetch = data->options.refetch; args.stateless_rpc = transport->stateless_rpc; args.server_options = transport->server_options; - args.negotiation_tips = data->options.negotiation_tips; + args.negotiation_restrict_tips = data->options.negotiation_restrict_tips; args.reject_shallow_remote = transport->smart_options->reject_shallow; if (!data->finished_handshake) { @@ -491,7 +491,7 @@ static int fetch_refs_via_pack(struct transport *transport, warning(_("server does not support wait-for-done")); ret = -1; } else { - negotiate_using_fetch(data->options.negotiation_tips, + negotiate_using_fetch(data->options.negotiation_restrict_tips, transport->server_options, transport->stateless_rpc, data->fd, @@ -979,9 +979,9 @@ static int disconnect_git(struct transport *transport) finish_connect(data->conn); } - if (data->options.negotiation_tips) { - oid_array_clear(data->options.negotiation_tips); - free(data->options.negotiation_tips); + if (data->options.negotiation_restrict_tips) { + oid_array_clear(data->options.negotiation_restrict_tips); + free(data->options.negotiation_restrict_tips); } list_objects_filter_release(&data->options.filter_options); oid_array_clear(&data->extra_have); diff --git a/transport.h b/transport.h index 892f19454a75d6..cdeb33c16f82f6 100644 --- a/transport.h +++ b/transport.h @@ -40,13 +40,13 @@ struct git_transport_options { /* * This is only used during fetch. See the documentation of - * negotiation_tips in struct fetch_pack_args. + * negotiation_restrict_tips in struct fetch_pack_args. * * This field is only supported by transports that support connect or * stateless_connect. Set this field directly instead of using * transport_set_option(). */ - struct oid_array *negotiation_tips; + struct oid_array *negotiation_restrict_tips; /* * If allocated, whenever transport_fetch_refs() is called, add known From 203275d13a761d8d4dfecddc0a78beb5daebf51c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 15 Apr 2026 15:14:23 +0000 Subject: [PATCH 206/241] remote: add remote.*.negotiationRestrict config In a previous change, the --negotiation-restrict command-line option of 'git fetch' was added as a synonym of --negotiation-tips. Both of these options restrict the set of 'haves' the client can send as part of negotiation. This was previously not available via a configuration option. Add a new 'remote..negotiationRestrict' multi-valued config option that updates 'git fetch ' to use these restrictions by default. If the user provides even one --negotiation-restrict argument, then the config is ignored. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/config/remote.adoc | 16 ++++++++++++++++ builtin/fetch.c | 24 ++++++++++++++++++++++-- remote.c | 6 ++++++ remote.h | 1 + t/t5510-fetch.sh | 22 ++++++++++++++++++++++ 5 files changed, 67 insertions(+), 2 deletions(-) diff --git a/Documentation/config/remote.adoc b/Documentation/config/remote.adoc index 91e46f66f5dd1c..5e8ac6cfdd335c 100644 --- a/Documentation/config/remote.adoc +++ b/Documentation/config/remote.adoc @@ -107,6 +107,22 @@ priority configuration file (e.g. `.git/config` in a repository) to clear the values inherited from a lower priority configuration files (e.g. `$HOME/.gitconfig`). +remote..negotiationRestrict:: + When negotiating with this remote during `git fetch` and `git push`, + restrict the commits advertised as "have" lines to only those + reachable from refs matching the given patterns. This multi-valued + config option behaves like `--negotiation-restrict` on the command + line. ++ +Each value is either an exact ref name (e.g. `refs/heads/release`) or a +glob pattern (e.g. `refs/heads/release/*`). The pattern syntax is the +same as for `--negotiation-restrict`. ++ +These config values are used as defaults for the `--negotiation-restrict` +command-line option. If `--negotiation-restrict` (or its synonym +`--negotiation-tip`) is specified on the command line, then the config +values are not used. + remote..followRemoteHEAD:: How linkgit:git-fetch[1] should handle updates to `remotes//HEAD` when fetching using the configured refspecs of a remote. diff --git a/builtin/fetch.c b/builtin/fetch.c index 4c3c5f2faa6a53..57b2b667fff0a7 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1601,6 +1601,19 @@ static struct transport *prepare_transport(struct remote *remote, int deepen, else warning(_("ignoring %s because the protocol does not support it"), "--negotiation-restrict"); + } else if (remote->negotiation_restrict.nr) { + struct string_list_item *item; + for_each_string_list_item(item, &remote->negotiation_restrict) + string_list_append(&negotiation_tip, item->string); + if (transport->smart_options) + add_negotiation_restrict_tips(transport->smart_options); + else { + struct strbuf config_name = STRBUF_INIT; + strbuf_addf(&config_name, "remote.%s.negotiationRestrict", remote->name); + warning(_("ignoring %s because the protocol does not support it"), + config_name.buf); + strbuf_release(&config_name); + } } return transport; } @@ -2659,8 +2672,12 @@ int cmd_fetch(int argc, config.display_format = DISPLAY_FORMAT_PORCELAIN; } - if (negotiate_only && !negotiation_tip.nr) - die(_("--negotiate-only needs one or more --negotiation-restrict=*")); + if (negotiate_only && !negotiation_tip.nr) { + /* + * Defer this check: remote..negotiationRestrict may + * provide defaults in prepare_transport(). + */ + } if (deepen_relative) { if (deepen_relative < 0) @@ -2749,6 +2766,9 @@ int cmd_fetch(int argc, if (!remote) die(_("must supply remote when using --negotiate-only")); gtransport = prepare_transport(remote, 1, &filter_options); + if (!gtransport->smart_options || + !gtransport->smart_options->negotiation_restrict_tips) + die(_("--negotiate-only needs one or more --negotiation-restrict=*")); if (gtransport->smart_options) { gtransport->smart_options->acked_commits = &acked_commits; } else { diff --git a/remote.c b/remote.c index 7ca2a6501b4920..07cdf6434d0434 100644 --- a/remote.c +++ b/remote.c @@ -152,6 +152,7 @@ static struct remote *make_remote(struct remote_state *remote_state, refspec_init_push(&ret->push); refspec_init_fetch(&ret->fetch); string_list_init_dup(&ret->server_options); + string_list_init_dup(&ret->negotiation_restrict); ALLOC_GROW(remote_state->remotes, remote_state->remotes_nr + 1, remote_state->remotes_alloc); @@ -179,6 +180,7 @@ static void remote_clear(struct remote *remote) FREE_AND_NULL(remote->http_proxy); FREE_AND_NULL(remote->http_proxy_authmethod); string_list_clear(&remote->server_options, 0); + string_list_clear(&remote->negotiation_restrict, 0); } static void add_merge(struct branch *branch, const char *name) @@ -562,6 +564,10 @@ static int handle_config(const char *key, const char *value, } else if (!strcmp(subkey, "serveroption")) { return parse_transport_option(key, value, &remote->server_options); + } else if (!strcmp(subkey, "negotiationrestrict")) { + if (!value) + return config_error_nonbool(key); + string_list_append(&remote->negotiation_restrict, value); } else if (!strcmp(subkey, "followremotehead")) { const char *no_warn_branch; if (!strcmp(value, "never")) diff --git a/remote.h b/remote.h index fc052945ee451d..e6ec37c3930355 100644 --- a/remote.h +++ b/remote.h @@ -117,6 +117,7 @@ struct remote { char *http_proxy_authmethod; struct string_list server_options; + struct string_list negotiation_restrict; enum follow_remote_head_settings follow_remote_head; const char *no_warn_branch; diff --git a/t/t5510-fetch.sh b/t/t5510-fetch.sh index dc3ce56d84c743..0d8749479498d1 100755 --- a/t/t5510-fetch.sh +++ b/t/t5510-fetch.sh @@ -1485,6 +1485,28 @@ test_expect_success '--negotiation-restrict and --negotiation-tip can be mixed' check_negotiation_tip ' +test_expect_success 'remote..negotiationRestrict used as default' ' + setup_negotiation_tip server server 0 && + git -C client config --add remote.origin.negotiationRestrict alpha_1 && + git -C client config --add remote.origin.negotiationRestrict beta_1 && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \ + origin alpha_s beta_s && + check_negotiation_tip +' + +test_expect_success 'CLI --negotiation-restrict overrides remote config' ' + setup_negotiation_tip server server 0 && + git -C client config --add remote.origin.negotiationRestrict alpha_1 && + git -C client config --add remote.origin.negotiationRestrict beta_1 && + ALPHA_1=$(git -C client rev-parse alpha_1) && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \ + --negotiation-restrict=alpha_1 \ + origin alpha_s beta_s && + test_grep "fetch> have $ALPHA_1" trace && + BETA_1=$(git -C client rev-parse beta_1) && + test_grep ! "fetch> have $BETA_1" trace +' + test_expect_success SYMLINKS 'clone does not get confused by a D/F conflict' ' git init df-conflict && ( From 18d5475b45b2a883ca88b12c87f4cc19d1ec5cf2 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 15 Apr 2026 15:14:24 +0000 Subject: [PATCH 207/241] fetch: add --negotiation-require option for negotiation Add a new --negotiation-require option to 'git fetch', which ensures that certain ref tips are always sent as 'have' lines during fetch negotiation, regardless of what the negotiation algorithm selects. This is useful when the repository has a large number of references, so the normal negotiation algorithm truncates the list. This is especially important in repositories with long parallel commit histories. For example, a repo could have a 'dev' branch for development and a 'release' branch for released versions. If the 'dev' branch isn't selected for negotiation, then it's not a big deal because there are many in-progress development branches with a shared history. However, if 'release' is not selected for negotiation, then the server may think that this is the first time the client has asked for that reference, causing a full download of its parallel commit history (and any extra data that may be unique to that branch). This is based on a real example where certain fetches would grow to 60+ GB when a release branch updated. This option is a complement to --negotiation-restrict, which reduces the negotiation ref set to a specific list. In the earlier example, using --negotiation-restrict to focus the negotiation to 'dev' and 'release' would avoid those problematic downloads, but would still not allow advertising potentially-relevant user brances. In this way, the 'require' version solves the problem I mention while allowing negotiation to pick other references opportunistically. The two options can also be combined to allow the best of both worlds. The argument may be an exact ref name or a glob pattern. Non-existent refs are silently ignored. Also add --negotiation-require to 'git pull' passthrough options. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/fetch-options.adoc | 19 +++++++ builtin/fetch.c | 10 ++++ builtin/pull.c | 3 + fetch-pack.c | 96 ++++++++++++++++++++++++++++++-- fetch-pack.h | 10 +++- t/t5510-fetch.sh | 66 ++++++++++++++++++++++ transport.c | 4 +- transport.h | 6 ++ 8 files changed, 206 insertions(+), 8 deletions(-) diff --git a/Documentation/fetch-options.adoc b/Documentation/fetch-options.adoc index c07b85499fafe9..85ffc5b32b68bb 100644 --- a/Documentation/fetch-options.adoc +++ b/Documentation/fetch-options.adoc @@ -73,6 +73,25 @@ See also the `fetch.negotiationAlgorithm` and `push.negotiate` configuration variables documented in linkgit:git-config[1], and the `--negotiate-only` option below. +`--negotiation-require=`:: + Ensure that the given ref tip is always sent as a "have" line + during fetch negotiation, regardless of what the negotiation + algorithm selects. This is useful to guarantee that common + history reachable from specific refs is always considered, even + when `--negotiation-restrict` restricts the set of tips or when + the negotiation algorithm would otherwise skip them. ++ +This option may be specified more than once; if so, each ref is sent +unconditionally. ++ +The argument may be an exact ref name (e.g. `refs/heads/release`) or a +glob pattern (e.g. `refs/heads/release/{asterisk}`). The pattern syntax +is the same as for `--negotiation-restrict`. ++ +If `--negotiation-restrict` is used, the have set is first restricted by +that option and then increased to include the tips specified by +`--negotiation-require`. + `--negotiate-only`:: Do not fetch anything from the server, and instead print the ancestors of the provided `--negotiation-tip=` arguments, diff --git a/builtin/fetch.c b/builtin/fetch.c index 57b2b667fff0a7..b60652e6b1cce2 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -99,6 +99,7 @@ static struct transport *gsecondary; static struct refspec refmap = REFSPEC_INIT_FETCH; static struct string_list server_options = STRING_LIST_INIT_DUP; static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP; +static struct string_list negotiation_require = STRING_LIST_INIT_NODUP; struct fetch_config { enum display_format display_format; @@ -1615,6 +1616,13 @@ static struct transport *prepare_transport(struct remote *remote, int deepen, strbuf_release(&config_name); } } + if (negotiation_require.nr) { + if (transport->smart_options) + transport->smart_options->negotiation_require = &negotiation_require; + else + warning(_("ignoring %s because the protocol does not support it"), + "--negotiation-require"); + } return transport; } @@ -2583,6 +2591,8 @@ int cmd_fetch(int argc, N_("report that we have only objects reachable from this object")), OPT_STRING_LIST(0, "negotiation-restrict", &negotiation_tip, N_("revision"), N_("report that we have only objects reachable from this object")), + OPT_STRING_LIST(0, "negotiation-require", &negotiation_require, N_("revision"), + N_("ensure this ref is always sent as a negotiation have")), OPT_BOOL(0, "negotiate-only", &negotiate_only, N_("do not fetch a packfile; instead, print ancestors of negotiation tips")), OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), diff --git a/builtin/pull.c b/builtin/pull.c index 821cc6699a142f..973186ecdcb307 100644 --- a/builtin/pull.c +++ b/builtin/pull.c @@ -1002,6 +1002,9 @@ int cmd_pull(int argc, OPT_PASSTHRU_ARGV(0, "negotiation-restrict", &opt_fetch, N_("revision"), N_("report that we have only objects reachable from this object"), 0), + OPT_PASSTHRU_ARGV(0, "negotiation-require", &opt_fetch, N_("revision"), + N_("ensure this ref is always sent as a negotiation have"), + 0), OPT_BOOL(0, "show-forced-updates", &opt_show_forced_updates, N_("check for forced-updates on all updated branches")), OPT_PASSTHRU(0, "set-upstream", &set_upstream, NULL, diff --git a/fetch-pack.c b/fetch-pack.c index baf239adf98db3..a0029253f18993 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -25,6 +25,7 @@ #include "oidset.h" #include "packfile.h" #include "odb.h" +#include "object-name.h" #include "path.h" #include "connected.h" #include "fetch-negotiator.h" @@ -332,6 +333,41 @@ static void send_filter(struct fetch_pack_args *args, } } +static int add_oid_to_oidset(const struct reference *ref, void *cb_data) +{ + struct oidset *set = cb_data; + if (odb_has_object(the_repository->objects, ref->oid, 0)) + oidset_insert(set, ref->oid); + return 0; +} + +static void resolve_negotiation_require(const struct string_list *negotiation_require, + struct oidset *result) +{ + struct string_list_item *item; + + if (!negotiation_require || !negotiation_require->nr) + return; + + for_each_string_list_item(item, negotiation_require) { + if (!has_glob_specials(item->string)) { + struct object_id oid; + if (repo_get_oid(the_repository, item->string, &oid)) + continue; + if (!odb_has_object(the_repository->objects, &oid, 0)) + continue; + oidset_insert(result, &oid); + } else { + struct refs_for_each_ref_options opts = { + .pattern = item->string, + }; + refs_for_each_ref_ext( + get_main_ref_store(the_repository), + add_oid_to_oidset, result, &opts); + } + } +} + static int find_common(struct fetch_negotiator *negotiator, struct fetch_pack_args *args, int fd[2], struct object_id *result_oid, @@ -347,6 +383,7 @@ static int find_common(struct fetch_negotiator *negotiator, struct strbuf req_buf = STRBUF_INIT; size_t state_len = 0; struct packet_reader reader; + struct oidset negotiation_require_oids = OIDSET_INIT; if (args->stateless_rpc && multi_ack == 1) die(_("the option '%s' requires '%s'"), "--stateless-rpc", "multi_ack_detailed"); @@ -474,7 +511,25 @@ static int find_common(struct fetch_negotiator *negotiator, trace2_region_enter("fetch-pack", "negotiation_v0_v1", the_repository); flushes = 0; retval = -1; + + /* Send unconditional haves from --negotiation-require */ + resolve_negotiation_require(args->negotiation_require, + &negotiation_require_oids); + if (oidset_size(&negotiation_require_oids)) { + struct oidset_iter iter; + oidset_iter_init(&negotiation_require_oids, &iter); + + while ((oid = oidset_iter_next(&iter))) { + packet_buf_write(&req_buf, "have %s\n", + oid_to_hex(oid)); + print_verbose(args, "have %s", oid_to_hex(oid)); + } + } + while ((oid = negotiator->next(negotiator))) { + /* avoid duplicate oids from --negotiation-require */ + if (oidset_contains(&negotiation_require_oids, oid)) + continue; packet_buf_write(&req_buf, "have %s\n", oid_to_hex(oid)); print_verbose(args, "have %s", oid_to_hex(oid)); in_vain++; @@ -584,6 +639,7 @@ static int find_common(struct fetch_negotiator *negotiator, flushes++; } strbuf_release(&req_buf); + oidset_clear(&negotiation_require_oids); if (!got_ready || !no_done) consume_shallow_list(args, &reader); @@ -1305,12 +1361,26 @@ static void add_common(struct strbuf *req_buf, struct oidset *common) static int add_haves(struct fetch_negotiator *negotiator, struct strbuf *req_buf, - int *haves_to_send) + int *haves_to_send, + struct oidset *negotiation_require_oids) { int haves_added = 0; const struct object_id *oid; + /* Send unconditional haves from --negotiation-require */ + if (negotiation_require_oids) { + struct oidset_iter iter; + oidset_iter_init(negotiation_require_oids, &iter); + + while ((oid = oidset_iter_next(&iter))) + packet_buf_write(req_buf, "have %s\n", + oid_to_hex(oid)); + } + while ((oid = negotiator->next(negotiator))) { + if (negotiation_require_oids && + oidset_contains(negotiation_require_oids, oid)) + continue; packet_buf_write(req_buf, "have %s\n", oid_to_hex(oid)); if (++haves_added >= *haves_to_send) break; @@ -1358,7 +1428,8 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out, struct fetch_pack_args *args, const struct ref *wants, struct oidset *common, int *haves_to_send, int *in_vain, - int sideband_all, int seen_ack) + int sideband_all, int seen_ack, + struct oidset *negotiation_require_oids) { int haves_added; int done_sent = 0; @@ -1413,7 +1484,8 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out, /* Add all of the common commits we've found in previous rounds */ add_common(&req_buf, common); - haves_added = add_haves(negotiator, &req_buf, haves_to_send); + haves_added = add_haves(negotiator, &req_buf, haves_to_send, + negotiation_require_oids); *in_vain += haves_added; trace2_data_intmax("negotiation_v2", the_repository, "haves_added", haves_added); trace2_data_intmax("negotiation_v2", the_repository, "in_vain", *in_vain); @@ -1657,6 +1729,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, struct ref *ref = copy_ref_list(orig_ref); enum fetch_state state = FETCH_CHECK_LOCAL; struct oidset common = OIDSET_INIT; + struct oidset negotiation_require_oids = OIDSET_INIT; struct packet_reader reader; int in_vain = 0, negotiation_started = 0; int negotiation_round = 0; @@ -1729,6 +1802,8 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, state = FETCH_SEND_REQUEST; mark_tips(negotiator, args->negotiation_restrict_tips); + resolve_negotiation_require(args->negotiation_require, + &negotiation_require_oids); for_each_cached_alternate(negotiator, insert_one_alternate_object); break; @@ -1747,7 +1822,8 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, &common, &haves_to_send, &in_vain, reader.use_sideband, - seen_ack)) { + seen_ack, + &negotiation_require_oids)) { trace2_region_leave_printf("negotiation_v2", "round", the_repository, "%d", negotiation_round); @@ -1883,6 +1959,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, negotiator->release(negotiator); oidset_clear(&common); + oidset_clear(&negotiation_require_oids); return ref; } @@ -2181,12 +2258,14 @@ void negotiate_using_fetch(const struct oid_array *negotiation_restrict_tips, const struct string_list *server_options, int stateless_rpc, int fd[], - struct oidset *acked_commits) + struct oidset *acked_commits, + const struct string_list *negotiation_require) { struct fetch_negotiator negotiator; struct packet_reader reader; struct object_array nt_object_array = OBJECT_ARRAY_INIT; struct strbuf req_buf = STRBUF_INIT; + struct oidset negotiation_require_oids = OIDSET_INIT; int haves_to_send = INITIAL_FLUSH; int in_vain = 0; int seen_ack = 0; @@ -2197,6 +2276,9 @@ void negotiate_using_fetch(const struct oid_array *negotiation_restrict_tips, fetch_negotiator_init(the_repository, &negotiator); mark_tips(&negotiator, negotiation_restrict_tips); + resolve_negotiation_require(negotiation_require, + &negotiation_require_oids); + packet_reader_init(&reader, fd[0], NULL, 0, PACKET_READ_CHOMP_NEWLINE | PACKET_READ_DIE_ON_ERR_PACKET); @@ -2221,7 +2303,8 @@ void negotiate_using_fetch(const struct oid_array *negotiation_restrict_tips, packet_buf_write(&req_buf, "wait-for-done"); - haves_added = add_haves(&negotiator, &req_buf, &haves_to_send); + haves_added = add_haves(&negotiator, &req_buf, &haves_to_send, + &negotiation_require_oids); in_vain += haves_added; if (!haves_added || (seen_ack && in_vain >= MAX_IN_VAIN)) last_iteration = 1; @@ -2273,6 +2356,7 @@ void negotiate_using_fetch(const struct oid_array *negotiation_restrict_tips, clear_common_flag(acked_commits); object_array_clear(&nt_object_array); + oidset_clear(&negotiation_require_oids); negotiator.release(&negotiator); strbuf_release(&req_buf); } diff --git a/fetch-pack.h b/fetch-pack.h index 6c70c942c2f001..1daea8c542dc9e 100644 --- a/fetch-pack.h +++ b/fetch-pack.h @@ -23,6 +23,13 @@ struct fetch_pack_args { */ const struct oid_array *negotiation_restrict_tips; + /* + * If non-empty, ref patterns whose tips should always be sent + * as "have" lines during negotiation, regardless of what the + * negotiation algorithm selects. + */ + const struct string_list *negotiation_require; + unsigned deepen_relative:1; unsigned quiet:1; unsigned keep_pack:1; @@ -93,7 +100,8 @@ void negotiate_using_fetch(const struct oid_array *negotiation_restrict_tips, const struct string_list *server_options, int stateless_rpc, int fd[], - struct oidset *acked_commits); + struct oidset *acked_commits, + const struct string_list *negotiation_require); /* * Print an appropriate error message for each sought ref that wasn't diff --git a/t/t5510-fetch.sh b/t/t5510-fetch.sh index 0d8749479498d1..ec30b81c71cb3f 100755 --- a/t/t5510-fetch.sh +++ b/t/t5510-fetch.sh @@ -1507,6 +1507,72 @@ test_expect_success 'CLI --negotiation-restrict overrides remote config' ' test_grep ! "fetch> have $BETA_1" trace ' +test_expect_success '--negotiation-require includes configured refs as haves' ' + test_when_finished rm -f trace && + setup_negotiation_tip server server 0 && + + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \ + --negotiation-restrict=alpha_1 \ + --negotiation-require=refs/tags/beta_1 \ + origin alpha_s beta_s && + + ALPHA_1=$(git -C client rev-parse alpha_1) && + test_grep "fetch> have $ALPHA_1" trace && + BETA_1=$(git -C client rev-parse beta_1) && + test_grep "fetch> have $BETA_1" trace +' + +test_expect_success '--negotiation-require works with glob patterns' ' + test_when_finished rm -f trace && + setup_negotiation_tip server server 0 && + + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \ + --negotiation-restrict=alpha_1 \ + --negotiation-require="refs/tags/beta_*" \ + origin alpha_s beta_s && + + BETA_1=$(git -C client rev-parse beta_1) && + test_grep "fetch> have $BETA_1" trace && + BETA_2=$(git -C client rev-parse beta_2) && + test_grep "fetch> have $BETA_2" trace +' + +test_expect_success '--negotiation-require is additive with negotiation' ' + test_when_finished rm -f trace && + setup_negotiation_tip server server 0 && + + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \ + --negotiation-require=refs/tags/beta_1 \ + origin alpha_s beta_s && + + BETA_1=$(git -C client rev-parse beta_1) && + test_grep "fetch> have $BETA_1" trace +' + +test_expect_success '--negotiation-require ignores non-existent refs silently' ' + setup_negotiation_tip server server 0 && + + git -C client fetch --quiet \ + --negotiation-restrict=alpha_1 \ + --negotiation-require=refs/tags/nonexistent \ + origin alpha_s beta_s 2>err && + test_must_be_empty err +' + +test_expect_success '--negotiation-require avoids duplicates with negotiator' ' + test_when_finished rm -f trace && + setup_negotiation_tip server server 0 && + + ALPHA_1=$(git -C client rev-parse alpha_1) && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \ + --negotiation-restrict=alpha_1 \ + --negotiation-require=refs/tags/alpha_1 \ + origin alpha_s beta_s && + + test_grep "fetch> have $ALPHA_1" trace >matches && + test_line_count = 1 matches +' + test_expect_success SYMLINKS 'clone does not get confused by a D/F conflict' ' git init df-conflict && ( diff --git a/transport.c b/transport.c index a3051f6733633d..d1b0e9eda0b104 100644 --- a/transport.c +++ b/transport.c @@ -464,6 +464,7 @@ static int fetch_refs_via_pack(struct transport *transport, args.stateless_rpc = transport->stateless_rpc; args.server_options = transport->server_options; args.negotiation_restrict_tips = data->options.negotiation_restrict_tips; + args.negotiation_require = data->options.negotiation_require; args.reject_shallow_remote = transport->smart_options->reject_shallow; if (!data->finished_handshake) { @@ -495,7 +496,8 @@ static int fetch_refs_via_pack(struct transport *transport, transport->server_options, transport->stateless_rpc, data->fd, - data->options.acked_commits); + data->options.acked_commits, + data->options.negotiation_require); ret = 0; } goto cleanup; diff --git a/transport.h b/transport.h index cdeb33c16f82f6..8737f23008ebc0 100644 --- a/transport.h +++ b/transport.h @@ -48,6 +48,12 @@ struct git_transport_options { */ struct oid_array *negotiation_restrict_tips; + /* + * If non-empty, ref patterns whose tips should always be sent + * as "have" lines during negotiation. + */ + const struct string_list *negotiation_require; + /* * If allocated, whenever transport_fetch_refs() is called, add known * common commits to this oidset instead of fetching any packfiles. From 288ea6c3c275d29817efa6e80711701abd041dfe Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 15 Apr 2026 15:14:25 +0000 Subject: [PATCH 208/241] remote: add negotiationRequire config as default for --negotiation-require Add a new 'remote..negotiationRequire' multi-valued config option that provides default values for --negotiation-require when no --negotiation-require arguments are specified over the command line. This is a mirror of how 'remote..negotiationRestrict' specifies defaults for the --negotiation-restrict arguments. Each value is either an exact ref name or a glob pattern whose tips should always be sent as 'have' lines during negotiation. The config values are resolved through the same resolve_negotiation_require() codepath as the CLI options. This option is additive with the normal negotiation process: the negotiation algorithm still runs and advertises its own selected commits, but the refs matching the config are sent unconditionally on top of those heuristically selected commits. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/config/remote.adoc | 24 +++++++++++++++++ Documentation/fetch-options.adoc | 4 +++ builtin/fetch.c | 10 +++++++ remote.c | 6 +++++ remote.h | 1 + t/t5510-fetch.sh | 46 ++++++++++++++++++++++++++++++++ 6 files changed, 91 insertions(+) diff --git a/Documentation/config/remote.adoc b/Documentation/config/remote.adoc index 5e8ac6cfdd335c..9dbe820275dd19 100644 --- a/Documentation/config/remote.adoc +++ b/Documentation/config/remote.adoc @@ -123,6 +123,30 @@ command-line option. If `--negotiation-restrict` (or its synonym `--negotiation-tip`) is specified on the command line, then the config values are not used. +remote..negotiationRequire:: + When negotiating with this remote during `git fetch` and `git push`, + the client advertises a list of commits that exist locally. In + repos with many references, this list of "haves" can be truncated. + Depending on data shape, dropping certain references may be + expensive. This multi-valued config option specifies ref patterns + whose tips should always be sent as "have" commits during fetch + negotiation with this remote. ++ +Each value is either an exact ref name (e.g. `refs/heads/release`) or a +glob pattern (e.g. `refs/heads/release/*`). The pattern syntax is the same +as for `--negotiation-restrict`. ++ +These config values are used as defaults for the `--negotiation-require` +command-line option. If `--negotiation-require` is specified on the +command line, then the config values are not used. ++ +This option is additive with the normal negotiation process: the +negotiation algorithm still runs and advertises its own selected commits, +but the refs matching `remote..negotiationRequire` are sent +unconditionally on top of those heuristically selected commits. This +option is also used during push negotiation when `push.negotiate` is +enabled. + remote..followRemoteHEAD:: How linkgit:git-fetch[1] should handle updates to `remotes//HEAD` when fetching using the configured refspecs of a remote. diff --git a/Documentation/fetch-options.adoc b/Documentation/fetch-options.adoc index 85ffc5b32b68bb..16c6e8cee91531 100644 --- a/Documentation/fetch-options.adoc +++ b/Documentation/fetch-options.adoc @@ -91,6 +91,10 @@ is the same as for `--negotiation-restrict`. If `--negotiation-restrict` is used, the have set is first restricted by that option and then increased to include the tips specified by `--negotiation-require`. ++ +If this option is not specified on the command line, then any +`remote..negotiationRequire` config values for the current remote +are used instead. `--negotiate-only`:: Do not fetch anything from the server, and instead print the diff --git a/builtin/fetch.c b/builtin/fetch.c index b60652e6b1cce2..a398115fb5e0f6 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1622,6 +1622,16 @@ static struct transport *prepare_transport(struct remote *remote, int deepen, else warning(_("ignoring %s because the protocol does not support it"), "--negotiation-require"); + } else if (remote->negotiation_require.nr) { + if (transport->smart_options) { + transport->smart_options->negotiation_require = &remote->negotiation_require; + } else { + struct strbuf config_name = STRBUF_INIT; + strbuf_addf(&config_name, "remote.%s.negotiationRequire", remote->name); + warning(_("ignoring %s because the protocol does not support it"), + config_name.buf); + strbuf_release(&config_name); + } } return transport; } diff --git a/remote.c b/remote.c index 07cdf6434d0434..53deed7565b32f 100644 --- a/remote.c +++ b/remote.c @@ -153,6 +153,7 @@ static struct remote *make_remote(struct remote_state *remote_state, refspec_init_fetch(&ret->fetch); string_list_init_dup(&ret->server_options); string_list_init_dup(&ret->negotiation_restrict); + string_list_init_dup(&ret->negotiation_require); ALLOC_GROW(remote_state->remotes, remote_state->remotes_nr + 1, remote_state->remotes_alloc); @@ -181,6 +182,7 @@ static void remote_clear(struct remote *remote) FREE_AND_NULL(remote->http_proxy_authmethod); string_list_clear(&remote->server_options, 0); string_list_clear(&remote->negotiation_restrict, 0); + string_list_clear(&remote->negotiation_require, 0); } static void add_merge(struct branch *branch, const char *name) @@ -568,6 +570,10 @@ static int handle_config(const char *key, const char *value, if (!value) return config_error_nonbool(key); string_list_append(&remote->negotiation_restrict, value); + } else if (!strcmp(subkey, "negotiationrequire")) { + if (!value) + return config_error_nonbool(key); + string_list_append(&remote->negotiation_require, value); } else if (!strcmp(subkey, "followremotehead")) { const char *no_warn_branch; if (!strcmp(value, "never")) diff --git a/remote.h b/remote.h index e6ec37c3930355..d986257c78c666 100644 --- a/remote.h +++ b/remote.h @@ -118,6 +118,7 @@ struct remote { struct string_list server_options; struct string_list negotiation_restrict; + struct string_list negotiation_require; enum follow_remote_head_settings follow_remote_head; const char *no_warn_branch; diff --git a/t/t5510-fetch.sh b/t/t5510-fetch.sh index ec30b81c71cb3f..0246ac6bc5ecaa 100755 --- a/t/t5510-fetch.sh +++ b/t/t5510-fetch.sh @@ -1573,6 +1573,52 @@ test_expect_success '--negotiation-require avoids duplicates with negotiator' ' test_line_count = 1 matches ' +test_expect_success 'remote..negotiationRequire used as default for --negotiation-require' ' + test_when_finished rm -f trace && + setup_negotiation_tip server server 0 && + + git -C client config --add remote.origin.negotiationRequire refs/tags/beta_1 && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \ + --negotiation-restrict=alpha_1 \ + origin alpha_s beta_s && + + ALPHA_1=$(git -C client rev-parse alpha_1) && + test_grep "fetch> have $ALPHA_1" trace && + BETA_1=$(git -C client rev-parse beta_1) && + test_grep "fetch> have $BETA_1" trace +' + +test_expect_success 'remote..negotiationRequire works with glob patterns' ' + test_when_finished rm -f trace && + setup_negotiation_tip server server 0 && + + git -C client config --add remote.origin.negotiationRequire "refs/tags/beta_*" && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \ + --negotiation-restrict=alpha_1 \ + origin alpha_s beta_s && + + BETA_1=$(git -C client rev-parse beta_1) && + test_grep "fetch> have $BETA_1" trace && + BETA_2=$(git -C client rev-parse beta_2) && + test_grep "fetch> have $BETA_2" trace +' + +test_expect_success 'CLI --negotiation-require overrides remote..negotiationRequire' ' + test_when_finished rm -f trace && + setup_negotiation_tip server server 0 && + + git -C client config --add remote.origin.negotiationRequire refs/tags/beta_2 && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \ + --negotiation-restrict=alpha_1 \ + --negotiation-require=refs/tags/beta_1 \ + origin alpha_s beta_s && + + BETA_1=$(git -C client rev-parse beta_1) && + test_grep "fetch> have $BETA_1" trace && + BETA_2=$(git -C client rev-parse beta_2) && + test_grep ! "fetch> have $BETA_2" trace +' + test_expect_success SYMLINKS 'clone does not get confused by a D/F conflict' ' git init df-conflict && ( From 9f67318dfc82e17c08ea79cc1cb85c7feb87bdb6 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 15 Apr 2026 15:14:26 +0000 Subject: [PATCH 209/241] send-pack: pass negotiation config in push When push.negotiate is enabled, 'git push' spawns a child 'git fetch --negotiate-only' process to find common commits. Pass --negotiation-require and --negotiation-restrict options from the 'remote..negotiationRequire' and 'remote..negotiationRestrict' config keys to this child process. When negotiationRestrict is configured, it replaces the default behavior of using all remote refs as negotiation tips. This allows the user to control which local refs are used for push negotiation. When negotiationRequire is configured, the specified ref patterns are passed as --negotiation-require to ensure their tips are always sent as 'have' lines during push negotiation. This change also updates the use of --negotiation-tip into --negotiation-restrict now that the new synonym exists. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- send-pack.c | 34 ++++++++++++++++++++++++++++------ send-pack.h | 2 ++ t/t5516-fetch-push.sh | 30 ++++++++++++++++++++++++++++++ transport.c | 2 ++ 4 files changed, 62 insertions(+), 6 deletions(-) diff --git a/send-pack.c b/send-pack.c index 67d6987b1ccd7e..1bf17a73a99eb4 100644 --- a/send-pack.c +++ b/send-pack.c @@ -433,6 +433,8 @@ static void reject_invalid_nonce(const char *nonce, int len) static void get_commons_through_negotiation(struct repository *r, const char *url, + const struct string_list *negotiation_require, + const struct string_list *negotiation_restrict, const struct ref *remote_refs, struct oid_array *commons) { @@ -445,13 +447,30 @@ static void get_commons_through_negotiation(struct repository *r, child.no_stdin = 1; child.out = -1; strvec_pushl(&child.args, "fetch", "--negotiate-only", NULL); - for (ref = remote_refs; ref; ref = ref->next) { - if (!is_null_oid(&ref->new_oid)) { - strvec_pushf(&child.args, "--negotiation-tip=%s", - oid_to_hex(&ref->new_oid)); - nr_negotiation_tip++; + + if (negotiation_restrict && negotiation_restrict->nr) { + struct string_list_item *item; + for_each_string_list_item(item, negotiation_restrict) + strvec_pushf(&child.args, "--negotiation-restrict=%s", + item->string); + nr_negotiation_tip = negotiation_restrict->nr; + } else { + for (ref = remote_refs; ref; ref = ref->next) { + if (!is_null_oid(&ref->new_oid)) { + strvec_pushf(&child.args, "--negotiation-tip=%s", + oid_to_hex(&ref->new_oid)); + nr_negotiation_tip++; + } } } + + if (negotiation_require && negotiation_require->nr) { + struct string_list_item *item; + for_each_string_list_item(item, negotiation_require) + strvec_pushf(&child.args, "--negotiation-require=%s", + item->string); + } + strvec_push(&child.args, url); if (!nr_negotiation_tip) { @@ -528,7 +547,10 @@ int send_pack(struct repository *r, repo_config_get_bool(r, "push.negotiate", &push_negotiate); if (push_negotiate) { trace2_region_enter("send_pack", "push_negotiate", r); - get_commons_through_negotiation(r, args->url, remote_refs, &commons); + get_commons_through_negotiation(r, args->url, + args->negotiation_require, + args->negotiation_restrict, + remote_refs, &commons); trace2_region_leave("send_pack", "push_negotiate", r); } diff --git a/send-pack.h b/send-pack.h index c5ded2d2006f13..112f31121a3835 100644 --- a/send-pack.h +++ b/send-pack.h @@ -18,6 +18,8 @@ struct repository; struct send_pack_args { const char *url; + const struct string_list *negotiation_require; + const struct string_list *negotiation_restrict; unsigned verbose:1, quiet:1, porcelain:1, diff --git a/t/t5516-fetch-push.sh b/t/t5516-fetch-push.sh index ac8447f21ed963..03b797cef58b26 100755 --- a/t/t5516-fetch-push.sh +++ b/t/t5516-fetch-push.sh @@ -254,6 +254,36 @@ test_expect_success 'push with negotiation does not attempt to fetch submodules' ! grep "Fetching submodule" err ' +test_expect_success 'push with negotiation and remote..negotiationRequire' ' + test_when_finished rm -rf negotiation_require && + mk_empty negotiation_require && + git push negotiation_require $the_first_commit:refs/remotes/origin/first_commit && + test_commit -C negotiation_require unrelated_commit && + git -C negotiation_require config receive.hideRefs refs/remotes/origin/first_commit && + test_when_finished "rm event" && + GIT_TRACE2_EVENT="$(pwd)/event" \ + git -c protocol.version=2 -c push.negotiate=1 \ + -c remote.negotiation_require.negotiationRequire=refs/heads/main \ + push negotiation_require refs/heads/main:refs/remotes/origin/main && + test_grep \"key\":\"total_rounds\" event && + grep_wrote 2 event # 1 commit, 1 tree +' + +test_expect_success 'push with negotiation and remote..negotiationRestrict' ' + test_when_finished rm -rf negotiation_restrict && + mk_empty negotiation_restrict && + git push negotiation_restrict $the_first_commit:refs/remotes/origin/first_commit && + test_commit -C negotiation_restrict unrelated_commit && + git -C negotiation_restrict config receive.hideRefs refs/remotes/origin/first_commit && + test_when_finished "rm event" && + GIT_TRACE2_EVENT="$(pwd)/event" \ + git -c protocol.version=2 -c push.negotiate=1 \ + -c remote.negotiation_restrict.negotiationRestrict=refs/heads/main \ + push negotiation_restrict refs/heads/main:refs/remotes/origin/main && + test_grep \"key\":\"total_rounds\" event && + grep_wrote 2 event # 1 commit, 1 tree +' + test_expect_success 'push without wildcard' ' mk_empty testrepo && diff --git a/transport.c b/transport.c index d1b0e9eda0b104..9903eb1a532dff 100644 --- a/transport.c +++ b/transport.c @@ -921,6 +921,8 @@ static int git_transport_push(struct transport *transport, struct ref *remote_re args.atomic = !!(flags & TRANSPORT_PUSH_ATOMIC); args.push_options = transport->push_options; args.url = transport->url; + args.negotiation_require = &transport->remote->negotiation_require; + args.negotiation_restrict = &transport->remote->negotiation_restrict; if (flags & TRANSPORT_PUSH_CERT_ALWAYS) args.push_cert = SEND_PACK_PUSH_CERT_ALWAYS; From 97658d4cea549a42b2367f514c5faff3a0ffff9c Mon Sep 17 00:00:00 2001 From: Harald Nordgren Date: Wed, 15 Apr 2026 16:24:15 +0000 Subject: [PATCH 210/241] stash: add --label-ours, --label-theirs, --label-base for apply Allow callers of "git stash apply" to pass custom labels for conflict markers instead of the default "Updated upstream" and "Stashed changes". Document the new options and add a test. Signed-off-by: Harald Nordgren Signed-off-by: Junio C Hamano --- Documentation/git-stash.adoc | 11 ++++++++++- builtin/stash.c | 28 ++++++++++++++++++++-------- t/t3903-stash.sh | 24 ++++++++++++++++++++++++ xdiff/xmerge.c | 6 +++--- 4 files changed, 57 insertions(+), 12 deletions(-) diff --git a/Documentation/git-stash.adoc b/Documentation/git-stash.adoc index b05c990ecd8759..50bb89f48362a4 100644 --- a/Documentation/git-stash.adoc +++ b/Documentation/git-stash.adoc @@ -12,7 +12,7 @@ git stash list [] git stash show [-u | --include-untracked | --only-untracked] [] [] git stash drop [-q | --quiet] [] git stash pop [--index] [-q | --quiet] [] -git stash apply [--index] [-q | --quiet] [] +git stash apply [--index] [-q | --quiet] [--label-ours=