From 38db639013b123c66b7616fa18b43bc90a9faef5 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 8 Jan 2026 17:02:24 -0500 Subject: [PATCH 1/5] config-batch: basic boilerplate of new builtin Later changes will document, implement, and test this new builtin. For now, this serves as the latest example of the minimum boilerplate to introduce a new builtin. Recently, we updated the comment in builtin.h about how to create a new builtin, but failed to mention the required change to meson.build files for some CI builds to pass. Fix that oversight. Signed-off-by: Derrick Stolee --- .gitignore | 1 + Documentation/git-config-batch.adoc | 24 +++++++++++++++++++++++ Documentation/meson.build | 1 + Makefile | 1 + builtin.h | 7 +++++++ builtin/config-batch.c | 30 +++++++++++++++++++++++++++++ command-list.txt | 1 + git.c | 1 + meson.build | 1 + t/meson.build | 1 + t/t1312-config-batch.sh | 12 ++++++++++++ 11 files changed, 80 insertions(+) create mode 100644 Documentation/git-config-batch.adoc create mode 100644 builtin/config-batch.c create mode 100755 t/t1312-config-batch.sh diff --git a/.gitignore b/.gitignore index 78a45cb5bec991..42640b5e249c8f 100644 --- a/.gitignore +++ b/.gitignore @@ -44,6 +44,7 @@ /git-commit-graph /git-commit-tree /git-config +/git-config-batch /git-count-objects /git-credential /git-credential-cache diff --git a/Documentation/git-config-batch.adoc b/Documentation/git-config-batch.adoc new file mode 100644 index 00000000000000..dfa0bd83e25f61 --- /dev/null +++ b/Documentation/git-config-batch.adoc @@ -0,0 +1,24 @@ +git-config-batch(1) +=================== + +NAME +---- +git-config-batch - Get and set options using machine-parseable interface + + +SYNOPSIS +-------- +[verse] +'git config-batch' + +DESCRIPTION +----------- +TODO + +SEE ALSO +-------- +linkgit:git-config[1] + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Documentation/meson.build b/Documentation/meson.build index f02dbc20cbcb86..f5ad1179213682 100644 --- a/Documentation/meson.build +++ b/Documentation/meson.build @@ -29,6 +29,7 @@ manpages = { 'git-commit-tree.adoc' : 1, 'git-commit.adoc' : 1, 'git-config.adoc' : 1, + 'git-config-batch.adoc' : 1, 'git-count-objects.adoc' : 1, 'git-credential-cache--daemon.adoc' : 1, 'git-credential-cache.adoc' : 1, diff --git a/Makefile b/Makefile index 8aa489f3b6812f..aa3868e5134119 100644 --- a/Makefile +++ b/Makefile @@ -1390,6 +1390,7 @@ BUILTIN_OBJS += builtin/commit-graph.o BUILTIN_OBJS += builtin/commit-tree.o BUILTIN_OBJS += builtin/commit.o BUILTIN_OBJS += builtin/config.o +BUILTIN_OBJS += builtin/config-batch.o BUILTIN_OBJS += builtin/count-objects.o BUILTIN_OBJS += builtin/credential-cache--daemon.o BUILTIN_OBJS += builtin/credential-cache.o diff --git a/builtin.h b/builtin.h index e5e16ecaa6c9d7..5f5a19635ee57c 100644 --- a/builtin.h +++ b/builtin.h @@ -68,12 +68,18 @@ * * . Add `builtin/foo.o` to `BUILTIN_OBJS` in `Makefile`. * + * . Add 'builtin/foo.c' to the 'builtin_sources' array in 'meson.build'. + * * Additionally, if `foo` is a new command, there are 4 more things to do: * * . Add tests to `t/` directory. * + * . Add the test script to 'integration_tests' in 't/meson.build'. + * * . Write documentation in `Documentation/git-foo.adoc`. * + * . Add 'git-foo.adoc' to the manpages list in 'Documentation/meson.build'. + * * . Add an entry for `git-foo` to `command-list.txt`. * * . Add an entry for `/git-foo` to `.gitignore`. @@ -167,6 +173,7 @@ int cmd_commit(int argc, const char **argv, const char *prefix, struct repositor int cmd_commit_graph(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_commit_tree(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_config(int argc, const char **argv, const char *prefix, struct repository *repo); +int cmd_config_batch(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_count_objects(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_credential(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_credential_cache(int argc, const char **argv, const char *prefix, struct repository *repo); diff --git a/builtin/config-batch.c b/builtin/config-batch.c new file mode 100644 index 00000000000000..ea4f408ecb45d6 --- /dev/null +++ b/builtin/config-batch.c @@ -0,0 +1,30 @@ +#define USE_THE_REPOSITORY_VARIABLE +#include "builtin.h" +#include "config.h" +#include "environment.h" +#include "parse-options.h" + +static const char *const builtin_config_batch_usage[] = { + N_("git config-batch "), + NULL +}; + +int cmd_config_batch(int argc, + const char **argv, + const char *prefix, + struct repository *repo) +{ + struct option options[] = { + OPT_END(), + }; + + show_usage_with_options_if_asked(argc, argv, + builtin_config_batch_usage, options); + + argc = parse_options(argc, argv, prefix, options, builtin_config_batch_usage, + 0); + + repo_config(repo, git_default_config, NULL); + + return 0; +} diff --git a/command-list.txt b/command-list.txt index accd3d0c4b5524..57c7c7458d9b26 100644 --- a/command-list.txt +++ b/command-list.txt @@ -83,6 +83,7 @@ git-commit mainporcelain history git-commit-graph plumbingmanipulators git-commit-tree plumbingmanipulators git-config ancillarymanipulators complete +git-config-batch plumbinginterrogators git-count-objects ancillaryinterrogators git-credential purehelpers git-credential-cache purehelpers diff --git a/git.c b/git.c index c5fad56813f437..6b55a867dd5809 100644 --- a/git.c +++ b/git.c @@ -557,6 +557,7 @@ static struct cmd_struct commands[] = { { "commit-graph", cmd_commit_graph, RUN_SETUP }, { "commit-tree", cmd_commit_tree, RUN_SETUP }, { "config", cmd_config, RUN_SETUP_GENTLY | DELAY_PAGER_CONFIG }, + { "config-batch", cmd_config_batch, RUN_SETUP_GENTLY }, { "count-objects", cmd_count_objects, RUN_SETUP }, { "credential", cmd_credential, RUN_SETUP_GENTLY | NO_PARSEOPT }, { "credential-cache", cmd_credential_cache }, diff --git a/meson.build b/meson.build index dd52efd1c87574..040bc32c2dc3eb 100644 --- a/meson.build +++ b/meson.build @@ -582,6 +582,7 @@ builtin_sources = [ 'builtin/commit-tree.c', 'builtin/commit.c', 'builtin/config.c', + 'builtin/config-batch.c', 'builtin/count-objects.c', 'builtin/credential-cache--daemon.c', 'builtin/credential-cache.c', diff --git a/t/meson.build b/t/meson.build index 459c52a48972e4..0e9f1826f8b948 100644 --- a/t/meson.build +++ b/t/meson.build @@ -186,6 +186,7 @@ integration_tests = [ 't1309-early-config.sh', 't1310-config-default.sh', 't1311-config-optional.sh', + 't1312-config-batch.sh', 't1350-config-hooks-path.sh', 't1400-update-ref.sh', 't1401-symbolic-ref.sh', diff --git a/t/t1312-config-batch.sh b/t/t1312-config-batch.sh new file mode 100755 index 00000000000000..f59ba4a0f3f1dc --- /dev/null +++ b/t/t1312-config-batch.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +test_description='Test git config-batch' + +. ./test-lib.sh + +test_expect_success 'help text' ' + test_must_fail git config-batch -h >out && + grep usage out +' + +test_done From 110d7cbe64e0a2d1b3dbc950b7da20b1321e8546 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Sat, 17 Jan 2026 13:16:55 -0500 Subject: [PATCH 2/5] config-batch: create parse loop and unknown command As we build new features in the config-batch command, we define the plaintext protocol with line-by-line output and responses. To think to the future, we make sure that the protocol has a clear way to respond to an unknown command or an unknown version of that command. As some commands will allow the final argument to contain spaces or even be able to parse "\ " as a non-split token, we only provide the remaining line as data. Signed-off-by: Derrick Stolee --- Documentation/git-config-batch.adoc | 23 ++++- builtin/config-batch.c | 132 +++++++++++++++++++++++++++- t/t1312-config-batch.sh | 19 +++- 3 files changed, 169 insertions(+), 5 deletions(-) diff --git a/Documentation/git-config-batch.adoc b/Documentation/git-config-batch.adoc index dfa0bd83e25f61..9ca04b0c1eafd2 100644 --- a/Documentation/git-config-batch.adoc +++ b/Documentation/git-config-batch.adoc @@ -13,7 +13,28 @@ SYNOPSIS DESCRIPTION ----------- -TODO +Tools frequently need to change their behavior based on values stored in +Git's configuration files. These files may have complicated conditions +for including extra files, so it is difficult to produce an independent +parser. To avoid executing multiple processes to discover or modify +multiple configuration values, the `git config-batch` command allows a +single process to handle multiple requests using a machine-parseable +interface across `stdin` and `stdout`. + +PROTOCOL +-------- +By default, the protocol uses line feeds (`LF`) to signal the end of a +command over `stdin` or a response over `stdout`. + +The protocol will be extended in the future, and consumers should be +resilient to older Git versions not understanding the latest command +set. Thus, if the Git version includes the `git config-batch` builtin +but doesn't understand an input command, it will return a single line +response: + +``` +unknown_command LF +``` SEE ALSO -------- diff --git a/builtin/config-batch.c b/builtin/config-batch.c index ea4f408ecb45d6..e35bc07f28c36c 100644 --- a/builtin/config-batch.c +++ b/builtin/config-batch.c @@ -3,17 +3,143 @@ #include "config.h" #include "environment.h" #include "parse-options.h" +#include "strbuf.h" +#include "string-list.h" static const char *const builtin_config_batch_usage[] = { N_("git config-batch "), NULL }; +#define UNKNOWN_COMMAND "unknown_command" + +static int emit_response(const char *response, ...) +{ + va_list params; + const char *token; + + printf("%s", response); + + va_start(params, response); + while ((token = va_arg(params, const char *))) + printf(" %s", token); + va_end(params); + + printf("\n"); + fflush(stdout); + return 0; +} + +/** + * A function pointer type for defining a command. The function is + * responsible for handling different versions of the command name. + * + * Provides the remaining 'data' for the command, to be parsed by + * the function as needed according to its parsing rules. + * + * These functions should only return a negative value if they result + * in such a catastrophic failure that the process should end. + * + * Return 0 on success. + */ +typedef int (*command_fn)(struct repository *repo, + char *data, size_t data_len); + +static int unknown_command(struct repository *repo UNUSED, + char *data UNUSED, size_t data_len UNUSED) +{ + return emit_response(UNKNOWN_COMMAND, NULL); +} + +struct command { + const char *name; + command_fn fn; + int version; +}; + +static struct command commands[] = { + /* unknown_command must be last. */ + { + .name = "", + .fn = unknown_command, + }, +}; + +#define COMMAND_COUNT ((size_t)(sizeof(commands) / sizeof(*commands))) + +/** + * Process a single line from stdin and process the command. + * + * Returns 0 on successful processing of command, including the + * unknown_command output. + * + * Returns 1 on natural exit due to exist signal of empty line. + * + * Returns negative value on other catastrophic error. + */ +static int process_command(struct repository *repo) +{ + static struct strbuf line = STRBUF_INIT; + struct string_list tokens = STRING_LIST_INIT_NODUP; + const char *command; + int version; + char *data = NULL; + size_t data_len = 0; + int res = 0; + + strbuf_getline(&line, stdin); + + if (!line.len) + return 1; + + /* Parse out the first two tokens, command and version. */ + string_list_split_in_place(&tokens, line.buf, " ", 2); + + if (tokens.nr < 2) { + res = error(_("expected at least 2 tokens, got %"PRIuMAX), tokens.nr); + goto cleanup; + } + + command = tokens.items[0].string; + + if (!git_parse_int(tokens.items[1].string, &version)) { + res = error(_("unable to parse '%s' to integer"), + tokens.items[1].string); + goto cleanup; + } + + if (tokens.nr >= 3) { + data = tokens.items[2].string; + data_len = strlen(tokens.items[2].string); + } + + for (size_t i = 0; i < COMMAND_COUNT; i++) { + /* + * Run the ith command if we have hit the unknown + * command or if the name and version match. + */ + if (!commands[i].name[0] || + (!strcmp(command, commands[i].name) && + commands[i].version == version)) { + res = commands[i].fn(repo, data, data_len); + goto cleanup; + } + } + + BUG(_("scanned to end of command list, including 'unknown_command'")); + +cleanup: + strbuf_reset(&line); + string_list_clear(&tokens, 0); + return res; +} + int cmd_config_batch(int argc, const char **argv, const char *prefix, struct repository *repo) { + int res = 0; struct option options[] = { OPT_END(), }; @@ -26,5 +152,9 @@ int cmd_config_batch(int argc, repo_config(repo, git_default_config, NULL); - return 0; + while (!(res = process_command(repo))); + + if (res == 1) + return 0; + die(_("an unrecoverable error occurred during command execution")); } diff --git a/t/t1312-config-batch.sh b/t/t1312-config-batch.sh index f59ba4a0f3f1dc..f60ef35e38d7c1 100755 --- a/t/t1312-config-batch.sh +++ b/t/t1312-config-batch.sh @@ -4,9 +4,22 @@ test_description='Test git config-batch' . ./test-lib.sh -test_expect_success 'help text' ' - test_must_fail git config-batch -h >out && - grep usage out +test_expect_success 'no commands' ' + echo | git config-batch >out && + test_must_be_empty out +' + +test_expect_success 'unknown_command' ' + echo unknown_command >expect && + echo "bogus 1 line of tokens" >in && + git config-batch >out in && + test_must_fail git config-batch 2>err Date: Sat, 17 Jan 2026 14:27:46 -0500 Subject: [PATCH 3/5] config-batch: implement get v1 The 'get' command for the 'git config-batch' builtin is the first command and is currently at version 1. It returns at most one value, the same as 'git config --get ' with optional value-based filtering. The documentation and tests detail the specifics of how to format requests of this format and how to parse the results. Future versions could consider multi-valued responses or regex-based key matching. For the sake of incremental exploration of the potential in the 'git config-batch' command, this is the only implementation being presented in the first patch series. Future extensions could include a '-z' parameter that uses NUL bytes in the command and output format to allow for spaces or newlines in the input or newlines in the output. Signed-off-by: Derrick Stolee --- Documentation/git-config-batch.adoc | 53 +++++- builtin/config-batch.c | 251 +++++++++++++++++++++++++++- config.h | 3 + t/t1312-config-batch.sh | 101 +++++++++++ 4 files changed, 405 insertions(+), 3 deletions(-) diff --git a/Documentation/git-config-batch.adoc b/Documentation/git-config-batch.adoc index 9ca04b0c1eafd2..efa0e34282d059 100644 --- a/Documentation/git-config-batch.adoc +++ b/Documentation/git-config-batch.adoc @@ -32,9 +32,58 @@ set. Thus, if the Git version includes the `git config-batch` builtin but doesn't understand an input command, it will return a single line response: -``` +------------ unknown_command LF -``` +------------ + +These are the commands that are currently understood: + +`get` version 1:: + The `get` command searches the config key-value pairs within a + given `` for values that match the fixed `` and + filters the resulting value based on an optional ``. + This can either be a regex or a fixed value. The command format + is one of the following formats: ++ +------------ +get 1 +get 1 arg:regex +get 1 arg:fixed-value +------------ ++ +The `` value can be one of `inherited`, `system`, `global`, +`local`, `worktree`, `submodule`, or `command`. If `inherited`, then all +config key-value pairs will be considered regardless of scope. Otherwise, +only the given scope will be considered. ++ +If no optional arguments are given, then the value will not be filtered +by any pattern matching. If `arg:regex` is specified, then the rest of +the line is considered a single string, ``, and is +interpreted as a regular expression for matching against stored values, +similar to specifying a value to `get config --get ""`. +If `arg:fixed-value` is specified, then the rest of the line is +considered a single string, ``, and is checked for an exact +match against the key-value pairs, simmilar to `git config --get +--fixed-value ""`. ++ +At mmost one key-value pair is returned, that being the last key-value +pair in the standard config order by scope and sequence within each scope. ++ +If a key-value pair is found, then the following output is given: ++ +------------ +get found +------------ ++ +If no matching key-value pair is found, then the following output is +given: ++ +------------ +get missing [|] +------------ ++ +where `` or `` is only supplied if provided in +the command. SEE ALSO -------- diff --git a/builtin/config-batch.c b/builtin/config-batch.c index e35bc07f28c36c..04e7c39e9d1f4a 100644 --- a/builtin/config-batch.c +++ b/builtin/config-batch.c @@ -12,6 +12,8 @@ static const char *const builtin_config_batch_usage[] = { }; #define UNKNOWN_COMMAND "unknown_command" +#define GET_COMMAND "get" +#define COMMAND_PARSE_ERROR "command_parse_error" static int emit_response(const char *response, ...) { @@ -30,6 +32,11 @@ static int emit_response(const char *response, ...) return 0; } +static int command_parse_error(const char *command) +{ + return emit_response(COMMAND_PARSE_ERROR, command, NULL); +} + /** * A function pointer type for defining a command. The function is * responsible for handling different versions of the command name. @@ -46,11 +53,248 @@ typedef int (*command_fn)(struct repository *repo, char *data, size_t data_len); static int unknown_command(struct repository *repo UNUSED, - char *data UNUSED, size_t data_len UNUSED) + char *data UNUSED, size_t data_len UNUSED) { return emit_response(UNKNOWN_COMMAND, NULL); } +static size_t parse_whitespace_token(char **data, size_t *data_len, + char **token, int *err UNUSED) +{ + size_t i = 0; + + *token = *data; + + while (i < *data_len && (*data)[i] && (*data)[i] != ' ') + i++; + + if (i >= *data_len) { + *data_len = 0; + *data = NULL; + return i; + } + + (*data)[i] = 0; + *data_len = (*data_len) - (i + 1); + *data = *data + (i + 1); + return i; +} + +/** + * Given the remaining data line and its size, attempt to extract + * a token. When the token delimiter is determined, the data + * string is mutated to insert a NUL byte at the end of the token. + * The data pointer is mutated to point at the next character (or + * set to NULL if that exceeds the string length). The data_len + * value is mutated to subtract the length of the discovered + * token. + * + * The returned value is the length of the token that was + * discovered. + * + * 'err' is ignored for now, but will be filled in in a future + * change. + */ +static size_t parse_token(char **data, size_t *data_len, + char **token, int *err) +{ + if (!*data_len) + return 0; + + return parse_whitespace_token(data, data_len, token, err); +} + +enum value_match_mode { + MATCH_ALL, + MATCH_EXACT, + MATCH_REGEX, +}; + +struct get_command_1_data { + /* parameters */ + char *key; + enum config_scope scope; + enum value_match_mode mode; + + /* optional parameters */ + char *value; + regex_t *value_pattern; + + /* data along the way, for single values. */ + char *found; + enum config_scope found_scope; +}; + +static int get_command_1_cb(const char *key, const char *value, + const struct config_context *context, + void *data) +{ + struct get_command_1_data *d = data; + + if (strcasecmp(key, d->key)) + return 0; + + if (d->scope != CONFIG_SCOPE_UNKNOWN && + d->scope != context->kvi->scope) + return 0; + + switch (d->mode) { + case MATCH_EXACT: + if (strcasecmp(value, d->value)) + return 0; + break; + + case MATCH_REGEX: + if (regexec(d->value_pattern, value, 0, NULL, 0)) + return 0; + break; + + default: + break; + } + + free(d->found); + d->found = xstrdup(value); + d->found_scope = context->kvi->scope; + return 0; +} + +static const char *scope_str(enum config_scope scope) +{ + switch (scope) { + case CONFIG_SCOPE_UNKNOWN: + return "unknown"; + + case CONFIG_SCOPE_SYSTEM: + return "system"; + + case CONFIG_SCOPE_GLOBAL: + return "global"; + + case CONFIG_SCOPE_LOCAL: + return "local"; + + case CONFIG_SCOPE_WORKTREE: + return "worktree"; + + case CONFIG_SCOPE_SUBMODULE: + return "submodule"; + + case CONFIG_SCOPE_COMMAND: + return "command"; + + default: + BUG("invalid config scope"); + } +} + +static int parse_scope(const char *str, enum config_scope *scope) +{ + if (!strcmp(str, "inherited")) { + *scope = CONFIG_SCOPE_UNKNOWN; + return 0; + } + + for (enum config_scope s = 0; s < CONFIG_SCOPE__NR; s++) { + if (!strcmp(str, scope_str(s))) { + *scope = s; + return 0; + } + } + + return -1; +} + +/** + * 'get' command, version 1. + * + * Positional arguments should be of the form: + * + * [0] scope ("system", "global", "local", "worktree", "command", "submodule", or "inherited") + * [1] config key + * [2*] multi-mode ("all", "regex", "fixed-value") + * [3*] value regex OR value string + * + * [N*] indicates optional parameters that are not needed. + */ +static int get_command_1(struct repository *repo, + char *data, + size_t data_len) +{ + struct get_command_1_data gc_data = { + .found = NULL, + .mode = MATCH_ALL, + }; + int res = 0, err = 0; + char *token; + size_t token_len; + + if (!parse_token(&data, &data_len, &token, &err) || err) + goto parse_error; + + if (parse_scope(token, &gc_data.scope)) + goto parse_error; + + if (!parse_token(&data, &data_len, &gc_data.key, &err) || err) + goto parse_error; + + token_len = parse_token(&data, &data_len, &token, &err); + if (err) + goto parse_error; + + if (token_len && !strncmp(token, "arg:", 4)) { + if (!strcmp(token + 4, "regex")) + gc_data.mode = MATCH_REGEX; + else if (!strcmp(token + 4, "fixed-value")) + gc_data.mode = MATCH_EXACT; + else + goto parse_error; /* unknown arg. */ + + /* Use the remaining data as the value string. */ + gc_data.value = data; + + if (gc_data.mode == MATCH_REGEX) { + CALLOC_ARRAY(gc_data.value_pattern, 1); + if (regcomp(gc_data.value_pattern, gc_data.value, + REG_EXTENDED)) { + FREE_AND_NULL(gc_data.value_pattern); + goto parse_error; + } + } + } else if (token_len) { + /* + * If we have remaining tokens not starting in "arg:", + * then we don't understand them. + */ + goto parse_error; + } + + repo_config(repo, get_command_1_cb, &gc_data); + + if (gc_data.found) + res = emit_response(GET_COMMAND, "1", "found", gc_data.key, + scope_str(gc_data.found_scope), + gc_data.found, + NULL); + else + res = emit_response(GET_COMMAND, "1", "missing", gc_data.key, + gc_data.value, NULL); + + goto cleanup; + + +parse_error: + res = command_parse_error(GET_COMMAND); + +cleanup: + if (gc_data.value_pattern) { + regfree(gc_data.value_pattern); + free(gc_data.value_pattern); + } + free(gc_data.found); + return res; +} + struct command { const char *name; command_fn fn; @@ -58,6 +302,11 @@ struct command { }; static struct command commands[] = { + { + .name = GET_COMMAND, + .fn = get_command_1, + .version = 1, + }, /* unknown_command must be last. */ { .name = "", diff --git a/config.h b/config.h index ba426a960af9f4..966a228f0e1a39 100644 --- a/config.h +++ b/config.h @@ -44,6 +44,9 @@ enum config_scope { CONFIG_SCOPE_WORKTREE, CONFIG_SCOPE_COMMAND, CONFIG_SCOPE_SUBMODULE, + + /* Must be last */ + CONFIG_SCOPE__NR }; const char *config_scope_name(enum config_scope scope); diff --git a/t/t1312-config-batch.sh b/t/t1312-config-batch.sh index f60ef35e38d7c1..e638b54d13fa5e 100755 --- a/t/t1312-config-batch.sh +++ b/t/t1312-config-batch.sh @@ -16,10 +16,111 @@ test_expect_success 'unknown_command' ' test_cmp expect out ' +test_expect_success 'completely broken input' ' + echo "not_even_two_tokens" >in && + test_must_fail git config-batch 2>err in && test_must_fail git config-batch 2>err in && + echo "get 1 found test.key local test value with spaces" >expect && + git config-batch >out in && + echo "get 1 missing test.key" >expect && + git config-batch >out in <<-\EOF && + get 1 inherited test.key arg:regex .*1.* + get 1 inherited test.key arg:regex [a-z]2.* + get 1 inherited test.key arg:regex .*3e s.* + get 1 inherited test.key arg:regex 4.* + get 1 inherited test.key arg:regex .*5.* + get 1 inherited test.key arg:regex .*6.* + EOF + + cat >expect <<-\EOF && + get 1 found test.key system on1e + get 1 found test.key global t2wo + get 1 found test.key local thre3e space + get 1 found test.key worktree 4four + get 1 found test.key command five5 + get 1 missing test.key .*6.* + EOF + + git -c test.key=five5 config-batch >out in <<-\EOF && + get 1 inherited test.key arg:fixed-value one + get 1 inherited test.key arg:fixed-value two + get 1 inherited test.key arg:fixed-value three space + get 1 inherited test.key arg:fixed-value four + get 1 inherited test.key arg:fixed-value five + get 1 inherited test.key arg:fixed-value six + EOF + + cat >expect <<-\EOF && + get 1 found test.key system one + get 1 found test.key global two + get 1 found test.key local three space + get 1 found test.key worktree four + get 1 found test.key command five + get 1 missing test.key six + EOF + + git -c test.key=five config-batch >out Date: Sun, 18 Jan 2026 13:49:35 -0500 Subject: [PATCH 4/5] config-batch: create 'help' command Tools that use the 'git config-batch' tool will want to know which commands are available in the current Git version. Having a 'help' command assists greatly to give a clear set of available commands and their versions. Signed-off-by: Derrick Stolee --- Documentation/git-config-batch.adoc | 17 +++++++++++++++ builtin/config-batch.c | 32 +++++++++++++++++++++++++++++ t/t1312-config-batch.sh | 13 ++++++++++++ 3 files changed, 62 insertions(+) diff --git a/Documentation/git-config-batch.adoc b/Documentation/git-config-batch.adoc index efa0e34282d059..105215b08d35f3 100644 --- a/Documentation/git-config-batch.adoc +++ b/Documentation/git-config-batch.adoc @@ -38,6 +38,23 @@ unknown_command LF These are the commands that are currently understood: +`help` version 1:: + The `help` command lists the currently-available commands in + this version of Git. The output is multi-line, but the first + line provides the count of possible commands via `help count `. + The next `` lines are of the form `help ` + to state that this Git version supports that `` at + version ``. Note that the same command may have multiple + available versions. ++ +Here is the currentl output of the help text at the latest version: ++ +------------ +help 1 count 2 +help 1 help 1 +help 1 get 1 +------------ + `get` version 1:: The `get` command searches the config key-value pairs within a given `` for values that match the fixed `` and diff --git a/builtin/config-batch.c b/builtin/config-batch.c index 04e7c39e9d1f4a..324e749c4db513 100644 --- a/builtin/config-batch.c +++ b/builtin/config-batch.c @@ -12,6 +12,7 @@ static const char *const builtin_config_batch_usage[] = { }; #define UNKNOWN_COMMAND "unknown_command" +#define HELP_COMMAND "help" #define GET_COMMAND "get" #define COMMAND_PARSE_ERROR "command_parse_error" @@ -104,6 +105,9 @@ static size_t parse_token(char **data, size_t *data_len, return parse_whitespace_token(data, data_len, token, err); } +static int help_command_1(struct repository *repo, + char *data, size_t data_len); + enum value_match_mode { MATCH_ALL, MATCH_EXACT, @@ -302,6 +306,11 @@ struct command { }; static struct command commands[] = { + { + .name = HELP_COMMAND, + .fn = help_command_1, + .version = 1, + }, { .name = GET_COMMAND, .fn = get_command_1, @@ -316,6 +325,29 @@ static struct command commands[] = { #define COMMAND_COUNT ((size_t)(sizeof(commands) / sizeof(*commands))) +static int help_command_1(struct repository *repo UNUSED, + char *data UNUSED, size_t data_len UNUSED) +{ + struct strbuf fmt_str = STRBUF_INIT; + + strbuf_addf(&fmt_str, "%"PRIuMAX, COMMAND_COUNT - 1); + emit_response(HELP_COMMAND, "1", "count", fmt_str.buf, NULL); + strbuf_reset(&fmt_str); + + for (size_t i = 0; i < COMMAND_COUNT; i++) { + /* Halt at unknown command. */ + if (!commands[i].name[0]) + break; + + strbuf_addf(&fmt_str, "%d", commands[i].version); + emit_response(HELP_COMMAND, "1", commands[i].name, fmt_str.buf, NULL); + strbuf_reset(&fmt_str); + } + + strbuf_release(&fmt_str); + return 0; +} + /** * Process a single line from stdin and process the command. * diff --git a/t/t1312-config-batch.sh b/t/t1312-config-batch.sh index e638b54d13fa5e..6b550a0e76d3c8 100755 --- a/t/t1312-config-batch.sh +++ b/t/t1312-config-batch.sh @@ -23,6 +23,19 @@ test_expect_success 'completely broken input' ' test_grep "an unrecoverable error occurred during command execution" err ' +test_expect_success 'help command' ' + echo "help 1" >in && + + cat >expect <<-\EOF && + help 1 count 2 + help 1 help 1 + help 1 get 1 + EOF + + git config-batch >out in && test_must_fail git config-batch 2>err Date: Wed, 21 Jan 2026 21:31:52 -0500 Subject: [PATCH 5/5] config-batch: add NUL-terminated I/O format When using automated tools, it is critical to allow for input/output formats that include special characters such as spaces and newlines. While the existing protocol for 'git config-batch' is human-readable and has some capacity for some spaces in certain positions, it is not available for spaces in the config key or newlines in the config values. Add the '-z' option to signal the use of NUL-terminated strings. To understand where commands end regardless of potential future formats, use two NUL bytes in a row to terminate a command. To allow for empty string values, each token is provided in a : format, making "0:" the empty string value. Update the existing 'help' and 'get' commands to match this format. Create helper methods that make it easy to parse and print in both formats simultaneously. Signed-off-by: Derrick Stolee --- Documentation/git-config-batch.adoc | 57 ++++++++- builtin/config-batch.c | 188 +++++++++++++++++++++++++--- t/t1312-config-batch.sh | 69 ++++++++++ 3 files changed, 293 insertions(+), 21 deletions(-) diff --git a/Documentation/git-config-batch.adoc b/Documentation/git-config-batch.adoc index 105215b08d35f3..4cb685e07f16e9 100644 --- a/Documentation/git-config-batch.adoc +++ b/Documentation/git-config-batch.adoc @@ -21,6 +21,15 @@ multiple configuration values, the `git config-batch` command allows a single process to handle multiple requests using a machine-parseable interface across `stdin` and `stdout`. +OPTIONS +------- + +`-z`:: + If specified, then use the NUL-terminated input and output + format instead of the space and newline format. This format is + useful when the strings involved may include spaces or newlines. + See PROTOCOL for more details. + PROTOCOL -------- By default, the protocol uses line feeds (`LF`) to signal the end of a @@ -41,13 +50,13 @@ These are the commands that are currently understood: `help` version 1:: The `help` command lists the currently-available commands in this version of Git. The output is multi-line, but the first - line provides the count of possible commands via `help count `. - The next `` lines are of the form `help ` + line provides the count of possible commands via `help 1 count `. + The next `` lines are of the form `help 1 ` to state that this Git version supports that `` at version ``. Note that the same command may have multiple available versions. + -Here is the currentl output of the help text at the latest version: +Here is the current output of the help text at the latest version: + ------------ help 1 count 2 @@ -102,6 +111,48 @@ get missing [|] where `` or `` is only supplied if provided in the command. +NUL-Terminated Format +~~~~~~~~~~~~~~~~~~~~~ + +When `-z` is given, the protocol changes in some structural ways. + +First, each command is terminated with two NUL bytes, providing a clear +boundary between commands regardless of future possibilities of new +command formats. + +Second, any time that a space _would_ be used to partition tokens in a +command, a NUL byte is used instead. Further, each token is prefixed +with `:` where `` is a decimal representation of the length of +the string between the `:` and the next NUL byte. Any disagreement in +these lengths is treated as a parsing error. This use of a length does +imply that "`0:`" is the representation of an empty string, if relevant. + +The decimal representation must have at most five numerals, thus the +maximum length of a string token can have 99999 characters. + +For example, the `get` command, version 1, could have any of the +following forms: + +------------ +3:get NUL 1:1 NUL 5:local NUL 14:key.with space NUL NUL +3:get NUL 1:1 NUL 9:inherit NUL 8:test.key NUL 9:arg:regex NUL 6:.*\ .* NUL NUL +3:get NUL 1:1 NUL 6:global NUL 8:test.key NUL 15:arg:fixed-value NUL 3:a b NUL NUL +------------ + +The output is modified similarly, such as the following output examples, +as if the input has a parse error, a valid `help` command, a `get` +command that had a match, and a `get` command that did not match. + +------------ +15:unknown_command NUL NUL +4:help NUL 1:1 NUL 5:count NUL 1:2 NUL NUL +4:help NUL 1:1 NUL 4:help NUL 1:1 NUL NUL +4:help NUL 1:1 NUL 3:get NUL 1:1 NUL NUL +3:get NUL 1:1 NUL 5:found NUL 8:test.key NUL 5:value NUL NUL +3:get NUL 1:1 NUL 7:missing NUL 8:test.key NUL NUL +------------ + + SEE ALSO -------- linkgit:git-config[1] diff --git a/builtin/config-batch.c b/builtin/config-batch.c index 324e749c4db513..5428a504a0804c 100644 --- a/builtin/config-batch.c +++ b/builtin/config-batch.c @@ -11,24 +11,40 @@ static const char *const builtin_config_batch_usage[] = { NULL }; +static int zformat = 0; + #define UNKNOWN_COMMAND "unknown_command" #define HELP_COMMAND "help" #define GET_COMMAND "get" #define COMMAND_PARSE_ERROR "command_parse_error" +static void print_word(const char *word, int start) +{ + if (zformat) { + printf("%"PRIuMAX":%s", strlen(word), word); + fputc(0, stdout); + } else if (start) + printf("%s", word); + else + printf(" %s", word); +} + static int emit_response(const char *response, ...) { va_list params; const char *token; - printf("%s", response); + print_word(response, 1); va_start(params, response); while ((token = va_arg(params, const char *))) - printf(" %s", token); + print_word(token, 0); va_end(params); - printf("\n"); + if (zformat) + fputc(0, stdout); + else + printf("\n"); fflush(stdout); return 0; } @@ -59,6 +75,52 @@ static int unknown_command(struct repository *repo UNUSED, return emit_response(UNKNOWN_COMMAND, NULL); } +/* + * Parse the next token using the NUL-byte format. + */ +static size_t parse_ztoken(char **data, size_t *data_len, + char **token, int *err) +{ + size_t i = 0, token_len; + + while (i < *data_len && (*data)[i] != ':') { + if ((*data)[i] < '0' || (*data)[i] > '9') { + goto parse_error; + } + i++; + } + + if (i >= *data_len || (*data)[i] != ':' || i > 5) + goto parse_error; + + (*data)[i] = 0; + token_len = atoi(*data); + + if (token_len + i + 1 >= *data_len) + goto parse_error; + + *token = *data + i + 1; + *data_len = *data_len - (i + 1); + + /* check for early NULs. */ + for (i = 0; i < token_len; i++) { + if (!(*token)[i]) + goto parse_error; + } + /* check for matching NUL. */ + if ((*token)[token_len]) + goto parse_error; + + *data = *token + token_len + 1; + *data_len = *data_len - (token_len + 1); + return token_len; + +parse_error: + *err = 1; + *token = NULL; + return 0; +} + static size_t parse_whitespace_token(char **data, size_t *data_len, char **token, int *err UNUSED) { @@ -93,15 +155,23 @@ static size_t parse_whitespace_token(char **data, size_t *data_len, * The returned value is the length of the token that was * discovered. * - * 'err' is ignored for now, but will be filled in in a future - * change. + * The 'token' pointer is used to set the start of the token. + * In the whitespace format, this is always the input value of + * 'data' but in the NUL-terminated format this follows an ":" + * prefix. + * + * In the case of the NUL-terminated format, a bad parse of the + * decimal length or a mismatch of the decimal length and the + * length of the following NUL-terminated string will result in + * the value pointed at by 'err' to be set to 1. */ static size_t parse_token(char **data, size_t *data_len, char **token, int *err) { if (!*data_len) return 0; - + if (zformat) + return parse_ztoken(data, data_len, token, err); return parse_whitespace_token(data, data_len, token, err); } @@ -255,7 +325,13 @@ static int get_command_1(struct repository *repo, goto parse_error; /* unknown arg. */ /* Use the remaining data as the value string. */ - gc_data.value = data; + if (!zformat) + gc_data.value = data; + else { + parse_token(&data, &data_len, &gc_data.value, &err); + if (err) + goto parse_error; + } if (gc_data.mode == MATCH_REGEX) { CALLOC_ARRAY(gc_data.value_pattern, 1); @@ -348,17 +424,74 @@ static int help_command_1(struct repository *repo UNUSED, return 0; } -/** - * Process a single line from stdin and process the command. - * - * Returns 0 on successful processing of command, including the - * unknown_command output. - * - * Returns 1 on natural exit due to exist signal of empty line. - * - * Returns negative value on other catastrophic error. - */ -static int process_command(struct repository *repo) +static int process_command_nul(struct repository *repo) +{ + static struct strbuf line = STRBUF_INIT; + char *data, *command, *versionstr; + size_t data_len, token_len; + int res = 0, err = 0, version = 0, getc; + char c; + + /* If we start with EOF it's not an error. */ + getc = fgetc(stdin); + if (getc == EOF) + return 1; + + do { + c = (char)getc; + strbuf_addch(&line, c); + + if (!c && line.len > 1 && !line.buf[line.len - 2]) + break; + + getc = fgetc(stdin); + + /* It's an error if we reach EOF while parsing a command. */ + if (getc == EOF) + goto parse_error; + } while (1); + + data = line.buf; + data_len = line.len - 1; + + token_len = parse_ztoken(&data, &data_len, &command, &err); + if (!token_len || err) + goto parse_error; + + token_len = parse_ztoken(&data, &data_len, &versionstr, &err); + if (!token_len || err) + goto parse_error; + + if (!git_parse_int(versionstr, &version)) { + res = error(_("unable to parse '%s' to integer"), + versionstr); + goto parse_error; + } + + for (size_t i = 0; i < COMMAND_COUNT; i++) { + /* + * Run the ith command if we have hit the unknown + * command or if the name and version match. + */ + if (!commands[i].name[0] || + (!strcmp(command, commands[i].name) && + commands[i].version == version)) { + res = commands[i].fn(repo, data, data_len); + goto cleanup; + } + } + + BUG(_("scanned to end of command list, including 'unknown_command'")); + +parse_error: + res = unknown_command(repo, NULL, 0); + +cleanup: + strbuf_release(&line); + return res; +} + +static int process_command_whitespace(struct repository *repo) { static struct strbuf line = STRBUF_INIT; struct string_list tokens = STRING_LIST_INIT_NODUP; @@ -415,6 +548,23 @@ static int process_command(struct repository *repo) return res; } +/** + * Process a single line from stdin and process the command. + * + * Returns 0 on successful processing of command, including the + * unknown_command output. + * + * Returns 1 on natural exit due to exist signal of empty line. + * + * Returns negative value on other catastrophic error. + */ +static int process_command(struct repository *repo) +{ + if (zformat) + return process_command_nul(repo); + return process_command_whitespace(repo); +} + int cmd_config_batch(int argc, const char **argv, const char *prefix, @@ -422,6 +572,8 @@ int cmd_config_batch(int argc, { int res = 0; struct option options[] = { + OPT_BOOL('z', NULL, &zformat, + N_("stdin and stdout is NUL-terminated")), OPT_END(), }; diff --git a/t/t1312-config-batch.sh b/t/t1312-config-batch.sh index 6b550a0e76d3c8..f7a74ddc2cb94b 100755 --- a/t/t1312-config-batch.sh +++ b/t/t1312-config-batch.sh @@ -4,6 +4,26 @@ test_description='Test git config-batch' . ./test-lib.sh +# usage: test_zformat out +# +# Let 'in' be a z-format input but with " NUL " between tokens in +# a single command and " NUL NUL" trailing each line. +# +# The values in 'out' will be space- and newline-delimited where +# NUL-bytes would normally be output. +test_zformat () { + sed -e "s/\ NUL\ /!/g" >nullin1 && + sed -e "s/NUL//g" nullin2 && + + tr "!" "\0" nullin3 && + tr "\n" "\0" zin && + + $* zout && + + tr "\0" " " outspace && + sed "s/\ \ /\n/g" out && test_must_be_empty out @@ -36,6 +56,23 @@ test_expect_success 'help command' ' test_cmp expect out ' +test_expect_success 'help -z' ' + cat >in <<-\EOF && + 4:help NUL 1:1 NUL NUL + 5:bogus NUL 2:10 NUL NUL + EOF + + cat >expect <<-\EOF && + 4:help 1:1 5:count 1:2 + 4:help 1:1 4:help 1:1 + 4:help 1:1 3:get 1:1 + 15:unknown_command + EOF + + test_zformat git config-batch -z >out in && test_must_fail git config-batch 2>err in <<-\EOF && + 3:get NUL 1:1 NUL 9:inherited NUL 8:test.key NUL NUL + 3:get NUL 1:1 NUL 6:global NUL 8:test.key NUL 9:arg:regex NUL 3:2.* NUL NUL + 3:get NUL 1:1 NUL 5:local NUL 8:test.key NUL 15:arg:fixed-value NUL 12:thre3e space NUL NUL + 3:get NUL 1:1 NUL 9:inherited NUL 11:key.missing NUL NUL + EOF + + cat >expect <<-\EOF && + 3:get 1:1 5:found 8:test.key 8:worktree 5:4four + 3:get 1:1 5:found 8:test.key 6:global 4:t2wo + 3:get 1:1 5:found 8:test.key 5:local 12:thre3e space + 3:get 1:1 7:missing 11:key.missing + EOF + + test_zformat git config-batch -z >out