From fda0239103f6e2b9e76403144b9ed2e9205e1c2a Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 28 Jan 2026 11:07:58 -0500 Subject: [PATCH 1/6] revision: include object-name.h The REV_INFO_INIT macro includes a use of the DEFAULT_ABBREV macro, which is defined in object-name.h. Include it in revision.h so consumers of REV_INFO_INIT do not need to include this hidden dependency. Signed-off-by: Derrick Stolee --- revision.h | 1 + 1 file changed, 1 insertion(+) diff --git a/revision.h b/revision.h index b36acfc2d9f61d..18c9bbd8227bf2 100644 --- a/revision.h +++ b/revision.h @@ -4,6 +4,7 @@ #include "commit.h" #include "grep.h" #include "notes.h" +#include "object-name.h" #include "oidset.h" #include "pretty.h" #include "diff.h" From 55a45b2fc81895d2b712f126f716a8152d73fc93 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 16 Mar 2026 11:12:22 -0400 Subject: [PATCH 2/6] t5620: prepare branched repo for revision tests Prepare the test infrastructure for upcoming changes that teach 'git backfill' to accept revision arguments and pathspecs. Add test_tick before each commit in the setup loop so that commit dates are deterministic. This enables reliable testing with '--since'. Rename the 'd/e/' directory to 'd/f/' so that the prefix 'd/f' is ambiguous with the files 'd/file.*.txt'. This exercises the subtlety in prefix pathspec matching that will be added in a later commit. Create a branched version of the test repository (src-revs) with: - A 'side' branch merged into main, adding s/file.{1,2}.txt with two versions (4 new blobs, 52 total from main HEAD). - An unmerged 'other' branch adding o/file.{1,2}.txt (2 more blobs, 54 total reachable from --all). This structure makes --all, --first-parent, and --since produce meaningfully different results when used with 'git backfill'. Signed-off-by: Derrick Stolee --- t/t5620-backfill.sh | 52 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/t/t5620-backfill.sh b/t/t5620-backfill.sh index 58c81556e72c89..1331949be47ea8 100755 --- a/t/t5620-backfill.sh +++ b/t/t5620-backfill.sh @@ -15,7 +15,7 @@ test_expect_success 'setup repo for object creation' ' git init src && mkdir -p src/a/b/c && - mkdir -p src/d/e && + mkdir -p src/d/f && for i in 1 2 do @@ -26,8 +26,9 @@ test_expect_success 'setup repo for object creation' ' echo "Version $i of file a/b/$n" > src/a/b/file.$n.txt && echo "Version $i of file a/b/c/$n" > src/a/b/c/file.$n.txt && echo "Version $i of file d/$n" > src/d/file.$n.txt && - echo "Version $i of file d/e/$n" > src/d/e/file.$n.txt && + echo "Version $i of file d/f/$n" > src/d/f/file.$n.txt && git -C src add . && + test_tick && git -C src commit -m "Iteration $n" || return 1 done done @@ -41,6 +42,53 @@ test_expect_success 'setup bare clone for server' ' git -C srv.bare config --local uploadpack.allowanysha1inwant 1 ' +# Create a version of the repo with branches for testing revision +# arguments like --all, --first-parent, and --since. +# +# main: 8 commits (linear) + merge of side branch +# 48 original blobs + 4 side blobs = 52 blobs from main HEAD +# side: 2 commits adding s/file.{1,2}.txt (v1, v2), merged into main +# other: 1 commit adding o/file.{1,2}.txt (not merged) +# 54 total blobs reachable from --all +test_expect_success 'setup branched repo for revision tests' ' + git clone src src-revs && + + # Side branch from tip of main with unique files + git -C src-revs checkout -b side HEAD && + mkdir -p src-revs/s && + echo "Side version 1 of file 1" >src-revs/s/file.1.txt && + echo "Side version 1 of file 2" >src-revs/s/file.2.txt && + test_tick && + git -C src-revs add . && + git -C src-revs commit -m "Side commit 1" && + + echo "Side version 2 of file 1" >src-revs/s/file.1.txt && + echo "Side version 2 of file 2" >src-revs/s/file.2.txt && + test_tick && + git -C src-revs add . && + git -C src-revs commit -m "Side commit 2" && + + # Merge side into main + git -C src-revs checkout main && + test_tick && + git -C src-revs merge side --no-ff -m "Merge side branch" && + + # Other branch (not merged) for --all testing + git -C src-revs checkout -b other main~1 && + mkdir -p src-revs/o && + echo "Other content 1" >src-revs/o/file.1.txt && + echo "Other content 2" >src-revs/o/file.2.txt && + test_tick && + git -C src-revs add . && + git -C src-revs commit -m "Other commit" && + + git -C src-revs checkout main && + + git clone --bare "file://$(pwd)/src-revs" srv-revs.bare && + git -C srv-revs.bare config --local uploadpack.allowfilter 1 && + git -C srv-revs.bare config --local uploadpack.allowanysha1inwant 1 +' + # do basic partial clone from "srv.bare" test_expect_success 'do partial clone 1, backfill gets all objects' ' git clone --no-checkout --filter=blob:none \ From 610a162973a7ad59eba4ef4d5a9288f1fea1d2e8 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 28 Jan 2026 11:30:00 -0500 Subject: [PATCH 3/6] backfill: accept revision arguments The existing implementation of 'git backfill' only includes downloading missing blobs reachable from HEAD. Advanced uses may desire more general commit limiting options, such as '--all' for all references, specifying a commit range via negative references, or specifying a recency of use such as with '--since='. All of these options are available if we use setup_revisions() to parse the unknown arguments with the revision machinery. This opens up a large number of possibilities, only a small set of which are tested here. For documentation, we avoid duplicating the option documentation and instead link to the documentation of 'git rev-list'. Note that these arguments currently allow specifying a pathspec, which modifies the commit history checks but does not limit the paths used in the backfill logic. This will be updated in a future change. Signed-off-by: Derrick Stolee --- Documentation/git-backfill.adoc | 5 +- builtin/backfill.c | 19 ++-- t/t5620-backfill.sh | 156 ++++++++++++++++++++++++++++++++ 3 files changed, 173 insertions(+), 7 deletions(-) diff --git a/Documentation/git-backfill.adoc b/Documentation/git-backfill.adoc index b8394dcf22b6e1..246ab417c24a10 100644 --- a/Documentation/git-backfill.adoc +++ b/Documentation/git-backfill.adoc @@ -63,9 +63,12 @@ OPTIONS current sparse-checkout. If the sparse-checkout feature is enabled, then `--sparse` is assumed and can be disabled with `--no-sparse`. +You may also specify the commit limiting options from linkgit:git-rev-list[1]. + SEE ALSO -------- -linkgit:git-clone[1]. +linkgit:git-clone[1], +linkgit:git-rev-list[1] GIT --- diff --git a/builtin/backfill.c b/builtin/backfill.c index e80fc1b694df61..90c9d84793ecd3 100644 --- a/builtin/backfill.c +++ b/builtin/backfill.c @@ -35,6 +35,7 @@ struct backfill_context { struct oid_array current_batch; size_t min_batch_size; int sparse; + struct rev_info revs; }; static void backfill_context_clear(struct backfill_context *ctx) @@ -80,7 +81,6 @@ static int fill_missing_blobs(const char *path UNUSED, static int do_backfill(struct backfill_context *ctx) { - struct rev_info revs; struct path_walk_info info = PATH_WALK_INFO_INIT; int ret; @@ -92,13 +92,14 @@ static int do_backfill(struct backfill_context *ctx) } } - repo_init_revisions(ctx->repo, &revs, ""); - handle_revision_arg("HEAD", &revs, 0, 0); + /* Walk from HEAD if otherwise unspecified. */ + if (!ctx->revs.pending.nr) + add_head_to_pending(&ctx->revs); info.blobs = 1; info.tags = info.commits = info.trees = 0; - info.revs = &revs; + info.revs = &ctx->revs; info.path_fn = fill_missing_blobs; info.path_fn_data = ctx; @@ -109,7 +110,6 @@ static int do_backfill(struct backfill_context *ctx) download_batch(ctx); path_walk_info_clear(&info); - release_revisions(&revs); return ret; } @@ -121,6 +121,7 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit .current_batch = OID_ARRAY_INIT, .min_batch_size = 50000, .sparse = 0, + .revs = REV_INFO_INIT, }; struct option options[] = { OPT_UNSIGNED(0, "min-batch-size", &ctx.min_batch_size, @@ -134,7 +135,12 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit builtin_backfill_usage, options); argc = parse_options(argc, argv, prefix, options, builtin_backfill_usage, - 0); + PARSE_OPT_KEEP_UNKNOWN_OPT | + PARSE_OPT_KEEP_ARGV0 | + PARSE_OPT_KEEP_DASHDASH); + + repo_init_revisions(repo, &ctx.revs, prefix); + argc = setup_revisions(argc, argv, &ctx.revs, NULL); repo_config(repo, git_default_config, NULL); @@ -143,5 +149,6 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit result = do_backfill(&ctx); backfill_context_clear(&ctx); + release_revisions(&ctx.revs); return result; } diff --git a/t/t5620-backfill.sh b/t/t5620-backfill.sh index 1331949be47ea8..db66d8b614dd19 100755 --- a/t/t5620-backfill.sh +++ b/t/t5620-backfill.sh @@ -224,6 +224,162 @@ test_expect_success 'backfill --sparse without cone mode (negative)' ' test_line_count = 12 missing ' +test_expect_success 'backfill with revision range' ' + test_when_finished rm -rf backfill-revs && + git clone --no-checkout --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv.bare" backfill-revs && + + # No blobs yet + git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 48 missing && + + git -C backfill-revs backfill HEAD~2..HEAD && + + # 30 objects downloaded. + git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 18 missing +' + +test_expect_success 'backfill with revisions over stdin' ' + test_when_finished rm -rf backfill-revs && + git clone --no-checkout --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv.bare" backfill-revs && + + # No blobs yet + git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 48 missing && + + cat >in <<-EOF && + HEAD + ^HEAD~2 + EOF + + git -C backfill-revs backfill --stdin missing && + test_line_count = 18 missing +' + +test_expect_success 'backfill with prefix pathspec' ' + test_when_finished rm -rf backfill-path && + git clone --bare --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv.bare" backfill-path && + + # No blobs yet + git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 48 missing && + + # TODO: The pathspec should limit the downloaded blobs to + # only those matching the prefix "d/f", but currently all + # blobs are downloaded. + git -C backfill-path backfill HEAD -- d/f && + + git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 0 missing +' + +test_expect_success 'backfill with multiple pathspecs' ' + test_when_finished rm -rf backfill-path && + git clone --bare --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv.bare" backfill-path && + + # No blobs yet + git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 48 missing && + + # TODO: The pathspecs should limit the downloaded blobs to + # only those matching "d/f" or "a", but currently all blobs + # are downloaded. + git -C backfill-path backfill HEAD -- d/f a && + + git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 0 missing +' + +test_expect_success 'backfill with wildcard pathspec' ' + test_when_finished rm -rf backfill-path && + git clone --bare --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv.bare" backfill-path && + + # No blobs yet + git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 48 missing && + + # TODO: The wildcard pathspec should limit downloaded blobs, + # but currently all blobs are downloaded. + git -C backfill-path backfill HEAD -- "d/file.*.txt" && + + git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 0 missing +' + +test_expect_success 'backfill with --all' ' + test_when_finished rm -rf backfill-all && + git clone --no-checkout --filter=blob:none \ + "file://$(pwd)/srv-revs.bare" backfill-all && + + # All blobs from all refs are missing + git -C backfill-all rev-list --quiet --objects --all --missing=print >missing && + test_line_count = 54 missing && + + # Backfill from HEAD gets main blobs only + git -C backfill-all backfill HEAD && + + # Other branch blobs still missing + git -C backfill-all rev-list --quiet --objects --all --missing=print >missing && + test_line_count = 2 missing && + + # Backfill with --all gets everything + git -C backfill-all backfill --all && + + git -C backfill-all rev-list --quiet --objects --all --missing=print >missing && + test_line_count = 0 missing +' + +test_expect_success 'backfill with --first-parent' ' + test_when_finished rm -rf backfill-fp && + git clone --no-checkout --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv-revs.bare" backfill-fp && + + git -C backfill-fp rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 52 missing && + + # --first-parent skips the side branch commits, so + # s/file.{1,2}.txt v1 blobs (only in side commit 1) are missed. + git -C backfill-fp backfill --first-parent HEAD && + + git -C backfill-fp rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 2 missing +' + +test_expect_success 'backfill with --since' ' + test_when_finished rm -rf backfill-since && + git clone --no-checkout --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv-revs.bare" backfill-since && + + git -C backfill-since rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 52 missing && + + # Use a cutoff between commits 4 and 5 (between v1 and v2 + # iterations). Commits 5-8 still carry v1 of files 2-4 in + # their trees, but v1 of file.1.txt is only in commits 1-4. + SINCE=$(git -C backfill-since log --first-parent --reverse \ + --format=%ct HEAD~1 | sed -n 5p) && + git -C backfill-since backfill --since="@$((SINCE - 1))" HEAD && + + # 6 missing: v1 of file.1.txt in all 6 directories + git -C backfill-since rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 6 missing +' + . "$TEST_DIRECTORY"/lib-httpd.sh start_httpd From 7223124fb3229fc3a06a3208a43181716cec2eac Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 3 Feb 2026 18:01:07 -0500 Subject: [PATCH 4/6] backfill: work with prefix pathspecs The previous change allowed specifying revision arguments over the 'git backfill' command-line. This created the opportunity for restricting the initial commit set by filtering the revision walk through a pathspec. Other than filtering the commit set (and thereby the root trees), this did not restrict the path-walk implementation of 'git backfill' and did not restrict the blobs that were downloaded to only those matching the pathspec. Update the path-walk API to accept certain kinds of pathspecs and to silently ignore anything too complex, for now. We will update this in the next change to properly restrict to even complex pathspecs. The current behavior focuses on pathspecs that match paths exactly. This includes exact filenames, including directory names as prefixes. Pathspecs containing wildcards or magic are cleared so the path walk downloads all blobs, as before. The reason for this restriction is to allow for a faster execution by pruning the path walk to only trees that could contribute towards one of those paths as a parent directory. The test directory 'd/f/' (next to 'd/file*.txt') was prepared in a previous commit to exercise the subtlety in prefix matching. Signed-off-by: Derrick Stolee --- path-walk.c | 39 +++++++++++++++++++++++++++++++++++++++ path.c | 2 +- path.h | 6 ++++++ t/t5620-backfill.sh | 16 ++++++---------- 4 files changed, 52 insertions(+), 11 deletions(-) diff --git a/path-walk.c b/path-walk.c index 364e4cfa19b2e4..3750552978655e 100644 --- a/path-walk.c +++ b/path-walk.c @@ -11,6 +11,7 @@ #include "list-objects.h" #include "object.h" #include "oid-array.h" +#include "path.h" #include "prio-queue.h" #include "repository.h" #include "revision.h" @@ -206,6 +207,33 @@ static int add_tree_entries(struct path_walk_context *ctx, match != MATCHED) continue; } + if (ctx->revs->prune_data.nr) { + struct pathspec *pd = &ctx->revs->prune_data; + bool found = false; + int did_strip_suffix = strbuf_strip_suffix(&path, "/"); + + + for (int i = 0; i < pd->nr; i++) { + struct pathspec_item *item = &pd->items[i]; + + /* + * Continue if either is a directory prefix + * of the other. + */ + if (dir_prefix(path.buf, item->match) || + dir_prefix(item->match, path.buf)) { + found = true; + break; + } + } + + if (did_strip_suffix) + strbuf_addch(&path, '/'); + + /* Skip paths that do not match the prefix. */ + if (!found) + continue; + } add_path_to_list(ctx, path.buf, type, &entry.oid, !(o->flags & UNINTERESTING)); @@ -481,6 +509,17 @@ int walk_objects_by_path(struct path_walk_info *info) if (info->tags) info->revs->tag_objects = 1; + if (ctx.revs->prune_data.nr) { + /* + * Only exact prefix pathspecs are currently supported. + * Clear any wildcard or magic pathspecs to avoid + * incorrect prefix matching. + */ + if (ctx.revs->prune_data.has_wildcard || + ctx.revs->prune_data.magic) + clear_pathspec(&ctx.revs->prune_data); + } + /* Insert a single list for the root tree into the paths. */ CALLOC_ARRAY(root_tree_list, 1); root_tree_list->type = OBJ_TREE; diff --git a/path.c b/path.c index d726537622cda6..aebb10b2e93a34 100644 --- a/path.c +++ b/path.c @@ -57,7 +57,7 @@ static void strbuf_cleanup_path(struct strbuf *sb) strbuf_remove(sb, 0, path - sb->buf); } -static int dir_prefix(const char *buf, const char *dir) +int dir_prefix(const char *buf, const char *dir) { int len = strlen(dir); return !strncmp(buf, dir, len) && diff --git a/path.h b/path.h index 0ec95a0b079c90..829fafd7e9d8e0 100644 --- a/path.h +++ b/path.h @@ -114,6 +114,12 @@ const char *repo_submodule_path_replace(struct repository *repo, const char *fmt, ...) __attribute__((format (printf, 4, 5))); +/* + * Given a directory name 'dir' (not ending with a trailing '/'), + * determine if 'buf' is equal to 'dir' or has prefix 'dir'+'/'. + */ +int dir_prefix(const char *buf, const char *dir); + void report_linked_checkout_garbage(struct repository *r); /* diff --git a/t/t5620-backfill.sh b/t/t5620-backfill.sh index db66d8b614dd19..52f6484ca18e55 100755 --- a/t/t5620-backfill.sh +++ b/t/t5620-backfill.sh @@ -273,13 +273,11 @@ test_expect_success 'backfill with prefix pathspec' ' git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && test_line_count = 48 missing && - # TODO: The pathspec should limit the downloaded blobs to - # only those matching the prefix "d/f", but currently all - # blobs are downloaded. - git -C backfill-path backfill HEAD -- d/f && + git -C backfill-path backfill HEAD -- d/f 2>err && + test_must_be_empty err && git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && - test_line_count = 0 missing + test_line_count = 40 missing ' test_expect_success 'backfill with multiple pathspecs' ' @@ -292,13 +290,11 @@ test_expect_success 'backfill with multiple pathspecs' ' git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && test_line_count = 48 missing && - # TODO: The pathspecs should limit the downloaded blobs to - # only those matching "d/f" or "a", but currently all blobs - # are downloaded. - git -C backfill-path backfill HEAD -- d/f a && + git -C backfill-path backfill HEAD -- d/f a 2>err && + test_must_be_empty err && git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && - test_line_count = 0 missing + test_line_count = 16 missing ' test_expect_success 'backfill with wildcard pathspec' ' From 1ea278bd10bdd0b7980750c9d0c450b044aec196 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 16 Mar 2026 16:46:36 -0400 Subject: [PATCH 5/6] path-walk: support wildcard pathspecs for blob filtering Previously, walk_objects_by_path() silently ignored pathspecs containing wildcards or magic by clearing them. This caused all blobs to be downloaded regardless of the given pathspec. Wildcard pathspecs like "d/file.*.txt" are useful for narrowing which blobs to process (e.g., during 'git backfill'). Support wildcard pathspecs by making two changes: 1. Add an 'exact_pathspecs' flag to path_walk_context. When the pathspec has no wildcards or magic, set this flag and use the existing fast-path prefix matching in add_tree_entries(). When wildcards are present, skip that block since prefix matching cannot handle glob patterns. 2. Add a match_pathspec() check in walk_path() to filter out blobs whose full path does not match the pathspec. This provides the actual blob-level filtering for wildcard pathspecs. Signed-off-by: Derrick Stolee --- path-walk.c | 22 +++++++++++++--------- t/t5620-backfill.sh | 7 +++---- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/path-walk.c b/path-walk.c index 3750552978655e..2aa3e7d8a4133d 100644 --- a/path-walk.c +++ b/path-walk.c @@ -63,6 +63,8 @@ struct path_walk_context { */ struct prio_queue path_stack; struct strset path_stack_pushed; + + unsigned exact_pathspecs:1; }; static int compare_by_type(const void *one, const void *two, void *cb_data) @@ -207,7 +209,7 @@ static int add_tree_entries(struct path_walk_context *ctx, match != MATCHED) continue; } - if (ctx->revs->prune_data.nr) { + if (ctx->revs->prune_data.nr && ctx->exact_pathspecs) { struct pathspec *pd = &ctx->revs->prune_data; bool found = false; int did_strip_suffix = strbuf_strip_suffix(&path, "/"); @@ -302,6 +304,13 @@ static int walk_path(struct path_walk_context *ctx, return 0; } + if (list->type == OBJ_BLOB && + ctx->revs->prune_data.nr && + !match_pathspec(ctx->repo->index, &ctx->revs->prune_data, + path, strlen(path), 0, + NULL, 0)) + return 0; + /* Evaluate function pointer on this data, if requested. */ if ((list->type == OBJ_TREE && ctx->info->trees) || (list->type == OBJ_BLOB && ctx->info->blobs) || @@ -510,14 +519,9 @@ int walk_objects_by_path(struct path_walk_info *info) info->revs->tag_objects = 1; if (ctx.revs->prune_data.nr) { - /* - * Only exact prefix pathspecs are currently supported. - * Clear any wildcard or magic pathspecs to avoid - * incorrect prefix matching. - */ - if (ctx.revs->prune_data.has_wildcard || - ctx.revs->prune_data.magic) - clear_pathspec(&ctx.revs->prune_data); + if (!ctx.revs->prune_data.has_wildcard && + !ctx.revs->prune_data.magic) + ctx.exact_pathspecs = 1; } /* Insert a single list for the root tree into the paths. */ diff --git a/t/t5620-backfill.sh b/t/t5620-backfill.sh index 52f6484ca18e55..c6f54ee91ccc6a 100755 --- a/t/t5620-backfill.sh +++ b/t/t5620-backfill.sh @@ -307,12 +307,11 @@ test_expect_success 'backfill with wildcard pathspec' ' git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && test_line_count = 48 missing && - # TODO: The wildcard pathspec should limit downloaded blobs, - # but currently all blobs are downloaded. - git -C backfill-path backfill HEAD -- "d/file.*.txt" && + git -C backfill-path backfill HEAD -- "d/file.*.txt" 2>err && + test_must_be_empty err && git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && - test_line_count = 0 missing + test_line_count = 40 missing ' test_expect_success 'backfill with --all' ' From b6423f9595bf003fc87670658f172a98c6257155 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Sun, 22 Mar 2026 21:44:29 -0400 Subject: [PATCH 6/6] t5620: test backfill's unknown argument handling Before the recent changes to parse rev-list arguments inside of 'git backfill', the builtin would take arbitrary arguments without complaint (and ignore them). This was noticed and a patch was sent [1] which motivates this change. [1] https://lore.kernel.org/git/20260321031643.5185-1-r.siddharth.shrimali@gmail.com/ Note that the revision machinery can output an "ambiguous argument" warning if a value not starting with '--' is found and doesn't make sense as a reference or a pathspec. For unrecognized arguments starting with '--' we need to add logic into builtin/backfill.c to catch leftover arguments. Reported-by: Siddharth Shrimali Signed-off-by: Derrick Stolee --- builtin/backfill.c | 3 +++ t/t5620-backfill.sh | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/builtin/backfill.c b/builtin/backfill.c index 90c9d84793ecd3..edc19c01e50655 100644 --- a/builtin/backfill.c +++ b/builtin/backfill.c @@ -142,6 +142,9 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit repo_init_revisions(repo, &ctx.revs, prefix); argc = setup_revisions(argc, argv, &ctx.revs, NULL); + if (argc > 1) + die(_("unrecognized argument: %s"), argv[1]); + repo_config(repo, git_default_config, NULL); if (ctx.sparse < 0) diff --git a/t/t5620-backfill.sh b/t/t5620-backfill.sh index c6f54ee91ccc6a..2c347a91fe6d9f 100755 --- a/t/t5620-backfill.sh +++ b/t/t5620-backfill.sh @@ -7,6 +7,14 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME . ./test-lib.sh +test_expect_success 'backfill rejects unexpected arguments' ' + test_must_fail git backfill unexpected-arg 2>err && + test_grep "ambiguous argument .*unexpected-arg" err && + + test_must_fail git backfill --all --unexpected-arg --first-parent 2>err && + test_grep "unrecognized argument: --unexpected-arg" err +' + # We create objects in the 'src' repo. test_expect_success 'setup repo for object creation' ' echo "{print \$1}" >print_1.awk &&