diff --git a/Documentation/git-backfill.adoc b/Documentation/git-backfill.adoc index b8394dcf22b6e1..246ab417c24a10 100644 --- a/Documentation/git-backfill.adoc +++ b/Documentation/git-backfill.adoc @@ -63,9 +63,12 @@ OPTIONS current sparse-checkout. If the sparse-checkout feature is enabled, then `--sparse` is assumed and can be disabled with `--no-sparse`. +You may also specify the commit limiting options from linkgit:git-rev-list[1]. + SEE ALSO -------- -linkgit:git-clone[1]. +linkgit:git-clone[1], +linkgit:git-rev-list[1] GIT --- diff --git a/builtin/backfill.c b/builtin/backfill.c index e80fc1b694df61..edc19c01e50655 100644 --- a/builtin/backfill.c +++ b/builtin/backfill.c @@ -35,6 +35,7 @@ struct backfill_context { struct oid_array current_batch; size_t min_batch_size; int sparse; + struct rev_info revs; }; static void backfill_context_clear(struct backfill_context *ctx) @@ -80,7 +81,6 @@ static int fill_missing_blobs(const char *path UNUSED, static int do_backfill(struct backfill_context *ctx) { - struct rev_info revs; struct path_walk_info info = PATH_WALK_INFO_INIT; int ret; @@ -92,13 +92,14 @@ static int do_backfill(struct backfill_context *ctx) } } - repo_init_revisions(ctx->repo, &revs, ""); - handle_revision_arg("HEAD", &revs, 0, 0); + /* Walk from HEAD if otherwise unspecified. */ + if (!ctx->revs.pending.nr) + add_head_to_pending(&ctx->revs); info.blobs = 1; info.tags = info.commits = info.trees = 0; - info.revs = &revs; + info.revs = &ctx->revs; info.path_fn = fill_missing_blobs; info.path_fn_data = ctx; @@ -109,7 +110,6 @@ static int do_backfill(struct backfill_context *ctx) download_batch(ctx); path_walk_info_clear(&info); - release_revisions(&revs); return ret; } @@ -121,6 +121,7 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit .current_batch = OID_ARRAY_INIT, .min_batch_size = 50000, .sparse = 0, + .revs = REV_INFO_INIT, }; struct option options[] = { OPT_UNSIGNED(0, "min-batch-size", &ctx.min_batch_size, @@ -134,7 +135,15 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit builtin_backfill_usage, options); argc = parse_options(argc, argv, prefix, options, builtin_backfill_usage, - 0); + PARSE_OPT_KEEP_UNKNOWN_OPT | + PARSE_OPT_KEEP_ARGV0 | + PARSE_OPT_KEEP_DASHDASH); + + repo_init_revisions(repo, &ctx.revs, prefix); + argc = setup_revisions(argc, argv, &ctx.revs, NULL); + + if (argc > 1) + die(_("unrecognized argument: %s"), argv[1]); repo_config(repo, git_default_config, NULL); @@ -143,5 +152,6 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit result = do_backfill(&ctx); backfill_context_clear(&ctx); + release_revisions(&ctx.revs); return result; } diff --git a/path-walk.c b/path-walk.c index 364e4cfa19b2e4..2aa3e7d8a4133d 100644 --- a/path-walk.c +++ b/path-walk.c @@ -11,6 +11,7 @@ #include "list-objects.h" #include "object.h" #include "oid-array.h" +#include "path.h" #include "prio-queue.h" #include "repository.h" #include "revision.h" @@ -62,6 +63,8 @@ struct path_walk_context { */ struct prio_queue path_stack; struct strset path_stack_pushed; + + unsigned exact_pathspecs:1; }; static int compare_by_type(const void *one, const void *two, void *cb_data) @@ -206,6 +209,33 @@ static int add_tree_entries(struct path_walk_context *ctx, match != MATCHED) continue; } + if (ctx->revs->prune_data.nr && ctx->exact_pathspecs) { + struct pathspec *pd = &ctx->revs->prune_data; + bool found = false; + int did_strip_suffix = strbuf_strip_suffix(&path, "/"); + + + for (int i = 0; i < pd->nr; i++) { + struct pathspec_item *item = &pd->items[i]; + + /* + * Continue if either is a directory prefix + * of the other. + */ + if (dir_prefix(path.buf, item->match) || + dir_prefix(item->match, path.buf)) { + found = true; + break; + } + } + + if (did_strip_suffix) + strbuf_addch(&path, '/'); + + /* Skip paths that do not match the prefix. */ + if (!found) + continue; + } add_path_to_list(ctx, path.buf, type, &entry.oid, !(o->flags & UNINTERESTING)); @@ -274,6 +304,13 @@ static int walk_path(struct path_walk_context *ctx, return 0; } + if (list->type == OBJ_BLOB && + ctx->revs->prune_data.nr && + !match_pathspec(ctx->repo->index, &ctx->revs->prune_data, + path, strlen(path), 0, + NULL, 0)) + return 0; + /* Evaluate function pointer on this data, if requested. */ if ((list->type == OBJ_TREE && ctx->info->trees) || (list->type == OBJ_BLOB && ctx->info->blobs) || @@ -481,6 +518,12 @@ int walk_objects_by_path(struct path_walk_info *info) if (info->tags) info->revs->tag_objects = 1; + if (ctx.revs->prune_data.nr) { + if (!ctx.revs->prune_data.has_wildcard && + !ctx.revs->prune_data.magic) + ctx.exact_pathspecs = 1; + } + /* Insert a single list for the root tree into the paths. */ CALLOC_ARRAY(root_tree_list, 1); root_tree_list->type = OBJ_TREE; diff --git a/path.c b/path.c index d726537622cda6..aebb10b2e93a34 100644 --- a/path.c +++ b/path.c @@ -57,7 +57,7 @@ static void strbuf_cleanup_path(struct strbuf *sb) strbuf_remove(sb, 0, path - sb->buf); } -static int dir_prefix(const char *buf, const char *dir) +int dir_prefix(const char *buf, const char *dir) { int len = strlen(dir); return !strncmp(buf, dir, len) && diff --git a/path.h b/path.h index 0ec95a0b079c90..829fafd7e9d8e0 100644 --- a/path.h +++ b/path.h @@ -114,6 +114,12 @@ const char *repo_submodule_path_replace(struct repository *repo, const char *fmt, ...) __attribute__((format (printf, 4, 5))); +/* + * Given a directory name 'dir' (not ending with a trailing '/'), + * determine if 'buf' is equal to 'dir' or has prefix 'dir'+'/'. + */ +int dir_prefix(const char *buf, const char *dir); + void report_linked_checkout_garbage(struct repository *r); /* diff --git a/revision.h b/revision.h index b36acfc2d9f61d..18c9bbd8227bf2 100644 --- a/revision.h +++ b/revision.h @@ -4,6 +4,7 @@ #include "commit.h" #include "grep.h" #include "notes.h" +#include "object-name.h" #include "oidset.h" #include "pretty.h" #include "diff.h" diff --git a/t/t5620-backfill.sh b/t/t5620-backfill.sh index 58c81556e72c89..2c347a91fe6d9f 100755 --- a/t/t5620-backfill.sh +++ b/t/t5620-backfill.sh @@ -7,6 +7,14 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME . ./test-lib.sh +test_expect_success 'backfill rejects unexpected arguments' ' + test_must_fail git backfill unexpected-arg 2>err && + test_grep "ambiguous argument .*unexpected-arg" err && + + test_must_fail git backfill --all --unexpected-arg --first-parent 2>err && + test_grep "unrecognized argument: --unexpected-arg" err +' + # We create objects in the 'src' repo. test_expect_success 'setup repo for object creation' ' echo "{print \$1}" >print_1.awk && @@ -15,7 +23,7 @@ test_expect_success 'setup repo for object creation' ' git init src && mkdir -p src/a/b/c && - mkdir -p src/d/e && + mkdir -p src/d/f && for i in 1 2 do @@ -26,8 +34,9 @@ test_expect_success 'setup repo for object creation' ' echo "Version $i of file a/b/$n" > src/a/b/file.$n.txt && echo "Version $i of file a/b/c/$n" > src/a/b/c/file.$n.txt && echo "Version $i of file d/$n" > src/d/file.$n.txt && - echo "Version $i of file d/e/$n" > src/d/e/file.$n.txt && + echo "Version $i of file d/f/$n" > src/d/f/file.$n.txt && git -C src add . && + test_tick && git -C src commit -m "Iteration $n" || return 1 done done @@ -41,6 +50,53 @@ test_expect_success 'setup bare clone for server' ' git -C srv.bare config --local uploadpack.allowanysha1inwant 1 ' +# Create a version of the repo with branches for testing revision +# arguments like --all, --first-parent, and --since. +# +# main: 8 commits (linear) + merge of side branch +# 48 original blobs + 4 side blobs = 52 blobs from main HEAD +# side: 2 commits adding s/file.{1,2}.txt (v1, v2), merged into main +# other: 1 commit adding o/file.{1,2}.txt (not merged) +# 54 total blobs reachable from --all +test_expect_success 'setup branched repo for revision tests' ' + git clone src src-revs && + + # Side branch from tip of main with unique files + git -C src-revs checkout -b side HEAD && + mkdir -p src-revs/s && + echo "Side version 1 of file 1" >src-revs/s/file.1.txt && + echo "Side version 1 of file 2" >src-revs/s/file.2.txt && + test_tick && + git -C src-revs add . && + git -C src-revs commit -m "Side commit 1" && + + echo "Side version 2 of file 1" >src-revs/s/file.1.txt && + echo "Side version 2 of file 2" >src-revs/s/file.2.txt && + test_tick && + git -C src-revs add . && + git -C src-revs commit -m "Side commit 2" && + + # Merge side into main + git -C src-revs checkout main && + test_tick && + git -C src-revs merge side --no-ff -m "Merge side branch" && + + # Other branch (not merged) for --all testing + git -C src-revs checkout -b other main~1 && + mkdir -p src-revs/o && + echo "Other content 1" >src-revs/o/file.1.txt && + echo "Other content 2" >src-revs/o/file.2.txt && + test_tick && + git -C src-revs add . && + git -C src-revs commit -m "Other commit" && + + git -C src-revs checkout main && + + git clone --bare "file://$(pwd)/src-revs" srv-revs.bare && + git -C srv-revs.bare config --local uploadpack.allowfilter 1 && + git -C srv-revs.bare config --local uploadpack.allowanysha1inwant 1 +' + # do basic partial clone from "srv.bare" test_expect_success 'do partial clone 1, backfill gets all objects' ' git clone --no-checkout --filter=blob:none \ @@ -176,6 +232,157 @@ test_expect_success 'backfill --sparse without cone mode (negative)' ' test_line_count = 12 missing ' +test_expect_success 'backfill with revision range' ' + test_when_finished rm -rf backfill-revs && + git clone --no-checkout --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv.bare" backfill-revs && + + # No blobs yet + git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 48 missing && + + git -C backfill-revs backfill HEAD~2..HEAD && + + # 30 objects downloaded. + git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 18 missing +' + +test_expect_success 'backfill with revisions over stdin' ' + test_when_finished rm -rf backfill-revs && + git clone --no-checkout --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv.bare" backfill-revs && + + # No blobs yet + git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 48 missing && + + cat >in <<-EOF && + HEAD + ^HEAD~2 + EOF + + git -C backfill-revs backfill --stdin missing && + test_line_count = 18 missing +' + +test_expect_success 'backfill with prefix pathspec' ' + test_when_finished rm -rf backfill-path && + git clone --bare --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv.bare" backfill-path && + + # No blobs yet + git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 48 missing && + + git -C backfill-path backfill HEAD -- d/f 2>err && + test_must_be_empty err && + + git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 40 missing +' + +test_expect_success 'backfill with multiple pathspecs' ' + test_when_finished rm -rf backfill-path && + git clone --bare --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv.bare" backfill-path && + + # No blobs yet + git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 48 missing && + + git -C backfill-path backfill HEAD -- d/f a 2>err && + test_must_be_empty err && + + git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 16 missing +' + +test_expect_success 'backfill with wildcard pathspec' ' + test_when_finished rm -rf backfill-path && + git clone --bare --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv.bare" backfill-path && + + # No blobs yet + git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 48 missing && + + git -C backfill-path backfill HEAD -- "d/file.*.txt" 2>err && + test_must_be_empty err && + + git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 40 missing +' + +test_expect_success 'backfill with --all' ' + test_when_finished rm -rf backfill-all && + git clone --no-checkout --filter=blob:none \ + "file://$(pwd)/srv-revs.bare" backfill-all && + + # All blobs from all refs are missing + git -C backfill-all rev-list --quiet --objects --all --missing=print >missing && + test_line_count = 54 missing && + + # Backfill from HEAD gets main blobs only + git -C backfill-all backfill HEAD && + + # Other branch blobs still missing + git -C backfill-all rev-list --quiet --objects --all --missing=print >missing && + test_line_count = 2 missing && + + # Backfill with --all gets everything + git -C backfill-all backfill --all && + + git -C backfill-all rev-list --quiet --objects --all --missing=print >missing && + test_line_count = 0 missing +' + +test_expect_success 'backfill with --first-parent' ' + test_when_finished rm -rf backfill-fp && + git clone --no-checkout --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv-revs.bare" backfill-fp && + + git -C backfill-fp rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 52 missing && + + # --first-parent skips the side branch commits, so + # s/file.{1,2}.txt v1 blobs (only in side commit 1) are missed. + git -C backfill-fp backfill --first-parent HEAD && + + git -C backfill-fp rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 2 missing +' + +test_expect_success 'backfill with --since' ' + test_when_finished rm -rf backfill-since && + git clone --no-checkout --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv-revs.bare" backfill-since && + + git -C backfill-since rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 52 missing && + + # Use a cutoff between commits 4 and 5 (between v1 and v2 + # iterations). Commits 5-8 still carry v1 of files 2-4 in + # their trees, but v1 of file.1.txt is only in commits 1-4. + SINCE=$(git -C backfill-since log --first-parent --reverse \ + --format=%ct HEAD~1 | sed -n 5p) && + git -C backfill-since backfill --since="@$((SINCE - 1))" HEAD && + + # 6 missing: v1 of file.1.txt in all 6 directories + git -C backfill-since rev-list --quiet --objects --missing=print HEAD >missing && + test_line_count = 6 missing +' + . "$TEST_DIRECTORY"/lib-httpd.sh start_httpd