From c407b2cb346b32cdf95c6017d16b93dd1483c627 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 5 Jul 2021 11:57:06 -0400 Subject: [PATCH 1/9] t7519: rewrite sparse index test The sparse index is tested with the FS Monitor hook and extension since f8fe49e (fsmonitor: integrate with sparse index, 2021-07-14). This test was very fragile because it shared an index across sparse and non-sparse behavior. Since that expansion and contraction could cause the index to lose its FS Monitor bitmap and token, behavior is fragile to changes in 'git sparse-checkout set'. Rewrite the test to use two clones of the original repo: full and sparse. This allows us to also keep the test files (actual, expect, trace2.txt) out of the repos we are testing with 'git status'. Signed-off-by: Derrick Stolee --- t/t7519-status-fsmonitor.sh | 38 ++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/t/t7519-status-fsmonitor.sh b/t/t7519-status-fsmonitor.sh index deea88d4431d23..f1463197b99c84 100755 --- a/t/t7519-status-fsmonitor.sh +++ b/t/t7519-status-fsmonitor.sh @@ -389,43 +389,47 @@ test_expect_success 'status succeeds after staging/unstaging' ' # If "!" is supplied, then we verify that we do not call ensure_full_index # during a call to 'git status'. Otherwise, we verify that we _do_ call it. check_sparse_index_behavior () { - git status --porcelain=v2 >expect && - git sparse-checkout init --cone --sparse-index && - git sparse-checkout set dir1 dir2 && + git -C full status --porcelain=v2 >expect && GIT_TRACE2_EVENT="$(pwd)/trace2.txt" GIT_TRACE2_EVENT_NESTING=10 \ - git status --porcelain=v2 >actual && + git -C sparse status --porcelain=v2 >actual && test_region $1 index ensure_full_index trace2.txt && test_region fsm_hook query trace2.txt && test_cmp expect actual && - rm trace2.txt && - git sparse-checkout disable + rm trace2.txt } test_expect_success 'status succeeds with sparse index' ' - git reset --hard && + git clone . full && + git clone --sparse . sparse && + git -C sparse sparse-checkout init --cone --sparse-index && + git -C sparse sparse-checkout set dir1 dir2 && - test_config core.fsmonitor "$TEST_DIRECTORY/t7519/fsmonitor-all" && - check_sparse_index_behavior ! && - - write_script .git/hooks/fsmonitor-test<<-\EOF && + write_script .git/hooks/fsmonitor-test <<-\EOF && printf "last_update_token\0" EOF - git config core.fsmonitor .git/hooks/fsmonitor-test && + git -C full config core.fsmonitor ../.git/hooks/fsmonitor-test && + git -C sparse config core.fsmonitor ../.git/hooks/fsmonitor-test && check_sparse_index_behavior ! && - write_script .git/hooks/fsmonitor-test<<-\EOF && + write_script .git/hooks/fsmonitor-test <<-\EOF && printf "last_update_token\0" printf "dir1/modified\0" EOF check_sparse_index_behavior ! && - cp -r dir1 dir1a && - git add dir1a && - git commit -m "add dir1a" && + git -C sparse sparse-checkout add dir1a && + + for repo in full sparse + do + cp -r $repo/dir1 $repo/dir1a && + git -C $repo add dir1a && + git -C $repo commit -m "add dir1a" || return 1 + done && + git -C sparse sparse-checkout set dir1 dir2 && # This one modifies outside the sparse-checkout definition # and hence we expect to expand the sparse-index. - write_script .git/hooks/fsmonitor-test<<-\EOF && + write_script .git/hooks/fsmonitor-test <<-\EOF && printf "last_update_token\0" printf "dir1a/modified\0" EOF From 8660877ba7a06971bb5c443bbc1e07825e950a60 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 21 Jun 2021 13:09:06 -0400 Subject: [PATCH 2/9] sparse-index: silently return when not using cone-mode patterns While the sparse-index is only enabled when core.sparseCheckoutCone is also enabled, it is possible for the user to modify the sparse-checkout file manually in a way that does not match cone-mode patterns. In this case, we should refuse to convert an index into a sparse index, since the sparse_checkout_patterns will not be initialized with recursive and parent path hashsets. Also silently return if there are no cache entries, which is a simple case: there are no paths to make sparse! Signed-off-by: Derrick Stolee --- sparse-index.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/sparse-index.c b/sparse-index.c index c6b4feec413a8f..cd6e0d5f40814c 100644 --- a/sparse-index.c +++ b/sparse-index.c @@ -130,7 +130,7 @@ static int index_has_unmerged_entries(struct index_state *istate) int convert_to_sparse(struct index_state *istate) { int test_env; - if (istate->split_index || istate->sparse_index || + if (istate->split_index || istate->sparse_index || !istate->cache_nr || !core_apply_sparse_checkout || !core_sparse_checkout_cone) return 0; @@ -158,10 +158,16 @@ int convert_to_sparse(struct index_state *istate) return 0; } - if (!istate->sparse_checkout_patterns->use_cone_patterns) { - warning(_("attempting to use sparse-index without cone mode")); - return -1; - } + /* + * We need cone-mode patterns to use sparse-index. If a user edits + * their sparse-checkout file manually, then we can detect during + * parsing that they are not actually using cone-mode patterns and + * hence we need to abort this conversion _without error_. Warnings + * already exist in the pattern parsing to inform the user of their + * bad patterns. + */ + if (!istate->sparse_checkout_patterns->use_cone_patterns) + return 0; /* * NEEDSWORK: If we have unmerged entries, then stay full. From edb00d3f9aa68f5021c50a72c8be4bc54dd64ea2 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 5 Jul 2021 12:43:20 -0400 Subject: [PATCH 3/9] unpack-trees: fix nested sparse-dir search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The iterated search in find_cache_entry() was recently modified to include a loop that searches backwards for a sparse directory entry that matches the given traverse_info and name_entry. However, the string comparison failed to actually concatenate those two strings, so this failed to find a sparse directory when it was not a top-level directory. This caused some errors in rare cases where a 'git checkout' spanned a diff that modified files within the sparse directory entry, but we could not correctly find the entry. Helped-by: Johannes Schindelin Helped-by: René Scharfe Signed-off-by: Derrick Stolee --- unpack-trees.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/unpack-trees.c b/unpack-trees.c index 5786645f315d51..b78d7ee6c09081 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1255,7 +1255,7 @@ static int sparse_dir_matches_path(const struct cache_entry *ce, static struct cache_entry *find_cache_entry(struct traverse_info *info, const struct name_entry *p) { - struct cache_entry *ce; + const char *path; int pos = find_cache_pos(info, p->path, p->pathlen); struct unpack_trees_options *o = info->data; @@ -1281,9 +1281,11 @@ static struct cache_entry *find_cache_entry(struct traverse_info *info, * paths (e.g. "subdir-"). */ while (pos >= 0) { - ce = o->src_index->cache[pos]; + struct cache_entry *ce = o->src_index->cache[pos]; - if (strncmp(ce->name, p->path, p->pathlen)) + if (!skip_prefix(ce->name, info->traverse_path, &path) || + strncmp(path, p->path, p->pathlen) || + path[p->pathlen] != '/') return NULL; if (S_ISSPARSEDIR(ce->ce_mode) && From c8620de61eb459c48919fbd4d122bc82cf8ed50a Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 24 Aug 2021 14:00:43 -0400 Subject: [PATCH 4/9] sparse-index: silently return when cache tree fails If cache_tree_update() returns a non-zero value, then it could not create the cache tree. This is likely due to a path having a merge conflict. Since we are already returning early, let's return silently to avoid making it seem like we failed to write the index at all. If we remove our dependence on the cache tree within convert_to_sparse(), then we could still recover from this scenario and have a sparse index. Signed-off-by: Derrick Stolee --- sparse-index.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sparse-index.c b/sparse-index.c index cd6e0d5f40814c..d9b076959532b4 100644 --- a/sparse-index.c +++ b/sparse-index.c @@ -178,10 +178,12 @@ int convert_to_sparse(struct index_state *istate) /* Clear and recompute the cache-tree */ cache_tree_free(&istate->cache_tree); - if (cache_tree_update(istate, 0)) { - warning(_("unable to update cache-tree, staying full")); - return -1; - } + /* + * Silently return if there is a problem with the cache tree update, + * which might just be due to a conflict state in some entry. + */ + if (cache_tree_update(istate, 0)) + return 0; remove_fsmonitor(istate); From 371716124247cf89069c1344e77c08853a1f76fb Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 24 Aug 2021 14:01:33 -0400 Subject: [PATCH 5/9] sparse-index: use WRITE_TREE_MISSING_OK When updating the cache tree in convert_to_sparse(), the WRITE_TREE_MISSING_OK flag indicates that trees might be computed that do not already exist within the object database. This happens in cases such as 'git add' creating new trees that it wants to store in anticipation of a following 'git commit'. If this flag is not specified, then it might trigger a promisor fetch or a failure due to the object not existing locally. Use WRITE_TREE_MISSING_OK during convert_to_sparse() to avoid these possible reasons for the cache_tree_update() to fail. Signed-off-by: Derrick Stolee --- sparse-index.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sparse-index.c b/sparse-index.c index d9b076959532b4..880c5f72338505 100644 --- a/sparse-index.c +++ b/sparse-index.c @@ -181,8 +181,11 @@ int convert_to_sparse(struct index_state *istate) /* * Silently return if there is a problem with the cache tree update, * which might just be due to a conflict state in some entry. + * + * This might create new tree objects, so be sure to use + * WRITE_TREE_MISSING_OK. */ - if (cache_tree_update(istate, 0)) + if (cache_tree_update(istate, WRITE_TREE_MISSING_OK)) return 0; remove_fsmonitor(istate); From 98b4cae2297cc93abe83a573360859c16806a6fd Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 26 Jul 2021 15:44:05 -0400 Subject: [PATCH 6/9] sparse-checkout: create helper methods As we integrate the sparse index into more builtins, we occasionally need to check the sparse-checkout patterns to see if a path is within the sparse-checkout cone. Create some helper methods that help initialize the patterns and check for pattern matching to make this easier. The existing callers of commands like get_sparse_checkout_patterns() use a custom 'struct pattern_list' that is not necessarily the one in the 'struct index_state', so there are not many previous uses that could adopt these helpers. There are just two in builtin/add.c and sparse-index.c that can use path_in_sparse_checkout(). We add a path_in_cone_mode_sparse_checkout() as well that will only return false if the path is outside of the sparse-checkout definition _and_ the sparse-checkout patterns are in cone mode. Signed-off-by: Derrick Stolee --- builtin/add.c | 7 +------ dir.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++ dir.h | 8 ++++++++ sparse-index.c | 14 +++----------- 4 files changed, 64 insertions(+), 17 deletions(-) diff --git a/builtin/add.c b/builtin/add.c index 17528e8f922693..88a6c0c69fb43b 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -190,8 +190,6 @@ static int refresh(int verbose, const struct pathspec *pathspec) struct string_list only_match_skip_worktree = STRING_LIST_INIT_NODUP; int flags = REFRESH_IGNORE_SKIP_WORKTREE | (verbose ? REFRESH_IN_PORCELAIN : REFRESH_QUIET); - struct pattern_list pl = { 0 }; - int sparse_checkout_enabled = !get_sparse_checkout_patterns(&pl); seen = xcalloc(pathspec->nr, 1); refresh_index(&the_index, flags, pathspec, seen, @@ -199,12 +197,9 @@ static int refresh(int verbose, const struct pathspec *pathspec) for (i = 0; i < pathspec->nr; i++) { if (!seen[i]) { const char *path = pathspec->items[i].original; - int dtype = DT_REG; if (matches_skip_worktree(pathspec, i, &skip_worktree_seen) || - (sparse_checkout_enabled && - !path_matches_pattern_list(path, strlen(path), NULL, - &dtype, &pl, &the_index))) { + !path_in_sparse_checkout(path, &the_index)) { string_list_append(&only_match_skip_worktree, pathspec->items[i].original); } else { diff --git a/dir.c b/dir.c index 03c4d212672bc4..86afa2eae001ef 100644 --- a/dir.c +++ b/dir.c @@ -1439,6 +1439,58 @@ enum pattern_match_result path_matches_pattern_list( return result; } +int init_sparse_checkout_patterns(struct index_state *istate) +{ + if (!core_apply_sparse_checkout) + return 1; + if (istate->sparse_checkout_patterns) + return 0; + + CALLOC_ARRAY(istate->sparse_checkout_patterns, 1); + + if (get_sparse_checkout_patterns(istate->sparse_checkout_patterns) < 0) { + FREE_AND_NULL(istate->sparse_checkout_patterns); + return -1; + } + + return 0; +} + +static int path_in_sparse_checkout_1(const char *path, + struct index_state *istate, + int require_cone_mode) +{ + const char *base; + int dtype = DT_REG; + + /* + * We default to accepting a path if there are no patterns or + * they are of the wrong type. + */ + if (init_sparse_checkout_patterns(istate) || + (require_cone_mode && + !istate->sparse_checkout_patterns->use_cone_patterns)) + return 1; + + base = strrchr(path, '/'); + return path_matches_pattern_list(path, strlen(path), base ? base + 1 : path, + &dtype, + istate->sparse_checkout_patterns, + istate) > 0; +} + +int path_in_sparse_checkout(const char *path, + struct index_state *istate) +{ + return path_in_sparse_checkout_1(path, istate, 0); +} + +int path_in_cone_mode_sparse_checkout(const char *path, + struct index_state *istate) +{ + return path_in_sparse_checkout_1(path, istate, 1); +} + static struct path_pattern *last_matching_pattern_from_lists( struct dir_struct *dir, struct index_state *istate, const char *pathname, int pathlen, diff --git a/dir.h b/dir.h index b3e1a54a97145d..6823312521e813 100644 --- a/dir.h +++ b/dir.h @@ -394,6 +394,14 @@ enum pattern_match_result path_matches_pattern_list(const char *pathname, const char *basename, int *dtype, struct pattern_list *pl, struct index_state *istate); + +int init_sparse_checkout_patterns(struct index_state *state); + +int path_in_sparse_checkout(const char *path, + struct index_state *istate); +int path_in_cone_mode_sparse_checkout(const char *path, + struct index_state *istate); + struct dir_entry *dir_add_ignored(struct dir_struct *dir, struct index_state *istate, const char *pathname, int len); diff --git a/sparse-index.c b/sparse-index.c index 880c5f72338505..23f7c3bd361988 100644 --- a/sparse-index.c +++ b/sparse-index.c @@ -33,19 +33,14 @@ static int convert_to_sparse_rec(struct index_state *istate, { int i, can_convert = 1; int start_converted = num_converted; - enum pattern_match_result match; - int dtype = DT_UNKNOWN; struct strbuf child_path = STRBUF_INIT; - struct pattern_list *pl = istate->sparse_checkout_patterns; /* * Is the current path outside of the sparse cone? * Then check if the region can be replaced by a sparse * directory entry (everything is sparse and merged). */ - match = path_matches_pattern_list(ct_path, ct_pathlen, - NULL, &dtype, pl, istate); - if (match != NOT_MATCHED) + if (path_in_sparse_checkout(ct_path, istate)) can_convert = 0; for (i = start; can_convert && i < end; i++) { @@ -152,11 +147,8 @@ int convert_to_sparse(struct index_state *istate) if (!istate->repo->settings.sparse_index) return 0; - if (!istate->sparse_checkout_patterns) { - istate->sparse_checkout_patterns = xcalloc(1, sizeof(struct pattern_list)); - if (get_sparse_checkout_patterns(istate->sparse_checkout_patterns) < 0) - return 0; - } + if (init_sparse_checkout_patterns(istate)) + return 0; /* * We need cone-mode patterns to use sparse-index. If a user edits From 6ec3cb2042e1064387dba410c9494a6dc8be0e54 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 26 Jul 2021 15:15:44 -0400 Subject: [PATCH 7/9] attr: be careful about sparse directories Signed-off-by: Derrick Stolee --- attr.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/attr.c b/attr.c index d029e681f2880a..79adaa50ea1e09 100644 --- a/attr.c +++ b/attr.c @@ -14,6 +14,7 @@ #include "utf8.h" #include "quote.h" #include "thread-utils.h" +#include "dir.h" const char git_attr__true[] = "(builtin)true"; const char git_attr__false[] = "\0(builtin)false"; @@ -744,6 +745,20 @@ static struct attr_stack *read_attr_from_index(struct index_state *istate, if (!istate) return NULL; + /* + * The .gitattributes file only applies to files within its + * parent directory. In the case of cone-mode sparse-checkout, + * the .gitattributes file is sparse if and only if all paths + * within that directory are also sparse. Thus, don't load the + * .gitattributes file since it will not matter. + * + * In the case of a sparse index, it is critical that we don't go + * looking for a .gitattributes file, as doing so would cause the + * index to expand. + */ + if (!path_in_cone_mode_sparse_checkout(path, istate)) + return NULL; + buf = read_blob_data_from_index(istate, path, NULL); if (!buf) return NULL; From d57f48c445cc148a7070fca799a3e57aa3dace76 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 10 Aug 2021 09:14:43 -0400 Subject: [PATCH 8/9] sparse-index: add SPARSE_INDEX_MEMORY_ONLY flag The convert_to_sparse() method checks for the GIT_TEST_SPARSE_INDEX environment variable or the "index.sparse" config setting before converting the index to a sparse one. This is for ease of use since all current consumers are preparing to compress the index before writing it to disk. If these settings are not enabled, then convert_to_sparse() silently returns without doing anything. We will add a consumer in the next change that wants to use the sparse index as an in-memory data structure, regardless of whether the on-disk format should be sparse. To that end, create the SPARSE_INDEX_MEMORY_ONLY flag that will skip these config checks when enabled. All current consumers are modified to pass '0' in the new 'flags' parameter. Signed-off-by: Derrick Stolee --- read-cache.c | 4 ++-- sparse-index.c | 37 ++++++++++++++++++++++--------------- sparse-index.h | 3 ++- 3 files changed, 26 insertions(+), 18 deletions(-) diff --git a/read-cache.c b/read-cache.c index 9048ef9e905251..f5d4385c4080a1 100644 --- a/read-cache.c +++ b/read-cache.c @@ -3069,7 +3069,7 @@ static int do_write_locked_index(struct index_state *istate, struct lock_file *l int ret; int was_full = !istate->sparse_index; - ret = convert_to_sparse(istate); + ret = convert_to_sparse(istate, 0); if (ret) { warning(_("failed to convert to a sparse-index")); @@ -3182,7 +3182,7 @@ static int write_shared_index(struct index_state *istate, int ret, was_full = !istate->sparse_index; move_cache_to_base_index(istate); - convert_to_sparse(istate); + convert_to_sparse(istate, 0); trace2_region_enter_printf("index", "shared/do_write_index", the_repository, "%s", get_tempfile_path(*temp)); diff --git a/sparse-index.c b/sparse-index.c index 23f7c3bd361988..0bc45f60ac5faf 100644 --- a/sparse-index.c +++ b/sparse-index.c @@ -122,30 +122,37 @@ static int index_has_unmerged_entries(struct index_state *istate) return 0; } -int convert_to_sparse(struct index_state *istate) +int convert_to_sparse(struct index_state *istate, int flags) { int test_env; - if (istate->split_index || istate->sparse_index || !istate->cache_nr || + if (istate->sparse_index || !istate->cache_nr || !core_apply_sparse_checkout || !core_sparse_checkout_cone) return 0; if (!istate->repo) istate->repo = the_repository; - /* - * The GIT_TEST_SPARSE_INDEX environment variable triggers the - * index.sparse config variable to be on. - */ - test_env = git_env_bool("GIT_TEST_SPARSE_INDEX", -1); - if (test_env >= 0) - set_sparse_index_config(istate->repo, test_env); + if (!(flags & SPARSE_INDEX_MEMORY_ONLY)) { + /* + * The sparse index is not (yet) integrated with a split index. + */ + if (istate->split_index) + return 0; + /* + * The GIT_TEST_SPARSE_INDEX environment variable triggers the + * index.sparse config variable to be on. + */ + test_env = git_env_bool("GIT_TEST_SPARSE_INDEX", -1); + if (test_env >= 0) + set_sparse_index_config(istate->repo, test_env); - /* - * Only convert to sparse if index.sparse is set. - */ - prepare_repo_settings(istate->repo); - if (!istate->repo->settings.sparse_index) - return 0; + /* + * Only convert to sparse if index.sparse is set. + */ + prepare_repo_settings(istate->repo); + if (!istate->repo->settings.sparse_index) + return 0; + } if (init_sparse_checkout_patterns(istate)) return 0; diff --git a/sparse-index.h b/sparse-index.h index 1115a0d7dd984b..9f3d7bc7fafce1 100644 --- a/sparse-index.h +++ b/sparse-index.h @@ -2,7 +2,8 @@ #define SPARSE_INDEX_H__ struct index_state; -int convert_to_sparse(struct index_state *istate); +#define SPARSE_INDEX_MEMORY_ONLY (1 << 0) +int convert_to_sparse(struct index_state *istate, int flags); /* * Some places in the codebase expect to search for a specific path. From 91b53f20109fe55635b1815f87afd5d5da68a182 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Sun, 4 Jul 2021 17:06:12 -0400 Subject: [PATCH 9/9] sparse-checkout: clear tracked sparse dirs When changing the scope of a sparse-checkout using cone mode, we might have some tracked directories go out of scope. The current logic removes the tracked files from within those directories, but leaves the ignored files within those directories. This is a bit unexpected to users who have given input to Git saying they don't need those directories anymore. This is something that is new to the cone mode pattern type: the user has explicitly said "I want these directories and _not_ those directories." The typical sparse-checkout patterns more generally apply to "I want files with with these patterns" so it is natural to leave ignored files as they are. This focus on directories in cone mode provides us an opportunity to change the behavior. Leaving these ignored files in the sparse directories makes it impossible to gain performance benefits in the sparse index. When we track into these directories, we need to know if the files are ignored or not, which might depend on the _tracked_ .gitignore file(s) within the sparse directory. This depends on the indexed version of the file, so the sparse directory must be expanded. We must take special care to look for untracked, non-ignored files in these directories before deleting them. We do not want to delete any meaningful work that the users were doing in those directories and perhaps forgot to add and commit before switching sparse-checkout definitions. Since those untracked files might be code files that generated ignored build output, also do not delete any ignored files from these directories in that case. The users can recover their state by resetting their sparse-checkout definition to include that directory and continue. Alternatively, they can see the warning that is presented and delete the directory themselves to regain the performance they expect. By deleting the sparse directories when changing scope (or running 'git sparse-checkout reapply') we regain these performance benefits as if the repository was in a clean state. Since these ignored files are frequently build output or helper files from IDEs, the users should not need the files now that the tracked files are removed. If the tracked files reappear, then they will have newer timestamps than the build artifacts, so the artifacts will need to be regenerated anyway. Use the sparse-index as a data structure in order to find the sparse directories that can be safely deleted. Re-expand the index to a full one if it was full before. Signed-off-by: Derrick Stolee --- Documentation/git-sparse-checkout.txt | 10 +++ builtin/sparse-checkout.c | 94 +++++++++++++++++++++++++++ t/t1091-sparse-checkout-builtin.sh | 59 +++++++++++++++++ 3 files changed, 163 insertions(+) diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt index fdcf43f87cb373..42056ee9ff99da 100644 --- a/Documentation/git-sparse-checkout.txt +++ b/Documentation/git-sparse-checkout.txt @@ -210,6 +210,16 @@ case-insensitive check. This corrects for case mismatched filenames in the 'git sparse-checkout set' command to reflect the expected cone in the working directory. +When changing the sparse-checkout patterns in cone mode, Git will inspect each +tracked directory that is not within the sparse-checkout cone to see if it +contains any untracked files. If all of those files are ignored due to the +`.gitignore` patterns, then the directory will be deleted. If any of the +untracked files within that directory is not ignored, then no deletions will +occur within that directory and a warning message will appear. If these files +are important, then reset your sparse-checkout definition so they are included, +use `git add` and `git commit` to store them, then remove any remaining files +manually to ensure Git can behave optimally. + SUBMODULES ---------- diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 8ba9f13787b058..d0f5c4702be69d 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -100,6 +100,98 @@ static int sparse_checkout_list(int argc, const char **argv) return 0; } +static void clean_tracked_sparse_directories(struct repository *r) +{ + int i, was_full = 0; + struct strbuf path = STRBUF_INIT; + size_t pathlen; + struct string_list_item *item; + struct string_list sparse_dirs = STRING_LIST_INIT_DUP; + + /* + * If we are not using cone mode patterns, then we cannot + * delete directories outside of the sparse cone. + */ + if (!r || !r->index || !r->worktree) + return; + if (init_sparse_checkout_patterns(r->index) || + !r->index->sparse_checkout_patterns->use_cone_patterns) + return; + + /* + * Use the sparse index as a data structure to assist finding + * directories that are safe to delete. This conversion to a + * sparse index will not delete directories that contain + * conflicted entries or submodules. + */ + if (!r->index->sparse_index) { + /* + * If something, such as a merge conflict or other concern, + * prevents us from converting to a sparse index, then do + * not try deleting files. + */ + if (convert_to_sparse(r->index, SPARSE_INDEX_MEMORY_ONLY)) + return; + was_full = 1; + } + + strbuf_addstr(&path, r->worktree); + strbuf_complete(&path, '/'); + pathlen = path.len; + + /* + * Collect directories that have gone out of scope but also + * exist on disk, so there is some work to be done. We need to + * store the entries in a list before exploring, since that might + * expand the sparse-index again. + */ + for (i = 0; i < r->index->cache_nr; i++) { + struct cache_entry *ce = r->index->cache[i]; + + if (S_ISSPARSEDIR(ce->ce_mode) && + repo_file_exists(r, ce->name)) + string_list_append(&sparse_dirs, ce->name); + } + + for_each_string_list_item(item, &sparse_dirs) { + struct dir_struct dir = DIR_INIT; + struct pathspec p = { 0 }; + struct strvec s = STRVEC_INIT; + + strbuf_setlen(&path, pathlen); + strbuf_addstr(&path, item->string); + + dir.flags |= DIR_SHOW_IGNORED_TOO; + + setup_standard_excludes(&dir); + strvec_push(&s, path.buf); + + parse_pathspec(&p, PATHSPEC_GLOB, 0, NULL, s.v); + fill_directory(&dir, r->index, &p); + + if (dir.nr) { + warning(_("directory '%s' contains untracked files," + " but is not in the sparse-checkout cone"), + item->string); + } else if (remove_dir_recursively(&path, 0)) { + /* + * Removal is "best effort". If something blocks + * the deletion, then continue with a warning. + */ + warning(_("failed to remove directory '%s'"), + item->string); + } + + dir_clear(&dir); + } + + string_list_clear(&sparse_dirs, 0); + strbuf_release(&path); + + if (was_full) + ensure_full_index(r->index); +} + static int update_working_directory(struct pattern_list *pl) { enum update_sparsity_result result; @@ -141,6 +233,8 @@ static int update_working_directory(struct pattern_list *pl) else rollback_lock_file(&lock_file); + clean_tracked_sparse_directories(r); + r->index->sparse_checkout_patterns = NULL; return result; } diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index 38fc8340f5c9b7..71236981e6484f 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -642,4 +642,63 @@ test_expect_success MINGW 'cone mode replaces backslashes with slashes' ' check_files repo/deep a deeper1 ' +test_expect_success 'cone mode clears ignored subdirectories' ' + rm repo/.git/info/sparse-checkout && + + git -C repo sparse-checkout init --cone && + git -C repo sparse-checkout set deep/deeper1 && + + cat >repo/.gitignore <<-\EOF && + obj/ + *.o + EOF + + git -C repo add .gitignore && + git -C repo commit -m ".gitignore" && + + mkdir -p repo/obj repo/folder1/obj repo/deep/deeper2/obj && + for file in folder1/obj/a obj/a folder1/file.o folder1.o \ + deep/deeper2/obj/a deep/deeper2/file.o file.o + do + echo ignored >repo/$file || return 1 + done && + + git -C repo status --porcelain=v2 >out && + test_must_be_empty out && + + git -C repo sparse-checkout reapply && + test_path_is_missing repo/folder1 && + test_path_is_missing repo/deep/deeper2 && + test_path_is_dir repo/obj && + test_path_is_file repo/file.o && + + git -C repo status --porcelain=v2 >out && + test_must_be_empty out && + + git -C repo sparse-checkout set deep/deeper2 && + test_path_is_missing repo/deep/deeper1 && + test_path_is_dir repo/deep/deeper2 && + test_path_is_dir repo/obj && + test_path_is_file repo/file.o && + + >repo/deep/deeper2/ignored.o && + >repo/deep/deeper2/untracked && + + # When an untracked file is in the way, all untracked files + # (even ignored files) are preserved. + git -C repo sparse-checkout set folder1 2>err && + grep "contains untracked files" err && + test_path_is_file repo/deep/deeper2/ignored.o && + test_path_is_file repo/deep/deeper2/untracked && + + # The rest of the cone matches expectation + test_path_is_missing repo/deep/deeper1 && + test_path_is_dir repo/obj && + test_path_is_file repo/file.o && + + git -C repo status --porcelain=v2 >out && + echo "? deep/deeper2/untracked" >expect && + test_cmp expect out +' + test_done