diff options
author | Paul Cristian Sarbu <paul.cristian.sarbu@huawei.com> | 2024-02-15 16:57:10 +0100 |
---|---|---|
committer | Paul Cristian Sarbu <paul.cristian.sarbu@huawei.com> | 2024-02-27 14:35:44 +0100 |
commit | f78b3ce6b90ccb9b86b01b3cd6644b853960b7c3 (patch) | |
tree | 9a4a1fce19a21eb17ac04f24c4f33c813665fa2f /src/other_tools/root_maps/commit_git_map.cpp | |
parent | 0e913dd3eee248bccf2b26161ca0a715d039e716 (diff) | |
download | justbuild-f78b3ce6b90ccb9b86b01b3cd6644b853960b7c3.tar.gz |
commit_git_map: Proper handling of file association
This commit fixes the invariant that a file association between
a Git commit and the root tree should only be set if that tree is
found in our own Git cache. This ensures consistency between
present and absent roots and in the interaction with the serve
endpoint.
Diffstat (limited to 'src/other_tools/root_maps/commit_git_map.cpp')
-rw-r--r-- | src/other_tools/root_maps/commit_git_map.cpp | 1079 |
1 files changed, 659 insertions, 420 deletions
diff --git a/src/other_tools/root_maps/commit_git_map.cpp b/src/other_tools/root_maps/commit_git_map.cpp index 3cf53cb4..04f14023 100644 --- a/src/other_tools/root_maps/commit_git_map.cpp +++ b/src/other_tools/root_maps/commit_git_map.cpp @@ -52,6 +52,12 @@ namespace { return std::nullopt; } +[[nodiscard]] auto IsCacheGitRoot( + std::filesystem::path const& repo_root) noexcept -> bool { + return std::filesystem::absolute(ToNormalPath(repo_root)) == + std::filesystem::absolute(ToNormalPath(StorageConfig::GitRoot())); +} + /// \brief Helper function for ensuring the serve endpoint, if given, has the /// root if it was marked absent. /// It guarantees the logger is called exactly once with fatal on failure, and @@ -137,7 +143,6 @@ void WriteIdFileAndSetWSRoot(std::string const& root_tree_id, std::string const& subdir, bool ignore_special, GitCASPtr const& git_cas, - std::filesystem::path const& repo_root, std::filesystem::path const& tree_id_file, CommitGitMap::SetterPtr const& ws_setter, CommitGitMap::LoggerPtr const& logger) { @@ -152,9 +157,9 @@ void WriteIdFileAndSetWSRoot(std::string const& root_tree_id, // extract the subdir tree auto git_repo = GitRepoRemote::Open(git_cas); // link fake repo to odb if (not git_repo) { - (*logger)( - fmt::format("Could not open repository {}", repo_root.string()), - /*fatal=*/true); + (*logger)(fmt::format("Could not open cache object database {}", + StorageConfig::GitRoot().string()), + /*fatal=*/true); return; } auto wrapped_logger = std::make_shared<AsyncMapConsumerLogger>( @@ -180,6 +185,342 @@ void WriteIdFileAndSetWSRoot(std::string const& root_tree_id, false)); } +void NetworkFetchAndSetPresentRoot( + GitRepoInfo const& repo_info, + std::filesystem::path const& repo_root, + std::string const& fetch_repo, + MirrorsPtr const& additional_mirrors, + GitCASPtr const& git_cas, + gsl::not_null<CriticalGitOpMap*> const& critical_git_op_map, + std::string const& git_bin, + std::vector<std::string> const& launcher, + bool fetch_absent, + gsl::not_null<TaskSystem*> const& ts, + CommitGitMap::SetterPtr const& ws_setter, + CommitGitMap::LoggerPtr const& logger) { + // reaching here can only result in a root that is present + if (repo_info.absent and not fetch_absent) { + (*logger)( + fmt::format("Cannot create workspace root as absent for commit {}.", + repo_info.hash), + /*fatal=*/true); + return; + } + + auto git_repo = GitRepoRemote::Open(git_cas); // link fake repo to odb + if (not git_repo) { + (*logger)( + fmt::format("Could not open repository {}", repo_root.string()), + /*fatal=*/true); + return; + } + + // default to fetching from network + auto tmp_dir = StorageUtils::CreateTypedTmpDir("fetch"); + if (not tmp_dir) { + (*logger)("Failed to create fetch tmp directory!", + /*fatal=*/true); + return; + } + // store failed attempts for subsequent logging + bool fetched{false}; + std::string err_messages{}; + // keep all remotes checked to report them in case fetch fails + std::string remotes_buffer{}; + // try local mirrors first + auto local_mirrors = + MirrorsUtils::GetLocalMirrors(additional_mirrors, fetch_repo); + for (auto mirror : local_mirrors) { + auto mirror_path = GitURLIsPath(mirror); + if (mirror_path) { + mirror = std::filesystem::absolute(*mirror_path).string(); + } + auto wrapped_logger = std::make_shared<AsyncMapConsumerLogger>( + [mirror, &err_messages](auto const& msg, bool /*fatal*/) { + err_messages += fmt::format( + "While attempting fetch from local mirror {}:\n{}\n", + mirror, + msg); + }); + if (git_repo->FetchViaTmpRepo(tmp_dir->GetPath(), + mirror, + repo_info.branch, + repo_info.inherit_env, + git_bin, + launcher, + wrapped_logger)) { + fetched = true; + break; + } + // add local mirror to buffer + remotes_buffer.append(fmt::format("\n> {}", mirror)); + } + if (not fetched) { + // get preferred hostnames list + auto preferred_hostnames = + MirrorsUtils::GetPreferredHostnames(additional_mirrors); + // try first the main URL, but with each of the preferred hostnames, if + // URL is not a path + if (not GitURLIsPath(fetch_repo)) { + for (auto const& hostname : preferred_hostnames) { + if (auto preferred_url = + CurlURLHandle::ReplaceHostname(fetch_repo, hostname)) { + auto wrapped_logger = + std::make_shared<AsyncMapConsumerLogger>( + [preferred_url, &err_messages](auto const& msg, + bool /*fatal*/) { + err_messages += fmt::format( + "While attempting fetch from remote " + "{}:\n{}\n", + *preferred_url, + msg); + }); + if (git_repo->FetchViaTmpRepo(tmp_dir->GetPath(), + *preferred_url, + repo_info.branch, + repo_info.inherit_env, + git_bin, + launcher, + wrapped_logger)) { + fetched = true; + break; + } + // add preferred to buffer + remotes_buffer.append( + fmt::format("\n> {}", *preferred_url)); + } + else { + // report failed hostname + remotes_buffer.append( + fmt::format("\n> {} (failed hostname replace: {})", + fetch_repo, + hostname)); + } + } + } + if (not fetched) { + // now try the original main fetch URL + auto wrapped_logger = std::make_shared<AsyncMapConsumerLogger>( + [fetch_repo, &err_messages](auto const& msg, bool /*fatal*/) { + err_messages += fmt::format( + "While attempting fetch from remote {}:\n{}\n", + fetch_repo, + msg); + }); + if (git_repo->FetchViaTmpRepo(tmp_dir->GetPath(), + fetch_repo, + repo_info.branch, + repo_info.inherit_env, + git_bin, + launcher, + wrapped_logger)) { + fetched = true; + } + else { + // add main fetch URL to buffer + remotes_buffer.append(fmt::format("\n> {}", fetch_repo)); + // now try to fetch from mirrors, in order, if given + for (auto mirror : repo_info.mirrors) { + auto mirror_path = GitURLIsPath(mirror); + if (mirror_path) { + mirror = + std::filesystem::absolute(*mirror_path).string(); + } + else { + // if non-path, try each of the preferred hostnames + for (auto const& hostname : preferred_hostnames) { + if (auto preferred_mirror = + CurlURLHandle::ReplaceHostname(mirror, + hostname)) { + wrapped_logger = + std::make_shared<AsyncMapConsumerLogger>( + [preferred_mirror, &err_messages]( + auto const& msg, bool /*fatal*/) { + err_messages += fmt::format( + "While attempting fetch from " + "mirror {}:\n{}\n", + *preferred_mirror, + msg); + }); + if (git_repo->FetchViaTmpRepo( + tmp_dir->GetPath(), + *preferred_mirror, + repo_info.branch, + repo_info.inherit_env, + git_bin, + launcher, + wrapped_logger)) { + fetched = true; + break; + } + // add preferred mirror to buffer + remotes_buffer.append( + fmt::format("\n> {}", *preferred_mirror)); + } + else { + // report failed hostname + remotes_buffer.append(fmt::format( + "\n> {} (failed hostname replace: {})", + mirror, + hostname)); + } + } + } + if (fetched) { + break; + } + wrapped_logger = std::make_shared<AsyncMapConsumerLogger>( + [mirror, &err_messages](auto const& msg, + bool /*fatal*/) { + err_messages += fmt::format( + "While attempting fetch from mirror {}:\n{}\n", + mirror, + msg); + }); + if (git_repo->FetchViaTmpRepo(tmp_dir->GetPath(), + mirror, + repo_info.branch, + repo_info.inherit_env, + git_bin, + launcher, + wrapped_logger)) { + fetched = true; + break; + } + // add mirror to buffer + remotes_buffer.append(fmt::format("\n> {}", mirror)); + } + } + } + } + if (not fetched) { + // log fetch failure and list the remotes tried + (*logger)( + fmt::format("While trying to fetch from provided remotes:{}Fetch " + "failed for the provided remotes{}", + err_messages, + remotes_buffer), + /*fatal=*/true); + return; + } + // setup wrapped logger + auto wrapped_logger = std::make_shared<AsyncMapConsumerLogger>( + [logger](auto const& msg, bool fatal) { + (*logger)(fmt::format("While checking commit exists:\n{}", msg), + fatal); + }); + // check if commit exists now, after fetch + auto is_commit_present = + git_repo->CheckCommitExists(repo_info.hash, wrapped_logger); + if (not is_commit_present) { + return; + } + if (not *is_commit_present) { + // commit could not be fetched, so fail + (*logger)(fmt::format( + "Could not fetch commit {} from branch {} for remote {}", + repo_info.hash, + repo_info.branch, + fetch_repo), + /*fatal=*/true); + return; + } + // if witnessing repository is the Git cache, then also tag the commit + if (IsCacheGitRoot(repo_root)) { + GitOpKey op_key = {.params = + { + repo_root, // target_path + repo_info.hash, // git_hash + "", // branch + "Keep referenced tree alive" // message + }, + .op_type = GitOpType::KEEP_TAG}; + critical_git_op_map->ConsumeAfterKeysReady( + ts, + {std::move(op_key)}, + [git_cas, repo_info, repo_root, ws_setter, logger]( + auto const& values) { + GitOpValue op_result = *values[0]; + // check flag + if (not op_result.result) { + (*logger)("Keep tag failed", + /*fatal=*/true); + return; + } + auto git_repo = + GitRepoRemote::Open(git_cas); // link fake repo to odb + if (not git_repo) { + (*logger)(fmt::format("Could not open repository {}", + repo_root.string()), + /*fatal=*/true); + return; + } + // setup wrapped logger + auto wrapped_logger = std::make_shared<AsyncMapConsumerLogger>( + [logger](auto const& msg, bool fatal) { + (*logger)( + fmt::format( + "While getting subtree from commit:\n{}", msg), + fatal); + }); + // get tree id and return workspace root + auto res = git_repo->GetSubtreeFromCommit( + repo_info.hash, repo_info.subdir, wrapped_logger); + if (not std::holds_alternative<std::string>(res)) { + return; + } + // set the workspace root as present + JustMRProgress::Instance().TaskTracker().Stop(repo_info.origin); + (*ws_setter)( + std::pair(nlohmann::json::array( + {repo_info.ignore_special + ? FileRoot::kGitTreeIgnoreSpecialMarker + : FileRoot::kGitTreeMarker, + std::get<std::string>(res), // subtree id + repo_root}), + /*is_cache_hit=*/false)); + }, + [logger, target_path = repo_root](auto const& msg, bool fatal) { + (*logger)(fmt::format("While running critical Git op KEEP_TAG " + "for target {}:\n{}", + target_path.string(), + msg), + fatal); + }); + } + else { + auto git_repo = GitRepoRemote::Open(git_cas); // link fake repo to odb + if (not git_repo) { + (*logger)( + fmt::format("Could not open repository {}", repo_root.string()), + /*fatal=*/true); + return; + } + // setup wrapped logger + auto wrapped_logger = std::make_shared<AsyncMapConsumerLogger>( + [logger](auto const& msg, bool fatal) { + (*logger)( + fmt::format("While getting subtree from commit:\n{}", msg), + fatal); + }); + // get tree id and return workspace root + auto res = git_repo->GetSubtreeFromCommit( + repo_info.hash, repo_info.subdir, wrapped_logger); + if (not std::holds_alternative<std::string>(res)) { + return; + } + // set the workspace root as present + JustMRProgress::Instance().TaskTracker().Stop(repo_info.origin); + (*ws_setter)(std::pair( + nlohmann::json::array({repo_info.ignore_special + ? FileRoot::kGitTreeIgnoreSpecialMarker + : FileRoot::kGitTreeMarker, + std::get<std::string>(res), // subtree id + repo_root}), + /*is_cache_hit=*/false)); + } +} + /// \brief Contains the main logic for this async map. It ensures the commit is /// available for processing (including fetching for a present root) and setting /// the root. @@ -202,8 +543,8 @@ void EnsureCommit( gsl::not_null<TaskSystem*> const& ts, CommitGitMap::SetterPtr const& ws_setter, CommitGitMap::LoggerPtr const& logger) { - // ensure commit exists, and fetch if needed - auto git_repo = GitRepoRemote::Open(git_cas); // link fake repo to odb + // link fake repo to odb + auto git_repo = GitRepoRemote::Open(git_cas); if (not git_repo) { (*logger)( fmt::format("Could not open repository {}", repo_root.string()), @@ -223,6 +564,9 @@ void EnsureCommit( } if (not is_commit_present.value()) { auto tree_id_file = StorageUtils::GetCommitTreeIDFile(repo_info.hash); + // Check if we have stored a file association between commit and tree; + // if an association file exists, the respective tree MUST be in the + // Git cache if (FileSystemManager::Exists(tree_id_file)) { // read resolved tree id auto resolved_tree_id = FileSystemManager::ReadFile(tree_id_file); @@ -232,6 +576,20 @@ void EnsureCommit( /*fatal=*/true); return; } + auto just_git_cas = GitCAS::Open(StorageConfig::GitRoot()); + if (not just_git_cas) { + (*logger)(fmt::format("Could not open Git cache database {}", + StorageConfig::GitRoot().string()), + /*fatal=*/true); + return; + } + auto just_git_repo = GitRepo::Open(just_git_cas); + if (not just_git_repo) { + (*logger)(fmt::format("Could not open Git cache repository {}", + StorageConfig::GitRoot().string()), + /*fatal=*/true); + return; + } // extract the subdir tree wrapped_logger = std::make_shared<AsyncMapConsumerLogger>( [logger, subdir = repo_info.subdir, tree = *resolved_tree_id]( @@ -243,7 +601,7 @@ void EnsureCommit( msg), fatal); }); - auto tree_id = git_repo->GetSubtreeFromTree( + auto tree_id = just_git_repo->GetSubtreeFromTree( *resolved_tree_id, repo_info.subdir, wrapped_logger); if (not tree_id) { return; @@ -260,7 +618,7 @@ void EnsureCommit( logger); } else { - // this root as present + // this root is present (*ws_setter)( std::pair(nlohmann::json::array( {repo_info.ignore_special @@ -318,444 +676,325 @@ void EnsureCommit( if (std::holds_alternative<std::string>(serve_result)) { auto const& root_tree_id = std::get<std::string>(serve_result); - // verify if we know the tree already locally - auto wrapped_logger = - std::make_shared<AsyncMapConsumerLogger>( - [logger, tree = root_tree_id](auto const& msg, - bool fatal) { + // verify if we know the tree already in the local Git cache + GitOpKey op_key = { + .params = + { + StorageConfig::GitRoot(), // target_path + "", // git_hash + "", // branch + std::nullopt, // message + true // init_bare + }, + .op_type = GitOpType::ENSURE_INIT}; + critical_git_op_map->ConsumeAfterKeysReady( + ts, + {std::move(op_key)}, + [root_tree_id, + tree_id_file, + repo_info, + repo_root, + fetch_repo, + additional_mirrors, + git_cas, + critical_git_op_map, + import_to_git_map, + git_bin, + launcher, + local_api, + remote_api, + fetch_absent, + ts, + ws_setter, + logger](auto const& values) { + GitOpValue op_result = *values[0]; + // check flag + if (not op_result.result) { + (*logger)("Git init failed", + /*fatal=*/true); + return; + } + auto just_git_repo = + GitRepoRemote::Open(op_result.git_cas); + if (not just_git_repo) { (*logger)( - fmt::format("While verifying presence of " - "tree {}:\n{}", + fmt::format( + "Could not open Git cache repository " + "{}", + StorageConfig::GitRoot().string()), + /*fatal=*/true); + return; + } + // check tree existence + auto wrapped_logger = + std::make_shared<AsyncMapConsumerLogger>( + [logger, tree = root_tree_id]( + auto const& msg, bool fatal) { + (*logger)( + fmt::format( + "While verifying presence of " + "tree {} in repository {}:\n{}", tree, + StorageConfig::GitRoot() + .string(), msg), - fatal); - }); - auto tree_present = - git_repo->CheckTreeExists(root_tree_id, wrapped_logger); - if (not tree_present) { - return; - } - if (*tree_present) { - JustMRProgress::Instance().TaskTracker().Stop( - repo_info.origin); - // write association to id file, get subdir tree, - // and set the workspace root as present - WriteIdFileAndSetWSRoot(root_tree_id, - repo_info.subdir, - repo_info.ignore_special, - git_cas, - repo_root, - tree_id_file, - ws_setter, - logger); - return; - } - // try to get root tree from remote execution endpoint - auto root_digest = - ArtifactDigest{root_tree_id, 0, /*is_tree=*/true}; - if (remote_api and - remote_api.value()->RetrieveToCas( - {Artifact::ObjectInfo{.digest = root_digest, - .type = ObjectType::Tree}}, - local_api)) { - JustMRProgress::Instance().TaskTracker().Stop( - repo_info.origin); - // Move tree from CAS to local git storage - auto tmp_dir = StorageUtils::CreateTypedTmpDir( - "fetch-absent-root"); - if (not tmp_dir) { - (*logger)( - fmt::format("Failed to create tmp " - "directory after fetching root " - "tree {} for absent commit {}", - root_tree_id, - repo_info.hash), - /*fatal=*/true); - return; - } - if (not local_api->RetrieveToPaths( - {Artifact::ObjectInfo{ - .digest = root_digest, - .type = ObjectType::Tree}}, - {tmp_dir->GetPath()})) { - (*logger)(fmt::format("Failed to copy fetched root " - "tree {} to {}", - root_tree_id, - tmp_dir->GetPath().string()), - /*fatal=*/true); - return; - } - CommitInfo c_info{ - tmp_dir->GetPath(), "tree", root_tree_id}; - import_to_git_map->ConsumeAfterKeysReady( - ts, - {std::move(c_info)}, - [tmp_dir, // keep tmp_dir alive - root_tree_id, - subdir = repo_info.subdir, - ignore_special = repo_info.ignore_special, - git_cas, - repo_root, - tree_id_file, - ws_setter, - logger](auto const& values) { - if (not values[0]->second) { - (*logger)("Importing to git failed", + fatal); + }); + auto tree_present = just_git_repo->CheckTreeExists( + root_tree_id, wrapped_logger); + if (not tree_present) { + return; + } + if (*tree_present) { + JustMRProgress::Instance().TaskTracker().Stop( + repo_info.origin); + // write association to id file, get subdir + // tree, and set the workspace root as present + WriteIdFileAndSetWSRoot( + root_tree_id, + repo_info.subdir, + repo_info.ignore_special, + op_result.git_cas, + tree_id_file, + ws_setter, + logger); + return; + } + + // now check if the tree is in the local checkout, + // if this checkout is not our Git cache; this can + // save an unnecessary remote CAS call + if (not IsCacheGitRoot(repo_root)) { + auto git_repo = GitRepoRemote::Open(git_cas); + if (not git_repo) { + (*logger)(fmt::format("Could not open Git " + "repository {}", + repo_root.string()), /*fatal=*/true); return; } - // sanity check: we should get the expected tree - if (values[0]->first != root_tree_id) { + // check tree existence + wrapped_logger = + std::make_shared<AsyncMapConsumerLogger>( + [logger, + tree = root_tree_id, + repo_root](auto const& msg, + bool fatal) { + (*logger)( + fmt::format( + "While verifying presence " + "of tree {} in repository " + "{}:\n{}", + tree, + repo_root.string(), + msg), + fatal); + }); + tree_present = git_repo->CheckTreeExists( + root_tree_id, wrapped_logger); + if (not tree_present) { + return; + } + if (*tree_present) { + JustMRProgress::Instance() + .TaskTracker() + .Stop(repo_info.origin); + // get subdir tree and set the workspace + // root as present; as this tree is not in + // our Git cache, no file association should + // be stored + wrapped_logger = std::make_shared< + AsyncMapConsumerLogger>( + [logger, + subdir = repo_info.subdir, + tree = root_tree_id](auto const& msg, + bool fatal) { + (*logger)( + fmt::format( + "While getting subdir {} " + "in tree {}:\n{}", + subdir, + tree, + msg), + fatal); + }); + auto tree_id = git_repo->GetSubtreeFromTree( + root_tree_id, + repo_info.subdir, + wrapped_logger); + if (not tree_id) { + return; + } + // set the workspace root as present + (*ws_setter)(std::pair( + nlohmann::json::array( + {repo_info.ignore_special + ? FileRoot:: + kGitTreeIgnoreSpecialMarker + : FileRoot::kGitTreeMarker, + *tree_id, + repo_root.string()}), + false)); + // done! + return; + } + } + + // try to get root tree from remote CAS + auto root_digest = ArtifactDigest{ + root_tree_id, 0, /*is_tree=*/true}; + if (remote_api and + remote_api.value()->RetrieveToCas( + {Artifact::ObjectInfo{ + .digest = root_digest, + .type = ObjectType::Tree}}, + local_api)) { + JustMRProgress::Instance().TaskTracker().Stop( + repo_info.origin); + // Move tree from local CAS to local Git storage + auto tmp_dir = StorageUtils::CreateTypedTmpDir( + "fetch-absent-root"); + if (not tmp_dir) { (*logger)( fmt::format( - "Mismatch in imported git tree " - "id:\nexpected {}, but got {}", + "Failed to create tmp directory " + "after fetching root tree {} for " + "absent commit {}", root_tree_id, - values[0]->first), + repo_info.hash), /*fatal=*/true); return; } - // tree is now in Git cache - auto just_git_cas = - GitCAS::Open(StorageConfig::GitRoot()); - if (not just_git_cas) { - (*logger)( - "Could not open Git cache object " - "database!", - /*fatal=*/true); + if (not local_api->RetrieveToPaths( + {Artifact::ObjectInfo{ + .digest = root_digest, + .type = ObjectType::Tree}}, + {tmp_dir->GetPath()})) { + (*logger)(fmt::format( + "Failed to copy fetched root " + "tree {} to {}", + root_tree_id, + tmp_dir->GetPath().string()), + /*fatal=*/true); return; } - // write association to id file, get subdir - // tree, and set the workspace root as present - WriteIdFileAndSetWSRoot( - root_tree_id, - subdir, - ignore_special, - just_git_cas, - StorageConfig::GitRoot(), - tree_id_file, - ws_setter, - logger); - }, - [logger, tmp_dir, root_tree_id](auto const& msg, - bool fatal) { - (*logger)( - fmt::format("While moving root tree {} " + CommitInfo c_info{ + tmp_dir->GetPath(), "tree", root_tree_id}; + import_to_git_map->ConsumeAfterKeysReady( + ts, + {std::move(c_info)}, + [tmp_dir, // keep tmp_dir alive + root_tree_id, + subdir = repo_info.subdir, + ignore_special = repo_info.ignore_special, + just_git_cas = op_result.git_cas, + tree_id_file, + ws_setter, + logger](auto const& values) { + if (not values[0]->second) { + (*logger)("Importing to git failed", + /*fatal=*/true); + return; + } + // sanity check: we should get the + // expected tree + if (values[0]->first != root_tree_id) { + (*logger)( + fmt::format( + "Mismatch in imported git " + "tree id:\nexpected {}, " + "but got {}", + root_tree_id, + values[0]->first), + /*fatal=*/true); + return; + } + // tree is now in Git cache; + // write association to id file, get + // subdir tree, and set the workspace + // root as present + WriteIdFileAndSetWSRoot(root_tree_id, + subdir, + ignore_special, + just_git_cas, + tree_id_file, + ws_setter, + logger); + }, + [logger, tmp_dir, root_tree_id]( + auto const& msg, bool fatal) { + (*logger)( + fmt::format( + "While moving root tree {} " "from {} to local git:\n{}", root_tree_id, tmp_dir->GetPath().string(), msg), - fatal); - }); + fatal); + }); - return; - } - // just serve should have made the tree available in the - // remote CAS, so log this attempt and revert to network - (*logger)(fmt::format("Tree {} marked as served, but not " - "found on remote", - root_tree_id), - /*fatal=*/false); - } - else { - // check if serve failure was due to commit not being found - // or it is otherwise fatal - auto const& is_fatal = std::get<bool>(serve_result); - if (is_fatal) { - (*logger)(fmt::format("Serve endpoint failed to set up " - "root from known commit {}", - repo_info.hash), - /*fatal=*/true); - return; - } - } - } - } + return; + } + // just serve should have made the tree available in + // the remote CAS, so log this attempt and revert to + // network + (*logger)(fmt::format("Tree {} marked as served, " + "but not found on remote", + root_tree_id), + /*fatal=*/false); - // reaching here can only result in a root that is present - if (repo_info.absent and not fetch_absent) { - (*logger)(fmt::format("Cannot create workspace root as absent for " - "commit {}.", - repo_info.hash), - /*fatal=*/true); - return; - } + NetworkFetchAndSetPresentRoot(repo_info, + repo_root, + fetch_repo, + additional_mirrors, + git_cas, + critical_git_op_map, + git_bin, + launcher, + fetch_absent, + ts, + ws_setter, + logger); + }, + [logger, target_path = StorageConfig::GitRoot()]( + auto const& msg, bool fatal) { + (*logger)(fmt::format("While running critical Git " + "op ENSURE_INIT bare for " + "target {}:\n{}", + target_path.string(), + msg), + fatal); + }); - // default to fetching from network - auto tmp_dir = StorageUtils::CreateTypedTmpDir("fetch"); - if (not tmp_dir) { - (*logger)("Failed to create fetch tmp directory!", - /*fatal=*/true); - return; - } - // store failed attempts for subsequent logging - bool fetched{false}; - std::string err_messages{}; - // keep all remotes checked to report them in case fetch fails - std::string remotes_buffer{}; - // try local mirrors first - auto local_mirrors = - MirrorsUtils::GetLocalMirrors(additional_mirrors, fetch_repo); - for (auto mirror : local_mirrors) { - auto mirror_path = GitURLIsPath(mirror); - if (mirror_path) { - mirror = std::filesystem::absolute(*mirror_path).string(); - } - auto wrapped_logger = std::make_shared<AsyncMapConsumerLogger>( - [mirror, &err_messages](auto const& msg, bool /*fatal*/) { - err_messages += fmt::format( - "\nWhile attempting fetch from local mirror {}:\n{}", - mirror, - msg); - }); - if (git_repo->FetchViaTmpRepo(tmp_dir->GetPath(), - mirror, - repo_info.branch, - repo_info.inherit_env, - git_bin, - launcher, - wrapped_logger)) { - fetched = true; - break; - } - // add local mirror to buffer - remotes_buffer.append(fmt::format("\n> {}", mirror)); - } - if (not fetched) { - // get preferred hostnames list - auto preferred_hostnames = - MirrorsUtils::GetPreferredHostnames(additional_mirrors); - // try first the main URL, but with each of the preferred - // hostnames, if URL is not a path - if (not GitURLIsPath(fetch_repo)) { - for (auto const& hostname : preferred_hostnames) { - if (auto preferred_url = CurlURLHandle::ReplaceHostname( - fetch_repo, hostname)) { - auto wrapped_logger = - std::make_shared<AsyncMapConsumerLogger>( - [preferred_url, &err_messages](auto const& msg, - bool /*fatal*/) { - err_messages += fmt::format( - "\nWhile attempting fetch from remote " - "{}:\n{}", - *preferred_url, - msg); - }); - if (git_repo->FetchViaTmpRepo(tmp_dir->GetPath(), - *preferred_url, - repo_info.branch, - repo_info.inherit_env, - git_bin, - launcher, - wrapped_logger)) { - fetched = true; - break; - } - // add preferred to buffer - remotes_buffer.append( - fmt::format("\n> {}", *preferred_url)); - } - else { - // report failed hostname - remotes_buffer.append( - fmt::format("\n> {} (failed hostname replace: {})", - fetch_repo, - hostname)); - } - } - } - if (not fetched) { - // now try the original main fetch URL - auto wrapped_logger = std::make_shared<AsyncMapConsumerLogger>( - [fetch_repo, &err_messages](auto const& msg, - bool /*fatal*/) { - err_messages += fmt::format( - "\nWhile attempting fetch from remote {}:\n{}", - fetch_repo, - msg); - }); - if (git_repo->FetchViaTmpRepo(tmp_dir->GetPath(), - fetch_repo, - repo_info.branch, - repo_info.inherit_env, - git_bin, - launcher, - wrapped_logger)) { - fetched = true; - } - else { - // add main fetch URL to buffer - remotes_buffer.append(fmt::format("\n> {}", fetch_repo)); - // now try to fetch from mirrors, in order, if given - for (auto mirror : repo_info.mirrors) { - auto mirror_path = GitURLIsPath(mirror); - if (mirror_path) { - mirror = std::filesystem::absolute(*mirror_path) - .string(); - } - else { - // if non-path, try each of the preferred hostnames - for (auto const& hostname : preferred_hostnames) { - if (auto preferred_mirror = - CurlURLHandle::ReplaceHostname( - mirror, hostname)) { - wrapped_logger = std::make_shared< - AsyncMapConsumerLogger>( - [preferred_mirror, &err_messages]( - auto const& msg, bool /*fatal*/) { - err_messages += fmt::format( - "\nWhile attempting fetch from " - "mirror {}:\n{}", - *preferred_mirror, - msg); - }); - if (git_repo->FetchViaTmpRepo( - tmp_dir->GetPath(), - *preferred_mirror, - repo_info.branch, - repo_info.inherit_env, - git_bin, - launcher, - wrapped_logger)) { - fetched = true; - break; - } - // add preferred mirror to buffer - remotes_buffer.append(fmt::format( - "\n> {}", *preferred_mirror)); - } - else { - // report failed hostname - remotes_buffer.append(fmt::format( - "\n> {} (failed hostname replace: {})", - mirror, - hostname)); - } - } - } - if (fetched) { - break; - } - wrapped_logger = - std::make_shared<AsyncMapConsumerLogger>( - [mirror, &err_messages](auto const& msg, - bool /*fatal*/) { - err_messages += fmt::format( - "\nWhile attempting fetch from mirror " - "{}:\n{}", - mirror, - msg); - }); - if (git_repo->FetchViaTmpRepo(tmp_dir->GetPath(), - mirror, - repo_info.branch, - repo_info.inherit_env, - git_bin, - launcher, - wrapped_logger)) { - fetched = true; - break; - } - // add mirror to buffer - remotes_buffer.append(fmt::format("\n> {}", mirror)); - } - } - } - } - if (not fetched) { - // log fetch failure details separately to reduce verbosity - (*logger)( - fmt::format("While fetching via tmp repo:{}", err_messages), - /*fatal=*/false); - (*logger)(fmt::format("Failed to fetch from provided remotes:{}", - remotes_buffer), - /*fatal=*/true); - return; - } - // setup wrapped logger - wrapped_logger = std::make_shared<AsyncMapConsumerLogger>( - [logger](auto const& msg, bool fatal) { - (*logger)(fmt::format("While checking commit exists:\n{}", msg), - fatal); - }); - // check if commit exists now, after fetch - auto is_commit_present = - git_repo->CheckCommitExists(repo_info.hash, wrapped_logger); - if (not is_commit_present) { - return; - } - if (not *is_commit_present) { - // commit could not be fetched, so fail - (*logger)(fmt::format("Could not fetch commit {} from branch " - "{} for remote {}", - repo_info.hash, - repo_info.branch, - fetch_repo), - /*fatal=*/true); - return; - } - // keep tag - GitOpKey op_key = {.params = - { - repo_root, // target_path - repo_info.hash, // git_hash - "", // branch - "Keep referenced tree alive" // message - }, - .op_type = GitOpType::KEEP_TAG}; - critical_git_op_map->ConsumeAfterKeysReady( - ts, - {std::move(op_key)}, - [git_cas, repo_info, repo_root, ws_setter, logger]( - auto const& values) { - GitOpValue op_result = *values[0]; - // check flag - if (not op_result.result) { - (*logger)("Keep tag failed", - /*fatal=*/true); + // done! return; } - // ensure commit exists, and fetch if needed - auto git_repo = - GitRepoRemote::Open(git_cas); // link fake repo to odb - if (not git_repo) { - (*logger)(fmt::format("Could not open repository {}", - repo_root.string()), + + // check if serve failure was due to commit not being found + // or it is otherwise fatal + auto const& is_fatal = std::get<bool>(serve_result); + if (is_fatal) { + (*logger)(fmt::format("Serve endpoint failed to set up " + "root from known commit {}", + repo_info.hash), /*fatal=*/true); return; } - // setup wrapped logger - auto wrapped_logger = std::make_shared<AsyncMapConsumerLogger>( - [logger](auto const& msg, bool fatal) { - (*logger)(fmt::format("While getting subtree " - "from commit:\n{}", - msg), - fatal); - }); - // get tree id and return workspace root - auto res = git_repo->GetSubtreeFromCommit( - repo_info.hash, repo_info.subdir, wrapped_logger); - if (not std::holds_alternative<std::string>(res)) { - return; - } - // set the workspace root as present - JustMRProgress::Instance().TaskTracker().Stop(repo_info.origin); - (*ws_setter)( - std::pair(nlohmann::json::array( - {repo_info.ignore_special - ? FileRoot::kGitTreeIgnoreSpecialMarker - : FileRoot::kGitTreeMarker, - std::get<std::string>(res), // subtree id - repo_root}), - /*is_cache_hit=*/false)); - }, - [logger, target_path = repo_root](auto const& msg, bool fatal) { - (*logger)(fmt::format("While running critical Git op " - "KEEP_TAG for target {}:\n{}", - target_path.string(), - msg), - fatal); - }); + } + } + + NetworkFetchAndSetPresentRoot(repo_info, + repo_root, + fetch_repo, + additional_mirrors, + git_cas, + critical_git_op_map, + git_bin, + launcher, + fetch_absent, + ts, + ws_setter, + logger); } else { // commit is present in given repository |