diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/other_tools/root_maps/commit_git_map.cpp | 1079 |
1 files changed, 659 insertions, 420 deletions
diff --git a/src/other_tools/root_maps/commit_git_map.cpp b/src/other_tools/root_maps/commit_git_map.cpp index 3cf53cb4..04f14023 100644 --- a/src/other_tools/root_maps/commit_git_map.cpp +++ b/src/other_tools/root_maps/commit_git_map.cpp @@ -52,6 +52,12 @@ namespace { return std::nullopt; } +[[nodiscard]] auto IsCacheGitRoot( + std::filesystem::path const& repo_root) noexcept -> bool { + return std::filesystem::absolute(ToNormalPath(repo_root)) == + std::filesystem::absolute(ToNormalPath(StorageConfig::GitRoot())); +} + /// \brief Helper function for ensuring the serve endpoint, if given, has the /// root if it was marked absent. /// It guarantees the logger is called exactly once with fatal on failure, and @@ -137,7 +143,6 @@ void WriteIdFileAndSetWSRoot(std::string const& root_tree_id, std::string const& subdir, bool ignore_special, GitCASPtr const& git_cas, - std::filesystem::path const& repo_root, std::filesystem::path const& tree_id_file, CommitGitMap::SetterPtr const& ws_setter, CommitGitMap::LoggerPtr const& logger) { @@ -152,9 +157,9 @@ void WriteIdFileAndSetWSRoot(std::string const& root_tree_id, // extract the subdir tree auto git_repo = GitRepoRemote::Open(git_cas); // link fake repo to odb if (not git_repo) { - (*logger)( - fmt::format("Could not open repository {}", repo_root.string()), - /*fatal=*/true); + (*logger)(fmt::format("Could not open cache object database {}", + StorageConfig::GitRoot().string()), + /*fatal=*/true); return; } auto wrapped_logger = std::make_shared<AsyncMapConsumerLogger>( @@ -180,6 +185,342 @@ void WriteIdFileAndSetWSRoot(std::string const& root_tree_id, false)); } +void NetworkFetchAndSetPresentRoot( + GitRepoInfo const& repo_info, + std::filesystem::path const& repo_root, + std::string const& fetch_repo, + MirrorsPtr const& additional_mirrors, + GitCASPtr const& git_cas, + gsl::not_null<CriticalGitOpMap*> const& critical_git_op_map, + std::string const& git_bin, + std::vector<std::string> const& launcher, + bool fetch_absent, + gsl::not_null<TaskSystem*> const& ts, + CommitGitMap::SetterPtr const& ws_setter, + CommitGitMap::LoggerPtr const& logger) { + // reaching here can only result in a root that is present + if (repo_info.absent and not fetch_absent) { + (*logger)( + fmt::format("Cannot create workspace root as absent for commit {}.", + repo_info.hash), + /*fatal=*/true); + return; + } + + auto git_repo = GitRepoRemote::Open(git_cas); // link fake repo to odb + if (not git_repo) { + (*logger)( + fmt::format("Could not open repository {}", repo_root.string()), + /*fatal=*/true); + return; + } + + // default to fetching from network + auto tmp_dir = StorageUtils::CreateTypedTmpDir("fetch"); + if (not tmp_dir) { + (*logger)("Failed to create fetch tmp directory!", + /*fatal=*/true); + return; + } + // store failed attempts for subsequent logging + bool fetched{false}; + std::string err_messages{}; + // keep all remotes checked to report them in case fetch fails + std::string remotes_buffer{}; + // try local mirrors first + auto local_mirrors = + MirrorsUtils::GetLocalMirrors(additional_mirrors, fetch_repo); + for (auto mirror : local_mirrors) { + auto mirror_path = GitURLIsPath(mirror); + if (mirror_path) { + mirror = std::filesystem::absolute(*mirror_path).string(); + } + auto wrapped_logger = std::make_shared<AsyncMapConsumerLogger>( + [mirror, &err_messages](auto const& msg, bool /*fatal*/) { + err_messages += fmt::format( + "While attempting fetch from local mirror {}:\n{}\n", + mirror, + msg); + }); + if (git_repo->FetchViaTmpRepo(tmp_dir->GetPath(), + mirror, + repo_info.branch, + repo_info.inherit_env, + git_bin, + launcher, + wrapped_logger)) { + fetched = true; + break; + } + // add local mirror to buffer + remotes_buffer.append(fmt::format("\n> {}", mirror)); + } + if (not fetched) { + // get preferred hostnames list + auto preferred_hostnames = + MirrorsUtils::GetPreferredHostnames(additional_mirrors); + // try first the main URL, but with each of the preferred hostnames, if + // URL is not a path + if (not GitURLIsPath(fetch_repo)) { + for (auto const& hostname : preferred_hostnames) { + if (auto preferred_url = + CurlURLHandle::ReplaceHostname(fetch_repo, hostname)) { + auto wrapped_logger = + std::make_shared<AsyncMapConsumerLogger>( + [preferred_url, &err_messages](auto const& msg, + bool /*fatal*/) { + err_messages += fmt::format( + "While attempting fetch from remote " + "{}:\n{}\n", + *preferred_url, + msg); + }); + if (git_repo->FetchViaTmpRepo(tmp_dir->GetPath(), + *preferred_url, + repo_info.branch, + repo_info.inherit_env, + git_bin, + launcher, + wrapped_logger)) { + fetched = true; + break; + } + // add preferred to buffer + remotes_buffer.append( + fmt::format("\n> {}", *preferred_url)); + } + else { + // report failed hostname + remotes_buffer.append( + fmt::format("\n> {} (failed hostname replace: {})", + fetch_repo, + hostname)); + } + } + } + if (not fetched) { + // now try the original main fetch URL + auto wrapped_logger = std::make_shared<AsyncMapConsumerLogger>( + [fetch_repo, &err_messages](auto const& msg, bool /*fatal*/) { + err_messages += fmt::format( + "While attempting fetch from remote {}:\n{}\n", + fetch_repo, + msg); + }); + if (git_repo->FetchViaTmpRepo(tmp_dir->GetPath(), + fetch_repo, + repo_info.branch, + repo_info.inherit_env, + git_bin, + launcher, + wrapped_logger)) { + fetched = true; + } + else { + // add main fetch URL to buffer + remotes_buffer.append(fmt::format("\n> {}", fetch_repo)); + // now try to fetch from mirrors, in order, if given + for (auto mirror : repo_info.mirrors) { + auto mirror_path = GitURLIsPath(mirror); + if (mirror_path) { + mirror = + std::filesystem::absolute(*mirror_path).string(); + } + else { + // if non-path, try each of the preferred hostnames + for (auto const& hostname : preferred_hostnames) { + if (auto preferred_mirror = + CurlURLHandle::ReplaceHostname(mirror, + hostname)) { + wrapped_logger = + std::make_shared<AsyncMapConsumerLogger>( + [preferred_mirror, &err_messages]( + auto const& msg, bool /*fatal*/) { + err_messages += fmt::format( + "While attempting fetch from " + "mirror {}:\n{}\n", + *preferred_mirror, + msg); + }); + if (git_repo->FetchViaTmpRepo( + tmp_dir->GetPath(), + *preferred_mirror, + repo_info.branch, + repo_info.inherit_env, + git_bin, + launcher, + wrapped_logger)) { + fetched = true; + break; + } + // add preferred mirror to buffer + remotes_buffer.append( + fmt::format("\n> {}", *preferred_mirror)); + } + else { + // report failed hostname + remotes_buffer.append(fmt::format( + "\n> {} (failed hostname replace: {})", + mirror, + hostname)); + } + } + } + if (fetched) { + break; + } + wrapped_logger = std::make_shared<AsyncMapConsumerLogger>( + [mirror, &err_messages](auto const& msg, + bool /*fatal*/) { + err_messages += fmt::format( + "While attempting fetch from mirror {}:\n{}\n", + mirror, + msg); + }); + if (git_repo->FetchViaTmpRepo(tmp_dir->GetPath(), + mirror, + repo_info.branch, + repo_info.inherit_env, + git_bin, + launcher, + wrapped_logger)) { + fetched = true; + break; + } + // add mirror to buffer + remotes_buffer.append(fmt::format("\n> {}", mirror)); + } + } + } + } + if (not fetched) { + // log fetch failure and list the remotes tried + (*logger)( + fmt::format("While trying to fetch from provided remotes:{}Fetch " + "failed for the provided remotes{}", + err_messages, + remotes_buffer), + /*fatal=*/true); + return; + } + // setup wrapped logger + auto wrapped_logger = std::make_shared<AsyncMapConsumerLogger>( + [logger](auto const& msg, bool fatal) { + (*logger)(fmt::format("While checking commit exists:\n{}", msg), + fatal); + }); + // check if commit exists now, after fetch + auto is_commit_present = + git_repo->CheckCommitExists(repo_info.hash, wrapped_logger); + if (not is_commit_present) { + return; + } + if (not *is_commit_present) { + // commit could not be fetched, so fail + (*logger)(fmt::format( + "Could not fetch commit {} from branch {} for remote {}", + repo_info.hash, + repo_info.branch, + fetch_repo), + /*fatal=*/true); + return; + } + // if witnessing repository is the Git cache, then also tag the commit + if (IsCacheGitRoot(repo_root)) { + GitOpKey op_key = {.params = + { + repo_root, // target_path + repo_info.hash, // git_hash + "", // branch + "Keep referenced tree alive" // message + }, + .op_type = GitOpType::KEEP_TAG}; + critical_git_op_map->ConsumeAfterKeysReady( + ts, + {std::move(op_key)}, + [git_cas, repo_info, repo_root, ws_setter, logger]( + auto const& values) { + GitOpValue op_result = *values[0]; + // check flag + if (not op_result.result) { + (*logger)("Keep tag failed", + /*fatal=*/true); + return; + } + auto git_repo = + GitRepoRemote::Open(git_cas); // link fake repo to odb + if (not git_repo) { + (*logger)(fmt::format("Could not open repository {}", + repo_root.string()), + /*fatal=*/true); + return; + } + // setup wrapped logger + auto wrapped_logger = std::make_shared<AsyncMapConsumerLogger>( + [logger](auto const& msg, bool fatal) { + (*logger)( + fmt::format( + "While getting subtree from commit:\n{}", msg), + fatal); + }); + // get tree id and return workspace root + auto res = git_repo->GetSubtreeFromCommit( + repo_info.hash, repo_info.subdir, wrapped_logger); + if (not std::holds_alternative<std::string>(res)) { + return; + } + // set the workspace root as present + JustMRProgress::Instance().TaskTracker().Stop(repo_info.origin); + (*ws_setter)( + std::pair(nlohmann::json::array( + {repo_info.ignore_special + ? FileRoot::kGitTreeIgnoreSpecialMarker + : FileRoot::kGitTreeMarker, + std::get<std::string>(res), // subtree id + repo_root}), + /*is_cache_hit=*/false)); + }, + [logger, target_path = repo_root](auto const& msg, bool fatal) { + (*logger)(fmt::format("While running critical Git op KEEP_TAG " + "for target {}:\n{}", + target_path.string(), + msg), + fatal); + }); + } + else { + auto git_repo = GitRepoRemote::Open(git_cas); // link fake repo to odb + if (not git_repo) { + (*logger)( + fmt::format("Could not open repository {}", repo_root.string()), + /*fatal=*/true); + return; + } + // setup wrapped logger + auto wrapped_logger = std::make_shared<AsyncMapConsumerLogger>( + [logger](auto const& msg, bool fatal) { + (*logger)( + fmt::format("While getting subtree from commit:\n{}", msg), + fatal); + }); + // get tree id and return workspace root + auto res = git_repo->GetSubtreeFromCommit( + repo_info.hash, repo_info.subdir, wrapped_logger); + if (not std::holds_alternative<std::string>(res)) { + return; + } + // set the workspace root as present + JustMRProgress::Instance().TaskTracker().Stop(repo_info.origin); + (*ws_setter)(std::pair( + nlohmann::json::array({repo_info.ignore_special + ? FileRoot::kGitTreeIgnoreSpecialMarker + : FileRoot::kGitTreeMarker, + std::get<std::string>(res), // subtree id + repo_root}), + /*is_cache_hit=*/false)); + } +} + /// \brief Contains the main logic for this async map. It ensures the commit is /// available for processing (including fetching for a present root) and setting /// the root. @@ -202,8 +543,8 @@ void EnsureCommit( gsl::not_null<TaskSystem*> const& ts, CommitGitMap::SetterPtr const& ws_setter, CommitGitMap::LoggerPtr const& logger) { - // ensure commit exists, and fetch if needed - auto git_repo = GitRepoRemote::Open(git_cas); // link fake repo to odb + // link fake repo to odb + auto git_repo = GitRepoRemote::Open(git_cas); if (not git_repo) { (*logger)( fmt::format("Could not open repository {}", repo_root.string()), @@ -223,6 +564,9 @@ void EnsureCommit( } if (not is_commit_present.value()) { auto tree_id_file = StorageUtils::GetCommitTreeIDFile(repo_info.hash); + // Check if we have stored a file association between commit and tree; + // if an association file exists, the respective tree MUST be in the + // Git cache if (FileSystemManager::Exists(tree_id_file)) { // read resolved tree id auto resolved_tree_id = FileSystemManager::ReadFile(tree_id_file); @@ -232,6 +576,20 @@ void EnsureCommit( /*fatal=*/true); return; } + auto just_git_cas = GitCAS::Open(StorageConfig::GitRoot()); + if (not just_git_cas) { + (*logger)(fmt::format("Could not open Git cache database {}", + StorageConfig::GitRoot().string()), + /*fatal=*/true); + return; + } + auto just_git_repo = GitRepo::Open(just_git_cas); + if (not just_git_repo) { + (*logger)(fmt::format("Could not open Git cache repository {}", + StorageConfig::GitRoot().string()), + /*fatal=*/true); + return; + } // extract the subdir tree wrapped_logger = std::make_shared<AsyncMapConsumerLogger>( [logger, subdir = repo_info.subdir, tree = *resolved_tree_id]( @@ -243,7 +601,7 @@ void EnsureCommit( msg), fatal); }); - auto tree_id = git_repo->GetSubtreeFromTree( + auto tree_id = just_git_repo->GetSubtreeFromTree( *resolved_tree_id, repo_info.subdir, wrapped_logger); if (not tree_id) { return; @@ -260,7 +618,7 @@ void EnsureCommit( logger); } else { - // this root as present + // this root is present (*ws_setter)( std::pair(nlohmann::json::array( {repo_info.ignore_special @@ -318,444 +676,325 @@ void EnsureCommit( if (std::holds_alternative<std::string>(serve_result)) { auto const& root_tree_id = std::get<std::string>(serve_result); - // verify if we know the tree already locally - auto wrapped_logger = - std::make_shared<AsyncMapConsumerLogger>( - [logger, tree = root_tree_id](auto const& msg, - bool fatal) { + // verify if we know the tree already in the local Git cache + GitOpKey op_key = { + .params = + { + StorageConfig::GitRoot(), // target_path + "", // git_hash + "", // branch + std::nullopt, // message + true // init_bare + }, + .op_type = GitOpType::ENSURE_INIT}; + critical_git_op_map->ConsumeAfterKeysReady( + ts, + {std::move(op_key)}, + [root_tree_id, + tree_id_file, + repo_info, + repo_root, + fetch_repo, + additional_mirrors, + git_cas, + critical_git_op_map, + import_to_git_map, + git_bin, + launcher, + local_api, + remote_api, + fetch_absent, + ts, + ws_setter, + logger](auto const& values) { + GitOpValue op_result = *values[0]; + // check flag + if (not op_result.result) { + (*logger)("Git init failed", + /*fatal=*/true); + return; + } + auto just_git_repo = + GitRepoRemote::Open(op_result.git_cas); + if (not just_git_repo) { (*logger)( - fmt::format("While verifying presence of " - "tree {}:\n{}", + fmt::format( + "Could not open Git cache repository " + "{}", + StorageConfig::GitRoot().string()), + /*fatal=*/true); + return; + } + // check tree existence + auto wrapped_logger = + std::make_shared<AsyncMapConsumerLogger>( + [logger, tree = root_tree_id]( + auto const& msg, bool fatal) { + (*logger)( + fmt::format( + "While verifying presence of " + "tree {} in repository {}:\n{}", tree, + StorageConfig::GitRoot() + .string(), msg), - fatal); - }); - auto tree_present = - git_repo->CheckTreeExists(root_tree_id, wrapped_logger); - if (not tree_present) { - return; - } - if (*tree_present) { - JustMRProgress::Instance().TaskTracker().Stop( - repo_info.origin); - // write association to id file, get subdir tree, - // and set the workspace root as present - WriteIdFileAndSetWSRoot(root_tree_id, - repo_info.subdir, - repo_info.ignore_special, - git_cas, - repo_root, - tree_id_file, - ws_setter, - logger); - return; - } - // try to get root tree from remote execution endpoint - auto root_digest = - ArtifactDigest{root_tree_id, 0, /*is_tree=*/true}; - if (remote_api and - remote_api.value()->RetrieveToCas( - {Artifact::ObjectInfo{.digest = root_digest, - .type = ObjectType::Tree}}, - local_api)) { - JustMRProgress::Instance().TaskTracker().Stop( - repo_info.origin); - // Move tree from CAS to local git storage - auto tmp_dir = StorageUtils::CreateTypedTmpDir( - "fetch-absent-root"); - if (not tmp_dir) { - (*logger)( - fmt::format("Failed to create tmp " - "directory after fetching root " - "tree {} for absent commit {}", - root_tree_id, - repo_info.hash), - /*fatal=*/true); - return; - } - if (not local_api->RetrieveToPaths( - {Artifact::ObjectInfo{ - .digest = root_digest, - .type = ObjectType::Tree}}, - {tmp_dir->GetPath()})) { - (*logger)(fmt::format("Failed to copy fetched root " - "tree {} to {}", - root_tree_id, - tmp_dir->GetPath().string()), - /*fatal=*/true); - return; - } - CommitInfo c_info{ - tmp_dir->GetPath(), "tree", root_tree_id}; - import_to_git_map->ConsumeAfterKeysReady( - ts, - {std::move(c_info)}, - [tmp_dir, // keep tmp_dir alive - root_tree_id, - subdir = repo_info.subdir, - ignore_special = repo_info.ignore_special, - git_cas, - repo_root, - tree_id_file, - ws_setter, - logger](auto const& values) { - if (not values[0]->second) { - (*logger)("Importing to git failed", + fatal); + }); + auto tree_present = just_git_repo->CheckTreeExists( + root_tree_id, wrapped_logger); + if (not tree_present) { + return; + } + if (*tree_present) { + JustMRProgress::Instance().TaskTracker().Stop( + repo_info.origin); + // write association to id file, get subdir + // tree, and set the workspace root as present + WriteIdFileAndSetWSRoot( + root_tree_id, + repo_info.subdir, + repo_info.ignore_special, + op_result.git_cas, + tree_id_file, + ws_setter, + logger); + return; + } + + // now check if the tree is in the local checkout, + // if this checkout is not our Git cache; this can + // save an unnecessary remote CAS call + if (not IsCacheGitRoot(repo_root)) { + auto git_repo = GitRepoRemote::Open(git_cas); + if (not git_repo) { + (*logger)(fmt::format("Could not open Git " + "repository {}", + repo_root.string()), /*fatal=*/true); return; } - // sanity check: we should get the expected tree - if (values[0]->first != root_tree_id) { + // check tree existence + wrapped_logger = + std::make_shared<AsyncMapConsumerLogger>( + [logger, + tree = root_tree_id, + repo_root](auto const& msg, + bool fatal) { + (*logger)( + fmt::format( + "While verifying presence " + "of tree {} in repository " + "{}:\n{}", + tree, + repo_root.string(), + msg), + fatal); + }); + tree_present = git_repo->CheckTreeExists( + root_tree_id, wrapped_logger); + if (not tree_present) { + return; + } + if (*tree_present) { + JustMRProgress::Instance() + .TaskTracker() + .Stop(repo_info.origin); + // get subdir tree and set the workspace + // root as present; as this tree is not in + // our Git cache, no file association should + // be stored + wrapped_logger = std::make_shared< + AsyncMapConsumerLogger>( + [logger, + subdir = repo_info.subdir, + tree = root_tree_id](auto const& msg, + bool fatal) { + (*logger)( + fmt::format( + "While getting subdir {} " + "in tree {}:\n{}", + subdir, + tree, + msg), + fatal); + }); + auto tree_id = git_repo->GetSubtreeFromTree( + root_tree_id, + repo_info.subdir, + wrapped_logger); + if (not tree_id) { + return; + } + // set the workspace root as present + (*ws_setter)(std::pair( + nlohmann::json::array( + {repo_info.ignore_special + ? FileRoot:: + kGitTreeIgnoreSpecialMarker + : FileRoot::kGitTreeMarker, + *tree_id, + repo_root.string()}), + false)); + // done! + return; + } + } + + // try to get root tree from remote CAS + auto root_digest = ArtifactDigest{ + root_tree_id, 0, /*is_tree=*/true}; + if (remote_api and + remote_api.value()->RetrieveToCas( + {Artifact::ObjectInfo{ + .digest = root_digest, + .type = ObjectType::Tree}}, + local_api)) { + JustMRProgress::Instance().TaskTracker().Stop( + repo_info.origin); + // Move tree from local CAS to local Git storage + auto tmp_dir = StorageUtils::CreateTypedTmpDir( + "fetch-absent-root"); + if (not tmp_dir) { (*logger)( fmt::format( - "Mismatch in imported git tree " - "id:\nexpected {}, but got {}", + "Failed to create tmp directory " + "after fetching root tree {} for " + "absent commit {}", root_tree_id, - values[0]->first), + repo_info.hash), /*fatal=*/true); return; } - // tree is now in Git cache - auto just_git_cas = - GitCAS::Open(StorageConfig::GitRoot()); - if (not just_git_cas) { - (*logger)( - "Could not open Git cache object " - "database!", - /*fatal=*/true); + if (not local_api->RetrieveToPaths( + {Artifact::ObjectInfo{ + .digest = root_digest, + .type = ObjectType::Tree}}, + {tmp_dir->GetPath()})) { + (*logger)(fmt::format( + "Failed to copy fetched root " + "tree {} to {}", + root_tree_id, + tmp_dir->GetPath().string()), + /*fatal=*/true); return; } - // write association to id file, get subdir - // tree, and set the workspace root as present - WriteIdFileAndSetWSRoot( - root_tree_id, - subdir, - ignore_special, - just_git_cas, - StorageConfig::GitRoot(), - tree_id_file, - ws_setter, - logger); - }, - [logger, tmp_dir, root_tree_id](auto const& msg, - bool fatal) { - (*logger)( - fmt::format("While moving root tree {} " + CommitInfo c_info{ + tmp_dir->GetPath(), "tree", root_tree_id}; + import_to_git_map->ConsumeAfterKeysReady( + ts, + {std::move(c_info)}, + [tmp_dir, // keep tmp_dir alive + root_tree_id, + subdir = repo_info.subdir, + ignore_special = repo_info.ignore_special, + just_git_cas = op_result.git_cas, + tree_id_file, + ws_setter, + logger](auto const& values) { + if (not values[0]->second) { + (*logger)("Importing to git failed", + /*fatal=*/true); + return; + } + // sanity check: we should get the + // expected tree + if (values[0]->first != root_tree_id) { + (*logger)( + fmt::format( + "Mismatch in imported git " + "tree id:\nexpected {}, " + "but got {}", + root_tree_id, + values[0]->first), + /*fatal=*/true); + return; + } + // tree is now in Git cache; + // write association to id file, get + // subdir tree, and set the workspace + // root as present + WriteIdFileAndSetWSRoot(root_tree_id, + subdir, + ignore_special, + just_git_cas, + tree_id_file, + ws_setter, + logger); + }, + [logger, tmp_dir, root_tree_id]( + auto const& msg, bool fatal) { + (*logger)( + fmt::format( + "While moving root tree {} " "from {} to local git:\n{}", root_tree_id, tmp_dir->GetPath().string(), msg), - fatal); - }); + fatal); + }); - return; - } - // just serve should have made the tree available in the - // remote CAS, so log this attempt and revert to network - (*logger)(fmt::format("Tree {} marked as served, but not " - "found on remote", - root_tree_id), - /*fatal=*/false); - } - else { - // check if serve failure was due to commit not being found - // or it is otherwise fatal - auto const& is_fatal = std::get<bool>(serve_result); - if (is_fatal) { - (*logger)(fmt::format("Serve endpoint failed to set up " - "root from known commit {}", - repo_info.hash), - /*fatal=*/true); - return; - } - } - } - } + return; + } + // just serve should have made the tree available in + // the remote CAS, so log this attempt and revert to + // network + (*logger)(fmt::format("Tree {} marked as served, " + "but not found on remote", + root_tree_id), + /*fatal=*/false); - // reaching here can only result in a root that is present - if (repo_info.absent and not fetch_absent) { - (*logger)(fmt::format("Cannot create workspace root as absent for " - "commit {}.", - repo_info.hash), - /*fatal=*/true); - return; - } + NetworkFetchAndSetPresentRoot(repo_info, + repo_root, + fetch_repo, + additional_mirrors, + git_cas, + critical_git_op_map, + git_bin, + launcher, + fetch_absent, + ts, + ws_setter, + logger); + }, + [logger, target_path = StorageConfig::GitRoot()]( + auto const& msg, bool fatal) { + (*logger)(fmt::format("While running critical Git " + "op ENSURE_INIT bare for " + "target {}:\n{}", + target_path.string(), + msg), + fatal); + }); - // default to fetching from network - auto tmp_dir = StorageUtils::CreateTypedTmpDir("fetch"); - if (not tmp_dir) { - (*logger)("Failed to create fetch tmp directory!", - /*fatal=*/true); - return; - } - // store failed attempts for subsequent logging - bool fetched{false}; - std::string err_messages{}; - // keep all remotes checked to report them in case fetch fails - std::string remotes_buffer{}; - // try local mirrors first - auto local_mirrors = - MirrorsUtils::GetLocalMirrors(additional_mirrors, fetch_repo); - for (auto mirror : local_mirrors) { - auto mirror_path = GitURLIsPath(mirror); - if (mirror_path) { - mirror = std::filesystem::absolute(*mirror_path).string(); - } - auto wrapped_logger = std::make_shared<AsyncMapConsumerLogger>( - [mirror, &err_messages](auto const& msg, bool /*fatal*/) { - err_messages += fmt::format( - "\nWhile attempting fetch from local mirror {}:\n{}", - mirror, - msg); - }); - if (git_repo->FetchViaTmpRepo(tmp_dir->GetPath(), - mirror, - repo_info.branch, - repo_info.inherit_env, - git_bin, - launcher, - wrapped_logger)) { - fetched = true; - break; - } - // add local mirror to buffer - remotes_buffer.append(fmt::format("\n> {}", mirror)); - } - if (not fetched) { - // get preferred hostnames list - auto preferred_hostnames = - MirrorsUtils::GetPreferredHostnames(additional_mirrors); - // try first the main URL, but with each of the preferred - // hostnames, if URL is not a path - if (not GitURLIsPath(fetch_repo)) { - for (auto const& hostname : preferred_hostnames) { - if (auto preferred_url = CurlURLHandle::ReplaceHostname( - fetch_repo, hostname)) { - auto wrapped_logger = - std::make_shared<AsyncMapConsumerLogger>( - [preferred_url, &err_messages](auto const& msg, - bool /*fatal*/) { - err_messages += fmt::format( - "\nWhile attempting fetch from remote " - "{}:\n{}", - *preferred_url, - msg); - }); - if (git_repo->FetchViaTmpRepo(tmp_dir->GetPath(), - *preferred_url, - repo_info.branch, - repo_info.inherit_env, - git_bin, - launcher, - wrapped_logger)) { - fetched = true; - break; - } - // add preferred to buffer - remotes_buffer.append( - fmt::format("\n> {}", *preferred_url)); - } - else { - // report failed hostname - remotes_buffer.append( - fmt::format("\n> {} (failed hostname replace: {})", - fetch_repo, - hostname)); - } - } - } - if (not fetched) { - // now try the original main fetch URL - auto wrapped_logger = std::make_shared<AsyncMapConsumerLogger>( - [fetch_repo, &err_messages](auto const& msg, - bool /*fatal*/) { - err_messages += fmt::format( - "\nWhile attempting fetch from remote {}:\n{}", - fetch_repo, - msg); - }); - if (git_repo->FetchViaTmpRepo(tmp_dir->GetPath(), - fetch_repo, - repo_info.branch, - repo_info.inherit_env, - git_bin, - launcher, - wrapped_logger)) { - fetched = true; - } - else { - // add main fetch URL to buffer - remotes_buffer.append(fmt::format("\n> {}", fetch_repo)); - // now try to fetch from mirrors, in order, if given - for (auto mirror : repo_info.mirrors) { - auto mirror_path = GitURLIsPath(mirror); - if (mirror_path) { - mirror = std::filesystem::absolute(*mirror_path) - .string(); - } - else { - // if non-path, try each of the preferred hostnames - for (auto const& hostname : preferred_hostnames) { - if (auto preferred_mirror = - CurlURLHandle::ReplaceHostname( - mirror, hostname)) { - wrapped_logger = std::make_shared< - AsyncMapConsumerLogger>( - [preferred_mirror, &err_messages]( - auto const& msg, bool /*fatal*/) { - err_messages += fmt::format( - "\nWhile attempting fetch from " - "mirror {}:\n{}", - *preferred_mirror, - msg); - }); - if (git_repo->FetchViaTmpRepo( - tmp_dir->GetPath(), - *preferred_mirror, - repo_info.branch, - repo_info.inherit_env, - git_bin, - launcher, - wrapped_logger)) { - fetched = true; - break; - } - // add preferred mirror to buffer - remotes_buffer.append(fmt::format( - "\n> {}", *preferred_mirror)); - } - else { - // report failed hostname - remotes_buffer.append(fmt::format( - "\n> {} (failed hostname replace: {})", - mirror, - hostname)); - } - } - } - if (fetched) { - break; - } - wrapped_logger = - std::make_shared<AsyncMapConsumerLogger>( - [mirror, &err_messages](auto const& msg, - bool /*fatal*/) { - err_messages += fmt::format( - "\nWhile attempting fetch from mirror " - "{}:\n{}", - mirror, - msg); - }); - if (git_repo->FetchViaTmpRepo(tmp_dir->GetPath(), - mirror, - repo_info.branch, - repo_info.inherit_env, - git_bin, - launcher, - wrapped_logger)) { - fetched = true; - break; - } - // add mirror to buffer - remotes_buffer.append(fmt::format("\n> {}", mirror)); - } - } - } - } - if (not fetched) { - // log fetch failure details separately to reduce verbosity - (*logger)( - fmt::format("While fetching via tmp repo:{}", err_messages), - /*fatal=*/false); - (*logger)(fmt::format("Failed to fetch from provided remotes:{}", - remotes_buffer), - /*fatal=*/true); - return; - } - // setup wrapped logger - wrapped_logger = std::make_shared<AsyncMapConsumerLogger>( - [logger](auto const& msg, bool fatal) { - (*logger)(fmt::format("While checking commit exists:\n{}", msg), - fatal); - }); - // check if commit exists now, after fetch - auto is_commit_present = - git_repo->CheckCommitExists(repo_info.hash, wrapped_logger); - if (not is_commit_present) { - return; - } - if (not *is_commit_present) { - // commit could not be fetched, so fail - (*logger)(fmt::format("Could not fetch commit {} from branch " - "{} for remote {}", - repo_info.hash, - repo_info.branch, - fetch_repo), - /*fatal=*/true); - return; - } - // keep tag - GitOpKey op_key = {.params = - { - repo_root, // target_path - repo_info.hash, // git_hash - "", // branch - "Keep referenced tree alive" // message - }, - .op_type = GitOpType::KEEP_TAG}; - critical_git_op_map->ConsumeAfterKeysReady( - ts, - {std::move(op_key)}, - [git_cas, repo_info, repo_root, ws_setter, logger]( - auto const& values) { - GitOpValue op_result = *values[0]; - // check flag - if (not op_result.result) { - (*logger)("Keep tag failed", - /*fatal=*/true); + // done! return; } - // ensure commit exists, and fetch if needed - auto git_repo = - GitRepoRemote::Open(git_cas); // link fake repo to odb - if (not git_repo) { - (*logger)(fmt::format("Could not open repository {}", - repo_root.string()), + + // check if serve failure was due to commit not being found + // or it is otherwise fatal + auto const& is_fatal = std::get<bool>(serve_result); + if (is_fatal) { + (*logger)(fmt::format("Serve endpoint failed to set up " + "root from known commit {}", + repo_info.hash), /*fatal=*/true); return; } - // setup wrapped logger - auto wrapped_logger = std::make_shared<AsyncMapConsumerLogger>( - [logger](auto const& msg, bool fatal) { - (*logger)(fmt::format("While getting subtree " - "from commit:\n{}", - msg), - fatal); - }); - // get tree id and return workspace root - auto res = git_repo->GetSubtreeFromCommit( - repo_info.hash, repo_info.subdir, wrapped_logger); - if (not std::holds_alternative<std::string>(res)) { - return; - } - // set the workspace root as present - JustMRProgress::Instance().TaskTracker().Stop(repo_info.origin); - (*ws_setter)( - std::pair(nlohmann::json::array( - {repo_info.ignore_special - ? FileRoot::kGitTreeIgnoreSpecialMarker - : FileRoot::kGitTreeMarker, - std::get<std::string>(res), // subtree id - repo_root}), - /*is_cache_hit=*/false)); - }, - [logger, target_path = repo_root](auto const& msg, bool fatal) { - (*logger)(fmt::format("While running critical Git op " - "KEEP_TAG for target {}:\n{}", - target_path.string(), - msg), - fatal); - }); + } + } + + NetworkFetchAndSetPresentRoot(repo_info, + repo_root, + fetch_repo, + additional_mirrors, + git_cas, + critical_git_op_map, + git_bin, + launcher, + fetch_absent, + ts, + ws_setter, + logger); } else { // commit is present in given repository |