From 720963e2d3f4e7587398b315d42ad275378f860e Mon Sep 17 00:00:00 2001 From: Paul Cristian Sarbu Date: Thu, 4 Jan 2024 11:55:36 +0100 Subject: just-mr distdir: Improved handling of absent root If root is marked absent and we're not asked to fetch absent, we can compute the resulting distdir root tree in-memory, as we have all the information. Otherwise, i.e., if we actually need to have the archives locally, we first check if the remote serve can provide them in the remote CAS before continuing as before to fetch the listed archives one at a time. --- src/other_tools/root_maps/distdir_git_map.cpp | 262 ++++++++++++++++++-------- 1 file changed, 188 insertions(+), 74 deletions(-) (limited to 'src/other_tools/root_maps/distdir_git_map.cpp') diff --git a/src/other_tools/root_maps/distdir_git_map.cpp b/src/other_tools/root_maps/distdir_git_map.cpp index 677e3057..6fa9d2d2 100644 --- a/src/other_tools/root_maps/distdir_git_map.cpp +++ b/src/other_tools/root_maps/distdir_git_map.cpp @@ -17,9 +17,15 @@ #include #include "fmt/core.h" +#include "src/buildtool/common/artifact.hpp" +#include "src/buildtool/common/artifact_digest.hpp" #include "src/buildtool/execution_api/common/execution_common.hpp" #include "src/buildtool/file_system/file_root.hpp" #include "src/buildtool/file_system/file_storage.hpp" +#include "src/buildtool/file_system/git_repo.hpp" +#include "src/buildtool/file_system/object_type.hpp" +#include "src/buildtool/multithreading/task_system.hpp" +#include "src/buildtool/serve_api/remote/serve_api.hpp" #include "src/buildtool/storage/config.hpp" #include "src/buildtool/storage/fs_utils.hpp" #include "src/buildtool/storage/storage.hpp" @@ -52,20 +58,92 @@ namespace { }); } +void ImportFromCASAndSetRoot( + std::shared_ptr> const& + content_list, + std::string const& content_id, + std::filesystem::path const& distdir_tree_id_file, + gsl::not_null const& import_to_git_map, + gsl::not_null const& ts, + DistdirGitMap::SetterPtr const& setter, + DistdirGitMap::LoggerPtr const& logger) { + // create the links to CAS + auto tmp_dir = StorageUtils::CreateTypedTmpDir("distdir"); + if (not tmp_dir) { + (*logger)(fmt::format("Failed to create tmp path for " + "distdir target {}", + content_id), + /*fatal=*/true); + return; + } + // link content from CAS into tmp dir + if (not LinkToCAS(content_list, tmp_dir->GetPath())) { + (*logger)( + fmt::format("Failed to create links to CAS content!", content_id), + /*fatal=*/true); + return; + } + // do import to git + CommitInfo c_info{tmp_dir->GetPath(), "distdir", content_id}; + import_to_git_map->ConsumeAfterKeysReady( + ts, + {std::move(c_info)}, + [tmp_dir, // keep tmp_dir alive + distdir_tree_id_file, + setter, + logger](auto const& values) { + // check for errors + if (not values[0]->second) { + (*logger)("Importing to git failed", + /*fatal=*/true); + return; + } + // only the tree is of interest + std::string distdir_tree_id = values[0]->first; + // write to tree id file + if (not StorageUtils::WriteTreeIDFile(distdir_tree_id_file, + distdir_tree_id)) { + (*logger)(fmt::format("Failed to write tree id to file {}", + distdir_tree_id_file.string()), + /*fatal=*/true); + return; + } + // set the workspace root as present + (*setter)(std::pair( + nlohmann::json::array({FileRoot::kGitTreeMarker, + distdir_tree_id, + StorageConfig::GitRoot().string()}), + false /*no cache hit*/)); + }, + [logger, target_path = tmp_dir->GetPath()](auto const& msg, + bool fatal) { + (*logger)(fmt::format("While importing target {} to git:\n{}", + target_path.string(), + msg), + fatal); + }); +} + } // namespace auto CreateDistdirGitMap( gsl::not_null const& content_cas_map, gsl::not_null const& import_to_git_map, gsl::not_null const& critical_git_op_map, + bool serve_api_exists, + IExecutionApi* local_api, + IExecutionApi* remote_api, std::size_t jobs) -> DistdirGitMap { auto distdir_to_git = [content_cas_map, import_to_git_map, - critical_git_op_map](auto ts, - auto setter, - auto logger, - auto /* unused */, - auto const& key) { + critical_git_op_map, + serve_api_exists, + local_api, + remote_api](auto ts, + auto setter, + auto logger, + auto /* unused */, + auto const& key) { auto distdir_tree_id_file = StorageUtils::GetDistdirTreeIDFile(key.content_id); if (FileSystemManager::Exists(distdir_tree_id_file)) { @@ -110,95 +188,131 @@ auto CreateDistdirGitMap( if (not absent) { root.emplace_back(StorageConfig::GitRoot().string()); } - (*setter)(std::pair(std::move(root), true)); + (*setter)(std::pair(std::move(root), true /*cache hit*/)); }, [logger, target_path = StorageConfig::GitRoot()]( auto const& msg, bool fatal) { - (*logger)(fmt::format("While running critical Git " - "op ENSURE_INIT for " - "target {}:\n{}", + (*logger)(fmt::format("While running critical Git op " + "ENSURE_INIT for target {}:\n{}", target_path.string(), msg), fatal); }); } else { - // fetch the gathered distdir repos into CAS + // create in-memory Git tree of distdir content to get the tree id + GitRepo::tree_entries_t entries{}; + entries.reserve(key.content_list->size()); + for (auto const& kv : *key.content_list) { + // tree_entries_t type expects raw ids + auto raw_id = FromHexString(kv.second); + if (not raw_id) { + (*logger)(fmt::format("While processing distdir {}: " + "Unexpected failure in conversion to " + "raw id of distfile content {}", + key.content_id, + kv.second), + /*is_fatal=*/true); + return; + } + entries[*raw_id].emplace_back(kv.first, ObjectType::File); + } + auto tree = GitRepo::CreateShallowTree(entries); + if (not tree) { + (*logger)(fmt::format("Failed to construct in-memory tree for " + "distdir content {}", + key.content_id), + /*is_fatal=*/true); + return; + } + // get hash from raw_id + auto tree_id = ToHexString(tree->first); + // if pure absent, we simply set the root tree + if (key.absent) { + (*setter)(std::pair( + nlohmann::json::array({FileRoot::kGitTreeMarker, tree_id}), + false /*no cache hit*/)); + return; + } + // otherwise, we check first the serve endpoint + if (serve_api_exists) { + if (auto served_tree_id = ServeApi::RetrieveTreeFromDistdir( + key.content_list, /*sync_tree=*/true)) { + // sanity check + if (*served_tree_id != tree_id) { + (*logger)( + fmt::format("Unexpected served tree id for distdir " + "content {}:\nExpected {}, but got {}", + key.content_id, + tree_id, + *served_tree_id), + /*is_fatal=*/true); + return; + } + // get the ditfiles from remote CAS in bulk + std::vector objects{}; + objects.reserve(key.content_list->size()); + for (auto const& kv : *key.content_list) { + objects.emplace_back(Artifact::ObjectInfo{ + .digest = + ArtifactDigest{kv.second, 0, /*is_tree=*/false}, + .type = ObjectType::File}); + } + if (remote_api != nullptr and local_api != nullptr and + remote_api->RetrieveToCas(objects, local_api)) { + ImportFromCASAndSetRoot(key.content_list, + key.content_id, + distdir_tree_id_file, + import_to_git_map, + ts, + setter, + logger); + // done! + return; + } + // just serve should have made the blobs available in the + // remote CAS, so log this attempt and revert to default + // fetch each blob individually + (*logger)(fmt::format("Tree {} marked as served, but not " + "all distfile blobs found on remote", + *served_tree_id), + /*fatal=*/false); + } + else { + // give warning + (*logger)( + fmt::format("Distdir content {} could not be served", + key.content_id), + /*fatal=*/false); + } + } + else { + // give warning + (*logger)( + fmt::format("Missing serve endpoint for distdir {} marked " + "absent requires slower network fetch.", + key.content_id), + /*fatal=*/false); + } + // revert to fetching the gathered distdir repos into CAS content_cas_map->ConsumeAfterKeysReady( ts, *key.repos_to_fetch, [distdir_tree_id_file, content_id = key.content_id, content_list = key.content_list, - absent = key.absent, import_to_git_map, ts, setter, logger]([[maybe_unused]] auto const& values) mutable { // repos are in CAS - // create the links to CAS - auto tmp_dir = StorageUtils::CreateTypedTmpDir("distdir"); - if (not tmp_dir) { - (*logger)(fmt::format("Failed to create tmp path for " - "distdir target {}", - content_id), - /*fatal=*/true); - return; - } - // link content from CAS into tmp dir - if (not LinkToCAS(content_list, tmp_dir->GetPath())) { - (*logger)(fmt::format( - "Failed to create links to CAS content!", - content_id), - /*fatal=*/true); - return; - } - // do import to git - CommitInfo c_info{ - tmp_dir->GetPath(), "distdir", content_id}; - import_to_git_map->ConsumeAfterKeysReady( - ts, - {std::move(c_info)}, - [tmp_dir, // keep tmp_dir alive - distdir_tree_id_file, - absent, - setter, - logger](auto const& values) { - // check for errors - if (not values[0]->second) { - (*logger)("Importing to git failed", - /*fatal=*/true); - return; - } - // only the tree is of interest - std::string distdir_tree_id = values[0]->first; - // write to tree id file - if (not StorageUtils::WriteTreeIDFile( - distdir_tree_id_file, distdir_tree_id)) { - (*logger)( - fmt::format( - "Failed to write tree id to file {}", - distdir_tree_id_file.string()), - /*fatal=*/true); - return; - } - // set the workspace root - auto root = nlohmann::json::array( - {FileRoot::kGitTreeMarker, distdir_tree_id}); - if (not absent) { - root.emplace_back( - StorageConfig::GitRoot().string()); - } - (*setter)(std::pair(std::move(root), false)); - }, - [logger, target_path = tmp_dir->GetPath()]( - auto const& msg, bool fatal) { - (*logger)(fmt::format("While importing target {} " - "to git:\n{}", - target_path.string(), - msg), - fatal); - }); + ImportFromCASAndSetRoot(content_list, + content_id, + distdir_tree_id_file, + import_to_git_map, + ts, + setter, + logger); }, [logger, content_id = key.content_id](auto const& msg, bool fatal) { -- cgit v1.2.3