From 720963e2d3f4e7587398b315d42ad275378f860e Mon Sep 17 00:00:00 2001 From: Paul Cristian Sarbu Date: Thu, 4 Jan 2024 11:55:36 +0100 Subject: just-mr distdir: Improved handling of absent root If root is marked absent and we're not asked to fetch absent, we can compute the resulting distdir root tree in-memory, as we have all the information. Otherwise, i.e., if we actually need to have the archives locally, we first check if the remote serve can provide them in the remote CAS before continuing as before to fetch the listed archives one at a time. --- src/other_tools/just_mr/setup.cpp | 12 +- src/other_tools/root_maps/TARGETS | 13 +- src/other_tools/root_maps/distdir_git_map.cpp | 262 ++++++++++++++++++-------- src/other_tools/root_maps/distdir_git_map.hpp | 4 + 4 files changed, 209 insertions(+), 82 deletions(-) diff --git a/src/other_tools/just_mr/setup.cpp b/src/other_tools/just_mr/setup.cpp index dedf9ecb..96411363 100644 --- a/src/other_tools/just_mr/setup.cpp +++ b/src/other_tools/just_mr/setup.cpp @@ -160,10 +160,14 @@ auto MultiRepoSetup(std::shared_ptr const& config, &import_to_git_map, &resolve_symlinks_map, common_args.jobs); - auto distdir_git_map = CreateDistdirGitMap(&content_cas_map, - &import_to_git_map, - &critical_git_op_map, - common_args.jobs); + auto distdir_git_map = + CreateDistdirGitMap(&content_cas_map, + &import_to_git_map, + &critical_git_op_map, + serve_api_exists, + local_api ? &(*local_api) : nullptr, + remote_api ? &(*remote_api) : nullptr, + common_args.jobs); auto tree_id_git_map = CreateTreeIdGitMap( &git_tree_fetch_map, common_args.fetch_absent, common_args.jobs); auto repos_to_setup_map = CreateReposToSetupMap(config, diff --git a/src/other_tools/root_maps/TARGETS b/src/other_tools/root_maps/TARGETS index 7eab68a5..f9d5ed8d 100644 --- a/src/other_tools/root_maps/TARGETS +++ b/src/other_tools/root_maps/TARGETS @@ -12,18 +12,23 @@ , "stage": ["src", "other_tools", "root_maps"] , "private-deps": [ ["@", "fmt", "", "fmt"] - , ["src/other_tools/ops_maps", "critical_git_op_map"] + , ["src/buildtool/common", "common"] , ["src/buildtool/execution_api/common", "common"] , ["src/buildtool/execution_api/local", "config"] , ["src/buildtool/execution_api/local", "local"] + , ["src/buildtool/file_system", "file_root"] + , ["src/buildtool/file_system", "file_storage"] + , ["src/buildtool/file_system", "git_repo"] + , ["src/buildtool/file_system", "object_type"] + , ["src/buildtool/multithreading", "task_system"] + , ["src/buildtool/serve_api/remote", "serve_api"] , ["src/buildtool/storage", "config"] , ["src/buildtool/storage", "fs_utils"] , ["src/buildtool/storage", "storage"] - , ["src/utils/cpp", "tmp_dir"] - , ["src/buildtool/file_system", "file_storage"] + , ["src/other_tools/ops_maps", "critical_git_op_map"] , ["src/other_tools/just_mr/progress_reporting", "progress"] , ["src/other_tools/just_mr/progress_reporting", "statistics"] - , ["src/buildtool/file_system", "file_root"] + , ["src/utils/cpp", "tmp_dir"] ] } , "commit_git_map": diff --git a/src/other_tools/root_maps/distdir_git_map.cpp b/src/other_tools/root_maps/distdir_git_map.cpp index 677e3057..6fa9d2d2 100644 --- a/src/other_tools/root_maps/distdir_git_map.cpp +++ b/src/other_tools/root_maps/distdir_git_map.cpp @@ -17,9 +17,15 @@ #include #include "fmt/core.h" +#include "src/buildtool/common/artifact.hpp" +#include "src/buildtool/common/artifact_digest.hpp" #include "src/buildtool/execution_api/common/execution_common.hpp" #include "src/buildtool/file_system/file_root.hpp" #include "src/buildtool/file_system/file_storage.hpp" +#include "src/buildtool/file_system/git_repo.hpp" +#include "src/buildtool/file_system/object_type.hpp" +#include "src/buildtool/multithreading/task_system.hpp" +#include "src/buildtool/serve_api/remote/serve_api.hpp" #include "src/buildtool/storage/config.hpp" #include "src/buildtool/storage/fs_utils.hpp" #include "src/buildtool/storage/storage.hpp" @@ -52,20 +58,92 @@ namespace { }); } +void ImportFromCASAndSetRoot( + std::shared_ptr> const& + content_list, + std::string const& content_id, + std::filesystem::path const& distdir_tree_id_file, + gsl::not_null const& import_to_git_map, + gsl::not_null const& ts, + DistdirGitMap::SetterPtr const& setter, + DistdirGitMap::LoggerPtr const& logger) { + // create the links to CAS + auto tmp_dir = StorageUtils::CreateTypedTmpDir("distdir"); + if (not tmp_dir) { + (*logger)(fmt::format("Failed to create tmp path for " + "distdir target {}", + content_id), + /*fatal=*/true); + return; + } + // link content from CAS into tmp dir + if (not LinkToCAS(content_list, tmp_dir->GetPath())) { + (*logger)( + fmt::format("Failed to create links to CAS content!", content_id), + /*fatal=*/true); + return; + } + // do import to git + CommitInfo c_info{tmp_dir->GetPath(), "distdir", content_id}; + import_to_git_map->ConsumeAfterKeysReady( + ts, + {std::move(c_info)}, + [tmp_dir, // keep tmp_dir alive + distdir_tree_id_file, + setter, + logger](auto const& values) { + // check for errors + if (not values[0]->second) { + (*logger)("Importing to git failed", + /*fatal=*/true); + return; + } + // only the tree is of interest + std::string distdir_tree_id = values[0]->first; + // write to tree id file + if (not StorageUtils::WriteTreeIDFile(distdir_tree_id_file, + distdir_tree_id)) { + (*logger)(fmt::format("Failed to write tree id to file {}", + distdir_tree_id_file.string()), + /*fatal=*/true); + return; + } + // set the workspace root as present + (*setter)(std::pair( + nlohmann::json::array({FileRoot::kGitTreeMarker, + distdir_tree_id, + StorageConfig::GitRoot().string()}), + false /*no cache hit*/)); + }, + [logger, target_path = tmp_dir->GetPath()](auto const& msg, + bool fatal) { + (*logger)(fmt::format("While importing target {} to git:\n{}", + target_path.string(), + msg), + fatal); + }); +} + } // namespace auto CreateDistdirGitMap( gsl::not_null const& content_cas_map, gsl::not_null const& import_to_git_map, gsl::not_null const& critical_git_op_map, + bool serve_api_exists, + IExecutionApi* local_api, + IExecutionApi* remote_api, std::size_t jobs) -> DistdirGitMap { auto distdir_to_git = [content_cas_map, import_to_git_map, - critical_git_op_map](auto ts, - auto setter, - auto logger, - auto /* unused */, - auto const& key) { + critical_git_op_map, + serve_api_exists, + local_api, + remote_api](auto ts, + auto setter, + auto logger, + auto /* unused */, + auto const& key) { auto distdir_tree_id_file = StorageUtils::GetDistdirTreeIDFile(key.content_id); if (FileSystemManager::Exists(distdir_tree_id_file)) { @@ -110,95 +188,131 @@ auto CreateDistdirGitMap( if (not absent) { root.emplace_back(StorageConfig::GitRoot().string()); } - (*setter)(std::pair(std::move(root), true)); + (*setter)(std::pair(std::move(root), true /*cache hit*/)); }, [logger, target_path = StorageConfig::GitRoot()]( auto const& msg, bool fatal) { - (*logger)(fmt::format("While running critical Git " - "op ENSURE_INIT for " - "target {}:\n{}", + (*logger)(fmt::format("While running critical Git op " + "ENSURE_INIT for target {}:\n{}", target_path.string(), msg), fatal); }); } else { - // fetch the gathered distdir repos into CAS + // create in-memory Git tree of distdir content to get the tree id + GitRepo::tree_entries_t entries{}; + entries.reserve(key.content_list->size()); + for (auto const& kv : *key.content_list) { + // tree_entries_t type expects raw ids + auto raw_id = FromHexString(kv.second); + if (not raw_id) { + (*logger)(fmt::format("While processing distdir {}: " + "Unexpected failure in conversion to " + "raw id of distfile content {}", + key.content_id, + kv.second), + /*is_fatal=*/true); + return; + } + entries[*raw_id].emplace_back(kv.first, ObjectType::File); + } + auto tree = GitRepo::CreateShallowTree(entries); + if (not tree) { + (*logger)(fmt::format("Failed to construct in-memory tree for " + "distdir content {}", + key.content_id), + /*is_fatal=*/true); + return; + } + // get hash from raw_id + auto tree_id = ToHexString(tree->first); + // if pure absent, we simply set the root tree + if (key.absent) { + (*setter)(std::pair( + nlohmann::json::array({FileRoot::kGitTreeMarker, tree_id}), + false /*no cache hit*/)); + return; + } + // otherwise, we check first the serve endpoint + if (serve_api_exists) { + if (auto served_tree_id = ServeApi::RetrieveTreeFromDistdir( + key.content_list, /*sync_tree=*/true)) { + // sanity check + if (*served_tree_id != tree_id) { + (*logger)( + fmt::format("Unexpected served tree id for distdir " + "content {}:\nExpected {}, but got {}", + key.content_id, + tree_id, + *served_tree_id), + /*is_fatal=*/true); + return; + } + // get the ditfiles from remote CAS in bulk + std::vector objects{}; + objects.reserve(key.content_list->size()); + for (auto const& kv : *key.content_list) { + objects.emplace_back(Artifact::ObjectInfo{ + .digest = + ArtifactDigest{kv.second, 0, /*is_tree=*/false}, + .type = ObjectType::File}); + } + if (remote_api != nullptr and local_api != nullptr and + remote_api->RetrieveToCas(objects, local_api)) { + ImportFromCASAndSetRoot(key.content_list, + key.content_id, + distdir_tree_id_file, + import_to_git_map, + ts, + setter, + logger); + // done! + return; + } + // just serve should have made the blobs available in the + // remote CAS, so log this attempt and revert to default + // fetch each blob individually + (*logger)(fmt::format("Tree {} marked as served, but not " + "all distfile blobs found on remote", + *served_tree_id), + /*fatal=*/false); + } + else { + // give warning + (*logger)( + fmt::format("Distdir content {} could not be served", + key.content_id), + /*fatal=*/false); + } + } + else { + // give warning + (*logger)( + fmt::format("Missing serve endpoint for distdir {} marked " + "absent requires slower network fetch.", + key.content_id), + /*fatal=*/false); + } + // revert to fetching the gathered distdir repos into CAS content_cas_map->ConsumeAfterKeysReady( ts, *key.repos_to_fetch, [distdir_tree_id_file, content_id = key.content_id, content_list = key.content_list, - absent = key.absent, import_to_git_map, ts, setter, logger]([[maybe_unused]] auto const& values) mutable { // repos are in CAS - // create the links to CAS - auto tmp_dir = StorageUtils::CreateTypedTmpDir("distdir"); - if (not tmp_dir) { - (*logger)(fmt::format("Failed to create tmp path for " - "distdir target {}", - content_id), - /*fatal=*/true); - return; - } - // link content from CAS into tmp dir - if (not LinkToCAS(content_list, tmp_dir->GetPath())) { - (*logger)(fmt::format( - "Failed to create links to CAS content!", - content_id), - /*fatal=*/true); - return; - } - // do import to git - CommitInfo c_info{ - tmp_dir->GetPath(), "distdir", content_id}; - import_to_git_map->ConsumeAfterKeysReady( - ts, - {std::move(c_info)}, - [tmp_dir, // keep tmp_dir alive - distdir_tree_id_file, - absent, - setter, - logger](auto const& values) { - // check for errors - if (not values[0]->second) { - (*logger)("Importing to git failed", - /*fatal=*/true); - return; - } - // only the tree is of interest - std::string distdir_tree_id = values[0]->first; - // write to tree id file - if (not StorageUtils::WriteTreeIDFile( - distdir_tree_id_file, distdir_tree_id)) { - (*logger)( - fmt::format( - "Failed to write tree id to file {}", - distdir_tree_id_file.string()), - /*fatal=*/true); - return; - } - // set the workspace root - auto root = nlohmann::json::array( - {FileRoot::kGitTreeMarker, distdir_tree_id}); - if (not absent) { - root.emplace_back( - StorageConfig::GitRoot().string()); - } - (*setter)(std::pair(std::move(root), false)); - }, - [logger, target_path = tmp_dir->GetPath()]( - auto const& msg, bool fatal) { - (*logger)(fmt::format("While importing target {} " - "to git:\n{}", - target_path.string(), - msg), - fatal); - }); + ImportFromCASAndSetRoot(content_list, + content_id, + distdir_tree_id_file, + import_to_git_map, + ts, + setter, + logger); }, [logger, content_id = key.content_id](auto const& msg, bool fatal) { diff --git a/src/other_tools/root_maps/distdir_git_map.hpp b/src/other_tools/root_maps/distdir_git_map.hpp index c2f03e08..df2b6064 100644 --- a/src/other_tools/root_maps/distdir_git_map.hpp +++ b/src/other_tools/root_maps/distdir_git_map.hpp @@ -22,6 +22,7 @@ #include "gsl/gsl" #include "nlohmann/json.hpp" +#include "src/buildtool/execution_api/common/execution_api.hpp" #include "src/other_tools/ops_maps/content_cas_map.hpp" #include "src/other_tools/ops_maps/import_to_git_map.hpp" @@ -50,6 +51,9 @@ using DistdirGitMap = gsl::not_null const& content_cas_map, gsl::not_null const& import_to_git_map, gsl::not_null const& critical_git_op_map, + bool serve_api_exists, + IExecutionApi* local_api, + IExecutionApi* remote_api, std::size_t jobs) -> DistdirGitMap; namespace std { -- cgit v1.2.3