diff options
-rw-r--r-- | src/other_tools/just_mr/setup.cpp | 12 | ||||
-rw-r--r-- | src/other_tools/root_maps/TARGETS | 13 | ||||
-rw-r--r-- | src/other_tools/root_maps/distdir_git_map.cpp | 262 | ||||
-rw-r--r-- | src/other_tools/root_maps/distdir_git_map.hpp | 4 |
4 files changed, 209 insertions, 82 deletions
diff --git a/src/other_tools/just_mr/setup.cpp b/src/other_tools/just_mr/setup.cpp index dedf9ecb..96411363 100644 --- a/src/other_tools/just_mr/setup.cpp +++ b/src/other_tools/just_mr/setup.cpp @@ -160,10 +160,14 @@ auto MultiRepoSetup(std::shared_ptr<Configuration> const& config, &import_to_git_map, &resolve_symlinks_map, common_args.jobs); - auto distdir_git_map = CreateDistdirGitMap(&content_cas_map, - &import_to_git_map, - &critical_git_op_map, - common_args.jobs); + auto distdir_git_map = + CreateDistdirGitMap(&content_cas_map, + &import_to_git_map, + &critical_git_op_map, + serve_api_exists, + local_api ? &(*local_api) : nullptr, + remote_api ? &(*remote_api) : nullptr, + common_args.jobs); auto tree_id_git_map = CreateTreeIdGitMap( &git_tree_fetch_map, common_args.fetch_absent, common_args.jobs); auto repos_to_setup_map = CreateReposToSetupMap(config, diff --git a/src/other_tools/root_maps/TARGETS b/src/other_tools/root_maps/TARGETS index 7eab68a5..f9d5ed8d 100644 --- a/src/other_tools/root_maps/TARGETS +++ b/src/other_tools/root_maps/TARGETS @@ -12,18 +12,23 @@ , "stage": ["src", "other_tools", "root_maps"] , "private-deps": [ ["@", "fmt", "", "fmt"] - , ["src/other_tools/ops_maps", "critical_git_op_map"] + , ["src/buildtool/common", "common"] , ["src/buildtool/execution_api/common", "common"] , ["src/buildtool/execution_api/local", "config"] , ["src/buildtool/execution_api/local", "local"] + , ["src/buildtool/file_system", "file_root"] + , ["src/buildtool/file_system", "file_storage"] + , ["src/buildtool/file_system", "git_repo"] + , ["src/buildtool/file_system", "object_type"] + , ["src/buildtool/multithreading", "task_system"] + , ["src/buildtool/serve_api/remote", "serve_api"] , ["src/buildtool/storage", "config"] , ["src/buildtool/storage", "fs_utils"] , ["src/buildtool/storage", "storage"] - , ["src/utils/cpp", "tmp_dir"] - , ["src/buildtool/file_system", "file_storage"] + , ["src/other_tools/ops_maps", "critical_git_op_map"] , ["src/other_tools/just_mr/progress_reporting", "progress"] , ["src/other_tools/just_mr/progress_reporting", "statistics"] - , ["src/buildtool/file_system", "file_root"] + , ["src/utils/cpp", "tmp_dir"] ] } , "commit_git_map": diff --git a/src/other_tools/root_maps/distdir_git_map.cpp b/src/other_tools/root_maps/distdir_git_map.cpp index 677e3057..6fa9d2d2 100644 --- a/src/other_tools/root_maps/distdir_git_map.cpp +++ b/src/other_tools/root_maps/distdir_git_map.cpp @@ -17,9 +17,15 @@ #include <algorithm> #include "fmt/core.h" +#include "src/buildtool/common/artifact.hpp" +#include "src/buildtool/common/artifact_digest.hpp" #include "src/buildtool/execution_api/common/execution_common.hpp" #include "src/buildtool/file_system/file_root.hpp" #include "src/buildtool/file_system/file_storage.hpp" +#include "src/buildtool/file_system/git_repo.hpp" +#include "src/buildtool/file_system/object_type.hpp" +#include "src/buildtool/multithreading/task_system.hpp" +#include "src/buildtool/serve_api/remote/serve_api.hpp" #include "src/buildtool/storage/config.hpp" #include "src/buildtool/storage/fs_utils.hpp" #include "src/buildtool/storage/storage.hpp" @@ -52,20 +58,92 @@ namespace { }); } +void ImportFromCASAndSetRoot( + std::shared_ptr<std::unordered_map<std::string, std::string>> const& + content_list, + std::string const& content_id, + std::filesystem::path const& distdir_tree_id_file, + gsl::not_null<ImportToGitMap*> const& import_to_git_map, + gsl::not_null<TaskSystem*> const& ts, + DistdirGitMap::SetterPtr const& setter, + DistdirGitMap::LoggerPtr const& logger) { + // create the links to CAS + auto tmp_dir = StorageUtils::CreateTypedTmpDir("distdir"); + if (not tmp_dir) { + (*logger)(fmt::format("Failed to create tmp path for " + "distdir target {}", + content_id), + /*fatal=*/true); + return; + } + // link content from CAS into tmp dir + if (not LinkToCAS(content_list, tmp_dir->GetPath())) { + (*logger)( + fmt::format("Failed to create links to CAS content!", content_id), + /*fatal=*/true); + return; + } + // do import to git + CommitInfo c_info{tmp_dir->GetPath(), "distdir", content_id}; + import_to_git_map->ConsumeAfterKeysReady( + ts, + {std::move(c_info)}, + [tmp_dir, // keep tmp_dir alive + distdir_tree_id_file, + setter, + logger](auto const& values) { + // check for errors + if (not values[0]->second) { + (*logger)("Importing to git failed", + /*fatal=*/true); + return; + } + // only the tree is of interest + std::string distdir_tree_id = values[0]->first; + // write to tree id file + if (not StorageUtils::WriteTreeIDFile(distdir_tree_id_file, + distdir_tree_id)) { + (*logger)(fmt::format("Failed to write tree id to file {}", + distdir_tree_id_file.string()), + /*fatal=*/true); + return; + } + // set the workspace root as present + (*setter)(std::pair( + nlohmann::json::array({FileRoot::kGitTreeMarker, + distdir_tree_id, + StorageConfig::GitRoot().string()}), + false /*no cache hit*/)); + }, + [logger, target_path = tmp_dir->GetPath()](auto const& msg, + bool fatal) { + (*logger)(fmt::format("While importing target {} to git:\n{}", + target_path.string(), + msg), + fatal); + }); +} + } // namespace auto CreateDistdirGitMap( gsl::not_null<ContentCASMap*> const& content_cas_map, gsl::not_null<ImportToGitMap*> const& import_to_git_map, gsl::not_null<CriticalGitOpMap*> const& critical_git_op_map, + bool serve_api_exists, + IExecutionApi* local_api, + IExecutionApi* remote_api, std::size_t jobs) -> DistdirGitMap { auto distdir_to_git = [content_cas_map, import_to_git_map, - critical_git_op_map](auto ts, - auto setter, - auto logger, - auto /* unused */, - auto const& key) { + critical_git_op_map, + serve_api_exists, + local_api, + remote_api](auto ts, + auto setter, + auto logger, + auto /* unused */, + auto const& key) { auto distdir_tree_id_file = StorageUtils::GetDistdirTreeIDFile(key.content_id); if (FileSystemManager::Exists(distdir_tree_id_file)) { @@ -110,95 +188,131 @@ auto CreateDistdirGitMap( if (not absent) { root.emplace_back(StorageConfig::GitRoot().string()); } - (*setter)(std::pair(std::move(root), true)); + (*setter)(std::pair(std::move(root), true /*cache hit*/)); }, [logger, target_path = StorageConfig::GitRoot()]( auto const& msg, bool fatal) { - (*logger)(fmt::format("While running critical Git " - "op ENSURE_INIT for " - "target {}:\n{}", + (*logger)(fmt::format("While running critical Git op " + "ENSURE_INIT for target {}:\n{}", target_path.string(), msg), fatal); }); } else { - // fetch the gathered distdir repos into CAS + // create in-memory Git tree of distdir content to get the tree id + GitRepo::tree_entries_t entries{}; + entries.reserve(key.content_list->size()); + for (auto const& kv : *key.content_list) { + // tree_entries_t type expects raw ids + auto raw_id = FromHexString(kv.second); + if (not raw_id) { + (*logger)(fmt::format("While processing distdir {}: " + "Unexpected failure in conversion to " + "raw id of distfile content {}", + key.content_id, + kv.second), + /*is_fatal=*/true); + return; + } + entries[*raw_id].emplace_back(kv.first, ObjectType::File); + } + auto tree = GitRepo::CreateShallowTree(entries); + if (not tree) { + (*logger)(fmt::format("Failed to construct in-memory tree for " + "distdir content {}", + key.content_id), + /*is_fatal=*/true); + return; + } + // get hash from raw_id + auto tree_id = ToHexString(tree->first); + // if pure absent, we simply set the root tree + if (key.absent) { + (*setter)(std::pair( + nlohmann::json::array({FileRoot::kGitTreeMarker, tree_id}), + false /*no cache hit*/)); + return; + } + // otherwise, we check first the serve endpoint + if (serve_api_exists) { + if (auto served_tree_id = ServeApi::RetrieveTreeFromDistdir( + key.content_list, /*sync_tree=*/true)) { + // sanity check + if (*served_tree_id != tree_id) { + (*logger)( + fmt::format("Unexpected served tree id for distdir " + "content {}:\nExpected {}, but got {}", + key.content_id, + tree_id, + *served_tree_id), + /*is_fatal=*/true); + return; + } + // get the ditfiles from remote CAS in bulk + std::vector<Artifact::ObjectInfo> objects{}; + objects.reserve(key.content_list->size()); + for (auto const& kv : *key.content_list) { + objects.emplace_back(Artifact::ObjectInfo{ + .digest = + ArtifactDigest{kv.second, 0, /*is_tree=*/false}, + .type = ObjectType::File}); + } + if (remote_api != nullptr and local_api != nullptr and + remote_api->RetrieveToCas(objects, local_api)) { + ImportFromCASAndSetRoot(key.content_list, + key.content_id, + distdir_tree_id_file, + import_to_git_map, + ts, + setter, + logger); + // done! + return; + } + // just serve should have made the blobs available in the + // remote CAS, so log this attempt and revert to default + // fetch each blob individually + (*logger)(fmt::format("Tree {} marked as served, but not " + "all distfile blobs found on remote", + *served_tree_id), + /*fatal=*/false); + } + else { + // give warning + (*logger)( + fmt::format("Distdir content {} could not be served", + key.content_id), + /*fatal=*/false); + } + } + else { + // give warning + (*logger)( + fmt::format("Missing serve endpoint for distdir {} marked " + "absent requires slower network fetch.", + key.content_id), + /*fatal=*/false); + } + // revert to fetching the gathered distdir repos into CAS content_cas_map->ConsumeAfterKeysReady( ts, *key.repos_to_fetch, [distdir_tree_id_file, content_id = key.content_id, content_list = key.content_list, - absent = key.absent, import_to_git_map, ts, setter, logger]([[maybe_unused]] auto const& values) mutable { // repos are in CAS - // create the links to CAS - auto tmp_dir = StorageUtils::CreateTypedTmpDir("distdir"); - if (not tmp_dir) { - (*logger)(fmt::format("Failed to create tmp path for " - "distdir target {}", - content_id), - /*fatal=*/true); - return; - } - // link content from CAS into tmp dir - if (not LinkToCAS(content_list, tmp_dir->GetPath())) { - (*logger)(fmt::format( - "Failed to create links to CAS content!", - content_id), - /*fatal=*/true); - return; - } - // do import to git - CommitInfo c_info{ - tmp_dir->GetPath(), "distdir", content_id}; - import_to_git_map->ConsumeAfterKeysReady( - ts, - {std::move(c_info)}, - [tmp_dir, // keep tmp_dir alive - distdir_tree_id_file, - absent, - setter, - logger](auto const& values) { - // check for errors - if (not values[0]->second) { - (*logger)("Importing to git failed", - /*fatal=*/true); - return; - } - // only the tree is of interest - std::string distdir_tree_id = values[0]->first; - // write to tree id file - if (not StorageUtils::WriteTreeIDFile( - distdir_tree_id_file, distdir_tree_id)) { - (*logger)( - fmt::format( - "Failed to write tree id to file {}", - distdir_tree_id_file.string()), - /*fatal=*/true); - return; - } - // set the workspace root - auto root = nlohmann::json::array( - {FileRoot::kGitTreeMarker, distdir_tree_id}); - if (not absent) { - root.emplace_back( - StorageConfig::GitRoot().string()); - } - (*setter)(std::pair(std::move(root), false)); - }, - [logger, target_path = tmp_dir->GetPath()]( - auto const& msg, bool fatal) { - (*logger)(fmt::format("While importing target {} " - "to git:\n{}", - target_path.string(), - msg), - fatal); - }); + ImportFromCASAndSetRoot(content_list, + content_id, + distdir_tree_id_file, + import_to_git_map, + ts, + setter, + logger); }, [logger, content_id = key.content_id](auto const& msg, bool fatal) { diff --git a/src/other_tools/root_maps/distdir_git_map.hpp b/src/other_tools/root_maps/distdir_git_map.hpp index c2f03e08..df2b6064 100644 --- a/src/other_tools/root_maps/distdir_git_map.hpp +++ b/src/other_tools/root_maps/distdir_git_map.hpp @@ -22,6 +22,7 @@ #include "gsl/gsl" #include "nlohmann/json.hpp" +#include "src/buildtool/execution_api/common/execution_api.hpp" #include "src/other_tools/ops_maps/content_cas_map.hpp" #include "src/other_tools/ops_maps/import_to_git_map.hpp" @@ -50,6 +51,9 @@ using DistdirGitMap = gsl::not_null<ContentCASMap*> const& content_cas_map, gsl::not_null<ImportToGitMap*> const& import_to_git_map, gsl::not_null<CriticalGitOpMap*> const& critical_git_op_map, + bool serve_api_exists, + IExecutionApi* local_api, + IExecutionApi* remote_api, std::size_t jobs) -> DistdirGitMap; namespace std { |