diff options
-rw-r--r-- | src/other_tools/just_mr/setup.cpp | 9 | ||||
-rw-r--r-- | src/other_tools/ops_maps/git_tree_fetch_map.cpp | 12 | ||||
-rw-r--r-- | src/other_tools/root_maps/TARGETS | 8 | ||||
-rw-r--r-- | src/other_tools/root_maps/tree_id_git_map.cpp | 317 | ||||
-rw-r--r-- | src/other_tools/root_maps/tree_id_git_map.hpp | 12 |
5 files changed, 344 insertions, 14 deletions
diff --git a/src/other_tools/just_mr/setup.cpp b/src/other_tools/just_mr/setup.cpp index e5078592..f1a65157 100644 --- a/src/other_tools/just_mr/setup.cpp +++ b/src/other_tools/just_mr/setup.cpp @@ -195,7 +195,14 @@ auto MultiRepoSetup(std::shared_ptr<Configuration> const& config, common_args.jobs); auto tree_id_git_map = CreateTreeIdGitMap( - &git_tree_fetch_map, common_args.fetch_absent, common_args.jobs); + &git_tree_fetch_map, + &critical_git_op_map, + &import_to_git_map, + common_args.fetch_absent, + serve_api_exists, + &(*local_api), + remote_api ? std::make_optional(&(*remote_api)) : std::nullopt, + common_args.jobs); auto repos_to_setup_map = CreateReposToSetupMap(config, main, diff --git a/src/other_tools/ops_maps/git_tree_fetch_map.cpp b/src/other_tools/ops_maps/git_tree_fetch_map.cpp index 00e0b1b4..e70ded2b 100644 --- a/src/other_tools/ops_maps/git_tree_fetch_map.cpp +++ b/src/other_tools/ops_maps/git_tree_fetch_map.cpp @@ -258,15 +258,19 @@ auto CreateGitTreeFetchMap( // create temporary location for command execution root auto tmp_dir = StorageUtils::CreateTypedTmpDir("git-tree"); if (not tmp_dir) { - (*logger)("Failed to create tmp directory for tree id map!", - /*fatal=*/true); + (*logger)( + "Failed to create execution root tmp directory for " + "tree id map!", + /*fatal=*/true); return; } // create temporary location for storing command result files auto out_dir = StorageUtils::CreateTypedTmpDir("git-tree"); if (not out_dir) { - (*logger)("Failed to create tmp directory for tree id map!", - /*fatal=*/true); + (*logger)( + "Failed to create results tmp directory for tree id " + "map!", + /*fatal=*/true); return; } // execute command in temporary location diff --git a/src/other_tools/root_maps/TARGETS b/src/other_tools/root_maps/TARGETS index f67c2474..94fa6b7a 100644 --- a/src/other_tools/root_maps/TARGETS +++ b/src/other_tools/root_maps/TARGETS @@ -134,14 +134,22 @@ , "deps": [ ["@", "gsl", "", "gsl"] , ["@", "json", "", "json"] + , ["src/buildtool/execution_api/common", "common"] + , ["src/other_tools/ops_maps", "critical_git_op_map"] , ["src/other_tools/ops_maps", "git_tree_fetch_map"] + , ["src/other_tools/ops_maps", "import_to_git_map"] , ["src/utils/cpp", "hash_combine"] ] , "stage": ["src", "other_tools", "root_maps"] , "private-deps": [ ["@", "fmt", "", "fmt"] + , "root_utils" + , ["src/buildtool/common", "config"] + , ["src/buildtool/execution_api/git", "git"] , ["src/buildtool/file_system", "file_root"] , ["src/buildtool/storage", "config"] + , ["src/buildtool/storage", "fs_utils"] + , ["src/buildtool/storage", "storage"] ] } , "root_utils": diff --git a/src/other_tools/root_maps/tree_id_git_map.cpp b/src/other_tools/root_maps/tree_id_git_map.cpp index 3d5ccf1e..c8e55d9c 100644 --- a/src/other_tools/root_maps/tree_id_git_map.cpp +++ b/src/other_tools/root_maps/tree_id_git_map.cpp @@ -15,20 +15,320 @@ #include "src/other_tools/root_maps/tree_id_git_map.hpp" #include "fmt/core.h" +#include "src/buildtool/common/repository_config.hpp" +#include "src/buildtool/execution_api/git/git_api.hpp" #include "src/buildtool/file_system/file_root.hpp" #include "src/buildtool/storage/config.hpp" +#include "src/buildtool/storage/fs_utils.hpp" +#include "src/buildtool/storage/storage.hpp" +#include "src/other_tools/root_maps/root_utils.hpp" + +namespace { + +/// \brief Guarantees it terminates by either calling the setter or calling the +/// logger with fatal. +void UploadToServeAndSetRoot(std::string const& tree_id, + ArtifactDigest const& digest, + gsl::not_null<IExecutionApi*> const& remote_api, + bool ignore_special, + TreeIdGitMap::SetterPtr const& setter, + TreeIdGitMap::LoggerPtr const& logger) { + // upload to remote CAS + auto repo_config = RepositoryConfig{}; + if (repo_config.SetGitCAS(StorageConfig::GitRoot())) { + auto git_api = GitApi{&repo_config}; + if (not git_api.RetrieveToCas( + {Artifact::ObjectInfo{.digest = digest, + .type = ObjectType::Tree}}, + remote_api)) { + (*logger)(fmt::format("Failed to sync tree {} from local Git cache " + "to remote CAS", + tree_id), + /*fatal=*/true); + return; + } + } + else { + (*logger)(fmt::format("Failed to SetGitCAS at {}", + StorageConfig::GitRoot().string()), + /*fatal=*/true); + return; + } + // tell serve to set up the root from the remote CAS tree; + // upload can be skipped + if (EnsureAbsentRootOnServe(tree_id, + /*repo_path=*/"", + /*remote_api=*/std::nullopt, + logger, + /*no_sync_is_fatal=*/true)) { + // set workspace root as absent + auto root = nlohmann::json::array( + {ignore_special ? FileRoot::kGitTreeIgnoreSpecialMarker + : FileRoot::kGitTreeMarker, + tree_id}); + (*setter)(std::pair(std::move(root), /*is_cache_hit=*/false)); + return; + } +} + +/// \brief Guarantees it terminates by either calling the setter or calling the +/// logger with fatal. +void MoveCASTreeToGitAndProcess( + std::string const& tree_id, + ArtifactDigest const& digest, + gsl::not_null<ImportToGitMap*> const& import_to_git_map, + gsl::not_null<IExecutionApi*> const& local_api, + gsl::not_null<IExecutionApi*> const& remote_api, + bool ignore_special, + gsl::not_null<TaskSystem*> const& ts, + TreeIdGitMap::SetterPtr const& setter, + TreeIdGitMap::LoggerPtr const& logger) { + // Move tree from CAS to local Git storage + auto tmp_dir = StorageUtils::CreateTypedTmpDir("fetch-remote-git-tree"); + if (not tmp_dir) { + (*logger)(fmt::format("Failed to create tmp directory for copying " + "git-tree {} from remote CAS", + digest.hash()), + true); + return; + } + if (not local_api->RetrieveToPaths( + {Artifact::ObjectInfo{.digest = digest, .type = ObjectType::Tree}}, + {tmp_dir->GetPath()})) { + (*logger)(fmt::format("Failed to copy git-tree {} to {}", + tree_id, + tmp_dir->GetPath().string()), + true); + return; + } + CommitInfo c_info{tmp_dir->GetPath(), "tree", tree_id}; + import_to_git_map->ConsumeAfterKeysReady( + ts, + {std::move(c_info)}, + [tmp_dir, // keep tmp_dir alive + tree_id, + digest, + remote_api, + ignore_special, + setter, + logger](auto const& values) { + if (not values[0]->second) { + (*logger)("Importing to git failed", + /*fatal=*/true); + return; + } + // upload tree from Git cache to remote CAS and tell serve to set up + // the root from the remote CAS tree; set root as absent on success + UploadToServeAndSetRoot( + tree_id, digest, remote_api, ignore_special, setter, logger); + }, + [logger, tmp_dir, tree_id](auto const& msg, bool fatal) { + (*logger)(fmt::format( + "While moving git-tree {} from {} to local git:\n{}", + tree_id, + tmp_dir->GetPath().string(), + msg), + fatal); + }); +} + +} // namespace auto CreateTreeIdGitMap( gsl::not_null<GitTreeFetchMap*> const& git_tree_fetch_map, + gsl::not_null<CriticalGitOpMap*> const& critical_git_op_map, + gsl::not_null<ImportToGitMap*> const& import_to_git_map, bool fetch_absent, + bool serve_api_exists, + gsl::not_null<IExecutionApi*> const& local_api, + std::optional<gsl::not_null<IExecutionApi*>> const& remote_api, std::size_t jobs) -> TreeIdGitMap { - auto tree_to_git = [git_tree_fetch_map, fetch_absent](auto ts, - auto setter, - auto logger, - auto /*unused*/, - auto const& key) { - // if root is actually absent, no work needs to be done + auto tree_to_git = [git_tree_fetch_map, + critical_git_op_map, + import_to_git_map, + fetch_absent, + serve_api_exists, + local_api, + remote_api](auto ts, + auto setter, + auto logger, + auto /*unused*/, + auto const& key) { + // if root is actually absent, check if serve endpoint knows the tree + // for building against it and only set the workspace root if tree is + // found on the serve endpoint or it can be made available to it; + // otherwise, error out if (key.absent and not fetch_absent) { + if (serve_api_exists) { + // check serve endpoint + auto has_tree = + CheckServeHasAbsentRoot(key.tree_info.hash, logger); + if (not has_tree) { + return; + } + if (*has_tree) { + // set workspace root as absent + auto root = nlohmann::json::array( + {key.ignore_special + ? FileRoot::kGitTreeIgnoreSpecialMarker + : FileRoot::kGitTreeMarker, + key.tree_info.hash}); + (*setter)( + std::pair(std::move(root), /*is_cache_hit=*/false)); + return; + } + // at this point we cannot proceed without the remote api + if (not remote_api) { + (*logger)(fmt::format("Missing remote-execution endpoint " + "needed to sync workspace root {} " + "with the serve endpoint.", + key.tree_info.hash), + /*fatal=*/true); + return; + } + // check if tree in already in remote CAS + auto digest = + ArtifactDigest{key.tree_info.hash, 0, /*is_tree=*/true}; + if (remote_api.value()->IsAvailable({digest})) { + // tell serve to set up the root from the remote CAS tree; + // upload can be skipped + if (EnsureAbsentRootOnServe(key.tree_info.hash, + /*repo_path=*/"", + /*remote_api=*/std::nullopt, + logger, + /*no_sync_is_fatal=*/true)) { + // set workspace root as absent + auto root = nlohmann::json::array( + {key.ignore_special + ? FileRoot::kGitTreeIgnoreSpecialMarker + : FileRoot::kGitTreeMarker, + key.tree_info.hash}); + (*setter)( + std::pair(std::move(root), /*is_cache_hit=*/false)); + return; + } + (*logger)( + fmt::format("Serve endpoint failed to create workspace " + "root {} that locally was marked absent.", + key.tree_info.hash), + /*fatal=*/true); + return; + } + // check if tree is in Git cache; + // ensure Git cache exists + GitOpKey op_key = { + .params = + { + StorageConfig::GitRoot(), // target_path + "", // git_hash + "", // branch + std::nullopt, // message + true // init_bare + }, + .op_type = GitOpType::ENSURE_INIT}; + critical_git_op_map->ConsumeAfterKeysReady( + ts, + {std::move(op_key)}, + [digest, + import_to_git_map, + local_api, + remote_api, + key, + ts, + setter, + logger](auto const& values) { + GitOpValue op_result = *values[0]; + // check flag + if (not op_result.result) { + (*logger)("Git cache init failed", + /*fatal=*/true); + return; + } + // Open fake tmp repo to check if tree is known to Git + // cache + auto git_repo = GitRepoRemote::Open( + op_result.git_cas); // link fake repo to odb + if (not git_repo) { + (*logger)( + fmt::format("Could not open repository {}", + StorageConfig::GitRoot().string()), + /*fatal=*/true); + return; + } + // setup wrapped logger + auto wrapped_logger = + std::make_shared<AsyncMapConsumerLogger>( + [logger](auto const& msg, bool fatal) { + (*logger)( + fmt::format("While checking tree " + "exists in Git cache:\n{}", + msg), + fatal); + }); + // check if the desired tree ID is in Git cache + auto tree_found = git_repo->CheckTreeExists( + key.tree_info.hash, wrapped_logger); + if (not tree_found) { + // errors encountered + return; + } + if (*tree_found) { + // upload tree from Git cache to remote CAS and tell + // serve to set up the root from the remote CAS + // tree, then set root as absent + UploadToServeAndSetRoot(key.tree_info.hash, + digest, + *remote_api, + key.ignore_special, + setter, + logger); + // done! + return; + } + // check if tree is known to local CAS + auto const& cas = Storage::Instance().CAS(); + if (auto path = cas.TreePath(digest)) { + // Move tree locally from CAS to Git cache, then + // continue processing it by UploadToServeAndSetRoot + MoveCASTreeToGitAndProcess(key.tree_info.hash, + digest, + import_to_git_map, + local_api, + *remote_api, + key.ignore_special, + ts, + setter, + logger); + // done! + return; + } + // tree is not know locally, so we cannot + // provide it to the serve endpoint and thus we + // cannot create the absent root + (*logger)(fmt::format("Cannot create workspace root " + "{} as absent for the provided " + "serve endpoint.", + key.tree_info.hash), + /*fatal=*/true); + }, + [logger, target_path = StorageConfig::GitRoot()]( + auto const& msg, bool fatal) { + (*logger)( + fmt::format("While running critical Git op " + "ENSURE_INIT bare for target {}:\n{}", + target_path.string(), + msg), + fatal); + }); + // done! + return; + } + // give warning that serve endpoint is missing + (*logger)(fmt::format("Workspace root {} marked absent but no " + "serve endpoint provided.", + key.tree_info.hash), + /*fatal=*/false); + // set workspace root as absent auto root = nlohmann::json::array( {key.ignore_special ? FileRoot::kGitTreeIgnoreSpecialMarker : FileRoot::kGitTreeMarker, @@ -36,8 +336,9 @@ auto CreateTreeIdGitMap( (*setter)(std::pair(std::move(root), false)); return; } - // otherwise, one must fetch; - // make sure the required tree is in Git cache + // if root is not absent, proceed with usual fetch logic: check locally, + // check serve endpoint, check remote-execution endpoint, and lastly + // default to network git_tree_fetch_map->ConsumeAfterKeysReady( ts, {key.tree_info}, diff --git a/src/other_tools/root_maps/tree_id_git_map.hpp b/src/other_tools/root_maps/tree_id_git_map.hpp index da4d9328..6a4e93b0 100644 --- a/src/other_tools/root_maps/tree_id_git_map.hpp +++ b/src/other_tools/root_maps/tree_id_git_map.hpp @@ -15,13 +15,17 @@ #ifndef INCLUDED_SRC_OTHER_TOOLS_ROOT_MAPS_TREE_ID_GIT_MAP_HPP #define INCLUDED_SRC_OTHER_TOOLS_ROOT_MAPS_TREE_ID_GIT_MAP_HPP +#include <optional> #include <string> #include <utility> #include <vector> #include "gsl/gsl" #include "nlohmann/json.hpp" +#include "src/buildtool/execution_api/common/execution_api.hpp" +#include "src/other_tools/ops_maps/critical_git_op_map.hpp" #include "src/other_tools/ops_maps/git_tree_fetch_map.hpp" +#include "src/other_tools/ops_maps/import_to_git_map.hpp" #include "src/utils/cpp/hash_combine.hpp" struct TreeIdInfo { @@ -55,11 +59,17 @@ struct hash<TreeIdInfo> { /// \brief Maps a known tree provided through a generic command to its /// workspace root and the information whether it was a cache hit. using TreeIdGitMap = - AsyncMapConsumer<TreeIdInfo, std::pair<nlohmann::json, bool>>; + AsyncMapConsumer<TreeIdInfo, + std::pair<nlohmann::json /*root*/, bool /*is_cache_hit*/>>; [[nodiscard]] auto CreateTreeIdGitMap( gsl::not_null<GitTreeFetchMap*> const& git_tree_fetch_map, + gsl::not_null<CriticalGitOpMap*> const& critical_git_op_map, + gsl::not_null<ImportToGitMap*> const& import_to_git_map, bool fetch_absent, + bool serve_api_exists, + gsl::not_null<IExecutionApi*> const& local_api, + std::optional<gsl::not_null<IExecutionApi*>> const& remote_api, std::size_t jobs) -> TreeIdGitMap; #endif // INCLUDED_SRC_OTHER_TOOLS_ROOT_MAPS_TREE_ID_GIT_MAP_HPP |