diff options
author | Paul Cristian Sarbu <paul.cristian.sarbu@huawei.com> | 2023-12-04 18:34:37 +0100 |
---|---|---|
committer | Paul Cristian Sarbu <paul.cristian.sarbu@huawei.com> | 2023-12-07 10:00:12 +0100 |
commit | 8d1c18466e597c110b1a48198b38df6df697508d (patch) | |
tree | 77bbdf10b1a5ac085c14070a63683e916276e92f | |
parent | 1a2b1958e3570964f1e9666750f9652c63f4c7f8 (diff) | |
download | justbuild-8d1c18466e597c110b1a48198b38df6df697508d.tar.gz |
just-mr fetch: Extend to include 'git tree' repositories
This includes implementing the logic for the --backup-to-remote
and --fetch-absent options.
-rw-r--r-- | src/other_tools/just_mr/TARGETS | 3 | ||||
-rw-r--r-- | src/other_tools/just_mr/fetch.cpp | 350 | ||||
-rw-r--r-- | src/other_tools/just_mr/setup.cpp | 1 | ||||
-rw-r--r-- | src/other_tools/ops_maps/TARGETS | 1 | ||||
-rw-r--r-- | src/other_tools/ops_maps/git_tree_fetch_map.cpp | 62 | ||||
-rw-r--r-- | src/other_tools/ops_maps/git_tree_fetch_map.hpp | 1 |
6 files changed, 321 insertions, 97 deletions
diff --git a/src/other_tools/just_mr/TARGETS b/src/other_tools/just_mr/TARGETS index 8973026a..f0792698 100644 --- a/src/other_tools/just_mr/TARGETS +++ b/src/other_tools/just_mr/TARGETS @@ -112,6 +112,9 @@ , ["src/other_tools/just_mr/progress_reporting", "progress_reporter"] , ["src/other_tools/ops_maps", "archive_fetch_map"] , ["src/other_tools/ops_maps", "content_cas_map"] + , ["src/other_tools/ops_maps", "critical_git_op_map"] + , ["src/other_tools/ops_maps", "git_tree_fetch_map"] + , ["src/other_tools/ops_maps", "import_to_git_map"] , "setup_utils" , ["src/buildtool/execution_api/common", "common"] , ["src/buildtool/execution_api/local", "local"] diff --git a/src/other_tools/just_mr/fetch.cpp b/src/other_tools/just_mr/fetch.cpp index bfddf5ce..77773f69 100644 --- a/src/other_tools/just_mr/fetch.cpp +++ b/src/other_tools/just_mr/fetch.cpp @@ -28,6 +28,9 @@ #include "src/other_tools/just_mr/setup_utils.hpp" #include "src/other_tools/ops_maps/archive_fetch_map.hpp" #include "src/other_tools/ops_maps/content_cas_map.hpp" +#include "src/other_tools/ops_maps/critical_git_op_map.hpp" +#include "src/other_tools/ops_maps/git_tree_fetch_map.hpp" +#include "src/other_tools/ops_maps/import_to_git_map.hpp" auto MultiRepoFetch(std::shared_ptr<Configuration> const& config, MultiRepoCommonArguments const& common_args, @@ -121,8 +124,11 @@ auto MultiRepoFetch(std::shared_ptr<Configuration> const& config, // gather all repos to be fetched std::vector<ArchiveRepoInfo> archives_to_fetch{}; + std::vector<GitTreeInfo> git_trees_to_fetch{}; archives_to_fetch.reserve( fetch_repos->to_include.size()); // pre-reserve a maximum size + git_trees_to_fetch.reserve( + fetch_repos->to_include.size()); // pre-reserve a maximum size for (auto const& repo_name : fetch_repos->to_include) { auto repo_desc = repos->At(repo_name); if (not repo_desc) { @@ -161,96 +167,220 @@ auto MultiRepoFetch(std::shared_ptr<Configuration> const& config, auto repo_type_str = repo_type->get()->String(); if (not kCheckoutTypeMap.contains(repo_type_str)) { Logger::Log(LogLevel::Error, - "Unknown repository type {} for {}", + "Config: Unknown repository type {} for {}", nlohmann::json(repo_type_str).dump(), nlohmann::json(repo_name).dump()); return kExitFetchError; } - // only do work if repo is archive type - if (kCheckoutTypeMap.at(repo_type_str) == CheckoutType::Archive) { - // check "absent" pragma - auto repo_desc_pragma = (*resolved_repo_desc)->At("pragma"); - auto pragma_absent = repo_desc_pragma - ? repo_desc_pragma->get()->At("absent") + // only do work if repo is archive or git tree type + switch (kCheckoutTypeMap.at(repo_type_str)) { + case CheckoutType::Archive: { + // check "absent" pragma + auto repo_desc_pragma = (*resolved_repo_desc)->At("pragma"); + auto pragma_absent = + repo_desc_pragma ? repo_desc_pragma->get()->At("absent") : std::nullopt; - auto pragma_absent_value = pragma_absent and - pragma_absent->get()->IsBool() and - pragma_absent->get()->Bool(); - // only fetch if either archive is not marked absent, or if - // explicitly told to fetch absent archives - if (not pragma_absent_value or common_args.fetch_absent) { - // check mandatory fields - auto repo_desc_content = - (*resolved_repo_desc)->At("content"); - if (not repo_desc_content) { - Logger::Log(LogLevel::Error, - "Mandatory field \"content\" is missing"); - return kExitFetchError; - } - if (not repo_desc_content->get()->IsString()) { - Logger::Log(LogLevel::Error, - "Unsupported value {} for mandatory field " - "\"content\"", - repo_desc_content->get()->ToString()); - return kExitFetchError; - } - auto repo_desc_fetch = (*resolved_repo_desc)->At("fetch"); - if (not repo_desc_fetch) { - Logger::Log(LogLevel::Error, - "Mandatory field \"fetch\" is missing"); - return kExitFetchError; + auto pragma_absent_value = + pragma_absent and pragma_absent->get()->IsBool() and + pragma_absent->get()->Bool(); + // only fetch if either archive is not marked absent, or if + // explicitly told to fetch absent archives + if (not pragma_absent_value or common_args.fetch_absent) { + // check mandatory fields + auto repo_desc_content = + (*resolved_repo_desc)->At("content"); + if (not repo_desc_content) { + Logger::Log(LogLevel::Error, + "Config: Mandatory field \"content\" " + "is missing"); + return kExitFetchError; + } + if (not repo_desc_content->get()->IsString()) { + Logger::Log(LogLevel::Error, + "Config: Unsupported value {} for " + "mandatory field \"content\"", + repo_desc_content->get()->ToString()); + return kExitFetchError; + } + auto repo_desc_fetch = + (*resolved_repo_desc)->At("fetch"); + if (not repo_desc_fetch) { + Logger::Log( + LogLevel::Error, + "Config: Mandatory field \"fetch\" is missing"); + return kExitFetchError; + } + if (not repo_desc_fetch->get()->IsString()) { + Logger::Log(LogLevel::Error, + "Config: Unsupported value {} for " + "mandatory field \"fetch\"", + repo_desc_fetch->get()->ToString()); + return kExitFetchError; + } + auto repo_desc_subdir = + (*resolved_repo_desc) + ->Get("subdir", Expression::none_t{}); + auto subdir = std::filesystem::path( + repo_desc_subdir->IsString() + ? repo_desc_subdir->String() + : "") + .lexically_normal(); + auto repo_desc_distfile = + (*resolved_repo_desc) + ->Get("distfile", Expression::none_t{}); + auto repo_desc_sha256 = + (*resolved_repo_desc) + ->Get("sha256", Expression::none_t{}); + auto repo_desc_sha512 = + (*resolved_repo_desc) + ->Get("sha512", Expression::none_t{}); + + ArchiveRepoInfo archive_info = { + .archive = + {.content = repo_desc_content->get()->String(), + .distfile = + repo_desc_distfile->IsString() + ? std::make_optional( + repo_desc_distfile->String()) + : std::nullopt, + .fetch_url = repo_desc_fetch->get()->String(), + .sha256 = repo_desc_sha256->IsString() + ? std::make_optional( + repo_desc_sha256->String()) + : std::nullopt, + .sha512 = repo_desc_sha512->IsString() + ? std::make_optional( + repo_desc_sha512->String()) + : std::nullopt, + .origin = repo_name, + .fetch_only = true}, + .repo_type = repo_type_str, + .subdir = subdir.empty() ? "." : subdir.string(), + .pragma_special = std::nullopt, // not used + .absent = false // not used + }; + // add to list + archives_to_fetch.emplace_back(std::move(archive_info)); } - if (not repo_desc_fetch->get()->IsString()) { - Logger::Log(LogLevel::Error, - "ArchiveCheckout: Unsupported value {} for " - "mandatory field \"fetch\"", - repo_desc_fetch->get()->ToString()); - return kExitFetchError; + } break; + case CheckoutType::GitTree: { + // check "absent" pragma + auto repo_desc_pragma = (*resolved_repo_desc)->At("pragma"); + auto pragma_absent = + repo_desc_pragma ? repo_desc_pragma->get()->At("absent") + : std::nullopt; + auto pragma_absent_value = + pragma_absent and pragma_absent->get()->IsBool() and + pragma_absent->get()->Bool(); + // only fetch if either archive is not marked absent, or if + // explicitly told to fetch absent archives + if (not pragma_absent_value or common_args.fetch_absent) { + // enforce mandatory fields + auto repo_desc_hash = (*resolved_repo_desc)->At("id"); + if (not repo_desc_hash) { + Logger::Log( + LogLevel::Error, + "Config: Mandatory field \"id\" is missing"); + return kExitFetchError; + } + if (not repo_desc_hash->get()->IsString()) { + Logger::Log( + LogLevel::Error, + fmt::format("Config: Unsupported value {} for " + "mandatory field \"id\"", + repo_desc_hash->get()->ToString())); + return kExitFetchError; + } + auto repo_desc_cmd = (*resolved_repo_desc)->At("cmd"); + if (not repo_desc_cmd) { + Logger::Log( + LogLevel::Error, + "Config: Mandatory field \"cmd\" is missing"); + return kExitFetchError; + } + if (not repo_desc_cmd->get()->IsList()) { + Logger::Log( + LogLevel::Error, + fmt::format("Config: Unsupported value {} for " + "mandatory field \"cmd\"", + repo_desc_cmd->get()->ToString())); + return kExitFetchError; + } + std::vector<std::string> cmd{}; + for (auto const& token : repo_desc_cmd->get()->List()) { + if (token.IsNotNull() and token->IsString()) { + cmd.emplace_back(token->String()); + } + else { + Logger::Log( + LogLevel::Error, + fmt::format("Config: Unsupported entry {} " + "in mandatory field \"cmd\"", + token->ToString())); + return kExitFetchError; + } + } + std::map<std::string, std::string> env{}; + auto repo_desc_env = + (*resolved_repo_desc) + ->Get("env", Expression::none_t{}); + if (repo_desc_env.IsNotNull() and + repo_desc_env->IsMap()) { + for (auto const& envar : + repo_desc_env->Map().Items()) { + if (envar.second.IsNotNull() and + envar.second->IsString()) { + env.insert( + {envar.first, envar.second->String()}); + } + else { + Logger::Log( + LogLevel::Error, + fmt::format( + "Config: Unsupported value {} for " + "key {} in optional field \"envs\"", + envar.second->ToString(), + nlohmann::json(envar.first) + .dump())); + return kExitFetchError; + } + } + } + std::vector<std::string> inherit_env{}; + auto repo_desc_inherit_env = + (*resolved_repo_desc) + ->Get("inherit env", Expression::none_t{}); + if (repo_desc_inherit_env.IsNotNull() and + repo_desc_inherit_env->IsList()) { + for (auto const& envvar : + repo_desc_inherit_env->List()) { + if (envvar->IsString()) { + inherit_env.emplace_back(envvar->String()); + } + else { + Logger::Log( + LogLevel::Error, + fmt::format("Config: Not a variable " + "name in the specification " + "of \"inherit env\": {}", + envvar->ToString())); + return kExitFetchError; + } + } + } + // populate struct + GitTreeInfo tree_info = { + .hash = repo_desc_hash->get()->String(), + .env_vars = std::move(env), + .inherit_env = std::move(inherit_env), + .command = std::move(cmd), + .origin = repo_name}; + // add to list + git_trees_to_fetch.emplace_back(std::move(tree_info)); } - auto repo_desc_subdir = - (*resolved_repo_desc) - ->Get("subdir", Expression::none_t{}); - auto subdir = - std::filesystem::path(repo_desc_subdir->IsString() - ? repo_desc_subdir->String() - : "") - .lexically_normal(); - auto repo_desc_distfile = - (*resolved_repo_desc) - ->Get("distfile", Expression::none_t{}); - auto repo_desc_sha256 = - (*resolved_repo_desc) - ->Get("sha256", Expression::none_t{}); - auto repo_desc_sha512 = - (*resolved_repo_desc) - ->Get("sha512", Expression::none_t{}); - - ArchiveRepoInfo archive_info = { - .archive = - {.content = repo_desc_content->get()->String(), - .distfile = repo_desc_distfile->IsString() - ? std::make_optional( - repo_desc_distfile->String()) - : std::nullopt, - .fetch_url = repo_desc_fetch->get()->String(), - .sha256 = repo_desc_sha256->IsString() - ? std::make_optional( - repo_desc_sha256->String()) - : std::nullopt, - .sha512 = repo_desc_sha512->IsString() - ? std::make_optional( - repo_desc_sha512->String()) - : std::nullopt, - .origin = repo_name, - .fetch_only = true}, - .repo_type = repo_type_str, - .subdir = subdir.empty() ? "." : subdir.string(), - .pragma_special = std::nullopt, // not used - .absent = false // not used - }; - // add to list - archives_to_fetch.emplace_back(std::move(archive_info)); - } + } break; + default: + continue; // ignore all other repository types } } else { @@ -262,11 +392,16 @@ auto MultiRepoFetch(std::shared_ptr<Configuration> const& config, } // report progress - auto nr = archives_to_fetch.size(); + auto nr_a = archives_to_fetch.size(); + auto nr_gt = git_trees_to_fetch.size(); + auto str_a = fmt::format("{} {}", nr_a, nr_a == 1 ? "archive" : "archives"); + auto str_gt = + fmt::format("{} git {}", nr_gt, nr_gt == 1 ? "tree" : "trees"); Logger::Log(LogLevel::Info, - "Found {} {} to fetch", - nr, - nr == 1 ? "archive" : "archives"); + "Found {}{}{} to fetch", + nr_a != 0 ? str_a : std::string(), + nr_a != 0 and nr_gt != 0 ? " and " : "", + nr_gt != 0 ? str_gt : std::string()); // setup the APIs for archive fetches auto remote_api = JustMR::Utils::GetRemoteApi( @@ -293,9 +428,26 @@ auto MultiRepoFetch(std::shared_ptr<Configuration> const& config, (fetch_args.backup_to_remote and local_api) ? &(*local_api) : nullptr, (fetch_args.backup_to_remote and remote_api) ? &(*remote_api) : nullptr, common_args.jobs); + auto crit_git_op_ptr = std::make_shared<CriticalGitOpGuard>(); + auto critical_git_op_map = CreateCriticalGitOpMap(crit_git_op_ptr); + auto import_to_git_map = + CreateImportToGitMap(&critical_git_op_map, + common_args.git_path->string(), + *common_args.local_launcher, + common_args.jobs); + auto git_tree_fetch_map = + CreateGitTreeFetchMap(&critical_git_op_map, + &import_to_git_map, + common_args.git_path->string(), + *common_args.local_launcher, + serve_api_exists, + local_api ? &(*local_api) : nullptr, + remote_api ? &(*remote_api) : nullptr, + fetch_args.backup_to_remote, + common_args.jobs); // set up progress observer - JustMRProgress::Instance().SetTotal(archives_to_fetch.size()); + JustMRProgress::Instance().SetTotal(static_cast<int>(nr_a + nr_gt)); std::atomic<bool> done{false}; std::condition_variable cv{}; auto reporter = JustMRProgressReporter::Reporter(); @@ -303,18 +455,32 @@ auto MultiRepoFetch(std::shared_ptr<Configuration> const& config, std::thread([reporter, &done, &cv]() { reporter(&done, &cv); }); // do the fetch - bool failed{false}; + bool failed_archives{false}; { TaskSystem ts{common_args.jobs}; archive_fetch_map.ConsumeAfterKeysReady( &ts, archives_to_fetch, []([[maybe_unused]] auto const& values) {}, - [&failed](auto const& msg, bool fatal) { + [&failed_archives](auto const& msg, bool fatal) { + Logger::Log(fatal ? LogLevel::Error : LogLevel::Warning, + "While performing just-mr fetch:\n{}", + msg); + failed_archives = failed_archives or fatal; + }); + } + bool failed_git_trees{false}; + { + TaskSystem ts{common_args.jobs}; + git_tree_fetch_map.ConsumeAfterKeysReady( + &ts, + git_trees_to_fetch, + []([[maybe_unused]] auto const& values) {}, + [&failed_git_trees](auto const& msg, bool fatal) { Logger::Log(fatal ? LogLevel::Error : LogLevel::Warning, "While performing just-mr fetch:\n{}", msg); - failed = failed or fatal; + failed_git_trees = failed_git_trees or fatal; }); } @@ -323,7 +489,7 @@ auto MultiRepoFetch(std::shared_ptr<Configuration> const& config, cv.notify_all(); observer.join(); - if (failed) { + if (failed_archives or failed_git_trees) { return kExitFetchError; } // report success diff --git a/src/other_tools/just_mr/setup.cpp b/src/other_tools/just_mr/setup.cpp index d09f699f..814aa1e4 100644 --- a/src/other_tools/just_mr/setup.cpp +++ b/src/other_tools/just_mr/setup.cpp @@ -125,6 +125,7 @@ auto MultiRepoSetup(std::shared_ptr<Configuration> const& config, serve_api_exists, local_api ? &(*local_api) : nullptr, remote_api ? &(*remote_api) : nullptr, + false, /* backup_to_remote */ common_args.jobs); auto resolve_symlinks_map = CreateResolveSymlinksMap(); diff --git a/src/other_tools/ops_maps/TARGETS b/src/other_tools/ops_maps/TARGETS index 8a6a905e..95febb30 100644 --- a/src/other_tools/ops_maps/TARGETS +++ b/src/other_tools/ops_maps/TARGETS @@ -106,6 +106,7 @@ , "stage": ["src", "other_tools", "ops_maps"] , "private-deps": [ ["src/other_tools/ops_maps", "critical_git_op_map"] + , ["src/buildtool/common", "config"] , ["src/buildtool/execution_api/common", "common"] , ["src/buildtool/file_system", "file_system_manager"] , ["src/buildtool/serve_api/remote", "serve_api"] diff --git a/src/other_tools/ops_maps/git_tree_fetch_map.cpp b/src/other_tools/ops_maps/git_tree_fetch_map.cpp index bd3e32bd..8176c0b2 100644 --- a/src/other_tools/ops_maps/git_tree_fetch_map.cpp +++ b/src/other_tools/ops_maps/git_tree_fetch_map.cpp @@ -17,6 +17,7 @@ #include <cstdlib> #include "fmt/core.h" +#include "src/buildtool/common/repository_config.hpp" #include "src/buildtool/execution_api/common/execution_common.hpp" #include "src/buildtool/file_system/file_system_manager.hpp" #include "src/buildtool/serve_api/remote/serve_api.hpp" @@ -27,6 +28,37 @@ #include "src/other_tools/just_mr/progress_reporting/progress.hpp" #include "src/other_tools/just_mr/progress_reporting/statistics.hpp" +namespace { + +void BackupToRemote(std::string const& tree_id, + IExecutionApi* remote_api, + GitTreeFetchMap::LoggerPtr const& logger) { + // try to back up to remote CAS + auto repo = RepositoryConfig{}; + if (repo.SetGitCAS(StorageConfig::GitRoot())) { + auto git_api = GitApi{&repo}; + if (not git_api.RetrieveToCas( + {Artifact::ObjectInfo{ + .digest = ArtifactDigest{tree_id, 0, /*is_tree=*/true}, + .type = ObjectType::Tree}}, + remote_api)) { + // give a warning + (*logger)(fmt::format( + "Failed to back up tree {} from local CAS to remote", + tree_id), + /*fatal=*/false); + } + } + else { + // give a warning + (*logger)(fmt::format("Failed to SetGitCAS at {}", + StorageConfig::GitRoot().string()), + /*fatal=*/false); + } +} + +} // namespace + auto CreateGitTreeFetchMap( gsl::not_null<CriticalGitOpMap*> const& critical_git_op_map, gsl::not_null<ImportToGitMap*> const& import_to_git_map, @@ -35,6 +67,7 @@ auto CreateGitTreeFetchMap( bool serve_api_exists, IExecutionApi* local_api, IExecutionApi* remote_api, + bool backup_to_remote, std::size_t jobs) -> GitTreeFetchMap { auto tree_to_cache = [critical_git_op_map, import_to_git_map, @@ -42,11 +75,12 @@ auto CreateGitTreeFetchMap( launcher, serve_api_exists, local_api, - remote_api](auto ts, - auto setter, - auto logger, - auto /*unused*/, - auto const& key) { + remote_api, + backup_to_remote](auto ts, + auto setter, + auto logger, + auto /*unused*/, + auto const& key) { // check whether tree exists already in Git cache; // ensure Git cache exists GitOpKey op_key = {.params = @@ -68,6 +102,7 @@ auto CreateGitTreeFetchMap( serve_api_exists, local_api, remote_api, + backup_to_remote, key, ts, setter, @@ -104,6 +139,11 @@ auto CreateGitTreeFetchMap( return; } if (*tree_found) { + // backup to remote, if needed + if (backup_to_remote and remote_api != nullptr) { + BackupToRemote(key.hash, remote_api, logger); + } + // success (*setter)(true /*cache hit*/); return; } @@ -160,6 +200,8 @@ auto CreateGitTreeFetchMap( /*fatal=*/true); return; } + // remote CAS already has the tree, so no need to + // even check backup_to_remote flag; // success (*setter)(false /*no cache hit*/); }, @@ -233,6 +275,8 @@ auto CreateGitTreeFetchMap( key, git_bin, launcher, + remote_api, + backup_to_remote, ts, setter, logger](auto const& values) { @@ -366,6 +410,8 @@ auto CreateGitTreeFetchMap( ts, {std::move(op_key)}, [tmp_dir, // keep tmp_dir alive + remote_api, + backup_to_remote, key, setter, logger](auto const& values) { @@ -378,6 +424,12 @@ auto CreateGitTreeFetchMap( } JustMRProgress::Instance().TaskTracker().Stop( key.origin); + // backup to remote, if needed + if (backup_to_remote and + remote_api != nullptr) { + BackupToRemote( + key.hash, remote_api, logger); + } // success (*setter)(false /*no cache hit*/); }, diff --git a/src/other_tools/ops_maps/git_tree_fetch_map.hpp b/src/other_tools/ops_maps/git_tree_fetch_map.hpp index b32b074f..85906e8c 100644 --- a/src/other_tools/ops_maps/git_tree_fetch_map.hpp +++ b/src/other_tools/ops_maps/git_tree_fetch_map.hpp @@ -60,6 +60,7 @@ using GitTreeFetchMap = AsyncMapConsumer<GitTreeInfo, bool>; bool serve_api_exists, IExecutionApi* local_api, IExecutionApi* remote_api, + bool backup_to_remote, std::size_t jobs) -> GitTreeFetchMap; #endif // INCLUDED_SRC_OTHER_TOOLS_OPS_MAPS_GIT_TREE_FETCH_MAP_HPP |