summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Cristian Sarbu <paul.cristian.sarbu@huawei.com>2024-01-19 18:27:49 +0100
committerPaul Cristian Sarbu <paul.cristian.sarbu@huawei.com>2024-01-26 14:51:43 +0100
commitd59d5b8fc36b79c973bd525c182dbf0ba8b99251 (patch)
tree8781d13328c1b35c257c9aa0dc1482647fb72561
parent98875552f6afa593ffd5d115c2379e47edfddfbf (diff)
downloadjustbuild-d59d5b8fc36b79c973bd525c182dbf0ba8b99251.tar.gz
just-mr 'git tree' repository: Absent roots sync with serve endpoint
To take advantage of absent roots, we need to ensure that a given serve endpoint can build against the tree of this generated root. To this end, for a 'git tree' repository we only set the root as absent only if the given serve endpoint has this root, or the tree is known locally and can be provided via the remote CAS. While generating an absent root the fetch command will never be called. Generating an absent root without being provided a serve endpoint is still allowed, but results in a warning.
-rw-r--r--src/other_tools/just_mr/setup.cpp9
-rw-r--r--src/other_tools/ops_maps/git_tree_fetch_map.cpp12
-rw-r--r--src/other_tools/root_maps/TARGETS8
-rw-r--r--src/other_tools/root_maps/tree_id_git_map.cpp317
-rw-r--r--src/other_tools/root_maps/tree_id_git_map.hpp12
5 files changed, 344 insertions, 14 deletions
diff --git a/src/other_tools/just_mr/setup.cpp b/src/other_tools/just_mr/setup.cpp
index e5078592..f1a65157 100644
--- a/src/other_tools/just_mr/setup.cpp
+++ b/src/other_tools/just_mr/setup.cpp
@@ -195,7 +195,14 @@ auto MultiRepoSetup(std::shared_ptr<Configuration> const& config,
common_args.jobs);
auto tree_id_git_map = CreateTreeIdGitMap(
- &git_tree_fetch_map, common_args.fetch_absent, common_args.jobs);
+ &git_tree_fetch_map,
+ &critical_git_op_map,
+ &import_to_git_map,
+ common_args.fetch_absent,
+ serve_api_exists,
+ &(*local_api),
+ remote_api ? std::make_optional(&(*remote_api)) : std::nullopt,
+ common_args.jobs);
auto repos_to_setup_map = CreateReposToSetupMap(config,
main,
diff --git a/src/other_tools/ops_maps/git_tree_fetch_map.cpp b/src/other_tools/ops_maps/git_tree_fetch_map.cpp
index 00e0b1b4..e70ded2b 100644
--- a/src/other_tools/ops_maps/git_tree_fetch_map.cpp
+++ b/src/other_tools/ops_maps/git_tree_fetch_map.cpp
@@ -258,15 +258,19 @@ auto CreateGitTreeFetchMap(
// create temporary location for command execution root
auto tmp_dir = StorageUtils::CreateTypedTmpDir("git-tree");
if (not tmp_dir) {
- (*logger)("Failed to create tmp directory for tree id map!",
- /*fatal=*/true);
+ (*logger)(
+ "Failed to create execution root tmp directory for "
+ "tree id map!",
+ /*fatal=*/true);
return;
}
// create temporary location for storing command result files
auto out_dir = StorageUtils::CreateTypedTmpDir("git-tree");
if (not out_dir) {
- (*logger)("Failed to create tmp directory for tree id map!",
- /*fatal=*/true);
+ (*logger)(
+ "Failed to create results tmp directory for tree id "
+ "map!",
+ /*fatal=*/true);
return;
}
// execute command in temporary location
diff --git a/src/other_tools/root_maps/TARGETS b/src/other_tools/root_maps/TARGETS
index f67c2474..94fa6b7a 100644
--- a/src/other_tools/root_maps/TARGETS
+++ b/src/other_tools/root_maps/TARGETS
@@ -134,14 +134,22 @@
, "deps":
[ ["@", "gsl", "", "gsl"]
, ["@", "json", "", "json"]
+ , ["src/buildtool/execution_api/common", "common"]
+ , ["src/other_tools/ops_maps", "critical_git_op_map"]
, ["src/other_tools/ops_maps", "git_tree_fetch_map"]
+ , ["src/other_tools/ops_maps", "import_to_git_map"]
, ["src/utils/cpp", "hash_combine"]
]
, "stage": ["src", "other_tools", "root_maps"]
, "private-deps":
[ ["@", "fmt", "", "fmt"]
+ , "root_utils"
+ , ["src/buildtool/common", "config"]
+ , ["src/buildtool/execution_api/git", "git"]
, ["src/buildtool/file_system", "file_root"]
, ["src/buildtool/storage", "config"]
+ , ["src/buildtool/storage", "fs_utils"]
+ , ["src/buildtool/storage", "storage"]
]
}
, "root_utils":
diff --git a/src/other_tools/root_maps/tree_id_git_map.cpp b/src/other_tools/root_maps/tree_id_git_map.cpp
index 3d5ccf1e..c8e55d9c 100644
--- a/src/other_tools/root_maps/tree_id_git_map.cpp
+++ b/src/other_tools/root_maps/tree_id_git_map.cpp
@@ -15,20 +15,320 @@
#include "src/other_tools/root_maps/tree_id_git_map.hpp"
#include "fmt/core.h"
+#include "src/buildtool/common/repository_config.hpp"
+#include "src/buildtool/execution_api/git/git_api.hpp"
#include "src/buildtool/file_system/file_root.hpp"
#include "src/buildtool/storage/config.hpp"
+#include "src/buildtool/storage/fs_utils.hpp"
+#include "src/buildtool/storage/storage.hpp"
+#include "src/other_tools/root_maps/root_utils.hpp"
+
+namespace {
+
+/// \brief Guarantees it terminates by either calling the setter or calling the
+/// logger with fatal.
+void UploadToServeAndSetRoot(std::string const& tree_id,
+ ArtifactDigest const& digest,
+ gsl::not_null<IExecutionApi*> const& remote_api,
+ bool ignore_special,
+ TreeIdGitMap::SetterPtr const& setter,
+ TreeIdGitMap::LoggerPtr const& logger) {
+ // upload to remote CAS
+ auto repo_config = RepositoryConfig{};
+ if (repo_config.SetGitCAS(StorageConfig::GitRoot())) {
+ auto git_api = GitApi{&repo_config};
+ if (not git_api.RetrieveToCas(
+ {Artifact::ObjectInfo{.digest = digest,
+ .type = ObjectType::Tree}},
+ remote_api)) {
+ (*logger)(fmt::format("Failed to sync tree {} from local Git cache "
+ "to remote CAS",
+ tree_id),
+ /*fatal=*/true);
+ return;
+ }
+ }
+ else {
+ (*logger)(fmt::format("Failed to SetGitCAS at {}",
+ StorageConfig::GitRoot().string()),
+ /*fatal=*/true);
+ return;
+ }
+ // tell serve to set up the root from the remote CAS tree;
+ // upload can be skipped
+ if (EnsureAbsentRootOnServe(tree_id,
+ /*repo_path=*/"",
+ /*remote_api=*/std::nullopt,
+ logger,
+ /*no_sync_is_fatal=*/true)) {
+ // set workspace root as absent
+ auto root = nlohmann::json::array(
+ {ignore_special ? FileRoot::kGitTreeIgnoreSpecialMarker
+ : FileRoot::kGitTreeMarker,
+ tree_id});
+ (*setter)(std::pair(std::move(root), /*is_cache_hit=*/false));
+ return;
+ }
+}
+
+/// \brief Guarantees it terminates by either calling the setter or calling the
+/// logger with fatal.
+void MoveCASTreeToGitAndProcess(
+ std::string const& tree_id,
+ ArtifactDigest const& digest,
+ gsl::not_null<ImportToGitMap*> const& import_to_git_map,
+ gsl::not_null<IExecutionApi*> const& local_api,
+ gsl::not_null<IExecutionApi*> const& remote_api,
+ bool ignore_special,
+ gsl::not_null<TaskSystem*> const& ts,
+ TreeIdGitMap::SetterPtr const& setter,
+ TreeIdGitMap::LoggerPtr const& logger) {
+ // Move tree from CAS to local Git storage
+ auto tmp_dir = StorageUtils::CreateTypedTmpDir("fetch-remote-git-tree");
+ if (not tmp_dir) {
+ (*logger)(fmt::format("Failed to create tmp directory for copying "
+ "git-tree {} from remote CAS",
+ digest.hash()),
+ true);
+ return;
+ }
+ if (not local_api->RetrieveToPaths(
+ {Artifact::ObjectInfo{.digest = digest, .type = ObjectType::Tree}},
+ {tmp_dir->GetPath()})) {
+ (*logger)(fmt::format("Failed to copy git-tree {} to {}",
+ tree_id,
+ tmp_dir->GetPath().string()),
+ true);
+ return;
+ }
+ CommitInfo c_info{tmp_dir->GetPath(), "tree", tree_id};
+ import_to_git_map->ConsumeAfterKeysReady(
+ ts,
+ {std::move(c_info)},
+ [tmp_dir, // keep tmp_dir alive
+ tree_id,
+ digest,
+ remote_api,
+ ignore_special,
+ setter,
+ logger](auto const& values) {
+ if (not values[0]->second) {
+ (*logger)("Importing to git failed",
+ /*fatal=*/true);
+ return;
+ }
+ // upload tree from Git cache to remote CAS and tell serve to set up
+ // the root from the remote CAS tree; set root as absent on success
+ UploadToServeAndSetRoot(
+ tree_id, digest, remote_api, ignore_special, setter, logger);
+ },
+ [logger, tmp_dir, tree_id](auto const& msg, bool fatal) {
+ (*logger)(fmt::format(
+ "While moving git-tree {} from {} to local git:\n{}",
+ tree_id,
+ tmp_dir->GetPath().string(),
+ msg),
+ fatal);
+ });
+}
+
+} // namespace
auto CreateTreeIdGitMap(
gsl::not_null<GitTreeFetchMap*> const& git_tree_fetch_map,
+ gsl::not_null<CriticalGitOpMap*> const& critical_git_op_map,
+ gsl::not_null<ImportToGitMap*> const& import_to_git_map,
bool fetch_absent,
+ bool serve_api_exists,
+ gsl::not_null<IExecutionApi*> const& local_api,
+ std::optional<gsl::not_null<IExecutionApi*>> const& remote_api,
std::size_t jobs) -> TreeIdGitMap {
- auto tree_to_git = [git_tree_fetch_map, fetch_absent](auto ts,
- auto setter,
- auto logger,
- auto /*unused*/,
- auto const& key) {
- // if root is actually absent, no work needs to be done
+ auto tree_to_git = [git_tree_fetch_map,
+ critical_git_op_map,
+ import_to_git_map,
+ fetch_absent,
+ serve_api_exists,
+ local_api,
+ remote_api](auto ts,
+ auto setter,
+ auto logger,
+ auto /*unused*/,
+ auto const& key) {
+ // if root is actually absent, check if serve endpoint knows the tree
+ // for building against it and only set the workspace root if tree is
+ // found on the serve endpoint or it can be made available to it;
+ // otherwise, error out
if (key.absent and not fetch_absent) {
+ if (serve_api_exists) {
+ // check serve endpoint
+ auto has_tree =
+ CheckServeHasAbsentRoot(key.tree_info.hash, logger);
+ if (not has_tree) {
+ return;
+ }
+ if (*has_tree) {
+ // set workspace root as absent
+ auto root = nlohmann::json::array(
+ {key.ignore_special
+ ? FileRoot::kGitTreeIgnoreSpecialMarker
+ : FileRoot::kGitTreeMarker,
+ key.tree_info.hash});
+ (*setter)(
+ std::pair(std::move(root), /*is_cache_hit=*/false));
+ return;
+ }
+ // at this point we cannot proceed without the remote api
+ if (not remote_api) {
+ (*logger)(fmt::format("Missing remote-execution endpoint "
+ "needed to sync workspace root {} "
+ "with the serve endpoint.",
+ key.tree_info.hash),
+ /*fatal=*/true);
+ return;
+ }
+ // check if tree in already in remote CAS
+ auto digest =
+ ArtifactDigest{key.tree_info.hash, 0, /*is_tree=*/true};
+ if (remote_api.value()->IsAvailable({digest})) {
+ // tell serve to set up the root from the remote CAS tree;
+ // upload can be skipped
+ if (EnsureAbsentRootOnServe(key.tree_info.hash,
+ /*repo_path=*/"",
+ /*remote_api=*/std::nullopt,
+ logger,
+ /*no_sync_is_fatal=*/true)) {
+ // set workspace root as absent
+ auto root = nlohmann::json::array(
+ {key.ignore_special
+ ? FileRoot::kGitTreeIgnoreSpecialMarker
+ : FileRoot::kGitTreeMarker,
+ key.tree_info.hash});
+ (*setter)(
+ std::pair(std::move(root), /*is_cache_hit=*/false));
+ return;
+ }
+ (*logger)(
+ fmt::format("Serve endpoint failed to create workspace "
+ "root {} that locally was marked absent.",
+ key.tree_info.hash),
+ /*fatal=*/true);
+ return;
+ }
+ // check if tree is in Git cache;
+ // ensure Git cache exists
+ GitOpKey op_key = {
+ .params =
+ {
+ StorageConfig::GitRoot(), // target_path
+ "", // git_hash
+ "", // branch
+ std::nullopt, // message
+ true // init_bare
+ },
+ .op_type = GitOpType::ENSURE_INIT};
+ critical_git_op_map->ConsumeAfterKeysReady(
+ ts,
+ {std::move(op_key)},
+ [digest,
+ import_to_git_map,
+ local_api,
+ remote_api,
+ key,
+ ts,
+ setter,
+ logger](auto const& values) {
+ GitOpValue op_result = *values[0];
+ // check flag
+ if (not op_result.result) {
+ (*logger)("Git cache init failed",
+ /*fatal=*/true);
+ return;
+ }
+ // Open fake tmp repo to check if tree is known to Git
+ // cache
+ auto git_repo = GitRepoRemote::Open(
+ op_result.git_cas); // link fake repo to odb
+ if (not git_repo) {
+ (*logger)(
+ fmt::format("Could not open repository {}",
+ StorageConfig::GitRoot().string()),
+ /*fatal=*/true);
+ return;
+ }
+ // setup wrapped logger
+ auto wrapped_logger =
+ std::make_shared<AsyncMapConsumerLogger>(
+ [logger](auto const& msg, bool fatal) {
+ (*logger)(
+ fmt::format("While checking tree "
+ "exists in Git cache:\n{}",
+ msg),
+ fatal);
+ });
+ // check if the desired tree ID is in Git cache
+ auto tree_found = git_repo->CheckTreeExists(
+ key.tree_info.hash, wrapped_logger);
+ if (not tree_found) {
+ // errors encountered
+ return;
+ }
+ if (*tree_found) {
+ // upload tree from Git cache to remote CAS and tell
+ // serve to set up the root from the remote CAS
+ // tree, then set root as absent
+ UploadToServeAndSetRoot(key.tree_info.hash,
+ digest,
+ *remote_api,
+ key.ignore_special,
+ setter,
+ logger);
+ // done!
+ return;
+ }
+ // check if tree is known to local CAS
+ auto const& cas = Storage::Instance().CAS();
+ if (auto path = cas.TreePath(digest)) {
+ // Move tree locally from CAS to Git cache, then
+ // continue processing it by UploadToServeAndSetRoot
+ MoveCASTreeToGitAndProcess(key.tree_info.hash,
+ digest,
+ import_to_git_map,
+ local_api,
+ *remote_api,
+ key.ignore_special,
+ ts,
+ setter,
+ logger);
+ // done!
+ return;
+ }
+ // tree is not know locally, so we cannot
+ // provide it to the serve endpoint and thus we
+ // cannot create the absent root
+ (*logger)(fmt::format("Cannot create workspace root "
+ "{} as absent for the provided "
+ "serve endpoint.",
+ key.tree_info.hash),
+ /*fatal=*/true);
+ },
+ [logger, target_path = StorageConfig::GitRoot()](
+ auto const& msg, bool fatal) {
+ (*logger)(
+ fmt::format("While running critical Git op "
+ "ENSURE_INIT bare for target {}:\n{}",
+ target_path.string(),
+ msg),
+ fatal);
+ });
+ // done!
+ return;
+ }
+ // give warning that serve endpoint is missing
+ (*logger)(fmt::format("Workspace root {} marked absent but no "
+ "serve endpoint provided.",
+ key.tree_info.hash),
+ /*fatal=*/false);
+ // set workspace root as absent
auto root = nlohmann::json::array(
{key.ignore_special ? FileRoot::kGitTreeIgnoreSpecialMarker
: FileRoot::kGitTreeMarker,
@@ -36,8 +336,9 @@ auto CreateTreeIdGitMap(
(*setter)(std::pair(std::move(root), false));
return;
}
- // otherwise, one must fetch;
- // make sure the required tree is in Git cache
+ // if root is not absent, proceed with usual fetch logic: check locally,
+ // check serve endpoint, check remote-execution endpoint, and lastly
+ // default to network
git_tree_fetch_map->ConsumeAfterKeysReady(
ts,
{key.tree_info},
diff --git a/src/other_tools/root_maps/tree_id_git_map.hpp b/src/other_tools/root_maps/tree_id_git_map.hpp
index da4d9328..6a4e93b0 100644
--- a/src/other_tools/root_maps/tree_id_git_map.hpp
+++ b/src/other_tools/root_maps/tree_id_git_map.hpp
@@ -15,13 +15,17 @@
#ifndef INCLUDED_SRC_OTHER_TOOLS_ROOT_MAPS_TREE_ID_GIT_MAP_HPP
#define INCLUDED_SRC_OTHER_TOOLS_ROOT_MAPS_TREE_ID_GIT_MAP_HPP
+#include <optional>
#include <string>
#include <utility>
#include <vector>
#include "gsl/gsl"
#include "nlohmann/json.hpp"
+#include "src/buildtool/execution_api/common/execution_api.hpp"
+#include "src/other_tools/ops_maps/critical_git_op_map.hpp"
#include "src/other_tools/ops_maps/git_tree_fetch_map.hpp"
+#include "src/other_tools/ops_maps/import_to_git_map.hpp"
#include "src/utils/cpp/hash_combine.hpp"
struct TreeIdInfo {
@@ -55,11 +59,17 @@ struct hash<TreeIdInfo> {
/// \brief Maps a known tree provided through a generic command to its
/// workspace root and the information whether it was a cache hit.
using TreeIdGitMap =
- AsyncMapConsumer<TreeIdInfo, std::pair<nlohmann::json, bool>>;
+ AsyncMapConsumer<TreeIdInfo,
+ std::pair<nlohmann::json /*root*/, bool /*is_cache_hit*/>>;
[[nodiscard]] auto CreateTreeIdGitMap(
gsl::not_null<GitTreeFetchMap*> const& git_tree_fetch_map,
+ gsl::not_null<CriticalGitOpMap*> const& critical_git_op_map,
+ gsl::not_null<ImportToGitMap*> const& import_to_git_map,
bool fetch_absent,
+ bool serve_api_exists,
+ gsl::not_null<IExecutionApi*> const& local_api,
+ std::optional<gsl::not_null<IExecutionApi*>> const& remote_api,
std::size_t jobs) -> TreeIdGitMap;
#endif // INCLUDED_SRC_OTHER_TOOLS_ROOT_MAPS_TREE_ID_GIT_MAP_HPP