summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/other_tools/just_mr/setup.cpp12
-rw-r--r--src/other_tools/root_maps/TARGETS13
-rw-r--r--src/other_tools/root_maps/distdir_git_map.cpp262
-rw-r--r--src/other_tools/root_maps/distdir_git_map.hpp4
4 files changed, 209 insertions, 82 deletions
diff --git a/src/other_tools/just_mr/setup.cpp b/src/other_tools/just_mr/setup.cpp
index dedf9ecb..96411363 100644
--- a/src/other_tools/just_mr/setup.cpp
+++ b/src/other_tools/just_mr/setup.cpp
@@ -160,10 +160,14 @@ auto MultiRepoSetup(std::shared_ptr<Configuration> const& config,
&import_to_git_map,
&resolve_symlinks_map,
common_args.jobs);
- auto distdir_git_map = CreateDistdirGitMap(&content_cas_map,
- &import_to_git_map,
- &critical_git_op_map,
- common_args.jobs);
+ auto distdir_git_map =
+ CreateDistdirGitMap(&content_cas_map,
+ &import_to_git_map,
+ &critical_git_op_map,
+ serve_api_exists,
+ local_api ? &(*local_api) : nullptr,
+ remote_api ? &(*remote_api) : nullptr,
+ common_args.jobs);
auto tree_id_git_map = CreateTreeIdGitMap(
&git_tree_fetch_map, common_args.fetch_absent, common_args.jobs);
auto repos_to_setup_map = CreateReposToSetupMap(config,
diff --git a/src/other_tools/root_maps/TARGETS b/src/other_tools/root_maps/TARGETS
index 7eab68a5..f9d5ed8d 100644
--- a/src/other_tools/root_maps/TARGETS
+++ b/src/other_tools/root_maps/TARGETS
@@ -12,18 +12,23 @@
, "stage": ["src", "other_tools", "root_maps"]
, "private-deps":
[ ["@", "fmt", "", "fmt"]
- , ["src/other_tools/ops_maps", "critical_git_op_map"]
+ , ["src/buildtool/common", "common"]
, ["src/buildtool/execution_api/common", "common"]
, ["src/buildtool/execution_api/local", "config"]
, ["src/buildtool/execution_api/local", "local"]
+ , ["src/buildtool/file_system", "file_root"]
+ , ["src/buildtool/file_system", "file_storage"]
+ , ["src/buildtool/file_system", "git_repo"]
+ , ["src/buildtool/file_system", "object_type"]
+ , ["src/buildtool/multithreading", "task_system"]
+ , ["src/buildtool/serve_api/remote", "serve_api"]
, ["src/buildtool/storage", "config"]
, ["src/buildtool/storage", "fs_utils"]
, ["src/buildtool/storage", "storage"]
- , ["src/utils/cpp", "tmp_dir"]
- , ["src/buildtool/file_system", "file_storage"]
+ , ["src/other_tools/ops_maps", "critical_git_op_map"]
, ["src/other_tools/just_mr/progress_reporting", "progress"]
, ["src/other_tools/just_mr/progress_reporting", "statistics"]
- , ["src/buildtool/file_system", "file_root"]
+ , ["src/utils/cpp", "tmp_dir"]
]
}
, "commit_git_map":
diff --git a/src/other_tools/root_maps/distdir_git_map.cpp b/src/other_tools/root_maps/distdir_git_map.cpp
index 677e3057..6fa9d2d2 100644
--- a/src/other_tools/root_maps/distdir_git_map.cpp
+++ b/src/other_tools/root_maps/distdir_git_map.cpp
@@ -17,9 +17,15 @@
#include <algorithm>
#include "fmt/core.h"
+#include "src/buildtool/common/artifact.hpp"
+#include "src/buildtool/common/artifact_digest.hpp"
#include "src/buildtool/execution_api/common/execution_common.hpp"
#include "src/buildtool/file_system/file_root.hpp"
#include "src/buildtool/file_system/file_storage.hpp"
+#include "src/buildtool/file_system/git_repo.hpp"
+#include "src/buildtool/file_system/object_type.hpp"
+#include "src/buildtool/multithreading/task_system.hpp"
+#include "src/buildtool/serve_api/remote/serve_api.hpp"
#include "src/buildtool/storage/config.hpp"
#include "src/buildtool/storage/fs_utils.hpp"
#include "src/buildtool/storage/storage.hpp"
@@ -52,20 +58,92 @@ namespace {
});
}
+void ImportFromCASAndSetRoot(
+ std::shared_ptr<std::unordered_map<std::string, std::string>> const&
+ content_list,
+ std::string const& content_id,
+ std::filesystem::path const& distdir_tree_id_file,
+ gsl::not_null<ImportToGitMap*> const& import_to_git_map,
+ gsl::not_null<TaskSystem*> const& ts,
+ DistdirGitMap::SetterPtr const& setter,
+ DistdirGitMap::LoggerPtr const& logger) {
+ // create the links to CAS
+ auto tmp_dir = StorageUtils::CreateTypedTmpDir("distdir");
+ if (not tmp_dir) {
+ (*logger)(fmt::format("Failed to create tmp path for "
+ "distdir target {}",
+ content_id),
+ /*fatal=*/true);
+ return;
+ }
+ // link content from CAS into tmp dir
+ if (not LinkToCAS(content_list, tmp_dir->GetPath())) {
+ (*logger)(
+ fmt::format("Failed to create links to CAS content!", content_id),
+ /*fatal=*/true);
+ return;
+ }
+ // do import to git
+ CommitInfo c_info{tmp_dir->GetPath(), "distdir", content_id};
+ import_to_git_map->ConsumeAfterKeysReady(
+ ts,
+ {std::move(c_info)},
+ [tmp_dir, // keep tmp_dir alive
+ distdir_tree_id_file,
+ setter,
+ logger](auto const& values) {
+ // check for errors
+ if (not values[0]->second) {
+ (*logger)("Importing to git failed",
+ /*fatal=*/true);
+ return;
+ }
+ // only the tree is of interest
+ std::string distdir_tree_id = values[0]->first;
+ // write to tree id file
+ if (not StorageUtils::WriteTreeIDFile(distdir_tree_id_file,
+ distdir_tree_id)) {
+ (*logger)(fmt::format("Failed to write tree id to file {}",
+ distdir_tree_id_file.string()),
+ /*fatal=*/true);
+ return;
+ }
+ // set the workspace root as present
+ (*setter)(std::pair(
+ nlohmann::json::array({FileRoot::kGitTreeMarker,
+ distdir_tree_id,
+ StorageConfig::GitRoot().string()}),
+ false /*no cache hit*/));
+ },
+ [logger, target_path = tmp_dir->GetPath()](auto const& msg,
+ bool fatal) {
+ (*logger)(fmt::format("While importing target {} to git:\n{}",
+ target_path.string(),
+ msg),
+ fatal);
+ });
+}
+
} // namespace
auto CreateDistdirGitMap(
gsl::not_null<ContentCASMap*> const& content_cas_map,
gsl::not_null<ImportToGitMap*> const& import_to_git_map,
gsl::not_null<CriticalGitOpMap*> const& critical_git_op_map,
+ bool serve_api_exists,
+ IExecutionApi* local_api,
+ IExecutionApi* remote_api,
std::size_t jobs) -> DistdirGitMap {
auto distdir_to_git = [content_cas_map,
import_to_git_map,
- critical_git_op_map](auto ts,
- auto setter,
- auto logger,
- auto /* unused */,
- auto const& key) {
+ critical_git_op_map,
+ serve_api_exists,
+ local_api,
+ remote_api](auto ts,
+ auto setter,
+ auto logger,
+ auto /* unused */,
+ auto const& key) {
auto distdir_tree_id_file =
StorageUtils::GetDistdirTreeIDFile(key.content_id);
if (FileSystemManager::Exists(distdir_tree_id_file)) {
@@ -110,95 +188,131 @@ auto CreateDistdirGitMap(
if (not absent) {
root.emplace_back(StorageConfig::GitRoot().string());
}
- (*setter)(std::pair(std::move(root), true));
+ (*setter)(std::pair(std::move(root), true /*cache hit*/));
},
[logger, target_path = StorageConfig::GitRoot()](
auto const& msg, bool fatal) {
- (*logger)(fmt::format("While running critical Git "
- "op ENSURE_INIT for "
- "target {}:\n{}",
+ (*logger)(fmt::format("While running critical Git op "
+ "ENSURE_INIT for target {}:\n{}",
target_path.string(),
msg),
fatal);
});
}
else {
- // fetch the gathered distdir repos into CAS
+ // create in-memory Git tree of distdir content to get the tree id
+ GitRepo::tree_entries_t entries{};
+ entries.reserve(key.content_list->size());
+ for (auto const& kv : *key.content_list) {
+ // tree_entries_t type expects raw ids
+ auto raw_id = FromHexString(kv.second);
+ if (not raw_id) {
+ (*logger)(fmt::format("While processing distdir {}: "
+ "Unexpected failure in conversion to "
+ "raw id of distfile content {}",
+ key.content_id,
+ kv.second),
+ /*is_fatal=*/true);
+ return;
+ }
+ entries[*raw_id].emplace_back(kv.first, ObjectType::File);
+ }
+ auto tree = GitRepo::CreateShallowTree(entries);
+ if (not tree) {
+ (*logger)(fmt::format("Failed to construct in-memory tree for "
+ "distdir content {}",
+ key.content_id),
+ /*is_fatal=*/true);
+ return;
+ }
+ // get hash from raw_id
+ auto tree_id = ToHexString(tree->first);
+ // if pure absent, we simply set the root tree
+ if (key.absent) {
+ (*setter)(std::pair(
+ nlohmann::json::array({FileRoot::kGitTreeMarker, tree_id}),
+ false /*no cache hit*/));
+ return;
+ }
+ // otherwise, we check first the serve endpoint
+ if (serve_api_exists) {
+ if (auto served_tree_id = ServeApi::RetrieveTreeFromDistdir(
+ key.content_list, /*sync_tree=*/true)) {
+ // sanity check
+ if (*served_tree_id != tree_id) {
+ (*logger)(
+ fmt::format("Unexpected served tree id for distdir "
+ "content {}:\nExpected {}, but got {}",
+ key.content_id,
+ tree_id,
+ *served_tree_id),
+ /*is_fatal=*/true);
+ return;
+ }
+ // get the ditfiles from remote CAS in bulk
+ std::vector<Artifact::ObjectInfo> objects{};
+ objects.reserve(key.content_list->size());
+ for (auto const& kv : *key.content_list) {
+ objects.emplace_back(Artifact::ObjectInfo{
+ .digest =
+ ArtifactDigest{kv.second, 0, /*is_tree=*/false},
+ .type = ObjectType::File});
+ }
+ if (remote_api != nullptr and local_api != nullptr and
+ remote_api->RetrieveToCas(objects, local_api)) {
+ ImportFromCASAndSetRoot(key.content_list,
+ key.content_id,
+ distdir_tree_id_file,
+ import_to_git_map,
+ ts,
+ setter,
+ logger);
+ // done!
+ return;
+ }
+ // just serve should have made the blobs available in the
+ // remote CAS, so log this attempt and revert to default
+ // fetch each blob individually
+ (*logger)(fmt::format("Tree {} marked as served, but not "
+ "all distfile blobs found on remote",
+ *served_tree_id),
+ /*fatal=*/false);
+ }
+ else {
+ // give warning
+ (*logger)(
+ fmt::format("Distdir content {} could not be served",
+ key.content_id),
+ /*fatal=*/false);
+ }
+ }
+ else {
+ // give warning
+ (*logger)(
+ fmt::format("Missing serve endpoint for distdir {} marked "
+ "absent requires slower network fetch.",
+ key.content_id),
+ /*fatal=*/false);
+ }
+ // revert to fetching the gathered distdir repos into CAS
content_cas_map->ConsumeAfterKeysReady(
ts,
*key.repos_to_fetch,
[distdir_tree_id_file,
content_id = key.content_id,
content_list = key.content_list,
- absent = key.absent,
import_to_git_map,
ts,
setter,
logger]([[maybe_unused]] auto const& values) mutable {
// repos are in CAS
- // create the links to CAS
- auto tmp_dir = StorageUtils::CreateTypedTmpDir("distdir");
- if (not tmp_dir) {
- (*logger)(fmt::format("Failed to create tmp path for "
- "distdir target {}",
- content_id),
- /*fatal=*/true);
- return;
- }
- // link content from CAS into tmp dir
- if (not LinkToCAS(content_list, tmp_dir->GetPath())) {
- (*logger)(fmt::format(
- "Failed to create links to CAS content!",
- content_id),
- /*fatal=*/true);
- return;
- }
- // do import to git
- CommitInfo c_info{
- tmp_dir->GetPath(), "distdir", content_id};
- import_to_git_map->ConsumeAfterKeysReady(
- ts,
- {std::move(c_info)},
- [tmp_dir, // keep tmp_dir alive
- distdir_tree_id_file,
- absent,
- setter,
- logger](auto const& values) {
- // check for errors
- if (not values[0]->second) {
- (*logger)("Importing to git failed",
- /*fatal=*/true);
- return;
- }
- // only the tree is of interest
- std::string distdir_tree_id = values[0]->first;
- // write to tree id file
- if (not StorageUtils::WriteTreeIDFile(
- distdir_tree_id_file, distdir_tree_id)) {
- (*logger)(
- fmt::format(
- "Failed to write tree id to file {}",
- distdir_tree_id_file.string()),
- /*fatal=*/true);
- return;
- }
- // set the workspace root
- auto root = nlohmann::json::array(
- {FileRoot::kGitTreeMarker, distdir_tree_id});
- if (not absent) {
- root.emplace_back(
- StorageConfig::GitRoot().string());
- }
- (*setter)(std::pair(std::move(root), false));
- },
- [logger, target_path = tmp_dir->GetPath()](
- auto const& msg, bool fatal) {
- (*logger)(fmt::format("While importing target {} "
- "to git:\n{}",
- target_path.string(),
- msg),
- fatal);
- });
+ ImportFromCASAndSetRoot(content_list,
+ content_id,
+ distdir_tree_id_file,
+ import_to_git_map,
+ ts,
+ setter,
+ logger);
},
[logger, content_id = key.content_id](auto const& msg,
bool fatal) {
diff --git a/src/other_tools/root_maps/distdir_git_map.hpp b/src/other_tools/root_maps/distdir_git_map.hpp
index c2f03e08..df2b6064 100644
--- a/src/other_tools/root_maps/distdir_git_map.hpp
+++ b/src/other_tools/root_maps/distdir_git_map.hpp
@@ -22,6 +22,7 @@
#include "gsl/gsl"
#include "nlohmann/json.hpp"
+#include "src/buildtool/execution_api/common/execution_api.hpp"
#include "src/other_tools/ops_maps/content_cas_map.hpp"
#include "src/other_tools/ops_maps/import_to_git_map.hpp"
@@ -50,6 +51,9 @@ using DistdirGitMap =
gsl::not_null<ContentCASMap*> const& content_cas_map,
gsl::not_null<ImportToGitMap*> const& import_to_git_map,
gsl::not_null<CriticalGitOpMap*> const& critical_git_op_map,
+ bool serve_api_exists,
+ IExecutionApi* local_api,
+ IExecutionApi* remote_api,
std::size_t jobs) -> DistdirGitMap;
namespace std {