diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/other_tools/ops_maps/content_cas_map.hpp | 3 | ||||
-rw-r--r-- | src/other_tools/root_maps/TARGETS | 1 | ||||
-rw-r--r-- | src/other_tools/root_maps/content_git_map.cpp | 342 | ||||
-rw-r--r-- | src/other_tools/root_maps/content_git_map.hpp | 3 |
4 files changed, 258 insertions, 91 deletions
diff --git a/src/other_tools/ops_maps/content_cas_map.hpp b/src/other_tools/ops_maps/content_cas_map.hpp index 206970fb..9ee70c39 100644 --- a/src/other_tools/ops_maps/content_cas_map.hpp +++ b/src/other_tools/ops_maps/content_cas_map.hpp @@ -65,6 +65,9 @@ struct ArchiveRepoInfo { /// \brief Maps the content hash of an archive to nullptr, as we only care if /// the map fails or not. +/// For pure fetches (fetch_only == true), all possible locations are checked to +/// obtain the content blob before reverting to the network fetch. Otherwise, +/// only the remote CAS is checked before going to the network. using ContentCASMap = AsyncMapConsumer<ArchiveContent, std::nullptr_t>; [[nodiscard]] auto CreateContentCASMap( diff --git a/src/other_tools/root_maps/TARGETS b/src/other_tools/root_maps/TARGETS index a77590a7..bdbff649 100644 --- a/src/other_tools/root_maps/TARGETS +++ b/src/other_tools/root_maps/TARGETS @@ -110,6 +110,7 @@ , "stage": ["src", "other_tools", "root_maps"] , "private-deps": [ ["@", "fmt", "", "fmt"] + , "root_utils" , ["src/buildtool/execution_api/local", "local"] , ["src/buildtool/file_system", "file_root"] , ["src/buildtool/file_system", "file_storage"] diff --git a/src/other_tools/root_maps/content_git_map.cpp b/src/other_tools/root_maps/content_git_map.cpp index 2a50eb14..5dafef06 100644 --- a/src/other_tools/root_maps/content_git_map.cpp +++ b/src/other_tools/root_maps/content_git_map.cpp @@ -25,6 +25,7 @@ #include "src/buildtool/storage/storage.hpp" #include "src/other_tools/just_mr/progress_reporting/progress.hpp" #include "src/other_tools/just_mr/progress_reporting/statistics.hpp" +#include "src/other_tools/root_maps/root_utils.hpp" #include "src/other_tools/utils/content.hpp" #include "src/utils/archive/archive_ops.hpp" @@ -47,11 +48,92 @@ namespace { return "unrecognized repository type"; } +/// \brief Helper function for ensuring the serve endpoint, if given, has the +/// root if it was marked absent. +/// It guarantees the logger is called exactly once with fatal on failure, and +/// the setter on success. +void EnsureRootAsAbsent( + std::string const& tree_id, + ArchiveRepoInfo const& key, + bool serve_api_exists, + std::optional<gsl::not_null<IExecutionApi*>> const& remote_api, + bool is_on_remote, + bool is_cache_hit, + ContentGitMap::SetterPtr const& ws_setter, + ContentGitMap::LoggerPtr const& logger) { + // this is an absent root + if (serve_api_exists) { + // check if the serve endpoint has this root + auto has_tree = CheckServeHasAbsentRoot(tree_id, logger); + if (not has_tree) { + return; + } + if (not *has_tree) { + // try to see if serve endpoint has the information to prepare the + // root itself + if (auto served_tree_id = + ServeApi::RetrieveTreeFromArchive(key.archive.content, + key.repo_type, + key.subdir, + key.pragma_special, + /*sync_tree=*/false)) { + // if serve has set up the tree, it must match what we expect + if (tree_id != *served_tree_id) { + (*logger)(fmt::format("Mismatch in served root tree " + "id:\nexpected {}, but got {}", + tree_id, + *served_tree_id), + /*fatal=*/true); + return; + } + } + else { + if (not is_on_remote and not remote_api) { + (*logger)(fmt::format("Missing remote-execution endpoint " + "needed to sync workspace root {} " + "with the serve endpoint.", + tree_id), + /*fatal=*/true); + return; + } + // the tree is known locally, so we can upload it to remote CAS + // for the serve endpoint to retrieve it and set up the root + if (not EnsureAbsentRootOnServe( + tree_id, + StorageConfig::GitRoot(), + is_on_remote ? std::nullopt : remote_api, + logger, + /*no_sync_is_fatal=*/true)) { + return; + } + } + } + } + else { + // give warning + (*logger)(fmt::format("Workspace root {} marked absent but no serve " + "endpoint provided.", + tree_id), + /*fatal=*/false); + } + // set root as absent + (*ws_setter)( + std::pair(nlohmann::json::array({FileRoot::kGitTreeMarker, tree_id}), + /*is_cache_hit=*/is_cache_hit)); +} + +/// \brief Called to get the resolved root (with respect to symlinks) from an +/// unresolved tree. +/// It guarantees the logger is called exactly once with fatal on failure, and +/// the setter on success. void ResolveContentTree( ArchiveRepoInfo const& key, std::string const& tree_hash, bool is_cache_hit, - bool fetch_absent, + bool is_absent, + bool serve_api_exists, + std::optional<gsl::not_null<IExecutionApi*>> const& remote_api, + bool is_on_remote, gsl::not_null<ResolveSymlinksMap*> const& resolve_symlinks_map, gsl::not_null<TaskSystem*> const& ts, ContentGitMap::SetterPtr const& ws_setter, @@ -71,12 +153,24 @@ void ResolveContentTree( return; } // set the workspace root - auto root = nlohmann::json::array( - {FileRoot::kGitTreeMarker, *resolved_tree_id}); - if (fetch_absent or not key.absent) { - root.emplace_back(StorageConfig::GitRoot().string()); + if (is_absent) { + // try all the available means to generate & set the absent root + EnsureRootAsAbsent(*resolved_tree_id, + key, + serve_api_exists, + remote_api, + is_on_remote, + is_cache_hit, + ws_setter, + logger); + } + else { + (*ws_setter)(std::pair( + nlohmann::json::array({FileRoot::kGitTreeMarker, + *resolved_tree_id, + StorageConfig::GitRoot().string()}), + /*is_cache_hit=*/is_cache_hit)); } - (*ws_setter)(std::pair(std::move(root), true)); } else { // resolve tree @@ -91,7 +185,10 @@ void ResolveContentTree( tree_id_file, is_cache_hit, key, - fetch_absent, + is_absent, + serve_api_exists, + remote_api, + is_on_remote, ws_setter, logger](auto const& hashes) { if (not hashes[0]) { @@ -123,12 +220,26 @@ void ResolveContentTree( return; } // set the workspace root - auto root = nlohmann::json::array( - {FileRoot::kGitTreeMarker, resolved_tree.id}); - if (fetch_absent or not key.absent) { - root.emplace_back(StorageConfig::GitRoot().string()); + if (is_absent) { + // try all the available means to generate & set the + // absent root + EnsureRootAsAbsent(resolved_tree.id, + key, + serve_api_exists, + remote_api, + is_on_remote, + is_cache_hit, + ws_setter, + logger); + } + else { + (*ws_setter)( + std::pair(nlohmann::json::array( + {FileRoot::kGitTreeMarker, + resolved_tree.id, + StorageConfig::GitRoot().string()}), + /*is_cache_hit=*/is_cache_hit)); } - (*ws_setter)(std::pair(std::move(root), is_cache_hit)); }, [logger, content = key.archive.content](auto const& msg, bool fatal) { @@ -142,23 +253,39 @@ void ResolveContentTree( } else { // set the workspace root as-is - auto root = - nlohmann::json::array({FileRoot::kGitTreeMarker, tree_hash}); - if (fetch_absent or not key.absent) { - root.emplace_back(StorageConfig::GitRoot().string()); + if (is_absent) { + // try all the available means to generate & set the absent root + EnsureRootAsAbsent(tree_hash, + key, + serve_api_exists, + remote_api, + is_on_remote, + is_cache_hit, + ws_setter, + logger); + } + else { + (*ws_setter)(std::pair( + nlohmann::json::array({FileRoot::kGitTreeMarker, + tree_hash, + StorageConfig::GitRoot().string()}), + /*is_cache_hit=*/is_cache_hit)); } - (*ws_setter)(std::pair(std::move(root), is_cache_hit)); } } -// Helper function for improved readability. It guarantees the logger is called -// exactly once with fatal if failure. +/// \brief Called to store the file association and then set the root. +/// It guarantees the logger is called exactly once with fatal on failure, and +/// the setter on success. void WriteIdFileAndSetWSRoot( ArchiveRepoInfo const& key, std::string const& archive_tree_id, GitCASPtr const& just_git_cas, std::filesystem::path const& archive_tree_id_file, - bool fetch_absent, + bool is_absent, + bool serve_api_exists, + std::optional<gsl::not_null<IExecutionApi*>> const& remote_api, + bool is_on_remote, gsl::not_null<ResolveSymlinksMap*> const& resolve_symlinks_map, gsl::not_null<TaskSystem*> const& ts, ContentGitMap::SetterPtr const& setter, @@ -198,20 +325,27 @@ void WriteIdFileAndSetWSRoot( ResolveContentTree(key, *subtree_hash, false, /*is_cache_hit*/ - fetch_absent, + is_absent, + serve_api_exists, + remote_api, + is_on_remote, resolve_symlinks_map, ts, setter, logger); } -// Helper function for improved readability. It guarantees the logger is called -// exactly once with fatal if failure. +/// \brief Called when archive is in local CAS. Performs the import-to-git and +/// follow-up processing. +/// It guarantees the logger is called exactly once with fatal on failure, and +/// the setter on success. void ExtractAndImportToGit( ArchiveRepoInfo const& key, std::filesystem::path const& content_cas_path, std::filesystem::path const& archive_tree_id_file, - bool fetch_absent, + bool is_absent, + bool serve_api_exists, + std::optional<gsl::not_null<IExecutionApi*>> const& remote_api, gsl::not_null<ImportToGitMap*> const& import_to_git_map, gsl::not_null<ResolveSymlinksMap*> const& resolve_symlinks_map, gsl::not_null<TaskSystem*> const& ts, @@ -244,7 +378,9 @@ void ExtractAndImportToGit( [tmp_dir, // keep tmp_dir alive archive_tree_id_file, key, - fetch_absent, + is_absent, + serve_api_exists, + remote_api, resolve_symlinks_map, ts, setter, @@ -269,7 +405,10 @@ void ExtractAndImportToGit( archive_tree_id, just_git_cas, archive_tree_id_file, - fetch_absent, + is_absent, + serve_api_exists, + remote_api, + false, /*is_on_remote*/ resolve_symlinks_map, ts, setter, @@ -343,6 +482,8 @@ auto CreateContentGitMap( [archive_tree_id = *archive_tree_id, key, fetch_absent, + serve_api_exists, + remote_api, resolve_symlinks_map, ts, setter, @@ -377,15 +518,19 @@ auto CreateContentGitMap( if (not subtree_hash) { return; } - // resolve tree and set workspace root - ResolveContentTree(key, - *subtree_hash, - true, /*is_cache_hit*/ - fetch_absent, - resolve_symlinks_map, - ts, - setter, - logger); + // resolve tree and set workspace root (present or absent) + ResolveContentTree( + key, + *subtree_hash, + /*is_cache_hit = */ true, + /*is_absent = */ (key.absent and not fetch_absent), + serve_api_exists, + remote_api, + /*is_on_remote = */ false, + resolve_symlinks_map, + ts, + setter, + logger); }, [logger, target_path = StorageConfig::GitRoot()]( auto const& msg, bool fatal) { @@ -403,18 +548,22 @@ auto CreateContentGitMap( auto digest = ArtifactDigest(key.archive.content, 0, false); if (auto content_cas_path = cas.BlobPath(digest, /*is_executable=*/false)) { - ExtractAndImportToGit(key, - *content_cas_path, - archive_tree_id_file, - fetch_absent, - import_to_git_map, - resolve_symlinks_map, - ts, - setter, - logger); + ExtractAndImportToGit( + key, + *content_cas_path, + archive_tree_id_file, + /*is_absent = */ (key.absent and not fetch_absent), + serve_api_exists, + remote_api, + import_to_git_map, + resolve_symlinks_map, + ts, + setter, + logger); // done return; } + // check if content is in Git cache; // ensure Git cache GitOpKey op_key = {.params = @@ -491,15 +640,18 @@ auto CreateContentGitMap( } if (auto content_cas_path = cas.BlobPath(digest, /*is_executable=*/false)) { - ExtractAndImportToGit(key, - *content_cas_path, - archive_tree_id_file, - fetch_absent, - import_to_git_map, - resolve_symlinks_map, - ts, - setter, - logger); + ExtractAndImportToGit( + key, + *content_cas_path, + archive_tree_id_file, + /*is_absent=*/(key.absent and not fetch_absent), + serve_api_exists, + remote_api, + import_to_git_map, + resolve_symlinks_map, + ts, + setter, + logger); // done return; } @@ -527,18 +679,22 @@ auto CreateContentGitMap( cas.BlobPath(digest, /*is_executable=*/false)) { JustMRProgress::Instance().TaskTracker().Stop( key.archive.origin); - ExtractAndImportToGit(key, - *content_cas_path, - archive_tree_id_file, - fetch_absent, - import_to_git_map, - resolve_symlinks_map, - ts, - setter, - logger); + ExtractAndImportToGit( + key, + *content_cas_path, + archive_tree_id_file, + /*is_absent=*/(key.absent and not fetch_absent), + serve_api_exists, + remote_api, + import_to_git_map, + resolve_symlinks_map, + ts, + setter, + logger); // done return; } + // check if content is known to remote serve service if (serve_api_exists) { // if purely absent, request the resolved subdir tree @@ -560,13 +716,6 @@ auto CreateContentGitMap( /*is_cache_hit = */ false)); return; } - // give warning - (*logger)( - fmt::format("Tree at subdir {} for archive {} " - "could not be served", - key.subdir, - key.archive.content), - /*fatal=*/false); } // otherwise, request (and sync) the whole archive tree, // UNRESOLVED, to ensure we maintain the id file @@ -603,13 +752,17 @@ auto CreateContentGitMap( JustMRProgress::Instance() .TaskTracker() .Stop(key.archive.origin); - // write to id file and process subdir tree + // write to id file and process subdir tree; + // this results in a present root WriteIdFileAndSetWSRoot( key, *root_tree_id, just_git_cas, archive_tree_id_file, - fetch_absent, + /*is_absent=*/false, + /*serve_api_exists=*/false, + /*remote_api=*/std::nullopt, + /*is_on_remote=*/false, resolve_symlinks_map, ts, setter, @@ -668,7 +821,6 @@ auto CreateContentGitMap( [tmp_dir, // keep tmp_dir alive key, root_tree_id, - fetch_absent, just_git_cas, archive_tree_id_file, resolve_symlinks_map, @@ -682,13 +834,17 @@ auto CreateContentGitMap( return; } // write to id file and process - // subdir tree + // subdir tree; this results in a + // present root WriteIdFileAndSetWSRoot( key, *root_tree_id, just_git_cas, archive_tree_id_file, - fetch_absent, + /*is_absent=*/false, + /*serve_api_exists=*/false, + /*remote_api=*/std::nullopt, + /*is_on_remote=*/false, resolve_symlinks_map, ts, setter, @@ -708,13 +864,14 @@ auto CreateContentGitMap( // done return; } - // try to fetch content from network + + // try the remote CAS, otherwise revert to a + // network fetch content_cas_map->ConsumeAfterKeysReady( ts, {key.archive}, [archive_tree_id_file, key, - fetch_absent, import_to_git_map, resolve_symlinks_map, ts, @@ -736,11 +893,14 @@ auto CreateContentGitMap( /*is_executable=*/ false) .value(); + // this results in a present root ExtractAndImportToGit( key, content_cas_path, archive_tree_id_file, - fetch_absent, + /*is_absent=*/false, + /*serve_api_exists=*/false, + /*remote_api=*/std::nullopt, import_to_git_map, resolve_symlinks_map, ts, @@ -766,24 +926,22 @@ auto CreateContentGitMap( /*fatal=*/false); } } - else { - if (key.absent) { - // give warning - (*logger)( - fmt::format("Missing serve endpoint for " - "content {} marked absent requires " - "slower network fetch.", - key.archive.content), - /*fatal=*/false); - } + + // reaching here can only result in a root that is present + if (key.absent and not fetch_absent) { + (*logger)(fmt::format("Cannot create workspace root " + "as absent for content {}.", + key.archive.content), + /*fatal=*/true); + return; } - // revert to network fetch + + // check remote CAS, otherwise revert to a network fetch content_cas_map->ConsumeAfterKeysReady( ts, {key.archive}, [archive_tree_id_file, key, - fetch_absent, import_to_git_map, resolve_symlinks_map, ts, @@ -798,10 +956,14 @@ auto CreateContentGitMap( key.archive.content, 0, false), /*is_executable=*/false) .value(); + // root can only be present, so default all + // arguments that refer to a serve endpoint ExtractAndImportToGit(key, content_cas_path, archive_tree_id_file, - fetch_absent, + /*is_absent=*/false, + /*serve_api_exists=*/false, + /*remote_api=*/std::nullopt, import_to_git_map, resolve_symlinks_map, ts, diff --git a/src/other_tools/root_maps/content_git_map.hpp b/src/other_tools/root_maps/content_git_map.hpp index 4fa4807b..86db9443 100644 --- a/src/other_tools/root_maps/content_git_map.hpp +++ b/src/other_tools/root_maps/content_git_map.hpp @@ -30,7 +30,8 @@ /// \brief Maps the content of an archive to the resulting Git tree WS root, /// together with the information whether it was a cache hit. using ContentGitMap = - AsyncMapConsumer<ArchiveRepoInfo, std::pair<nlohmann::json, bool>>; + AsyncMapConsumer<ArchiveRepoInfo, + std::pair<nlohmann::json /*root*/, bool /*is_cache_hit*/>>; [[nodiscard]] auto CreateContentGitMap( gsl::not_null<ContentCASMap*> const& content_cas_map, |