diff options
author | Paul Cristian Sarbu <paul.cristian.sarbu@huawei.com> | 2024-10-24 11:21:23 +0200 |
---|---|---|
committer | Paul Cristian Sarbu <paul.cristian.sarbu@huawei.com> | 2024-10-25 13:00:43 +0200 |
commit | b98addc8f8e3e62e1213cd967f20aa631057f84e (patch) | |
tree | d4e7399fc4a02fa4e0e0684a1cdfa2bfa35673c3 /src/other_tools/ops_maps | |
parent | 4018960c59d3fd0702c714e404ac913b2e83c3f5 (diff) | |
download | justbuild-b98addc8f8e3e62e1213cd967f20aa631057f84e.tar.gz |
serve service: Respond with digest for blob and tree checks
...to be able to interrogate remotes irrespective of protocol.
When serve endpoint is active, it will provide the correct digest
with which to interrogate the remote endpoint. Otherwise, for a
compatible remote check the file mappings for the correct digest.
The serve service proto file is updated accordingly.
Diffstat (limited to 'src/other_tools/ops_maps')
-rw-r--r-- | src/other_tools/ops_maps/TARGETS | 2 | ||||
-rw-r--r-- | src/other_tools/ops_maps/content_cas_map.cpp | 166 | ||||
-rw-r--r-- | src/other_tools/ops_maps/content_cas_map.hpp | 6 | ||||
-rw-r--r-- | src/other_tools/ops_maps/git_tree_fetch_map.cpp | 111 |
4 files changed, 191 insertions, 94 deletions
diff --git a/src/other_tools/ops_maps/TARGETS b/src/other_tools/ops_maps/TARGETS index 96d9db57..34c2d355 100644 --- a/src/other_tools/ops_maps/TARGETS +++ b/src/other_tools/ops_maps/TARGETS @@ -77,6 +77,7 @@ , "stage": ["src", "other_tools", "ops_maps"] , "private-deps": [ ["@", "fmt", "", "fmt"] + , ["src/buildtool/execution_api/serve", "utils"] , ["src/buildtool/file_system", "file_storage"] , ["src/buildtool/storage", "fs_utils"] , ["src/other_tools/git_operations", "git_repo_remote"] @@ -127,6 +128,7 @@ , ["src/buildtool/common", "config"] , ["src/buildtool/common", "protocol_traits"] , ["src/buildtool/execution_api/serve", "mr_git_api"] + , ["src/buildtool/execution_api/serve", "utils"] , ["src/buildtool/file_system", "file_system_manager"] , ["src/buildtool/multithreading", "task_system"] , ["src/buildtool/system", "system_command"] diff --git a/src/other_tools/ops_maps/content_cas_map.cpp b/src/other_tools/ops_maps/content_cas_map.cpp index ebec2b04..a8e42adb 100644 --- a/src/other_tools/ops_maps/content_cas_map.cpp +++ b/src/other_tools/ops_maps/content_cas_map.cpp @@ -17,6 +17,7 @@ #include <utility> // std::move #include "fmt/core.h" +#include "src/buildtool/execution_api/serve/utils.hpp" #include "src/buildtool/file_system/file_storage.hpp" #include "src/buildtool/storage/fs_utils.hpp" #include "src/other_tools/git_operations/git_repo_remote.hpp" @@ -28,7 +29,7 @@ namespace { void FetchFromNetwork(ArchiveContent const& key, MirrorsPtr const& additional_mirrors, CAInfoPtr const& ca_info, - Storage const& storage, + Storage const& native_storage, gsl::not_null<JustMRProgress*> const& progress, ContentCASMap::SetterPtr const& setter, ContentCASMap::LoggerPtr const& logger) { @@ -72,9 +73,9 @@ void FetchFromNetwork(ArchiveContent const& key, return; } } - // add the fetched data to CAS - auto path = StorageUtils::AddToCAS(storage, *data); - // check one last time if content is in CAS now + // add the fetched data to native CAS + auto path = StorageUtils::AddToCAS(native_storage, *data); + // check one last time if content is in native CAS now if (not path) { (*logger)(fmt::format("Failed to store fetched content from {}", key.fetch_url), @@ -82,9 +83,9 @@ void FetchFromNetwork(ArchiveContent const& key, return; } // check that the data we stored actually produces the requested digest - auto const& cas = storage.CAS(); - if (not cas.BlobPath(ArtifactDigest{key.content_hash, 0}, - /*is_executable=*/false)) { + auto const& native_cas = native_storage.CAS(); + if (not native_cas.BlobPath(ArtifactDigest{key.content_hash, 0}, + /*is_executable=*/false)) { (*logger)( fmt::format("Content {} was not found at given fetch location {}", key.content_hash.Hash(), @@ -105,8 +106,10 @@ auto CreateContentCASMap( CAInfoPtr const& ca_info, gsl::not_null<CriticalGitOpMap*> const& critical_git_op_map, ServeApi const* serve, - gsl::not_null<StorageConfig const*> const& storage_config, - gsl::not_null<Storage const*> const& storage, + gsl::not_null<StorageConfig const*> const& native_storage_config, + StorageConfig const* compat_storage_config, + gsl::not_null<Storage const*> const& native_storage, + Storage const* compat_storage, gsl::not_null<IExecutionApi const*> const& local_api, IExecutionApi const* remote_api, gsl::not_null<JustMRProgress*> const& progress, @@ -116,8 +119,10 @@ auto CreateContentCASMap( ca_info, critical_git_op_map, serve, - storage, - storage_config, + native_storage_config, + compat_storage_config, + native_storage, + compat_storage, local_api, remote_api, progress](auto ts, @@ -125,34 +130,37 @@ auto CreateContentCASMap( auto logger, auto /*unused*/, auto const& key) { - auto const digest = ArtifactDigest{key.content_hash, 0}; - // check local CAS - if (local_api->IsAvailable(digest)) { + auto const native_digest = ArtifactDigest{key.content_hash, 0}; + // check native local CAS + if (local_api->IsAvailable(native_digest)) { (*setter)(nullptr); return; } // check if content is in Git cache; // ensure Git cache - GitOpKey op_key = {.params = - { - storage_config->GitRoot(), // target_path - "", // git_hash - std::nullopt, // message - std::nullopt, // source_path - true // init_bare - }, - .op_type = GitOpType::ENSURE_INIT}; + GitOpKey op_key = { + .params = + { + native_storage_config->GitRoot(), // target_path + "", // git_hash + std::nullopt, // message + std::nullopt, // source_path + true // init_bare + }, + .op_type = GitOpType::ENSURE_INIT}; critical_git_op_map->ConsumeAfterKeysReady( ts, {std::move(op_key)}, [key, - digest, + native_digest, just_mr_paths, additional_mirrors, ca_info, serve, - storage, - storage_config, + native_storage_config, + compat_storage_config, + native_storage, + compat_storage, local_api, remote_api, progress, @@ -189,26 +197,27 @@ auto CreateContentCASMap( // blob check failed return; } - auto const& cas = storage->CAS(); + auto const& native_cas = native_storage->CAS(); if (res.second) { - // blob found; add it to CAS - if (not cas.StoreBlob(*res.second, - /*is_executable=*/false)) { + // blob found; add it to native CAS + if (not native_cas.StoreBlob(*res.second, + /*is_executable=*/false)) { (*logger)(fmt::format("Failed to store content {} " - "to local CAS", + "to native local CAS", key.content_hash.Hash()), /*fatal=*/true); return; } - // content stored to CAS + // content stored to native CAS (*setter)(nullptr); return; } // check for blob in older generations for (std::size_t generation = 1; - generation < storage_config->num_generations; + generation < native_storage_config->num_generations; generation++) { - auto old = storage_config->GitGenerationRoot(generation); + auto old = + native_storage_config->GitGenerationRoot(generation); if (FileSystemManager::IsDirectory(old)) { auto old_repo = GitRepo::Open(old); auto no_logging = @@ -219,17 +228,16 @@ auto CreateContentCASMap( key.content_hash.Hash(), no_logging); if (res.first and res.second) { // read blob from older generation - auto const& cas = storage->CAS(); - if (not cas.StoreBlob( + if (not native_cas.StoreBlob( *res.second, /*is_executable=*/false)) { (*logger)(fmt::format( "Failed to store content {} " - "to local CAS", + "to native local CAS", key.content_hash.Hash()), /*fatal=*/true); return; } - // content stored in CAS + // content stored in native CAS (*setter)(nullptr); return; } @@ -239,37 +247,91 @@ auto CreateContentCASMap( // blob not found in Git cache progress->TaskTracker().Start(key.origin); - // add distfile to CAS + // add distfile to native CAS auto repo_distfile = (key.distfile ? key.distfile.value() : std::filesystem::path(key.fetch_url) .filename() .string()); StorageUtils::AddDistfileToCAS( - *storage, repo_distfile, just_mr_paths); - // check if content is in CAS now - if (cas.BlobPath(digest, /*is_executable=*/false)) { + *native_storage, repo_distfile, just_mr_paths); + // check if content is in native CAS now + if (native_cas.BlobPath(native_digest, + /*is_executable=*/false)) { progress->TaskTracker().Stop(key.origin); (*setter)(nullptr); return; } // check if content is known to remote serve service - if (serve != nullptr and remote_api != nullptr and - serve->ContentInRemoteCAS(key.content_hash.Hash())) { + if (serve != nullptr and remote_api != nullptr) { + auto const remote_digest = + serve->ContentInRemoteCAS(key.content_hash.Hash()); // try to get content from remote CAS - if (remote_api->RetrieveToCas( - {Artifact::ObjectInfo{.digest = digest, + if (remote_digest and + remote_api->RetrieveToCas( + {Artifact::ObjectInfo{.digest = *remote_digest, .type = ObjectType::File}}, *local_api)) { progress->TaskTracker().Stop(key.origin); + if (remote_digest->hash() == key.content_hash.Hash()) { + // content is in native local CAS, so all done + (*setter)(nullptr); + return; + } + // if content is in compatible local CAS, rehash it + if (compat_storage_config == nullptr or + compat_storage == nullptr) { + // sanity check + (*logger)("No compatible local storage set up!", + /*fatal=*/true); + return; + } + auto const& compat_cas = compat_storage->CAS(); + auto const cas_path = compat_cas.BlobPath( + *remote_digest, /*is_executable=*/false); + if (not cas_path) { + (*logger)(fmt::format("Expected content {} not " + "found in " + "compatible local CAS", + remote_digest->hash()), + /*fatal=*/true); + return; + } + auto rehashed_digest = native_cas.StoreBlob( + *cas_path, /*is_executable=*/false); + if (not rehashed_digest or + rehashed_digest->hash() != + key.content_hash.Hash()) { + (*logger)(fmt::format("Failed to rehash content {} " + "into native local CAS", + remote_digest->hash()), + /*fatal=*/true); + return; + } + // cache association between digests + auto error_msg = MRApiUtils::StoreRehashedDigest( + native_digest, + *rehashed_digest, + ObjectType::File, + *native_storage_config, + *compat_storage_config); + if (error_msg) { + (*logger)(fmt::format("Failed to cache digests " + "mapping with:\n{}", + *error_msg), + /*fatal=*/true); + return; + } + // content is in native local CAS now (*setter)(nullptr); return; } } - // check remote execution endpoint, if given - if (remote_api != nullptr and + // check if content is on remote, if given and native + if (compat_storage_config == nullptr and + remote_api != nullptr and remote_api->RetrieveToCas( - {Artifact::ObjectInfo{.digest = digest, + {Artifact::ObjectInfo{.digest = native_digest, .type = ObjectType::File}}, *local_api)) { progress->TaskTracker().Stop(key.origin); @@ -280,13 +342,13 @@ auto CreateContentCASMap( FetchFromNetwork(key, additional_mirrors, ca_info, - *storage, + *native_storage, progress, setter, logger); }, - [logger, target_path = storage_config->GitRoot()](auto const& msg, - bool fatal) { + [logger, target_path = native_storage_config->GitRoot()]( + auto const& msg, bool fatal) { (*logger)(fmt::format("While running critical Git op " "ENSURE_INIT for target {}:\n{}", target_path.string(), diff --git a/src/other_tools/ops_maps/content_cas_map.hpp b/src/other_tools/ops_maps/content_cas_map.hpp index ff5bf22c..e161be19 100644 --- a/src/other_tools/ops_maps/content_cas_map.hpp +++ b/src/other_tools/ops_maps/content_cas_map.hpp @@ -89,8 +89,10 @@ using ContentCASMap = AsyncMapConsumer<ArchiveContent, std::nullptr_t>; CAInfoPtr const& ca_info, gsl::not_null<CriticalGitOpMap*> const& critical_git_op_map, ServeApi const* serve, - gsl::not_null<StorageConfig const*> const& storage_config, - gsl::not_null<Storage const*> const& storage, + gsl::not_null<StorageConfig const*> const& native_storage_config, + StorageConfig const* compat_storage_config, + gsl::not_null<Storage const*> const& native_storage, + Storage const* compat_storage, gsl::not_null<IExecutionApi const*> const& local_api, IExecutionApi const* remote_api, gsl::not_null<JustMRProgress*> const& progress, diff --git a/src/other_tools/ops_maps/git_tree_fetch_map.cpp b/src/other_tools/ops_maps/git_tree_fetch_map.cpp index 6f1b5556..0980e8a7 100644 --- a/src/other_tools/ops_maps/git_tree_fetch_map.cpp +++ b/src/other_tools/ops_maps/git_tree_fetch_map.cpp @@ -24,6 +24,7 @@ #include "src/buildtool/common/repository_config.hpp" #include "src/buildtool/execution_api/common/execution_common.hpp" #include "src/buildtool/execution_api/serve/mr_git_api.hpp" +#include "src/buildtool/execution_api/serve/utils.hpp" #include "src/buildtool/file_system/file_system_manager.hpp" #include "src/buildtool/multithreading/task_system.hpp" #include "src/buildtool/system/system_command.hpp" @@ -69,7 +70,8 @@ void BackupToRemote(ArtifactDigest const& digest, /// \brief Moves the root tree from local CAS to the Git cache and sets the /// root. void MoveCASTreeToGit( - ArtifactDigest const& digest, + HashInfo const& tree_hash, + ArtifactDigest const& digest, // native or compatible gsl::not_null<ImportToGitMap*> const& import_to_git_map, gsl::not_null<StorageConfig const*> const& native_storage_config, StorageConfig const* compat_storage_config, @@ -86,7 +88,7 @@ void MoveCASTreeToGit( if (not tmp_dir) { (*logger)(fmt::format("Failed to create tmp directory for copying " "git-tree {} from remote CAS", - digest.hash()), + tree_hash.Hash()), true); return; } @@ -94,17 +96,17 @@ void MoveCASTreeToGit( {Artifact::ObjectInfo{.digest = digest, .type = ObjectType::Tree}}, {tmp_dir->GetPath()})) { (*logger)(fmt::format("Failed to copy git-tree {} to {}", - digest.hash(), + tree_hash.Hash(), tmp_dir->GetPath().string()), true); return; } - CommitInfo c_info{tmp_dir->GetPath(), "tree", digest.hash()}; + CommitInfo c_info{tmp_dir->GetPath(), "tree", tree_hash.Hash()}; import_to_git_map->ConsumeAfterKeysReady( ts, {std::move(c_info)}, [tmp_dir, // keep tmp_dir alive - digest, + tree_hash, native_storage_config, compat_storage_config, compat_storage, @@ -120,7 +122,9 @@ void MoveCASTreeToGit( } // backup to remote if needed and in compatibility mode if (backup_to_remote and remote_api != nullptr) { - BackupToRemote(digest, + // back up only native digests, as that is what Git stores + auto const native_digest = ArtifactDigest{tree_hash, 0}; + BackupToRemote(native_digest, *native_storage_config, compat_storage_config, compat_storage, @@ -130,10 +134,10 @@ void MoveCASTreeToGit( } (*setter)(false /*no cache hit*/); }, - [logger, tmp_dir, digest](auto const& msg, bool fatal) { + [logger, tmp_dir, tree_hash](auto const& msg, bool fatal) { (*logger)(fmt::format( "While moving git-tree {} from {} to local git:\n{}", - digest.hash(), + tree_hash.Hash(), tmp_dir->GetPath().string(), msg), fatal); @@ -191,12 +195,12 @@ void TagAndSetRoot( (*setter)(false /*no cache hit*/); }, [logger, repo, digest](auto const& msg, bool fatal) { - (*logger)( - fmt::format("While tagging tree {} in {} to keep it alive:\n{}", - digest.hash(), - repo.string(), - msg), - fatal); + (*logger)(fmt::format("While tagging tree {} in {} to keep it " + "alive:\n{}", + digest.hash(), + repo.string(), + msg), + fatal); }); } @@ -246,7 +250,8 @@ void TakeTreeFromOlderGeneration( auto tag = *op_result.result; auto git_repo = GitRepoRemote::Open(git_cas); if (not git_repo) { - (*logger)("Could not open main git repository", /*fatal=*/true); + (*logger)("Could not open main git repository", + /*fatal=*/true); return; } auto fetch_logger = std::make_shared<AsyncMapConsumerLogger>( @@ -379,7 +384,7 @@ auto CreateGitTreeFetchMap( return; } if (*tree_found) { - // backup to remote if needed and in native mode + // backup to remote if needed if (backup_to_remote and remote_api != nullptr) { BackupToRemote(ArtifactDigest{key.tree_hash, 0}, *native_storage_config, @@ -429,11 +434,12 @@ auto CreateGitTreeFetchMap( } } - // check if tree is known to local CAS - auto const digest = ArtifactDigest{key.tree_hash, 0}; - if (local_api->IsAvailable(digest)) { + // check if tree is known to native local CAS + auto const native_digest = ArtifactDigest{key.tree_hash, 0}; + if (local_api->IsAvailable(native_digest)) { // import tree to Git cache - MoveCASTreeToGit(digest, + MoveCASTreeToGit(key.tree_hash, + native_digest, import_to_git_map, native_storage_config, compat_storage_config, @@ -449,32 +455,57 @@ auto CreateGitTreeFetchMap( } progress->TaskTracker().Start(key.origin); // check if tree is known to remote serve service and can be - // made available in remote CAS + // provided via the remote CAS if (serve != nullptr and remote_api != nullptr) { - // as we anyway interrogate the remote execution endpoint, - // we're only interested here in the serve endpoint making - // an attempt to upload the tree, if known, to remote CAS - std::ignore = serve->TreeInRemoteCAS(key.tree_hash.Hash()); + auto const remote_digest = + serve->TreeInRemoteCAS(key.tree_hash.Hash()); + // try to get content from remote CAS into local CAS; + // whether it is retrieved locally in native or + // compatible CAS, it will be imported to Git either way + if (remote_digest and + remote_api->RetrieveToCas( + {Artifact::ObjectInfo{.digest = *remote_digest, + .type = ObjectType::Tree}}, + *local_api)) { + progress->TaskTracker().Stop(key.origin); + MoveCASTreeToGit(key.tree_hash, + *remote_digest, + import_to_git_map, + native_storage_config, + compat_storage_config, + compat_storage, + local_api, + remote_api, + false, // tree already on remote, + // so ignore backing up + ts, + setter, + logger); + // done! + return; + } } - // check if tree is in remote CAS, if a remote is given - if (remote_api != nullptr and + // check if tree is on remote, if given and native + if (compat_storage_config == nullptr and + remote_api != nullptr and remote_api->RetrieveToCas( - {Artifact::ObjectInfo{.digest = digest, + {Artifact::ObjectInfo{.digest = native_digest, .type = ObjectType::Tree}}, *local_api)) { progress->TaskTracker().Stop(key.origin); - MoveCASTreeToGit( - digest, - import_to_git_map, - native_storage_config, - compat_storage_config, - compat_storage, - local_api, - remote_api, - false, // tree already in remote, so ignore backing up - ts, - setter, - logger); + MoveCASTreeToGit(key.tree_hash, + native_digest, + import_to_git_map, + native_storage_config, + compat_storage_config, + compat_storage, + local_api, + remote_api, + false, // tree already on remote, + // so ignore backing up + ts, + setter, + logger); // done! return; } |