diff options
author | Paul Cristian Sarbu <paul.cristian.sarbu@huawei.com> | 2024-02-23 16:35:34 +0100 |
---|---|---|
committer | Paul Cristian Sarbu <paul.cristian.sarbu@huawei.com> | 2024-02-27 14:35:44 +0100 |
commit | 0e913dd3eee248bccf2b26161ca0a715d039e716 (patch) | |
tree | 3bfac4df42c9759dfe60cb7621ad782d422b7674 | |
parent | 842145ff59594be8624390e22d815c71bc5c85d6 (diff) | |
download | justbuild-0e913dd3eee248bccf2b26161ca0a715d039e716.tar.gz |
just-mr setup archive: Local roots require the archive blob to be local too
For archive repositories we need to ensure that a non-absent root
is backed by an archive content blob in the local CAS, in order to
also keep the proper root tree file associations. This change also
simplifies the content_cas_map logic by removing the previous
separation of implementation logic between fetching and setting up
the workspace root.
-rw-r--r-- | src/other_tools/just_mr/fetch.cpp | 3 | ||||
-rw-r--r-- | src/other_tools/just_mr/setup.cpp | 1 | ||||
-rw-r--r-- | src/other_tools/ops_maps/archive_fetch_map.cpp | 87 | ||||
-rw-r--r-- | src/other_tools/ops_maps/content_cas_map.cpp | 284 | ||||
-rw-r--r-- | src/other_tools/ops_maps/content_cas_map.hpp | 5 | ||||
-rw-r--r-- | src/other_tools/repo_map/repos_to_setup_map.cpp | 6 | ||||
-rw-r--r-- | src/other_tools/root_maps/content_git_map.cpp | 626 | ||||
-rw-r--r-- | src/other_tools/root_maps/content_git_map.hpp | 1 |
8 files changed, 361 insertions, 652 deletions
diff --git a/src/other_tools/just_mr/fetch.cpp b/src/other_tools/just_mr/fetch.cpp index db2eedcd..a47b02cf 100644 --- a/src/other_tools/just_mr/fetch.cpp +++ b/src/other_tools/just_mr/fetch.cpp @@ -301,8 +301,7 @@ auto MultiRepoFetch(std::shared_ptr<Configuration> const& config, ? std::make_optional( repo_desc_sha512->String()) : std::nullopt, - .origin = repo_name, - .fetch_only = true}, + .origin = repo_name}, .repo_type = repo_type_str, .subdir = subdir.empty() ? "." : subdir.string(), .pragma_special = std::nullopt, // not used diff --git a/src/other_tools/just_mr/setup.cpp b/src/other_tools/just_mr/setup.cpp index 6d716d54..ee6c2f30 100644 --- a/src/other_tools/just_mr/setup.cpp +++ b/src/other_tools/just_mr/setup.cpp @@ -170,7 +170,6 @@ auto MultiRepoSetup(std::shared_ptr<Configuration> const& config, &resolve_symlinks_map, &critical_git_op_map, serve_api_exists, - &(*local_api), remote_api ? std::make_optional(&(*remote_api)) : std::nullopt, common_args.fetch_absent, common_args.jobs); diff --git a/src/other_tools/ops_maps/archive_fetch_map.cpp b/src/other_tools/ops_maps/archive_fetch_map.cpp index f2b5842b..ab4ca7e8 100644 --- a/src/other_tools/ops_maps/archive_fetch_map.cpp +++ b/src/other_tools/ops_maps/archive_fetch_map.cpp @@ -32,14 +32,14 @@ void ProcessContent( gsl::not_null<IExecutionApi*> const& local_api, std::optional<gsl::not_null<IExecutionApi*>> const& remote_api, std::string const& content, - ArtifactDigest const& digest, ArchiveFetchMap::SetterPtr const& setter, ArchiveFetchMap::LoggerPtr const& logger) { // try to back up to remote CAS if (remote_api) { if (not local_api->RetrieveToCas( - {Artifact::ObjectInfo{.digest = digest, - .type = ObjectType::File}}, + {Artifact::ObjectInfo{ + .digest = ArtifactDigest{content, 0, /*is_tree=*/false}, + .type = ObjectType::File}}, *remote_api)) { // give a warning (*logger)(fmt::format("Failed to back up content {} from local CAS " @@ -87,55 +87,38 @@ auto CreateArchiveFetchMap( .filename() .string()); auto target_name = fetch_dir / distfile; - // check if content not already in CAS - auto digest = ArtifactDigest(key.archive.content, 0, false); - auto const& cas = Storage::Instance().CAS(); - auto content_path = cas.BlobPath(digest, - /*is_executable=*/false); - if (not content_path) { - // make sure content is in CAS - content_cas_map->ConsumeAfterKeysReady( - ts, - {key.archive}, - [target_name, - local_api, - remote_api, - content = key.archive.content, - digest = std::move(digest), - setter, - logger]([[maybe_unused]] auto const& values) { - auto const& cas = Storage::Instance().CAS(); - auto content_path = cas.BlobPath(digest, - /*is_executable=*/false) - .value(); - ProcessContent(content_path, - target_name, - local_api, - remote_api, - content, - digest, - setter, - logger); - }, - [logger, content = key.archive.content](auto const& msg, - bool fatal) { - (*logger)( - fmt::format("While ensuring content {} is in CAS:\n{}", - content, - msg), - fatal); - }); - } - else { - ProcessContent(*content_path, - target_name, - local_api, - remote_api, - key.archive.content, - digest, - setter, - logger); - } + // make sure content is in CAS + content_cas_map->ConsumeAfterKeysReady( + ts, + {key.archive}, + [target_name, + local_api, + remote_api, + content = key.archive.content, + setter, + logger]([[maybe_unused]] auto const& values) { + // content is in local CAS now + auto const& cas = Storage::Instance().CAS(); + auto content_path = + cas.BlobPath(ArtifactDigest{content, 0, /*is_tree=*/false}, + /*is_executable=*/false) + .value(); + ProcessContent(content_path, + target_name, + local_api, + remote_api, + content, + setter, + logger); + }, + [logger, content = key.archive.content](auto const& msg, + bool fatal) { + (*logger)( + fmt::format("While ensuring content {} is in CAS:\n{}", + content, + msg), + fatal); + }); }; return AsyncMapConsumer<ArchiveRepoInfo, bool>(fetch_archive, jobs); } diff --git a/src/other_tools/ops_maps/content_cas_map.cpp b/src/other_tools/ops_maps/content_cas_map.cpp index 7fe4efef..d6a3e36e 100644 --- a/src/other_tools/ops_maps/content_cas_map.cpp +++ b/src/other_tools/ops_maps/content_cas_map.cpp @@ -28,25 +28,11 @@ namespace { -void CheckRemoteAndFetchFromNetwork( - ArchiveContent const& key, - ArtifactDigest const& digest, - MirrorsPtr const& additional_mirrors, - CAInfoPtr const& ca_info, - gsl::not_null<IExecutionApi*> const& local_api, - std::optional<gsl::not_null<IExecutionApi*>> const& remote_api, - ContentCASMap::SetterPtr const& setter, - ContentCASMap::LoggerPtr const& logger) { - // check if content is in remote CAS, if a remote is given - if (remote_api and - remote_api.value()->RetrieveToCas( - {Artifact::ObjectInfo{.digest = digest, .type = ObjectType::File}}, - local_api)) { - JustMRProgress::Instance().TaskTracker().Stop(key.origin); - (*setter)(nullptr); - return; - } - // archive needs network fetching; +void FetchFromNetwork(ArchiveContent const& key, + MirrorsPtr const& additional_mirrors, + CAInfoPtr const& ca_info, + ContentCASMap::SetterPtr const& setter, + ContentCASMap::LoggerPtr const& logger) { // first, check that mandatory fields are provided if (key.fetch_url.empty()) { (*logger)("Failed to provide archive fetch url!", @@ -100,7 +86,8 @@ void CheckRemoteAndFetchFromNetwork( } // check that the data we stored actually produces the requested digest auto const& cas = Storage::Instance().CAS(); - if (not cas.BlobPath(digest, /*is_executable=*/false)) { + if (not cas.BlobPath(ArtifactDigest{key.content, 0, /*is_tree=*/false}, + /*is_executable=*/false)) { (*logger)( fmt::format("Content {} was not found at given fetch location {}", key.content, @@ -108,9 +95,7 @@ void CheckRemoteAndFetchFromNetwork( /*fatal=*/true); return; } - if (key.fetch_only) { - JustMRProgress::Instance().TaskTracker().Stop(key.origin); - } + JustMRProgress::Instance().TaskTracker().Stop(key.origin); // success! (*setter)(nullptr); } @@ -138,149 +123,132 @@ auto CreateContentCASMap( auto /*unused*/, auto const& key) { auto digest = ArtifactDigest(key.content, 0, false); - // separate logic if we need a pure fetch - if (key.fetch_only) { - auto const& cas = Storage::Instance().CAS(); - if (cas.BlobPath(digest, /*is_executable=*/false)) { - (*setter)(nullptr); - return; - } - // check if content is in Git cache; - // ensure Git cache - GitOpKey op_key = {.params = - { - StorageConfig::GitRoot(), // target_path - "", // git_hash - "", // branch - std::nullopt, // message - true // init_bare - }, - .op_type = GitOpType::ENSURE_INIT}; - critical_git_op_map->ConsumeAfterKeysReady( - ts, - {std::move(op_key)}, - [key, - digest, - just_mr_paths, - additional_mirrors, - ca_info, - serve_api_exists, - local_api, - remote_api, - setter, - logger](auto const& values) { - GitOpValue op_result = *values[0]; - // check flag - if (not op_result.result) { - (*logger)("Git init failed", - /*fatal=*/true); - return; - } - auto const just_git_cas = op_result.git_cas; - // open fake repo wrap for GitCAS - auto just_git_repo = GitRepoRemote::Open(just_git_cas); - if (not just_git_repo) { - (*logger)("Could not open Git cache repository!", + // check local CAS + auto const& cas = Storage::Instance().CAS(); + if (cas.BlobPath(digest, /*is_executable=*/false)) { + (*setter)(nullptr); + return; + } + // check if content is in Git cache; + // ensure Git cache + GitOpKey op_key = {.params = + { + StorageConfig::GitRoot(), // target_path + "", // git_hash + "", // branch + std::nullopt, // message + true // init_bare + }, + .op_type = GitOpType::ENSURE_INIT}; + critical_git_op_map->ConsumeAfterKeysReady( + ts, + {std::move(op_key)}, + [key, + digest, + just_mr_paths, + additional_mirrors, + ca_info, + serve_api_exists, + local_api, + remote_api, + setter, + logger](auto const& values) { + GitOpValue op_result = *values[0]; + // check flag + if (not op_result.result) { + (*logger)("Git init failed", + /*fatal=*/true); + return; + } + auto const just_git_cas = op_result.git_cas; + // open fake repo wrap for GitCAS + auto just_git_repo = GitRepoRemote::Open(just_git_cas); + if (not just_git_repo) { + (*logger)("Could not open Git cache repository!", + /*fatal=*/true); + return; + } + // verify if local Git knows content blob + auto wrapped_logger = std::make_shared<AsyncMapConsumerLogger>( + [&logger, blob = key.content](auto const& msg, bool fatal) { + (*logger)(fmt::format("While verifying presence of " + "blob {}:\n{}", + blob, + msg), + fatal); + }); + auto res = + just_git_repo->TryReadBlob(key.content, wrapped_logger); + if (not res.first) { + // blob check failed + return; + } + auto const& cas = Storage::Instance().CAS(); + if (res.second) { + // blob found; add it to CAS + if (not cas.StoreBlob(*res.second, + /*is_executable=*/false)) { + (*logger)(fmt::format("Failed to store content {} " + "to local CAS", + key.content), /*fatal=*/true); return; } - // verify if local Git knows content blob - auto wrapped_logger = - std::make_shared<AsyncMapConsumerLogger>( - [&logger, blob = key.content](auto const& msg, - bool fatal) { - (*logger)( - fmt::format("While verifying presence of " - "blob {}:\n{}", - blob, - msg), - fatal); - }); - auto res = - just_git_repo->TryReadBlob(key.content, wrapped_logger); - if (not res.first) { - // blob check failed - return; - } - auto const& cas = Storage::Instance().CAS(); - if (res.second) { - // blob found; add it to CAS - if (not cas.StoreBlob(*res.second, - /*is_executable=*/false)) { - (*logger)(fmt::format("Failed to store content {} " - "to local CAS", - key.content), - /*fatal=*/true); - return; - } - // content stored to CAS - (*setter)(nullptr); - return; - } - // blob not found in Git cache - JustMRProgress::Instance().TaskTracker().Start(key.origin); - // add distfile to CAS - auto repo_distfile = - (key.distfile ? key.distfile.value() - : std::filesystem::path(key.fetch_url) - .filename() - .string()); - StorageUtils::AddDistfileToCAS(repo_distfile, - just_mr_paths); - // check if content is in CAS now - if (cas.BlobPath(digest, /*is_executable=*/false)) { + // content stored to CAS + (*setter)(nullptr); + return; + } + // blob not found in Git cache + JustMRProgress::Instance().TaskTracker().Start(key.origin); + // add distfile to CAS + auto repo_distfile = + (key.distfile ? key.distfile.value() + : std::filesystem::path(key.fetch_url) + .filename() + .string()); + StorageUtils::AddDistfileToCAS(repo_distfile, just_mr_paths); + // check if content is in CAS now + if (cas.BlobPath(digest, /*is_executable=*/false)) { + JustMRProgress::Instance().TaskTracker().Stop(key.origin); + (*setter)(nullptr); + return; + } + // check if content is known to remote serve service + if (serve_api_exists and remote_api and + ServeApi::ContentInRemoteCAS(key.content)) { + // try to get content from remote CAS + if (remote_api.value()->RetrieveToCas( + {Artifact::ObjectInfo{.digest = digest, + .type = ObjectType::File}}, + local_api)) { JustMRProgress::Instance().TaskTracker().Stop( key.origin); (*setter)(nullptr); return; } - // check if content is known to remote serve service - if (serve_api_exists and - ServeApi::ContentInRemoteCAS(key.content)) { - // try to get content from remote CAS - if (remote_api and remote_api.value()->RetrieveToCas( - {Artifact::ObjectInfo{ - .digest = digest, - .type = ObjectType::File}}, - local_api)) { - JustMRProgress::Instance().TaskTracker().Stop( - key.origin); - (*setter)(nullptr); - return; - } - } - // check remote execution endpoint and if not found revert - // to network fetch - CheckRemoteAndFetchFromNetwork(key, - digest, - additional_mirrors, - ca_info, - local_api, - remote_api, - setter, - logger); - }, - [logger, target_path = StorageConfig::GitRoot()]( - auto const& msg, bool fatal) { - (*logger)(fmt::format("While running critical Git op " - "ENSURE_INIT for target {}:\n{}", - target_path.string(), - msg), - fatal); - }); - // done! - return; - } - // if not fetch only, then only check remote execution endpoint and - // revert to network fetch as last resort - CheckRemoteAndFetchFromNetwork(key, - digest, - additional_mirrors, - ca_info, - local_api, - remote_api, - setter, - logger); + } + // check remote execution endpoint, if given + if (remote_api and + remote_api.value()->RetrieveToCas( + {Artifact::ObjectInfo{.digest = digest, + .type = ObjectType::File}}, + local_api)) { + JustMRProgress::Instance().TaskTracker().Stop(key.origin); + (*setter)(nullptr); + return; + } + // revert to network fetch + FetchFromNetwork( + key, additional_mirrors, ca_info, setter, logger); + }, + [logger, target_path = StorageConfig::GitRoot()](auto const& msg, + bool fatal) { + (*logger)(fmt::format("While running critical Git op " + "ENSURE_INIT for target {}:\n{}", + target_path.string(), + msg), + fatal); + }); }; return AsyncMapConsumer<ArchiveContent, std::nullptr_t>(ensure_in_cas, jobs); diff --git a/src/other_tools/ops_maps/content_cas_map.hpp b/src/other_tools/ops_maps/content_cas_map.hpp index 9ee70c39..d9462c68 100644 --- a/src/other_tools/ops_maps/content_cas_map.hpp +++ b/src/other_tools/ops_maps/content_cas_map.hpp @@ -37,8 +37,6 @@ struct ArchiveContent { std::optional<std::string> sha512{std::nullopt}; // name of repository for which work is done; used in progress reporting std::string origin{}; - // flag to separate logic for pure fetch operations - bool fetch_only{}; [[nodiscard]] auto operator==(const ArchiveContent& other) const -> bool { return content == other.content; @@ -65,9 +63,6 @@ struct ArchiveRepoInfo { /// \brief Maps the content hash of an archive to nullptr, as we only care if /// the map fails or not. -/// For pure fetches (fetch_only == true), all possible locations are checked to -/// obtain the content blob before reverting to the network fetch. Otherwise, -/// only the remote CAS is checked before going to the network. using ContentCASMap = AsyncMapConsumer<ArchiveContent, std::nullptr_t>; [[nodiscard]] auto CreateContentCASMap( diff --git a/src/other_tools/repo_map/repos_to_setup_map.cpp b/src/other_tools/repo_map/repos_to_setup_map.cpp index aec57868..7d0762c8 100644 --- a/src/other_tools/repo_map/repos_to_setup_map.cpp +++ b/src/other_tools/repo_map/repos_to_setup_map.cpp @@ -297,8 +297,7 @@ void ArchiveCheckout(ExpressionPtr const& repo_desc, .sha512 = repo_desc_sha512->IsString() ? std::make_optional(repo_desc_sha512->String()) : std::nullopt, - .origin = repo_name, - .fetch_only = false}, + .origin = repo_name}, .repo_type = repo_type, .subdir = subdir.empty() ? "." : subdir.string(), .pragma_special = pragma_special_value, @@ -617,8 +616,7 @@ void DistdirCheckout(ExpressionPtr const& repo_desc, .sha512 = repo_desc_sha512->IsString() ? std::make_optional(repo_desc_sha512->String()) : std::nullopt, - .origin = dist_repo_name, - .fetch_only = true}; + .origin = dist_repo_name}; // add to distdir content map auto repo_distfile = diff --git a/src/other_tools/root_maps/content_git_map.cpp b/src/other_tools/root_maps/content_git_map.cpp index 0a70cb0c..e5b6b83e 100644 --- a/src/other_tools/root_maps/content_git_map.cpp +++ b/src/other_tools/root_maps/content_git_map.cpp @@ -449,7 +449,6 @@ auto CreateContentGitMap( gsl::not_null<ResolveSymlinksMap*> const& resolve_symlinks_map, gsl::not_null<CriticalGitOpMap*> const& critical_git_op_map, bool serve_api_exists, - gsl::not_null<IExecutionApi*> const& local_api, std::optional<gsl::not_null<IExecutionApi*>> const& remote_api, bool fetch_absent, std::size_t jobs) -> ContentGitMap { @@ -461,7 +460,6 @@ auto CreateContentGitMap( additional_mirrors, ca_info, serve_api_exists, - local_api, remote_api, fetch_absent](auto ts, auto setter, @@ -558,163 +556,157 @@ auto CreateContentGitMap( }); } else { - // check if content already in CAS - auto const& cas = Storage::Instance().CAS(); - auto digest = ArtifactDigest(key.archive.content, 0, false); - if (auto content_cas_path = - cas.BlobPath(digest, /*is_executable=*/false)) { - ExtractAndImportToGit( - key, - *content_cas_path, - archive_tree_id_file, - /*is_absent = */ (key.absent and not fetch_absent), - serve_api_exists, - remote_api, - import_to_git_map, - resolve_symlinks_map, - ts, - setter, - logger); - // done - return; - } + // separate logic between absent and present roots + if (key.absent and not fetch_absent) { + // check if content already in CAS + auto const& cas = Storage::Instance().CAS(); + auto digest = ArtifactDigest(key.archive.content, 0, false); + if (auto content_cas_path = + cas.BlobPath(digest, /*is_executable=*/false)) { + ExtractAndImportToGit(key, + *content_cas_path, + archive_tree_id_file, + /*is_absent = */ true, + serve_api_exists, + remote_api, + import_to_git_map, + resolve_symlinks_map, + ts, + setter, + logger); + // done + return; + } - // check if content is in Git cache; - // ensure Git cache - GitOpKey op_key = {.params = - { - StorageConfig::GitRoot(), // target_path - "", // git_hash - "", // branch - std::nullopt, // message - true // init_bare - }, - .op_type = GitOpType::ENSURE_INIT}; - critical_git_op_map->ConsumeAfterKeysReady( - ts, - {std::move(op_key)}, - [key, - digest, - archive_tree_id_file, - content_cas_map, - import_to_git_map, - resolve_symlinks_map, - just_mr_paths, - additional_mirrors, - ca_info, - serve_api_exists, - local_api, - remote_api, - fetch_absent, - ts, - setter, - logger](auto const& values) { - GitOpValue op_result = *values[0]; - // check flag - if (not op_result.result) { - (*logger)("Git init failed", - /*fatal=*/true); - return; - } - auto const just_git_cas = op_result.git_cas; - // open fake repo wrap for GitCAS - auto just_git_repo = GitRepoRemote::Open(just_git_cas); - if (not just_git_repo) { - (*logger)("Could not open Git cache repository!", - /*fatal=*/true); - return; - } - // verify if local Git knows content blob - auto wrapped_logger = - std::make_shared<AsyncMapConsumerLogger>( - [&logger, blob = key.archive.content]( - auto const& msg, bool fatal) { - (*logger)( - fmt::format("While verifying presence of " - "blob {}:\n{}", - blob, - msg), - fatal); - }); - auto res = just_git_repo->TryReadBlob(key.archive.content, - wrapped_logger); - if (not res.first) { - // blob check failed - return; - } - auto const& cas = Storage::Instance().CAS(); - if (res.second) { - // blob found; add it to CAS - if (not cas.StoreBlob(*res.second, - /*is_executable=*/false)) { - (*logger)(fmt::format("Failed to store content {} " - "to local CAS", - key.archive.content), + // check if content is in Git cache; + // ensure Git cache + GitOpKey op_key = { + .params = + { + StorageConfig::GitRoot(), // target_path + "", // git_hash + "", // branch + std::nullopt, // message + true // init_bare + }, + .op_type = GitOpType::ENSURE_INIT}; + critical_git_op_map->ConsumeAfterKeysReady( + ts, + {std::move(op_key)}, + [key, + digest, + archive_tree_id_file, + import_to_git_map, + resolve_symlinks_map, + just_mr_paths, + additional_mirrors, + ca_info, + serve_api_exists, + remote_api, + ts, + setter, + logger](auto const& values) { + GitOpValue op_result = *values[0]; + // check flag + if (not op_result.result) { + (*logger)("Git init failed", + /*fatal=*/true); + return; + } + auto const just_git_cas = op_result.git_cas; + // open fake repo wrap for GitCAS + auto just_git_repo = GitRepoRemote::Open(just_git_cas); + if (not just_git_repo) { + (*logger)("Could not open Git cache repository!", + /*fatal=*/true); + return; + } + // verify if local Git knows content blob + auto wrapped_logger = + std::make_shared<AsyncMapConsumerLogger>( + [&logger, blob = key.archive.content]( + auto const& msg, bool fatal) { + (*logger)( + fmt::format("While verifying presence " + "of blob {}:\n{}", + blob, + msg), + fatal); + }); + auto res = just_git_repo->TryReadBlob( + key.archive.content, wrapped_logger); + if (not res.first) { + // blob check failed + return; + } + auto const& cas = Storage::Instance().CAS(); + if (res.second) { + // blob found; add it to CAS + if (not cas.StoreBlob(*res.second, + /*is_executable=*/false)) { + (*logger)(fmt::format("Failed to store content " + "{} to local CAS", + key.archive.content), + /*fatal=*/true); + return; + } + if (auto content_cas_path = cas.BlobPath( + digest, /*is_executable=*/false)) { + ExtractAndImportToGit(key, + *content_cas_path, + archive_tree_id_file, + /*is_absent=*/true, + serve_api_exists, + remote_api, + import_to_git_map, + resolve_symlinks_map, + ts, + setter, + logger); + // done + return; + } + // this should normally never be reached unless + // something went really wrong + (*logger)(fmt::format("Failed to retrieve blob {} " + "from local CAS", + digest.hash()), /*fatal=*/true); return; } + JustMRProgress::Instance().TaskTracker().Start( + key.archive.origin); + // add distfile to CAS + auto repo_distfile = + (key.archive.distfile + ? key.archive.distfile.value() + : std::filesystem::path(key.archive.fetch_url) + .filename() + .string()); + StorageUtils::AddDistfileToCAS(repo_distfile, + just_mr_paths); + // check if content is in CAS now if (auto content_cas_path = cas.BlobPath(digest, /*is_executable=*/false)) { - ExtractAndImportToGit( - key, - *content_cas_path, - archive_tree_id_file, - /*is_absent=*/(key.absent and not fetch_absent), - serve_api_exists, - remote_api, - import_to_git_map, - resolve_symlinks_map, - ts, - setter, - logger); + JustMRProgress::Instance().TaskTracker().Stop( + key.archive.origin); + ExtractAndImportToGit(key, + *content_cas_path, + archive_tree_id_file, + /*is_absent=*/true, + serve_api_exists, + remote_api, + import_to_git_map, + resolve_symlinks_map, + ts, + setter, + logger); // done return; } - // this should normally never be reached unless - // something went really wrong - (*logger)(fmt::format("Failed to retrieve blob {} from " - "local CAS", - digest.hash()), - /*fatal=*/true); - return; - } - JustMRProgress::Instance().TaskTracker().Start( - key.archive.origin); - // add distfile to CAS - auto repo_distfile = - (key.archive.distfile - ? key.archive.distfile.value() - : std::filesystem::path(key.archive.fetch_url) - .filename() - .string()); - StorageUtils::AddDistfileToCAS(repo_distfile, - just_mr_paths); - // check if content is in CAS now - if (auto content_cas_path = - cas.BlobPath(digest, /*is_executable=*/false)) { - JustMRProgress::Instance().TaskTracker().Stop( - key.archive.origin); - ExtractAndImportToGit( - key, - *content_cas_path, - archive_tree_id_file, - /*is_absent=*/(key.absent and not fetch_absent), - serve_api_exists, - remote_api, - import_to_git_map, - resolve_symlinks_map, - ts, - setter, - logger); - // done - return; - } - - // check if content is known to remote serve service - if (serve_api_exists) { - // if purely absent, request the resolved subdir tree - // directly - if (key.absent and not fetch_absent) { + // request the resolved subdir tree from the serve + // endpoint, if given + if (serve_api_exists) { auto serve_result = ServeApi::RetrieveTreeFromArchive( key.archive.content, @@ -740,225 +732,6 @@ auto CreateContentGitMap( if (is_fatal) { (*logger)( fmt::format( - "Serve endpoint failed to set up " - "root from known archive content {}", - key.archive.content), - /*fatal=*/true); - return; - } - } - // otherwise, request (and sync) the whole archive tree, - // UNRESOLVED, to ensure we maintain the id file - // association - else { - auto serve_result = - ServeApi::RetrieveTreeFromArchive( - key.archive.content, - key.repo_type, - /*subdir = */ ".", - /* resolve_symlinks = */ std::nullopt, - /*sync_tree = */ true); - if (std::holds_alternative<std::string>( - serve_result)) { - auto const& root_tree_id = - std::get<std::string>(serve_result); - // verify if we already know the tree locally; - // setup wrapped logger - auto wrapped_logger = - std::make_shared<AsyncMapConsumerLogger>( - [&logger, tree = root_tree_id]( - auto const& msg, bool fatal) { - (*logger)( - fmt::format( - "While verifying presence " - "of tree {}:\n{}", - tree, - msg), - fatal); - }); - auto tree_present = - just_git_repo->CheckTreeExists( - root_tree_id, wrapped_logger); - if (not tree_present) { - return; - } - if (*tree_present) { - JustMRProgress::Instance() - .TaskTracker() - .Stop(key.archive.origin); - // write to id file and process subdir tree; - // this results in a present root - WriteIdFileAndSetWSRoot( - key, - root_tree_id, - just_git_cas, - archive_tree_id_file, - /*is_absent=*/false, - /*serve_api_exists=*/false, - /*remote_api=*/std::nullopt, - /*is_on_remote=*/false, - resolve_symlinks_map, - ts, - setter, - logger); - // done - return; - } - // try to get root tree from remote execution - // endpoint - auto root_digest = ArtifactDigest{ - root_tree_id, 0, /*is_tree=*/true}; - if (remote_api and - remote_api.value()->RetrieveToCas( - {Artifact::ObjectInfo{ - .digest = root_digest, - .type = ObjectType::Tree}}, - local_api)) { - JustMRProgress::Instance() - .TaskTracker() - .Stop(key.archive.origin); - // Move tree from CAS to local git storage - auto tmp_dir = - StorageUtils::CreateTypedTmpDir( - "fetch-absent-root"); - if (not tmp_dir) { - (*logger)( - fmt::format( - "Failed to create tmp " - "directory after fetching root " - "tree {} for absent archive {}", - root_tree_id, - key.archive.content), - true); - return; - } - if (not local_api->RetrieveToPaths( - {Artifact::ObjectInfo{ - .digest = root_digest, - .type = ObjectType::Tree}}, - {tmp_dir->GetPath()})) { - (*logger)( - fmt::format( - "Failed to copy fetched root " - "tree {} to {}", - root_tree_id, - tmp_dir->GetPath().string()), - true); - return; - } - CommitInfo c_info{tmp_dir->GetPath(), - "tree", - root_tree_id}; - import_to_git_map->ConsumeAfterKeysReady( - ts, - {std::move(c_info)}, - [tmp_dir, // keep tmp_dir alive - key, - root_tree_id, - just_git_cas, - archive_tree_id_file, - resolve_symlinks_map, - ts, - setter, - logger](auto const& values) { - if (not values[0]->second) { - (*logger)( - "Importing to git failed", - /*fatal=*/true); - return; - } - // write to id file and process - // subdir tree; this results in a - // present root - WriteIdFileAndSetWSRoot( - key, - root_tree_id, - just_git_cas, - archive_tree_id_file, - /*is_absent=*/false, - /*serve_api_exists=*/false, - /*remote_api=*/std::nullopt, - /*is_on_remote=*/false, - resolve_symlinks_map, - ts, - setter, - logger); - }, - [logger, tmp_dir, root_tree_id]( - auto const& msg, bool fatal) { - (*logger)( - fmt::format( - "While moving root tree {} " - "from {} to local git:\n{}", - root_tree_id, - tmp_dir->GetPath().string(), - msg), - fatal); - }); - // done - return; - } - - // try the remote CAS, otherwise revert to a - // network fetch - content_cas_map->ConsumeAfterKeysReady( - ts, - {key.archive}, - [archive_tree_id_file, - key, - import_to_git_map, - resolve_symlinks_map, - ts, - setter, - logger]( - [[maybe_unused]] auto const& values) { - JustMRProgress::Instance() - .TaskTracker() - .Stop(key.archive.origin); - // content is in CAS - auto const& cas = - Storage::Instance().CAS(); - auto content_cas_path = - cas.BlobPath( - ArtifactDigest( - key.archive.content, - 0, - false), - /*is_executable=*/ - false) - .value(); - // this results in a present root - ExtractAndImportToGit( - key, - content_cas_path, - archive_tree_id_file, - /*is_absent=*/false, - /*serve_api_exists=*/false, - /*remote_api=*/std::nullopt, - import_to_git_map, - resolve_symlinks_map, - ts, - setter, - logger); - }, - [logger, content = key.archive.content]( - auto const& msg, bool fatal) { - (*logger)(fmt::format( - "While ensuring content " - "{} is in CAS:\n{}", - content, - msg), - fatal); - }); - // done - return; - } - // check if serve failure was due to archive content - // not being found or it is otherwise fatal - auto const& is_fatal = std::get<bool>(serve_result); - if (is_fatal) { - (*logger)( - fmt::format( "Serve endpoint failed to set up root " "from known archive content {}", key.archive.content), @@ -966,68 +739,63 @@ auto CreateContentGitMap( return; } } - } - - // reaching here can only result in a root that is present - if (key.absent and not fetch_absent) { - (*logger)(fmt::format("Cannot create workspace root " - "as absent for content {}.", + // report not being able to set up this root as absent + (*logger)(fmt::format("Cannot create workspace root as " + "absent for content {}.", key.archive.content), /*fatal=*/true); - return; - } - - // check remote CAS, otherwise revert to a network fetch - content_cas_map->ConsumeAfterKeysReady( - ts, - {key.archive}, - [archive_tree_id_file, - key, - import_to_git_map, - resolve_symlinks_map, - ts, - setter, - logger]([[maybe_unused]] auto const& values) { - JustMRProgress::Instance().TaskTracker().Stop( - key.archive.origin); - // content is in CAS - auto const& cas = Storage::Instance().CAS(); - auto content_cas_path = - cas.BlobPath(ArtifactDigest( - key.archive.content, 0, false), - /*is_executable=*/false) - .value(); - // root can only be present, so default all - // arguments that refer to a serve endpoint - ExtractAndImportToGit(key, - content_cas_path, - archive_tree_id_file, - /*is_absent=*/false, - /*serve_api_exists=*/false, - /*remote_api=*/std::nullopt, - import_to_git_map, - resolve_symlinks_map, - ts, - setter, - logger); - }, - [logger, content = key.archive.content](auto const& msg, - bool fatal) { - (*logger)(fmt::format("While ensuring content {} " - "is in CAS:\n{}", - content, - msg), - fatal); - }); - }, - [logger, target_path = StorageConfig::GitRoot()]( - auto const& msg, bool fatal) { - (*logger)(fmt::format("While running critical Git op " - "ENSURE_INIT for target {}:\n{}", - target_path.string(), - msg), - fatal); - }); + }, + [logger, target_path = StorageConfig::GitRoot()]( + auto const& msg, bool fatal) { + (*logger)(fmt::format("While running critical Git op " + "ENSURE_INIT for target {}:\n{}", + target_path.string(), + msg), + fatal); + }); + } + else { + // for a present root we need the archive to be present too + content_cas_map->ConsumeAfterKeysReady( + ts, + {key.archive}, + [archive_tree_id_file, + key, + import_to_git_map, + resolve_symlinks_map, + ts, + setter, + logger]([[maybe_unused]] auto const& values) { + // content is in local CAS now + auto const& cas = Storage::Instance().CAS(); + auto content_cas_path = + cas.BlobPath(ArtifactDigest( + key.archive.content, 0, false), + /*is_executable=*/false) + .value(); + // root can only be present, so default all arguments + // that refer to a serve endpoint + ExtractAndImportToGit(key, + content_cas_path, + archive_tree_id_file, + /*is_absent=*/false, + /*serve_api_exists=*/false, + /*remote_api=*/std::nullopt, + import_to_git_map, + resolve_symlinks_map, + ts, + setter, + logger); + }, + [logger, content = key.archive.content](auto const& msg, + bool fatal) { + (*logger)(fmt::format("While ensuring content {} is in " + "CAS:\n{}", + content, + msg), + fatal); + }); + } } }; return AsyncMapConsumer<ArchiveRepoInfo, std::pair<nlohmann::json, bool>>( diff --git a/src/other_tools/root_maps/content_git_map.hpp b/src/other_tools/root_maps/content_git_map.hpp index 86db9443..f369e944 100644 --- a/src/other_tools/root_maps/content_git_map.hpp +++ b/src/other_tools/root_maps/content_git_map.hpp @@ -42,7 +42,6 @@ using ContentGitMap = gsl::not_null<ResolveSymlinksMap*> const& resolve_symlinks_map, gsl::not_null<CriticalGitOpMap*> const& critical_git_op_map, bool serve_api_exists, - gsl::not_null<IExecutionApi*> const& local_api, std::optional<gsl::not_null<IExecutionApi*>> const& remote_api, bool fetch_absent, std::size_t jobs) -> ContentGitMap; |