From 13211c140457b878ca8787b80b2ad2424f5272bd Mon Sep 17 00:00:00 2001 From: Klaus Aehlig Date: Tue, 9 Jul 2024 16:51:54 +0200 Subject: ParallelRetrieveToCas: avoid duplicated requests While no additional blob will be transferred, doing a request for missing blobs is still a request and, in particular, an unnecessary round trip. Therefore avoid this, by remembering what we synchronized already. --- .../execution_api/remote/bazel/bazel_api.cpp | 43 ++++++++++++++++++++-- 1 file changed, 40 insertions(+), 3 deletions(-) (limited to 'src/buildtool/execution_api/remote/bazel/bazel_api.cpp') diff --git a/src/buildtool/execution_api/remote/bazel/bazel_api.cpp b/src/buildtool/execution_api/remote/bazel/bazel_api.cpp index 1ce65259..6fbd19f3 100644 --- a/src/buildtool/execution_api/remote/bazel/bazel_api.cpp +++ b/src/buildtool/execution_api/remote/bazel/bazel_api.cpp @@ -357,7 +357,6 @@ auto BazelApi::CreateAction( blob_digests, api, network_, missing_artifacts_info->back_map); } -/// NOLINTNEXTLINE(misc-no-recursion) [[nodiscard]] auto BazelApi::ParallelRetrieveToCas( std::vector const& artifacts_info, IExecutionApi const& api, @@ -367,6 +366,33 @@ auto BazelApi::CreateAction( if (this == &api) { return true; } + std::unordered_set done{}; + return ParallelRetrieveToCasWithCache( + artifacts_info, api, jobs, use_blob_splitting, &done); +} + +/// NOLINTNEXTLINE(misc-no-recursion) +[[nodiscard]] auto BazelApi::ParallelRetrieveToCasWithCache( + std::vector const& all_artifacts_info, + IExecutionApi const& api, + std::size_t jobs, + bool use_blob_splitting, + gsl::not_null*> done) + const noexcept -> bool { + + std::vector artifacts_info{}; + artifacts_info.reserve(all_artifacts_info.size()); + for (auto const& info : all_artifacts_info) { + if (not done->contains(info)) { + artifacts_info.emplace_back(info); + } + } + if (artifacts_info.empty()) { + return true; // Nothing to do + } + std::sort(artifacts_info.begin(), artifacts_info.end()); + auto last_info = std::unique(artifacts_info.begin(), artifacts_info.end()); + artifacts_info.erase(last_info, artifacts_info.end()); // Determine missing artifacts in other CAS. auto missing_artifacts_info = GetMissingArtifactsInfo( @@ -392,8 +418,8 @@ auto BazelApi::CreateAction( auto const result = reader.ReadDirectTreeEntries( info.digest, std::filesystem::path{}); if (not result or - not ParallelRetrieveToCas( - result->infos, api, jobs, use_blob_splitting)) { + not ParallelRetrieveToCasWithCache( + result->infos, api, jobs, use_blob_splitting, done)) { return false; } } @@ -425,6 +451,17 @@ auto BazelApi::CreateAction( return false; } + try { + for (auto const& info : artifacts_info) { + done->insert(info); + } + } catch (std::exception const& ex) { + Logger::Log(LogLevel::Warning, + "Exception when updating set of synchronized objects " + "(continuing anyway): {}", + ex.what()); + } + return not failure; } -- cgit v1.2.3