From d83c997ad5a866f4fbb38d4a81e7edf70a491db2 Mon Sep 17 00:00:00 2001 From: Sascha Roloff Date: Fri, 23 Feb 2024 09:32:54 +0100 Subject: Refactor split and splice implementations. Currently, the implementations of the split and splice operation are both hidden behind the Bazel API implementation. This was sufficient to implement splitting at the server and splicing at the client. In order to support the other direction of splitting at the client and splicing at the server while reusing their implementations, the code needs to be refactored. First, the functionality of split and splice are explicitly exposed at the general execution API interface and implemented in the sub APIs. Second, the implementations of split and splice are factored into a separate utils class. --- .../execution_api/remote/bazel/bazel_api.cpp | 61 ++++++++++++++-------- .../execution_api/remote/bazel/bazel_api.hpp | 5 ++ .../remote/bazel/bazel_cas_client.cpp | 13 +++-- .../remote/bazel/bazel_cas_client.hpp | 11 ++-- .../execution_api/remote/bazel/bazel_network.cpp | 8 ++- .../execution_api/remote/bazel/bazel_network.hpp | 6 ++- 6 files changed, 69 insertions(+), 35 deletions(-) (limited to 'src/buildtool/execution_api/remote/bazel') diff --git a/src/buildtool/execution_api/remote/bazel/bazel_api.cpp b/src/buildtool/execution_api/remote/bazel/bazel_api.cpp index 48cf3133..157016f7 100644 --- a/src/buildtool/execution_api/remote/bazel/bazel_api.cpp +++ b/src/buildtool/execution_api/remote/bazel/bazel_api.cpp @@ -17,7 +17,8 @@ #include #include #include -#include +#include +#include #include #include @@ -120,7 +121,7 @@ namespace { // Fetch unknown chunks. auto digest_set = std::unordered_set{ - (*chunk_digests).begin(), (*chunk_digests).end()}; + chunk_digests->begin(), chunk_digests->end()}; auto unique_digests = std::vector{digest_set.begin(), digest_set.end()}; auto missing_digests = ::IsAvailable(unique_digests, api); @@ -129,30 +130,24 @@ namespace { } // Assemble blob from chunks. - auto tmp_dir = StorageUtils::CreateTypedTmpDir("splice"); - auto tmp_file = tmp_dir->GetPath() / "blob"; - std::size_t total_size{}; - { - std::ofstream tmp(tmp_file, std::ios::binary); - for (auto const& chunk_digest : *chunk_digests) { - auto info = - Artifact::ObjectInfo{.digest = ArtifactDigest{chunk_digest}, - .type = ObjectType::File}; - auto chunk_data = api->RetrieveToMemory(info); - if (not chunk_data) { - Logger::Log(LogLevel::Error, - "could not load blob chunk in memory: ", - chunk_digest.hash()); - return false; - } - tmp << *chunk_data; - total_size += chunk_data->size(); - } + auto artifact_digests = std::vector{}; + artifact_digests.reserve(chunk_digests->size()); + std::transform(chunk_digests->cbegin(), + chunk_digests->cend(), + std::back_inserter(artifact_digests), + [](auto const& digest) { return ArtifactDigest{digest}; }); + auto digest = api->SpliceBlob(artifact_info.digest, artifact_digests); + if (not digest) { + // If blob splicing failed, fall back to regular fetching. + return ::RetrieveToCas({artifact_info.digest}, api, network, info_map); } Logger::Log( LogLevel::Debug, - [&artifact_info, &unique_digests, &missing_digests, &total_size]() { + [&artifact_info, + &unique_digests, + &missing_digests, + total_size = digest->size()]() { auto missing_digest_set = std::unordered_set{ missing_digests.begin(), missing_digests.end()}; std::uint64_t transmitted_bytes{0}; @@ -172,7 +167,7 @@ namespace { artifact_info.ToString()); }); - return api->UploadFile(tmp_file, artifact_info.type); + return true; } } // namespace @@ -584,3 +579,23 @@ auto BazelApi::CreateAction( } return result; } + +[[nodiscard]] auto BazelApi::SplitBlob(ArtifactDigest const& blob_digest) + const noexcept -> std::optional> { + auto chunk_digests = + network_->SplitBlob(static_cast(blob_digest)); + if (not chunk_digests) { + return std::nullopt; + } + auto artifact_digests = std::vector{}; + artifact_digests.reserve(chunk_digests->size()); + std::transform(chunk_digests->cbegin(), + chunk_digests->cend(), + std::back_inserter(artifact_digests), + [](auto const& digest) { return ArtifactDigest{digest}; }); + return artifact_digests; +} + +[[nodiscard]] auto BazelApi::BlobSplitSupport() const noexcept -> bool { + return network_->BlobSplitSupport(); +} diff --git a/src/buildtool/execution_api/remote/bazel/bazel_api.hpp b/src/buildtool/execution_api/remote/bazel/bazel_api.hpp index aae90907..2d514ecd 100644 --- a/src/buildtool/execution_api/remote/bazel/bazel_api.hpp +++ b/src/buildtool/execution_api/remote/bazel/bazel_api.hpp @@ -96,6 +96,11 @@ class BazelApi final : public IExecutionApi { Artifact::ObjectInfo const& artifact_info) noexcept -> std::optional final; + [[nodiscard]] auto SplitBlob(ArtifactDigest const& blob_digest) + const noexcept -> std::optional> final; + + [[nodiscard]] auto BlobSplitSupport() const noexcept -> bool final; + private: std::shared_ptr network_; diff --git a/src/buildtool/execution_api/remote/bazel/bazel_cas_client.cpp b/src/buildtool/execution_api/remote/bazel/bazel_cas_client.cpp index f193bf8b..473dd5ba 100644 --- a/src/buildtool/execution_api/remote/bazel/bazel_cas_client.cpp +++ b/src/buildtool/execution_api/remote/bazel/bazel_cas_client.cpp @@ -93,7 +93,7 @@ namespace { return blob_split_support_map[instance_name]; } } - auto supported = BlobSplitSupport(instance_name, stub); + auto supported = ::BlobSplitSupport(instance_name, stub); logger->Emit(LogLevel::Debug, "Blob split support for \"{}\": {}", instance_name, @@ -312,14 +312,14 @@ auto BazelCasClient::ReadSingleBlob(std::string const& instance_name, } auto BazelCasClient::SplitBlob(std::string const& instance_name, - bazel_re::Digest const& digest) noexcept - -> std::optional> { + bazel_re::Digest const& blob_digest) + const noexcept -> std::optional> { if (not BlobSplitSupportCached(instance_name, stub_, &logger_)) { return std::nullopt; } bazel_re::SplitBlobRequest request{}; request.set_instance_name(instance_name); - request.mutable_blob_digest()->CopyFrom(digest); + request.mutable_blob_digest()->CopyFrom(blob_digest); bazel_re::SplitBlobResponse response{}; auto [ok, status] = WithRetry( [this, &response, &request]() { @@ -334,6 +334,11 @@ auto BazelCasClient::SplitBlob(std::string const& instance_name, return ProcessResponseContents(response); } +auto BazelCasClient::BlobSplitSupport( + std::string const& instance_name) const noexcept -> bool { + return ::BlobSplitSupportCached(instance_name, stub_, &logger_); +} + template auto BazelCasClient::FindMissingBlobs(std::string const& instance_name, T_ForwardIter const& start, diff --git a/src/buildtool/execution_api/remote/bazel/bazel_cas_client.hpp b/src/buildtool/execution_api/remote/bazel/bazel_cas_client.hpp index 762b4f37..77196022 100644 --- a/src/buildtool/execution_api/remote/bazel/bazel_cas_client.hpp +++ b/src/buildtool/execution_api/remote/bazel/bazel_cas_client.hpp @@ -129,12 +129,15 @@ class BazelCasClient { -> std::optional; /// @brief Split single blob into chunks - /// @param[in] instance_name Name of the CAS instance - /// @param[in] digest Blob digest to be splitted + /// @param[in] instance_name Name of the CAS instance + /// @param[in] blob_digest Blob digest to be splitted /// @return The chunk digests of the splitted blob [[nodiscard]] auto SplitBlob(std::string const& instance_name, - bazel_re::Digest const& digest) noexcept - -> std::optional>; + bazel_re::Digest const& blob_digest) + const noexcept -> std::optional>; + + [[nodiscard]] auto BlobSplitSupport( + std::string const& instance_name) const noexcept -> bool; private: std::unique_ptr stream_{}; diff --git a/src/buildtool/execution_api/remote/bazel/bazel_network.cpp b/src/buildtool/execution_api/remote/bazel/bazel_network.cpp index 223960cb..04c13589 100644 --- a/src/buildtool/execution_api/remote/bazel/bazel_network.cpp +++ b/src/buildtool/execution_api/remote/bazel/bazel_network.cpp @@ -154,9 +154,13 @@ auto BazelNetwork::IsAvailable(std::vector const& digests) return cas_->FindMissingBlobs(instance_name_, digests); } -auto BazelNetwork::SplitBlob(bazel_re::Digest const& digest) const noexcept +auto BazelNetwork::SplitBlob(bazel_re::Digest const& blob_digest) const noexcept -> std::optional> { - return cas_->SplitBlob(instance_name_, digest); + return cas_->SplitBlob(instance_name_, blob_digest); +} + +auto BazelNetwork::BlobSplitSupport() const noexcept -> bool { + return cas_->BlobSplitSupport(instance_name_); } template diff --git a/src/buildtool/execution_api/remote/bazel/bazel_network.hpp b/src/buildtool/execution_api/remote/bazel/bazel_network.hpp index af48c7db..0808d2e9 100644 --- a/src/buildtool/execution_api/remote/bazel/bazel_network.hpp +++ b/src/buildtool/execution_api/remote/bazel/bazel_network.hpp @@ -75,8 +75,10 @@ class BazelNetwork { [[nodiscard]] auto IsAvailable(std::vector const& digests) const noexcept -> std::vector; - [[nodiscard]] auto SplitBlob(bazel_re::Digest const& digest) const noexcept - -> std::optional>; + [[nodiscard]] auto SplitBlob(bazel_re::Digest const& blob_digest) + const noexcept -> std::optional>; + + [[nodiscard]] auto BlobSplitSupport() const noexcept -> bool; /// \brief Uploads blobs to CAS /// \param blobs The blobs to upload -- cgit v1.2.3