diff options
author | Sascha Roloff <sascha.roloff@huawei.com> | 2024-02-23 09:32:54 +0100 |
---|---|---|
committer | Sascha Roloff <sascha.roloff@huawei.com> | 2024-02-26 17:16:21 +0100 |
commit | d83c997ad5a866f4fbb38d4a81e7edf70a491db2 (patch) | |
tree | 7331b4d58d2c56adecb6e9879862cf354129d6af /src/buildtool/execution_api/remote | |
parent | 4ae3f068372041f949538fb273113a4a1c665a0f (diff) | |
download | justbuild-d83c997ad5a866f4fbb38d4a81e7edf70a491db2.tar.gz |
Refactor split and splice implementations.
Currently, the implementations of the split and splice operation are both
hidden behind the Bazel API implementation. This was sufficient to implement
splitting at the server and splicing at the client. In order to support the
other direction of splitting at the client and splicing at the server while
reusing their implementations, the code needs to be refactored. First, the
functionality of split and splice are explicitly exposed at the general
execution API interface and implemented in the sub APIs. Second, the
implementations of split and splice are factored into a separate utils class.
Diffstat (limited to 'src/buildtool/execution_api/remote')
6 files changed, 69 insertions, 35 deletions
diff --git a/src/buildtool/execution_api/remote/bazel/bazel_api.cpp b/src/buildtool/execution_api/remote/bazel/bazel_api.cpp index 48cf3133..157016f7 100644 --- a/src/buildtool/execution_api/remote/bazel/bazel_api.cpp +++ b/src/buildtool/execution_api/remote/bazel/bazel_api.cpp @@ -17,7 +17,8 @@ #include <algorithm> #include <atomic> #include <cstdint> -#include <fstream> +#include <iterator> +#include <sstream> #include <unordered_map> #include <unordered_set> @@ -120,7 +121,7 @@ namespace { // Fetch unknown chunks. auto digest_set = std::unordered_set<bazel_re::Digest>{ - (*chunk_digests).begin(), (*chunk_digests).end()}; + chunk_digests->begin(), chunk_digests->end()}; auto unique_digests = std::vector<bazel_re::Digest>{digest_set.begin(), digest_set.end()}; auto missing_digests = ::IsAvailable(unique_digests, api); @@ -129,30 +130,24 @@ namespace { } // Assemble blob from chunks. - auto tmp_dir = StorageUtils::CreateTypedTmpDir("splice"); - auto tmp_file = tmp_dir->GetPath() / "blob"; - std::size_t total_size{}; - { - std::ofstream tmp(tmp_file, std::ios::binary); - for (auto const& chunk_digest : *chunk_digests) { - auto info = - Artifact::ObjectInfo{.digest = ArtifactDigest{chunk_digest}, - .type = ObjectType::File}; - auto chunk_data = api->RetrieveToMemory(info); - if (not chunk_data) { - Logger::Log(LogLevel::Error, - "could not load blob chunk in memory: ", - chunk_digest.hash()); - return false; - } - tmp << *chunk_data; - total_size += chunk_data->size(); - } + auto artifact_digests = std::vector<ArtifactDigest>{}; + artifact_digests.reserve(chunk_digests->size()); + std::transform(chunk_digests->cbegin(), + chunk_digests->cend(), + std::back_inserter(artifact_digests), + [](auto const& digest) { return ArtifactDigest{digest}; }); + auto digest = api->SpliceBlob(artifact_info.digest, artifact_digests); + if (not digest) { + // If blob splicing failed, fall back to regular fetching. + return ::RetrieveToCas({artifact_info.digest}, api, network, info_map); } Logger::Log( LogLevel::Debug, - [&artifact_info, &unique_digests, &missing_digests, &total_size]() { + [&artifact_info, + &unique_digests, + &missing_digests, + total_size = digest->size()]() { auto missing_digest_set = std::unordered_set<bazel_re::Digest>{ missing_digests.begin(), missing_digests.end()}; std::uint64_t transmitted_bytes{0}; @@ -172,7 +167,7 @@ namespace { artifact_info.ToString()); }); - return api->UploadFile(tmp_file, artifact_info.type); + return true; } } // namespace @@ -584,3 +579,23 @@ auto BazelApi::CreateAction( } return result; } + +[[nodiscard]] auto BazelApi::SplitBlob(ArtifactDigest const& blob_digest) + const noexcept -> std::optional<std::vector<ArtifactDigest>> { + auto chunk_digests = + network_->SplitBlob(static_cast<bazel_re::Digest>(blob_digest)); + if (not chunk_digests) { + return std::nullopt; + } + auto artifact_digests = std::vector<ArtifactDigest>{}; + artifact_digests.reserve(chunk_digests->size()); + std::transform(chunk_digests->cbegin(), + chunk_digests->cend(), + std::back_inserter(artifact_digests), + [](auto const& digest) { return ArtifactDigest{digest}; }); + return artifact_digests; +} + +[[nodiscard]] auto BazelApi::BlobSplitSupport() const noexcept -> bool { + return network_->BlobSplitSupport(); +} diff --git a/src/buildtool/execution_api/remote/bazel/bazel_api.hpp b/src/buildtool/execution_api/remote/bazel/bazel_api.hpp index aae90907..2d514ecd 100644 --- a/src/buildtool/execution_api/remote/bazel/bazel_api.hpp +++ b/src/buildtool/execution_api/remote/bazel/bazel_api.hpp @@ -96,6 +96,11 @@ class BazelApi final : public IExecutionApi { Artifact::ObjectInfo const& artifact_info) noexcept -> std::optional<std::string> final; + [[nodiscard]] auto SplitBlob(ArtifactDigest const& blob_digest) + const noexcept -> std::optional<std::vector<ArtifactDigest>> final; + + [[nodiscard]] auto BlobSplitSupport() const noexcept -> bool final; + private: std::shared_ptr<BazelNetwork> network_; diff --git a/src/buildtool/execution_api/remote/bazel/bazel_cas_client.cpp b/src/buildtool/execution_api/remote/bazel/bazel_cas_client.cpp index f193bf8b..473dd5ba 100644 --- a/src/buildtool/execution_api/remote/bazel/bazel_cas_client.cpp +++ b/src/buildtool/execution_api/remote/bazel/bazel_cas_client.cpp @@ -93,7 +93,7 @@ namespace { return blob_split_support_map[instance_name]; } } - auto supported = BlobSplitSupport(instance_name, stub); + auto supported = ::BlobSplitSupport(instance_name, stub); logger->Emit(LogLevel::Debug, "Blob split support for \"{}\": {}", instance_name, @@ -312,14 +312,14 @@ auto BazelCasClient::ReadSingleBlob(std::string const& instance_name, } auto BazelCasClient::SplitBlob(std::string const& instance_name, - bazel_re::Digest const& digest) noexcept - -> std::optional<std::vector<bazel_re::Digest>> { + bazel_re::Digest const& blob_digest) + const noexcept -> std::optional<std::vector<bazel_re::Digest>> { if (not BlobSplitSupportCached(instance_name, stub_, &logger_)) { return std::nullopt; } bazel_re::SplitBlobRequest request{}; request.set_instance_name(instance_name); - request.mutable_blob_digest()->CopyFrom(digest); + request.mutable_blob_digest()->CopyFrom(blob_digest); bazel_re::SplitBlobResponse response{}; auto [ok, status] = WithRetry( [this, &response, &request]() { @@ -334,6 +334,11 @@ auto BazelCasClient::SplitBlob(std::string const& instance_name, return ProcessResponseContents<bazel_re::Digest>(response); } +auto BazelCasClient::BlobSplitSupport( + std::string const& instance_name) const noexcept -> bool { + return ::BlobSplitSupportCached(instance_name, stub_, &logger_); +} + template <class T_ForwardIter> auto BazelCasClient::FindMissingBlobs(std::string const& instance_name, T_ForwardIter const& start, diff --git a/src/buildtool/execution_api/remote/bazel/bazel_cas_client.hpp b/src/buildtool/execution_api/remote/bazel/bazel_cas_client.hpp index 762b4f37..77196022 100644 --- a/src/buildtool/execution_api/remote/bazel/bazel_cas_client.hpp +++ b/src/buildtool/execution_api/remote/bazel/bazel_cas_client.hpp @@ -129,12 +129,15 @@ class BazelCasClient { -> std::optional<BazelBlob>; /// @brief Split single blob into chunks - /// @param[in] instance_name Name of the CAS instance - /// @param[in] digest Blob digest to be splitted + /// @param[in] instance_name Name of the CAS instance + /// @param[in] blob_digest Blob digest to be splitted /// @return The chunk digests of the splitted blob [[nodiscard]] auto SplitBlob(std::string const& instance_name, - bazel_re::Digest const& digest) noexcept - -> std::optional<std::vector<bazel_re::Digest>>; + bazel_re::Digest const& blob_digest) + const noexcept -> std::optional<std::vector<bazel_re::Digest>>; + + [[nodiscard]] auto BlobSplitSupport( + std::string const& instance_name) const noexcept -> bool; private: std::unique_ptr<ByteStreamClient> stream_{}; diff --git a/src/buildtool/execution_api/remote/bazel/bazel_network.cpp b/src/buildtool/execution_api/remote/bazel/bazel_network.cpp index 223960cb..04c13589 100644 --- a/src/buildtool/execution_api/remote/bazel/bazel_network.cpp +++ b/src/buildtool/execution_api/remote/bazel/bazel_network.cpp @@ -154,9 +154,13 @@ auto BazelNetwork::IsAvailable(std::vector<bazel_re::Digest> const& digests) return cas_->FindMissingBlobs(instance_name_, digests); } -auto BazelNetwork::SplitBlob(bazel_re::Digest const& digest) const noexcept +auto BazelNetwork::SplitBlob(bazel_re::Digest const& blob_digest) const noexcept -> std::optional<std::vector<bazel_re::Digest>> { - return cas_->SplitBlob(instance_name_, digest); + return cas_->SplitBlob(instance_name_, blob_digest); +} + +auto BazelNetwork::BlobSplitSupport() const noexcept -> bool { + return cas_->BlobSplitSupport(instance_name_); } template <class T_Iter> diff --git a/src/buildtool/execution_api/remote/bazel/bazel_network.hpp b/src/buildtool/execution_api/remote/bazel/bazel_network.hpp index af48c7db..0808d2e9 100644 --- a/src/buildtool/execution_api/remote/bazel/bazel_network.hpp +++ b/src/buildtool/execution_api/remote/bazel/bazel_network.hpp @@ -75,8 +75,10 @@ class BazelNetwork { [[nodiscard]] auto IsAvailable(std::vector<bazel_re::Digest> const& digests) const noexcept -> std::vector<bazel_re::Digest>; - [[nodiscard]] auto SplitBlob(bazel_re::Digest const& digest) const noexcept - -> std::optional<std::vector<bazel_re::Digest>>; + [[nodiscard]] auto SplitBlob(bazel_re::Digest const& blob_digest) + const noexcept -> std::optional<std::vector<bazel_re::Digest>>; + + [[nodiscard]] auto BlobSplitSupport() const noexcept -> bool; /// \brief Uploads blobs to CAS /// \param blobs The blobs to upload |