From 1dc455d965dc36cd1ba2f4851d72bb4ac085bcef Mon Sep 17 00:00:00 2001 From: Maksim Denisov Date: Sat, 23 Mar 2024 23:20:49 +0100 Subject: LargeBlobs: Splice large objects from external sources. For splicing of large objects from external sources additional checks are performed: * The digest of the spliced result must be equal to the expected digest; * The parts of a spliced tree must be in the storage. Tested: * Regular splicing of large objects; * If the result is unexpected, splicing fails; * If some parts of a tree are missing, splicing fails. --- src/buildtool/storage/large_object_cas.hpp | 6 ++ src/buildtool/storage/local_cas.hpp | 55 ++++++++++++ src/buildtool/storage/local_cas.tpp | 139 +++++++++++++++++++++++++++++ 3 files changed, 200 insertions(+) (limited to 'src') diff --git a/src/buildtool/storage/large_object_cas.hpp b/src/buildtool/storage/large_object_cas.hpp index 2c8a3348..e3fbf6cb 100644 --- a/src/buildtool/storage/large_object_cas.hpp +++ b/src/buildtool/storage/large_object_cas.hpp @@ -37,6 +37,12 @@ enum class LargeObjectErrorCode { /// \brief The digest is not in the CAS. FileNotFound, + + /// \brief The result is different from what was expected. + InvalidResult, + + /// \brief Some parts of the tree are not in the storage. + InvalidTree }; /// \brief Describes an error that occurred during split-splice. diff --git a/src/buildtool/storage/local_cas.hpp b/src/buildtool/storage/local_cas.hpp index 057087ef..64dc5082 100644 --- a/src/buildtool/storage/local_cas.hpp +++ b/src/buildtool/storage/local_cas.hpp @@ -117,6 +117,20 @@ class LocalCAS { return cas_file_large_.Split(digest); } + /// \brief Splice a blob from parts. + /// \param digest The expected digest of the result. + /// \param parts The parts of the large object. + /// \param is_executable Splice the blob with executable permissions. + /// \return The digest of the result or an error code on + /// failure. + [[nodiscard]] auto SpliceBlob(bazel_re::Digest const& digest, + std::vector const& parts, + bool is_executable) const noexcept + -> std::variant { + return is_executable ? Splice(digest, parts) + : Splice(digest, parts); + } + /// \brief Obtain tree path from digest. /// \param digest Digest of the tree to lookup. /// \returns Path to the tree if found or nullopt otherwise. @@ -134,6 +148,17 @@ class LocalCAS { return cas_tree_large_.Split(digest); } + /// \brief Splice a tree from parts. + /// \param digest The expected digest of the result. + /// \param parts The parts of the large object. + /// \return The digest of the result or an error code on + /// failure. + [[nodiscard]] auto SpliceTree(bazel_re::Digest const& digest, + std::vector const& parts) + const noexcept -> std::variant { + return Splice(digest, parts); + } + /// \brief Traverses a tree recursively and retrieves object infos of all /// found blobs (leafs). Tree objects are by default not added to the result /// list, but converted to a path name. @@ -161,6 +186,14 @@ class LocalCAS { -> std::optional, std::vector>>; + /// \brief Check whether all parts of the tree are in the storage. + /// \param tree_digest Digest of the tree to be checked. + /// \param tree_data Content of the tree. + /// \return An error on fail. + [[nodiscard]] auto CheckTreeInvariant(bazel_re::Digest const& tree_digest, + std::string const& tree_data) + const noexcept -> std::optional; + /// \brief Dump artifact to file stream. /// Tree artifacts are pretty-printed (i.e., contents are listed) unless /// raw_tree is set, then the raw tree will be written to the file stream. @@ -285,10 +318,32 @@ class LocalCAS { requires(kIsLocalGeneration) [[nodiscard]] auto TrySplice( bazel_re::Digest const& digest) const noexcept -> std::optional; + + template + [[nodiscard]] auto Splice(bazel_re::Digest const& digest, + std::vector const& parts) + const noexcept -> std::variant; }; #ifndef BOOTSTRAP_BUILD_TOOL #include "src/buildtool/storage/local_cas.tpp" +#else +template +auto LocalCAS::CheckTreeInvariant( + bazel_re::Digest const& tree_digest, + std::string const& tree_data) const noexcept + -> std::optional { + return std::nullopt; +} + +template +template +auto LocalCAS::Splice( + bazel_re::Digest const& digest, + std::vector const& parts) const noexcept + -> std::variant { + return LargeObjectError{LargeObjectErrorCode::Internal, "not allowed"}; +} #endif #endif // INCLUDED_SRC_BUILDTOOL_STORAGE_LOCAL_CAS_HPP diff --git a/src/buildtool/storage/local_cas.tpp b/src/buildtool/storage/local_cas.tpp index 0c2d794d..b1c25504 100644 --- a/src/buildtool/storage/local_cas.tpp +++ b/src/buildtool/storage/local_cas.tpp @@ -18,6 +18,7 @@ #include #include // std::move +#include "fmt/core.h" #include "src/buildtool/execution_api/bazel_msg/bazel_msg_factory.hpp" #include "src/buildtool/logging/log_level.hpp" #include "src/buildtool/storage/local_cas.hpp" @@ -191,6 +192,19 @@ auto ReadObjectInfosRecursively( return false; } +[[nodiscard]] static inline auto CheckDigestConsistency( + bazel_re::Digest const& lhs, + bazel_re::Digest const& rhs) noexcept -> bool { + if (lhs.hash() != rhs.hash()) { + return false; + } + bool const both_known = lhs.size_bytes() != 0 and rhs.size_bytes() != 0; + if (Compatibility::IsCompatible() or both_known) { + return lhs.size_bytes() == rhs.size_bytes(); + } + return true; +} + } // namespace detail template @@ -508,4 +522,129 @@ requires(kIsLocalGeneration) auto LocalCAS::TrySplice( : std::nullopt; } +template +auto LocalCAS::CheckTreeInvariant( + bazel_re::Digest const& tree_digest, + std::string const& tree_data) const noexcept + -> std::optional { + if (Compatibility::IsCompatible()) { + return std::nullopt; + } + + auto skip_symlinks = [](auto const& /*unused*/) { return true; }; + auto const entries = + GitRepo::ReadTreeData(tree_data, + NativeSupport::Unprefix(tree_digest.hash()), + skip_symlinks, + /*is_hex_id=*/true); + if (not entries) { + return LargeObjectError{ + LargeObjectErrorCode::Internal, + fmt::format("could not read entries of the tree {}", + tree_digest.hash())}; + } + + // Ensure all entries are in the storage: + for (const auto& entry : *entries) { + for (auto const& item : entry.second) { + bazel_re::Digest const digest = + ArtifactDigest(ToHexString(entry.first), + /*size_unknown=*/0ULL, + IsTreeObject(item.type)); + + // To avoid splicing during search, large CASes are inspected first. + bool const entry_exists = + IsTreeObject(item.type) + ? cas_tree_large_.GetEntryPath(digest) or TreePath(digest) + : cas_file_large_.GetEntryPath(digest) or + BlobPath(digest, IsExecutableObject(item.type)); + + if (not entry_exists) { + return LargeObjectError{ + LargeObjectErrorCode::InvalidTree, + fmt::format("tree invariant violated {} : missing part {}", + tree_digest.hash(), + digest.hash())}; + } + } + } + return std::nullopt; +} + +template +template +auto LocalCAS::Splice( + bazel_re::Digest const& digest, + std::vector const& parts) const noexcept + -> std::variant { + static constexpr bool kIsTree = IsTreeObject(kType); + static constexpr bool kIsExec = IsExecutableObject(kType); + + // Check file is spliced already: + if (kIsTree ? TreePath(digest) : BlobPath(digest, kIsExec)) { + return digest; + } + + // Splice the result from parts: + std::optional large_object; + auto splice_result = kIsTree ? cas_tree_large_.Splice(digest, parts) + : cas_file_large_.Splice(digest, parts); + if (auto* result = std::get_if(&splice_result)) { + large_object = *result; + } + else if (auto* error = std::get_if(&splice_result)) { + return std::move(*error); + } + else { + return LargeObjectError{ + LargeObjectErrorCode::Internal, + fmt::format("could not splice {}", digest.hash())}; + } + + // Check digest consistency: + // Using Store{Tree, Blob} to calculate the resulting hash and later + // decide whether the result is valid is unreasonable, because these + // methods can refer to a file that existed before. The direct hash + // calculation is done instead. + auto const file_path = large_object->GetPath(); + auto spliced_digest = ObjectCAS::CreateDigest(file_path); + if (not spliced_digest) { + return LargeObjectError{LargeObjectErrorCode::Internal, + "could not calculate digest"}; + } + + if (not detail::CheckDigestConsistency(*spliced_digest, digest)) { + return LargeObjectError{ + LargeObjectErrorCode::InvalidResult, + fmt::format("actual result {} differs from the expected one {}", + spliced_digest->hash(), + digest.hash())}; + } + + // Check tree invariants: + if constexpr (kIsTree) { + if (not Compatibility::IsCompatible()) { + // Read tree entries: + auto const tree_data = FileSystemManager::ReadFile(file_path); + if (not tree_data) { + return LargeObjectError{ + LargeObjectErrorCode::Internal, + fmt::format("could not read tree {}", digest.hash())}; + } + if (auto error = CheckTreeInvariant(digest, *tree_data)) { + return std::move(*error); + } + } + } + + static constexpr bool kOwner = true; + auto const stored_digest = kIsTree ? StoreTree(file_path) + : StoreBlob(file_path, kIsExec); + if (stored_digest) { + return std::move(*stored_digest); + } + return LargeObjectError{LargeObjectErrorCode::Internal, + fmt::format("could not splice {}", digest.hash())}; +} + #endif // INCLUDED_SRC_BUILDTOOL_STORAGE_LOCAL_CAS_TPP -- cgit v1.2.3