diff options
-rw-r--r-- | src/buildtool/storage/large_object_cas.hpp | 6 | ||||
-rw-r--r-- | src/buildtool/storage/local_cas.hpp | 55 | ||||
-rw-r--r-- | src/buildtool/storage/local_cas.tpp | 139 | ||||
-rw-r--r-- | test/buildtool/storage/large_object_cas.test.cpp | 104 |
4 files changed, 304 insertions, 0 deletions
diff --git a/src/buildtool/storage/large_object_cas.hpp b/src/buildtool/storage/large_object_cas.hpp index 2c8a3348..e3fbf6cb 100644 --- a/src/buildtool/storage/large_object_cas.hpp +++ b/src/buildtool/storage/large_object_cas.hpp @@ -37,6 +37,12 @@ enum class LargeObjectErrorCode { /// \brief The digest is not in the CAS. FileNotFound, + + /// \brief The result is different from what was expected. + InvalidResult, + + /// \brief Some parts of the tree are not in the storage. + InvalidTree }; /// \brief Describes an error that occurred during split-splice. diff --git a/src/buildtool/storage/local_cas.hpp b/src/buildtool/storage/local_cas.hpp index 057087ef..64dc5082 100644 --- a/src/buildtool/storage/local_cas.hpp +++ b/src/buildtool/storage/local_cas.hpp @@ -117,6 +117,20 @@ class LocalCAS { return cas_file_large_.Split(digest); } + /// \brief Splice a blob from parts. + /// \param digest The expected digest of the result. + /// \param parts The parts of the large object. + /// \param is_executable Splice the blob with executable permissions. + /// \return The digest of the result or an error code on + /// failure. + [[nodiscard]] auto SpliceBlob(bazel_re::Digest const& digest, + std::vector<bazel_re::Digest> const& parts, + bool is_executable) const noexcept + -> std::variant<LargeObjectError, bazel_re::Digest> { + return is_executable ? Splice<ObjectType::Executable>(digest, parts) + : Splice<ObjectType::File>(digest, parts); + } + /// \brief Obtain tree path from digest. /// \param digest Digest of the tree to lookup. /// \returns Path to the tree if found or nullopt otherwise. @@ -134,6 +148,17 @@ class LocalCAS { return cas_tree_large_.Split(digest); } + /// \brief Splice a tree from parts. + /// \param digest The expected digest of the result. + /// \param parts The parts of the large object. + /// \return The digest of the result or an error code on + /// failure. + [[nodiscard]] auto SpliceTree(bazel_re::Digest const& digest, + std::vector<bazel_re::Digest> const& parts) + const noexcept -> std::variant<LargeObjectError, bazel_re::Digest> { + return Splice<ObjectType::Tree>(digest, parts); + } + /// \brief Traverses a tree recursively and retrieves object infos of all /// found blobs (leafs). Tree objects are by default not added to the result /// list, but converted to a path name. @@ -161,6 +186,14 @@ class LocalCAS { -> std::optional<std::pair<std::vector<std::filesystem::path>, std::vector<Artifact::ObjectInfo>>>; + /// \brief Check whether all parts of the tree are in the storage. + /// \param tree_digest Digest of the tree to be checked. + /// \param tree_data Content of the tree. + /// \return An error on fail. + [[nodiscard]] auto CheckTreeInvariant(bazel_re::Digest const& tree_digest, + std::string const& tree_data) + const noexcept -> std::optional<LargeObjectError>; + /// \brief Dump artifact to file stream. /// Tree artifacts are pretty-printed (i.e., contents are listed) unless /// raw_tree is set, then the raw tree will be written to the file stream. @@ -285,10 +318,32 @@ class LocalCAS { requires(kIsLocalGeneration) [[nodiscard]] auto TrySplice( bazel_re::Digest const& digest) const noexcept -> std::optional<LargeObject>; + + template <ObjectType kType> + [[nodiscard]] auto Splice(bazel_re::Digest const& digest, + std::vector<bazel_re::Digest> const& parts) + const noexcept -> std::variant<LargeObjectError, bazel_re::Digest>; }; #ifndef BOOTSTRAP_BUILD_TOOL #include "src/buildtool/storage/local_cas.tpp" +#else +template <bool kDoGlobalUplink> +auto LocalCAS<kDoGlobalUplink>::CheckTreeInvariant( + bazel_re::Digest const& tree_digest, + std::string const& tree_data) const noexcept + -> std::optional<LargeObjectError> { + return std::nullopt; +} + +template <bool kDoGlobalUplink> +template <ObjectType kType> +auto LocalCAS<kDoGlobalUplink>::Splice( + bazel_re::Digest const& digest, + std::vector<bazel_re::Digest> const& parts) const noexcept + -> std::variant<LargeObjectError, bazel_re::Digest> { + return LargeObjectError{LargeObjectErrorCode::Internal, "not allowed"}; +} #endif #endif // INCLUDED_SRC_BUILDTOOL_STORAGE_LOCAL_CAS_HPP diff --git a/src/buildtool/storage/local_cas.tpp b/src/buildtool/storage/local_cas.tpp index 0c2d794d..b1c25504 100644 --- a/src/buildtool/storage/local_cas.tpp +++ b/src/buildtool/storage/local_cas.tpp @@ -18,6 +18,7 @@ #include <cstddef> #include <utility> // std::move +#include "fmt/core.h" #include "src/buildtool/execution_api/bazel_msg/bazel_msg_factory.hpp" #include "src/buildtool/logging/log_level.hpp" #include "src/buildtool/storage/local_cas.hpp" @@ -191,6 +192,19 @@ auto ReadObjectInfosRecursively( return false; } +[[nodiscard]] static inline auto CheckDigestConsistency( + bazel_re::Digest const& lhs, + bazel_re::Digest const& rhs) noexcept -> bool { + if (lhs.hash() != rhs.hash()) { + return false; + } + bool const both_known = lhs.size_bytes() != 0 and rhs.size_bytes() != 0; + if (Compatibility::IsCompatible() or both_known) { + return lhs.size_bytes() == rhs.size_bytes(); + } + return true; +} + } // namespace detail template <bool kDoGlobalUplink> @@ -508,4 +522,129 @@ requires(kIsLocalGeneration) auto LocalCAS<kDoGlobalUplink>::TrySplice( : std::nullopt; } +template <bool kDoGlobalUplink> +auto LocalCAS<kDoGlobalUplink>::CheckTreeInvariant( + bazel_re::Digest const& tree_digest, + std::string const& tree_data) const noexcept + -> std::optional<LargeObjectError> { + if (Compatibility::IsCompatible()) { + return std::nullopt; + } + + auto skip_symlinks = [](auto const& /*unused*/) { return true; }; + auto const entries = + GitRepo::ReadTreeData(tree_data, + NativeSupport::Unprefix(tree_digest.hash()), + skip_symlinks, + /*is_hex_id=*/true); + if (not entries) { + return LargeObjectError{ + LargeObjectErrorCode::Internal, + fmt::format("could not read entries of the tree {}", + tree_digest.hash())}; + } + + // Ensure all entries are in the storage: + for (const auto& entry : *entries) { + for (auto const& item : entry.second) { + bazel_re::Digest const digest = + ArtifactDigest(ToHexString(entry.first), + /*size_unknown=*/0ULL, + IsTreeObject(item.type)); + + // To avoid splicing during search, large CASes are inspected first. + bool const entry_exists = + IsTreeObject(item.type) + ? cas_tree_large_.GetEntryPath(digest) or TreePath(digest) + : cas_file_large_.GetEntryPath(digest) or + BlobPath(digest, IsExecutableObject(item.type)); + + if (not entry_exists) { + return LargeObjectError{ + LargeObjectErrorCode::InvalidTree, + fmt::format("tree invariant violated {} : missing part {}", + tree_digest.hash(), + digest.hash())}; + } + } + } + return std::nullopt; +} + +template <bool kDoGlobalUplink> +template <ObjectType kType> +auto LocalCAS<kDoGlobalUplink>::Splice( + bazel_re::Digest const& digest, + std::vector<bazel_re::Digest> const& parts) const noexcept + -> std::variant<LargeObjectError, bazel_re::Digest> { + static constexpr bool kIsTree = IsTreeObject(kType); + static constexpr bool kIsExec = IsExecutableObject(kType); + + // Check file is spliced already: + if (kIsTree ? TreePath(digest) : BlobPath(digest, kIsExec)) { + return digest; + } + + // Splice the result from parts: + std::optional<LargeObject> large_object; + auto splice_result = kIsTree ? cas_tree_large_.Splice(digest, parts) + : cas_file_large_.Splice(digest, parts); + if (auto* result = std::get_if<LargeObject>(&splice_result)) { + large_object = *result; + } + else if (auto* error = std::get_if<LargeObjectError>(&splice_result)) { + return std::move(*error); + } + else { + return LargeObjectError{ + LargeObjectErrorCode::Internal, + fmt::format("could not splice {}", digest.hash())}; + } + + // Check digest consistency: + // Using Store{Tree, Blob} to calculate the resulting hash and later + // decide whether the result is valid is unreasonable, because these + // methods can refer to a file that existed before. The direct hash + // calculation is done instead. + auto const file_path = large_object->GetPath(); + auto spliced_digest = ObjectCAS<kType>::CreateDigest(file_path); + if (not spliced_digest) { + return LargeObjectError{LargeObjectErrorCode::Internal, + "could not calculate digest"}; + } + + if (not detail::CheckDigestConsistency(*spliced_digest, digest)) { + return LargeObjectError{ + LargeObjectErrorCode::InvalidResult, + fmt::format("actual result {} differs from the expected one {}", + spliced_digest->hash(), + digest.hash())}; + } + + // Check tree invariants: + if constexpr (kIsTree) { + if (not Compatibility::IsCompatible()) { + // Read tree entries: + auto const tree_data = FileSystemManager::ReadFile(file_path); + if (not tree_data) { + return LargeObjectError{ + LargeObjectErrorCode::Internal, + fmt::format("could not read tree {}", digest.hash())}; + } + if (auto error = CheckTreeInvariant(digest, *tree_data)) { + return std::move(*error); + } + } + } + + static constexpr bool kOwner = true; + auto const stored_digest = kIsTree ? StoreTree<kOwner>(file_path) + : StoreBlob<kOwner>(file_path, kIsExec); + if (stored_digest) { + return std::move(*stored_digest); + } + return LargeObjectError{LargeObjectErrorCode::Internal, + fmt::format("could not splice {}", digest.hash())}; +} + #endif // INCLUDED_SRC_BUILDTOOL_STORAGE_LOCAL_CAS_TPP diff --git a/test/buildtool/storage/large_object_cas.test.cpp b/test/buildtool/storage/large_object_cas.test.cpp index c8718baa..1d3b8394 100644 --- a/test/buildtool/storage/large_object_cas.test.cpp +++ b/test/buildtool/storage/large_object_cas.test.cpp @@ -333,6 +333,107 @@ static void TestEmpty() noexcept { } } +// Test splicing from an external source. +// 1. The object can be explicitly spliced, if the parts are presented in the +// storage. +// 2. Explicit splice fails, it the result of splicing is different from +// what was expected. +// 3. Explicit splice fails, if some parts of the tree are missing. +template <ObjectType kType> +static void TestExternal() noexcept { + SECTION("External") { + static constexpr bool kIsTree = IsTreeObject(kType); + static constexpr bool kIsExec = IsExecutableObject(kType); + + using TestType = std::conditional_t<kIsTree, + LargeTestUtils::Tree, + LargeTestUtils::Blob<kIsExec>>; + + auto const& cas = Storage::Instance().CAS(); + + // Create a large object: + auto object = TestType::Create( + cas, std::string(TestType::kLargeId), TestType::kLargeSize); + CHECK(object); + auto const& [digest, path] = *object; + + // Split the object: + auto pack_1 = kIsTree ? cas.SplitTree(digest) : cas.SplitBlob(digest); + auto* split = std::get_if<std::vector<bazel_re::Digest>>(&pack_1); + CHECK(split); + CHECK(split->size() > 1); + + // External source is emulated by moving the large entry to an older + // generation and promoting the parts of the entry to the youngest + // generation: + REQUIRE(GarbageCollector::TriggerGarbageCollection()); + for (auto const& part : *split) { + static constexpr bool is_executable = false; + REQUIRE(cas.BlobPath(part, is_executable)); + } + + auto const& youngest = Storage::Generation(0).CAS(); + + SECTION("Proper request") { + if constexpr (kIsTree) { + // Promote the parts of the tree: + auto splice = cas.TreePath(digest); + REQUIRE(splice); + REQUIRE(FileSystemManager::RemoveFile(*splice)); + } + REQUIRE_FALSE(FileSystemManager::IsFile(path)); + + // Reconstruct the result from parts: + std::ignore = kIsTree + ? youngest.SpliceTree(digest, *split) + : youngest.SpliceBlob(digest, *split, kIsExec); + CHECK(FileSystemManager::IsFile(path)); + } + + // Simulate a situation when parts result to an existing file, but it is + // not the expected result: + SECTION("Digest consistency fail") { + // Splice the result to check it will not be affected: + auto implicit_splice = + kIsTree ? cas.TreePath(digest) : cas.BlobPath(digest, kIsExec); + REQUIRE(implicit_splice); + REQUIRE(*implicit_splice == path); + + // Randomize one more object to simulate invalidation: + auto small = TestType::Create( + cas, std::string(TestType::kSmallId), TestType::kSmallSize); + REQUIRE(small); + auto const& [small_digest, small_path] = *small; + + // The entry itself is not important, only it's digest is needed: + REQUIRE(FileSystemManager::RemoveFile(small_path)); + REQUIRE_FALSE(FileSystemManager::IsFile(small_path)); + + // Invalidation is simulated by reconstructing the small_digest + // object from the parts of the initial object: + auto splice = + kIsTree ? youngest.SpliceTree(small_digest, *split) + : youngest.SpliceBlob(small_digest, *split, kIsExec); + auto* error = std::get_if<LargeObjectError>(&splice); + REQUIRE(error); + CHECK(error->Code() == LargeObjectErrorCode::InvalidResult); + + // The initial entry must not be affected: + REQUIRE(FileSystemManager::IsFile(path)); + } + + if constexpr (kIsTree) { + SECTION("Tree invariants check fails") { + // Check splice fails due to the tree invariants check. + auto splice = youngest.SpliceTree(digest, *split); + auto* error = std::get_if<LargeObjectError>(&splice); + REQUIRE(error); + CHECK(error->Code() == LargeObjectErrorCode::InvalidTree); + } + } + } +} + TEST_CASE_METHOD(HermeticLocalTestFixture, "LocalCAS: Split-Splice", "[storage]") { @@ -340,16 +441,19 @@ TEST_CASE_METHOD(HermeticLocalTestFixture, TestLarge<ObjectType::File>(); TestSmall<ObjectType::File>(); TestEmpty<ObjectType::File>(); + TestExternal<ObjectType::File>(); } SECTION("Tree") { TestLarge<ObjectType::Tree>(); TestSmall<ObjectType::Tree>(); TestEmpty<ObjectType::Tree>(); + TestExternal<ObjectType::Tree>(); } SECTION("Executable") { TestLarge<ObjectType::Executable>(); TestSmall<ObjectType::Executable>(); TestEmpty<ObjectType::Executable>(); + TestExternal<ObjectType::Executable>(); } } |