summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/buildtool/storage/large_object_cas.hpp6
-rw-r--r--src/buildtool/storage/local_cas.hpp55
-rw-r--r--src/buildtool/storage/local_cas.tpp139
-rw-r--r--test/buildtool/storage/large_object_cas.test.cpp104
4 files changed, 304 insertions, 0 deletions
diff --git a/src/buildtool/storage/large_object_cas.hpp b/src/buildtool/storage/large_object_cas.hpp
index 2c8a3348..e3fbf6cb 100644
--- a/src/buildtool/storage/large_object_cas.hpp
+++ b/src/buildtool/storage/large_object_cas.hpp
@@ -37,6 +37,12 @@ enum class LargeObjectErrorCode {
/// \brief The digest is not in the CAS.
FileNotFound,
+
+ /// \brief The result is different from what was expected.
+ InvalidResult,
+
+ /// \brief Some parts of the tree are not in the storage.
+ InvalidTree
};
/// \brief Describes an error that occurred during split-splice.
diff --git a/src/buildtool/storage/local_cas.hpp b/src/buildtool/storage/local_cas.hpp
index 057087ef..64dc5082 100644
--- a/src/buildtool/storage/local_cas.hpp
+++ b/src/buildtool/storage/local_cas.hpp
@@ -117,6 +117,20 @@ class LocalCAS {
return cas_file_large_.Split(digest);
}
+ /// \brief Splice a blob from parts.
+ /// \param digest The expected digest of the result.
+ /// \param parts The parts of the large object.
+ /// \param is_executable Splice the blob with executable permissions.
+ /// \return The digest of the result or an error code on
+ /// failure.
+ [[nodiscard]] auto SpliceBlob(bazel_re::Digest const& digest,
+ std::vector<bazel_re::Digest> const& parts,
+ bool is_executable) const noexcept
+ -> std::variant<LargeObjectError, bazel_re::Digest> {
+ return is_executable ? Splice<ObjectType::Executable>(digest, parts)
+ : Splice<ObjectType::File>(digest, parts);
+ }
+
/// \brief Obtain tree path from digest.
/// \param digest Digest of the tree to lookup.
/// \returns Path to the tree if found or nullopt otherwise.
@@ -134,6 +148,17 @@ class LocalCAS {
return cas_tree_large_.Split(digest);
}
+ /// \brief Splice a tree from parts.
+ /// \param digest The expected digest of the result.
+ /// \param parts The parts of the large object.
+ /// \return The digest of the result or an error code on
+ /// failure.
+ [[nodiscard]] auto SpliceTree(bazel_re::Digest const& digest,
+ std::vector<bazel_re::Digest> const& parts)
+ const noexcept -> std::variant<LargeObjectError, bazel_re::Digest> {
+ return Splice<ObjectType::Tree>(digest, parts);
+ }
+
/// \brief Traverses a tree recursively and retrieves object infos of all
/// found blobs (leafs). Tree objects are by default not added to the result
/// list, but converted to a path name.
@@ -161,6 +186,14 @@ class LocalCAS {
-> std::optional<std::pair<std::vector<std::filesystem::path>,
std::vector<Artifact::ObjectInfo>>>;
+ /// \brief Check whether all parts of the tree are in the storage.
+ /// \param tree_digest Digest of the tree to be checked.
+ /// \param tree_data Content of the tree.
+ /// \return An error on fail.
+ [[nodiscard]] auto CheckTreeInvariant(bazel_re::Digest const& tree_digest,
+ std::string const& tree_data)
+ const noexcept -> std::optional<LargeObjectError>;
+
/// \brief Dump artifact to file stream.
/// Tree artifacts are pretty-printed (i.e., contents are listed) unless
/// raw_tree is set, then the raw tree will be written to the file stream.
@@ -285,10 +318,32 @@ class LocalCAS {
requires(kIsLocalGeneration) [[nodiscard]] auto TrySplice(
bazel_re::Digest const& digest) const noexcept
-> std::optional<LargeObject>;
+
+ template <ObjectType kType>
+ [[nodiscard]] auto Splice(bazel_re::Digest const& digest,
+ std::vector<bazel_re::Digest> const& parts)
+ const noexcept -> std::variant<LargeObjectError, bazel_re::Digest>;
};
#ifndef BOOTSTRAP_BUILD_TOOL
#include "src/buildtool/storage/local_cas.tpp"
+#else
+template <bool kDoGlobalUplink>
+auto LocalCAS<kDoGlobalUplink>::CheckTreeInvariant(
+ bazel_re::Digest const& tree_digest,
+ std::string const& tree_data) const noexcept
+ -> std::optional<LargeObjectError> {
+ return std::nullopt;
+}
+
+template <bool kDoGlobalUplink>
+template <ObjectType kType>
+auto LocalCAS<kDoGlobalUplink>::Splice(
+ bazel_re::Digest const& digest,
+ std::vector<bazel_re::Digest> const& parts) const noexcept
+ -> std::variant<LargeObjectError, bazel_re::Digest> {
+ return LargeObjectError{LargeObjectErrorCode::Internal, "not allowed"};
+}
#endif
#endif // INCLUDED_SRC_BUILDTOOL_STORAGE_LOCAL_CAS_HPP
diff --git a/src/buildtool/storage/local_cas.tpp b/src/buildtool/storage/local_cas.tpp
index 0c2d794d..b1c25504 100644
--- a/src/buildtool/storage/local_cas.tpp
+++ b/src/buildtool/storage/local_cas.tpp
@@ -18,6 +18,7 @@
#include <cstddef>
#include <utility> // std::move
+#include "fmt/core.h"
#include "src/buildtool/execution_api/bazel_msg/bazel_msg_factory.hpp"
#include "src/buildtool/logging/log_level.hpp"
#include "src/buildtool/storage/local_cas.hpp"
@@ -191,6 +192,19 @@ auto ReadObjectInfosRecursively(
return false;
}
+[[nodiscard]] static inline auto CheckDigestConsistency(
+ bazel_re::Digest const& lhs,
+ bazel_re::Digest const& rhs) noexcept -> bool {
+ if (lhs.hash() != rhs.hash()) {
+ return false;
+ }
+ bool const both_known = lhs.size_bytes() != 0 and rhs.size_bytes() != 0;
+ if (Compatibility::IsCompatible() or both_known) {
+ return lhs.size_bytes() == rhs.size_bytes();
+ }
+ return true;
+}
+
} // namespace detail
template <bool kDoGlobalUplink>
@@ -508,4 +522,129 @@ requires(kIsLocalGeneration) auto LocalCAS<kDoGlobalUplink>::TrySplice(
: std::nullopt;
}
+template <bool kDoGlobalUplink>
+auto LocalCAS<kDoGlobalUplink>::CheckTreeInvariant(
+ bazel_re::Digest const& tree_digest,
+ std::string const& tree_data) const noexcept
+ -> std::optional<LargeObjectError> {
+ if (Compatibility::IsCompatible()) {
+ return std::nullopt;
+ }
+
+ auto skip_symlinks = [](auto const& /*unused*/) { return true; };
+ auto const entries =
+ GitRepo::ReadTreeData(tree_data,
+ NativeSupport::Unprefix(tree_digest.hash()),
+ skip_symlinks,
+ /*is_hex_id=*/true);
+ if (not entries) {
+ return LargeObjectError{
+ LargeObjectErrorCode::Internal,
+ fmt::format("could not read entries of the tree {}",
+ tree_digest.hash())};
+ }
+
+ // Ensure all entries are in the storage:
+ for (const auto& entry : *entries) {
+ for (auto const& item : entry.second) {
+ bazel_re::Digest const digest =
+ ArtifactDigest(ToHexString(entry.first),
+ /*size_unknown=*/0ULL,
+ IsTreeObject(item.type));
+
+ // To avoid splicing during search, large CASes are inspected first.
+ bool const entry_exists =
+ IsTreeObject(item.type)
+ ? cas_tree_large_.GetEntryPath(digest) or TreePath(digest)
+ : cas_file_large_.GetEntryPath(digest) or
+ BlobPath(digest, IsExecutableObject(item.type));
+
+ if (not entry_exists) {
+ return LargeObjectError{
+ LargeObjectErrorCode::InvalidTree,
+ fmt::format("tree invariant violated {} : missing part {}",
+ tree_digest.hash(),
+ digest.hash())};
+ }
+ }
+ }
+ return std::nullopt;
+}
+
+template <bool kDoGlobalUplink>
+template <ObjectType kType>
+auto LocalCAS<kDoGlobalUplink>::Splice(
+ bazel_re::Digest const& digest,
+ std::vector<bazel_re::Digest> const& parts) const noexcept
+ -> std::variant<LargeObjectError, bazel_re::Digest> {
+ static constexpr bool kIsTree = IsTreeObject(kType);
+ static constexpr bool kIsExec = IsExecutableObject(kType);
+
+ // Check file is spliced already:
+ if (kIsTree ? TreePath(digest) : BlobPath(digest, kIsExec)) {
+ return digest;
+ }
+
+ // Splice the result from parts:
+ std::optional<LargeObject> large_object;
+ auto splice_result = kIsTree ? cas_tree_large_.Splice(digest, parts)
+ : cas_file_large_.Splice(digest, parts);
+ if (auto* result = std::get_if<LargeObject>(&splice_result)) {
+ large_object = *result;
+ }
+ else if (auto* error = std::get_if<LargeObjectError>(&splice_result)) {
+ return std::move(*error);
+ }
+ else {
+ return LargeObjectError{
+ LargeObjectErrorCode::Internal,
+ fmt::format("could not splice {}", digest.hash())};
+ }
+
+ // Check digest consistency:
+ // Using Store{Tree, Blob} to calculate the resulting hash and later
+ // decide whether the result is valid is unreasonable, because these
+ // methods can refer to a file that existed before. The direct hash
+ // calculation is done instead.
+ auto const file_path = large_object->GetPath();
+ auto spliced_digest = ObjectCAS<kType>::CreateDigest(file_path);
+ if (not spliced_digest) {
+ return LargeObjectError{LargeObjectErrorCode::Internal,
+ "could not calculate digest"};
+ }
+
+ if (not detail::CheckDigestConsistency(*spliced_digest, digest)) {
+ return LargeObjectError{
+ LargeObjectErrorCode::InvalidResult,
+ fmt::format("actual result {} differs from the expected one {}",
+ spliced_digest->hash(),
+ digest.hash())};
+ }
+
+ // Check tree invariants:
+ if constexpr (kIsTree) {
+ if (not Compatibility::IsCompatible()) {
+ // Read tree entries:
+ auto const tree_data = FileSystemManager::ReadFile(file_path);
+ if (not tree_data) {
+ return LargeObjectError{
+ LargeObjectErrorCode::Internal,
+ fmt::format("could not read tree {}", digest.hash())};
+ }
+ if (auto error = CheckTreeInvariant(digest, *tree_data)) {
+ return std::move(*error);
+ }
+ }
+ }
+
+ static constexpr bool kOwner = true;
+ auto const stored_digest = kIsTree ? StoreTree<kOwner>(file_path)
+ : StoreBlob<kOwner>(file_path, kIsExec);
+ if (stored_digest) {
+ return std::move(*stored_digest);
+ }
+ return LargeObjectError{LargeObjectErrorCode::Internal,
+ fmt::format("could not splice {}", digest.hash())};
+}
+
#endif // INCLUDED_SRC_BUILDTOOL_STORAGE_LOCAL_CAS_TPP
diff --git a/test/buildtool/storage/large_object_cas.test.cpp b/test/buildtool/storage/large_object_cas.test.cpp
index c8718baa..1d3b8394 100644
--- a/test/buildtool/storage/large_object_cas.test.cpp
+++ b/test/buildtool/storage/large_object_cas.test.cpp
@@ -333,6 +333,107 @@ static void TestEmpty() noexcept {
}
}
+// Test splicing from an external source.
+// 1. The object can be explicitly spliced, if the parts are presented in the
+// storage.
+// 2. Explicit splice fails, it the result of splicing is different from
+// what was expected.
+// 3. Explicit splice fails, if some parts of the tree are missing.
+template <ObjectType kType>
+static void TestExternal() noexcept {
+ SECTION("External") {
+ static constexpr bool kIsTree = IsTreeObject(kType);
+ static constexpr bool kIsExec = IsExecutableObject(kType);
+
+ using TestType = std::conditional_t<kIsTree,
+ LargeTestUtils::Tree,
+ LargeTestUtils::Blob<kIsExec>>;
+
+ auto const& cas = Storage::Instance().CAS();
+
+ // Create a large object:
+ auto object = TestType::Create(
+ cas, std::string(TestType::kLargeId), TestType::kLargeSize);
+ CHECK(object);
+ auto const& [digest, path] = *object;
+
+ // Split the object:
+ auto pack_1 = kIsTree ? cas.SplitTree(digest) : cas.SplitBlob(digest);
+ auto* split = std::get_if<std::vector<bazel_re::Digest>>(&pack_1);
+ CHECK(split);
+ CHECK(split->size() > 1);
+
+ // External source is emulated by moving the large entry to an older
+ // generation and promoting the parts of the entry to the youngest
+ // generation:
+ REQUIRE(GarbageCollector::TriggerGarbageCollection());
+ for (auto const& part : *split) {
+ static constexpr bool is_executable = false;
+ REQUIRE(cas.BlobPath(part, is_executable));
+ }
+
+ auto const& youngest = Storage::Generation(0).CAS();
+
+ SECTION("Proper request") {
+ if constexpr (kIsTree) {
+ // Promote the parts of the tree:
+ auto splice = cas.TreePath(digest);
+ REQUIRE(splice);
+ REQUIRE(FileSystemManager::RemoveFile(*splice));
+ }
+ REQUIRE_FALSE(FileSystemManager::IsFile(path));
+
+ // Reconstruct the result from parts:
+ std::ignore = kIsTree
+ ? youngest.SpliceTree(digest, *split)
+ : youngest.SpliceBlob(digest, *split, kIsExec);
+ CHECK(FileSystemManager::IsFile(path));
+ }
+
+ // Simulate a situation when parts result to an existing file, but it is
+ // not the expected result:
+ SECTION("Digest consistency fail") {
+ // Splice the result to check it will not be affected:
+ auto implicit_splice =
+ kIsTree ? cas.TreePath(digest) : cas.BlobPath(digest, kIsExec);
+ REQUIRE(implicit_splice);
+ REQUIRE(*implicit_splice == path);
+
+ // Randomize one more object to simulate invalidation:
+ auto small = TestType::Create(
+ cas, std::string(TestType::kSmallId), TestType::kSmallSize);
+ REQUIRE(small);
+ auto const& [small_digest, small_path] = *small;
+
+ // The entry itself is not important, only it's digest is needed:
+ REQUIRE(FileSystemManager::RemoveFile(small_path));
+ REQUIRE_FALSE(FileSystemManager::IsFile(small_path));
+
+ // Invalidation is simulated by reconstructing the small_digest
+ // object from the parts of the initial object:
+ auto splice =
+ kIsTree ? youngest.SpliceTree(small_digest, *split)
+ : youngest.SpliceBlob(small_digest, *split, kIsExec);
+ auto* error = std::get_if<LargeObjectError>(&splice);
+ REQUIRE(error);
+ CHECK(error->Code() == LargeObjectErrorCode::InvalidResult);
+
+ // The initial entry must not be affected:
+ REQUIRE(FileSystemManager::IsFile(path));
+ }
+
+ if constexpr (kIsTree) {
+ SECTION("Tree invariants check fails") {
+ // Check splice fails due to the tree invariants check.
+ auto splice = youngest.SpliceTree(digest, *split);
+ auto* error = std::get_if<LargeObjectError>(&splice);
+ REQUIRE(error);
+ CHECK(error->Code() == LargeObjectErrorCode::InvalidTree);
+ }
+ }
+ }
+}
+
TEST_CASE_METHOD(HermeticLocalTestFixture,
"LocalCAS: Split-Splice",
"[storage]") {
@@ -340,16 +441,19 @@ TEST_CASE_METHOD(HermeticLocalTestFixture,
TestLarge<ObjectType::File>();
TestSmall<ObjectType::File>();
TestEmpty<ObjectType::File>();
+ TestExternal<ObjectType::File>();
}
SECTION("Tree") {
TestLarge<ObjectType::Tree>();
TestSmall<ObjectType::Tree>();
TestEmpty<ObjectType::Tree>();
+ TestExternal<ObjectType::Tree>();
}
SECTION("Executable") {
TestLarge<ObjectType::Executable>();
TestSmall<ObjectType::Executable>();
TestEmpty<ObjectType::Executable>();
+ TestExternal<ObjectType::Executable>();
}
}