diff options
-rw-r--r-- | src/buildtool/storage/garbage_collector.cpp | 14 | ||||
-rw-r--r-- | src/buildtool/storage/garbage_collector.hpp | 7 | ||||
-rw-r--r-- | src/buildtool/storage/large_object_cas.hpp | 15 | ||||
-rw-r--r-- | src/buildtool/storage/large_object_cas.tpp | 51 | ||||
-rw-r--r-- | src/buildtool/storage/local_cas.hpp | 14 | ||||
-rw-r--r-- | src/buildtool/storage/local_cas.tpp | 56 | ||||
-rw-r--r-- | test/buildtool/storage/large_object_cas.test.cpp | 142 |
7 files changed, 293 insertions, 6 deletions
diff --git a/src/buildtool/storage/garbage_collector.cpp b/src/buildtool/storage/garbage_collector.cpp index cfc6daf5..232c8ab8 100644 --- a/src/buildtool/storage/garbage_collector.cpp +++ b/src/buildtool/storage/garbage_collector.cpp @@ -72,6 +72,20 @@ auto GarbageCollector::GlobalUplinkBlob(bazel_re::Digest const& digest, return false; } +auto GarbageCollector::GlobalUplinkLargeBlob( + bazel_re::Digest const& digest) noexcept -> bool { + // Try to find large entry in all generations. + auto const& latest_cas = Storage::Generation(0).CAS(); + for (std::size_t i = 0; i < StorageConfig::NumGenerations(); ++i) { + if (Storage::Generation(i) + .CAS() + .LocalUplinkLargeObject<ObjectType::File>(latest_cas, digest)) { + return true; + } + } + return false; +} + auto GarbageCollector::GlobalUplinkTree(bazel_re::Digest const& digest) noexcept -> bool { // Try to find tree in all generations. diff --git a/src/buildtool/storage/garbage_collector.hpp b/src/buildtool/storage/garbage_collector.hpp index c4ceaf8a..28a5f08b 100644 --- a/src/buildtool/storage/garbage_collector.hpp +++ b/src/buildtool/storage/garbage_collector.hpp @@ -42,6 +42,13 @@ class GarbageCollector { bool is_executable) noexcept -> bool; + /// \brief Uplink large blob entry across LocalCASes from all generations to + /// latest. This method does not splice the large object. + /// \param digest Digest of the large blob entry to uplink. + /// \returns true if large entry was found and successfully uplinked. + [[nodiscard]] auto static GlobalUplinkLargeBlob( + bazel_re::Digest const& digest) noexcept -> bool; + /// \brief Uplink tree across LocalCASes from all generations to latest. /// Note that the tree will be deeply uplinked, i.e., all entries referenced /// by this tree will be uplinked before (including sub-trees). diff --git a/src/buildtool/storage/large_object_cas.hpp b/src/buildtool/storage/large_object_cas.hpp index 0d658af6..2c8a3348 100644 --- a/src/buildtool/storage/large_object_cas.hpp +++ b/src/buildtool/storage/large_object_cas.hpp @@ -133,6 +133,21 @@ class LargeObjectCAS final { std::vector<bazel_re::Digest> const& parts) const noexcept -> std::variant<LargeObjectError, LargeObject>; + /// \brief Uplink large entry from this generation to latest LocalCAS + /// generation. For the large entry it's parts get promoted first and then + /// the entry itself. This function is only available for instances that are + /// used as local GC generations (i.e., disabled global uplink). + /// \tparam kIsLocalGeneration True if this instance is a local generation. + /// \param latest The latest LocalCAS generation. + /// \param latest_large The latest LargeObjectCAS + /// \param digest The digest of the large entry to uplink. + /// \returns True if the large entry was successfully uplinked. + template <bool kIsLocalGeneration = not kDoGlobalUplink> + requires(kIsLocalGeneration) [[nodiscard]] auto LocalUplink( + LocalCAS<false> const& latest, + LargeObjectCAS<false, kType> const& latest_large, + bazel_re::Digest const& digest) const noexcept -> bool; + private: // By default, overwrite existing entries. Unless this is a generation // (disabled global uplink), then we never want to overwrite any entries. diff --git a/src/buildtool/storage/large_object_cas.tpp b/src/buildtool/storage/large_object_cas.tpp index 71b4528e..4bab5201 100644 --- a/src/buildtool/storage/large_object_cas.tpp +++ b/src/buildtool/storage/large_object_cas.tpp @@ -21,9 +21,11 @@ #include "fmt/core.h" #include "nlohmann/json.hpp" +#include "src/buildtool/compatibility/compatibility.hpp" #include "src/buildtool/compatibility/native_support.hpp" #include "src/buildtool/file_system/file_system_manager.hpp" #include "src/buildtool/storage/file_chunker.hpp" +#include "src/buildtool/storage/garbage_collector.hpp" #include "src/buildtool/storage/large_object_cas.hpp" #include "src/buildtool/storage/local_cas.hpp" @@ -36,6 +38,18 @@ auto LargeObjectCAS<kDoGlobalUplink, kType>::GetEntryPath( if (FileSystemManager::IsFile(file_path)) { return file_path; } + + if constexpr (kDoGlobalUplink) { + // To promote parts of the tree properly, regular uplinking logic for + // trees is used: + bool uplinked = + IsTreeObject(kType) and not Compatibility::IsCompatible() + ? GarbageCollector::GlobalUplinkTree(digest) + : GarbageCollector::GlobalUplinkLargeBlob(digest); + if (uplinked and FileSystemManager::IsFile(file_path)) { + return file_path; + } + } return std::nullopt; } @@ -213,4 +227,41 @@ auto LargeObjectCAS<kDoGlobalUplink, kType>::Splice( return large_object; } +template <bool kDoGlobalUplink, ObjectType kType> +template <bool kIsLocalGeneration> +requires(kIsLocalGeneration) auto LargeObjectCAS<kDoGlobalUplink, kType>:: + LocalUplink(LocalCAS<false> const& latest, + LargeObjectCAS<false, kType> const& latest_large, + bazel_re::Digest const& digest) const noexcept -> bool { + // Check the large entry in the youngest generation: + if (latest_large.GetEntryPath(digest)) { + return true; + } + + // Check the large entry in the current generation: + auto parts = ReadEntry(digest); + if (not parts) { + // No large entry or the object is not large + return true; + } + + // Promoting the parts of the large entry: + for (auto const& part : *parts) { + static constexpr bool is_executable = false; + static constexpr bool skip_sync = true; + if (not local_cas_.LocalUplinkBlob( + latest, part, is_executable, skip_sync)) { + return false; + } + } + + auto path = GetEntryPath(digest); + if (not path) { + return false; + } + + const auto hash = NativeSupport::Unprefix(digest.hash()); + return latest_large.file_store_.AddFromFile(hash, *path, /*is_owner=*/true); +} + #endif // INCLUDED_SRC_BUILDTOOL_STORAGE_LARGE_OBJECT_CAS_TPP diff --git a/src/buildtool/storage/local_cas.hpp b/src/buildtool/storage/local_cas.hpp index e4477011..057087ef 100644 --- a/src/buildtool/storage/local_cas.hpp +++ b/src/buildtool/storage/local_cas.hpp @@ -206,6 +206,20 @@ class LocalCAS { LocalGenerationCAS const& latest, bazel_re::Digest const& digest) const noexcept -> bool; + /// \brief Uplink large entry from this generation to latest LocalCAS + /// generation. This function is only available for instances that are used + /// as local GC generations (i.e., disabled global uplink). + /// \tparam kType Type of the large entry to be uplinked. + /// \tparam kIsLocalGeneration True if this instance is a local generation. + /// \param latest The latest LocalCAS generation. + /// \param latest_large The latest LargeObjectCAS + /// \param digest The digest of the large entry to uplink. + /// \returns True if the large entry was successfully uplinked. + template <ObjectType kType, bool kIsLocalGeneration = not kDoGlobalUplink> + requires(kIsLocalGeneration) [[nodiscard]] auto LocalUplinkLargeObject( + LocalGenerationCAS const& latest, + bazel_re::Digest const& digest) const noexcept -> bool; + private: ObjectCAS<ObjectType::File> cas_file_; ObjectCAS<ObjectType::Executable> cas_exec_; diff --git a/src/buildtool/storage/local_cas.tpp b/src/buildtool/storage/local_cas.tpp index 8c20ded9..0c2d794d 100644 --- a/src/buildtool/storage/local_cas.tpp +++ b/src/buildtool/storage/local_cas.tpp @@ -294,8 +294,19 @@ requires(kIsLocalGeneration) auto LocalCAS<kDoGlobalUplink>::LocalUplinkBlob( } // Uplink blob from older generation to the latest generation. - return blob_path_latest.has_value() or - latest.StoreBlob</*kOwner=*/true>(*blob_path, is_executable); + bool uplinked = + blob_path_latest.has_value() or + latest.StoreBlob</*kOwner=*/true>(*blob_path, is_executable); + + if (uplinked) { + // The result of uplinking of a large object must not affect the + // result of uplinking in general. In other case, two sequential calls + // to BlobPath might return different results: The first call splices + // and uplinks the object, but fails at large entry uplinking. The + // second call finds the tree in the youngest generation and returns. + std::ignore = LocalUplinkLargeObject<ObjectType::File>(latest, digest); + } + return uplinked; } template <bool kDoGlobalUplink> @@ -386,10 +397,17 @@ requires(kIsLocalGeneration) auto LocalCAS<kDoGlobalUplink>::LocalUplinkGitTree( } // Uplink tree from older generation to the latest generation. - return latest.cas_tree_ - .StoreBlobFromFile(*tree_path, - /*is_owner=*/true) - .has_value(); + if (latest.cas_tree_.StoreBlobFromFile(*tree_path, /*is owner=*/true)) { + // Uplink the large entry afterwards: + // The result of uplinking of a large object must not affect the + // result of uplinking in general. In other case, two sequential calls + // to TreePath might return different results: The first call splices + // and uplinks the object, but fails at large entry uplinking. The + // second call finds the tree in the youngest generation and returns. + std::ignore = LocalUplinkLargeObject<ObjectType::Tree>(latest, digest); + return true; + } + return false; } template <bool kDoGlobalUplink> @@ -445,6 +463,16 @@ requires(kIsLocalGeneration) auto LocalCAS<kDoGlobalUplink>:: /*is_owner=*/true)) { try { seen->emplace(digest); + + // Uplink the large entry afterwards: + // The result of uplinking of a large object must not affect the + // result of uplinking in general. In other case, two sequential + // calls to TreePath might return different results: The first call + // splices and uplinks the object, but fails at large entry + // uplinking. The second call finds the tree in the youngest + // generation and returns. + std::ignore = + LocalUplinkLargeObject<ObjectType::Tree>(latest, digest); return true; } catch (...) { } @@ -454,6 +482,22 @@ requires(kIsLocalGeneration) auto LocalCAS<kDoGlobalUplink>:: template <bool kDoGlobalUplink> template <ObjectType kType, bool kIsLocalGeneration> +requires(kIsLocalGeneration) auto LocalCAS<kDoGlobalUplink>:: + LocalUplinkLargeObject(LocalGenerationCAS const& latest, + bazel_re::Digest const& digest) const noexcept + -> bool { + if constexpr (IsTreeObject(kType)) { + return cas_tree_large_.LocalUplink( + latest, latest.cas_tree_large_, digest); + } + else { + return cas_file_large_.LocalUplink( + latest, latest.cas_file_large_, digest); + } +} + +template <bool kDoGlobalUplink> +template <ObjectType kType, bool kIsLocalGeneration> requires(kIsLocalGeneration) auto LocalCAS<kDoGlobalUplink>::TrySplice( bazel_re::Digest const& digest) const noexcept -> std::optional<LargeObject> { diff --git a/test/buildtool/storage/large_object_cas.test.cpp b/test/buildtool/storage/large_object_cas.test.cpp index 0a070349..c8718baa 100644 --- a/test/buildtool/storage/large_object_cas.test.cpp +++ b/test/buildtool/storage/large_object_cas.test.cpp @@ -61,6 +61,8 @@ class Blob final { -> std::optional<std::filesystem::path>; }; +using File = Blob<false>; + class Tree final { public: static constexpr auto kLargeId = std::string_view("tree_256"); @@ -149,6 +151,13 @@ static void TestLarge() noexcept { CHECK(FileSystemManager::RemoveFile(path)); CHECK_FALSE(FileSystemManager::IsFile(path)); + // For executables the non-executable entry must be also deleted. + if constexpr (kIsExec) { + auto blob_path = cas.BlobPath(digest, /*is_executable=*/false); + REQUIRE(blob_path); + CHECK(FileSystemManager::RemoveFile(*blob_path)); + CHECK_FALSE(FileSystemManager::IsFile(*blob_path)); + } SECTION("Split short-circuting") { // Check the second call loads the entry from the large CAS: @@ -171,6 +180,41 @@ static void TestLarge() noexcept { // The result must be in the same location: CHECK(*spliced_path == path); } + + SECTION("Uplinking") { + // Increment generation: + CHECK(GarbageCollector::TriggerGarbageCollection()); + + // Check implicit splice: + auto spliced_path = + kIsTree ? cas.TreePath(digest) : cas.BlobPath(digest, kIsExec); + REQUIRE(spliced_path); + + // The result must be spliced to the same location: + CHECK(*spliced_path == path); + + // Check the large entry was uplinked too: + // Remove the spliced result: + CHECK(FileSystemManager::RemoveFile(path)); + CHECK_FALSE(FileSystemManager::IsFile(path)); + + // Call split with disabled uplinking: + auto pack_3 = kIsTree + ? Storage::Generation(0).CAS().SplitTree(digest) + : Storage::Generation(0).CAS().SplitBlob(digest); + auto* split_3 = std::get_if<std::vector<bazel_re::Digest>>(&pack_3); + REQUIRE(split_3); + CHECK(split_3->size() == split->size()); + + // Check there are no spliced results in all generations: + for (std::size_t i = 0; i < StorageConfig::NumGenerations(); ++i) { + auto generation_path = + kIsTree ? Storage::Generation(i).CAS().TreePath(digest) + : Storage::Generation(i).CAS().BlobPath(digest, + kIsExec); + REQUIRE_FALSE(generation_path); + } + } } } @@ -309,6 +353,104 @@ TEST_CASE_METHOD(HermeticLocalTestFixture, } } +// Test uplinking of nested large objects: +// A large tree depends on a number of nested objects: +// +// large_tree +// | - nested_blob +// | - nested_tree +// | |- other nested entries +// | - other entries +// +// All large entries are preliminarily split and the spliced results are +// deleted. The youngest generation is empty. Uplinking must restore the +// object(and it's parts) and uplink them properly. +TEST_CASE_METHOD(HermeticLocalTestFixture, + "LargeObjectCAS: uplink nested large objects", + "[storage]") { + auto const& cas = Storage::Instance().CAS(); + + // Randomize a large directory: + auto tree_path = LargeTestUtils::Tree::Generate( + std::string("nested_tree"), LargeTestUtils::Tree::kLargeSize); + REQUIRE(tree_path); + + // Randomize a large nested tree: + auto const nested_tree = (*tree_path) / "nested_tree"; + REQUIRE(LargeObjectUtils::GenerateDirectory( + nested_tree, LargeTestUtils::Tree::kLargeSize)); + + // Randomize a large nested blob: + auto nested_blob = (*tree_path) / "nested_blob"; + REQUIRE(LargeObjectUtils::GenerateFile(nested_blob, + LargeTestUtils::File::kLargeSize)); + + // Add the nested tree to the CAS: + auto nested_tree_digest = LargeTestUtils::Tree::StoreRaw(cas, nested_tree); + REQUIRE(nested_tree_digest); + auto nested_tree_path = cas.TreePath(*nested_tree_digest); + REQUIRE(nested_tree_path); + + // Add the nested blob to the CAS: + auto nested_blob_digest = cas.StoreBlob(nested_blob, false); + REQUIRE(nested_blob_digest); + auto nested_blob_path = cas.BlobPath(*nested_blob_digest, false); + REQUIRE(nested_blob_path); + + // Add the initial large directory to the CAS: + auto large_tree_digest = LargeTestUtils::Tree::StoreRaw(cas, *tree_path); + REQUIRE(large_tree_digest); + auto large_tree_path = cas.TreePath(*large_tree_digest); + REQUIRE(large_tree_path); + + // Split large entries: + auto split_nested_tree = cas.SplitTree(*nested_tree_digest); + REQUIRE(std::get_if<std::vector<bazel_re::Digest>>(&split_nested_tree)); + + auto split_nested_blob = cas.SplitBlob(*nested_blob_digest); + REQUIRE(std::get_if<std::vector<bazel_re::Digest>>(&split_nested_blob)); + + auto split_large_tree = cas.SplitTree(*large_tree_digest); + REQUIRE(std::get_if<std::vector<bazel_re::Digest>>(&split_large_tree)); + + // Remove the spliced results: + REQUIRE(FileSystemManager::RemoveFile(*nested_tree_path)); + REQUIRE(FileSystemManager::RemoveFile(*nested_blob_path)); + REQUIRE(FileSystemManager::RemoveFile(*large_tree_path)); + + // Rotate generations: + REQUIRE(GarbageCollector::TriggerGarbageCollection()); + + // Ask to splice the large tree: + auto result_path = cas.TreePath(*large_tree_digest); + REQUIRE(result_path); + + // The nested tree and all it's large parts must be spliced to the same + // locations: + CHECK(FileSystemManager::IsFile(*nested_tree_path)); + CHECK(FileSystemManager::IsFile(*nested_blob_path)); + CHECK(FileSystemManager::IsFile(*large_tree_path)); + + // Check there are no spliced results in old generations: + for (std::size_t i = 1; i < StorageConfig::NumGenerations(); ++i) { + auto const& generation_cas = Storage::Generation(i).CAS(); + REQUIRE_FALSE(generation_cas.TreePath(*nested_tree_digest)); + REQUIRE_FALSE(generation_cas.TreePath(*large_tree_digest)); + REQUIRE_FALSE(generation_cas.BlobPath(*nested_blob_digest, + /*is_executable=*/false)); + } + + // Check large entries are in the latest generation: + auto const& latest_cas = Storage::Generation(0).CAS(); + auto split_nested_tree_2 = latest_cas.SplitTree(*nested_tree_digest); + REQUIRE_FALSE(std::get_if<LargeObjectError>(&split_nested_tree_2)); + + auto split_nested_blob_2 = latest_cas.SplitBlob(*nested_blob_digest); + REQUIRE_FALSE(std::get_if<LargeObjectError>(&split_nested_blob_2)); + + auto split_large_tree_2 = latest_cas.SplitTree(*large_tree_digest); + REQUIRE_FALSE(std::get_if<LargeObjectError>(&split_large_tree_2)); +} namespace { /// \brief Extends the lifetime of large files for the whole set of tests. |