summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/buildtool/storage/garbage_collector.cpp14
-rw-r--r--src/buildtool/storage/garbage_collector.hpp7
-rw-r--r--src/buildtool/storage/large_object_cas.hpp15
-rw-r--r--src/buildtool/storage/large_object_cas.tpp51
-rw-r--r--src/buildtool/storage/local_cas.hpp14
-rw-r--r--src/buildtool/storage/local_cas.tpp56
-rw-r--r--test/buildtool/storage/large_object_cas.test.cpp142
7 files changed, 293 insertions, 6 deletions
diff --git a/src/buildtool/storage/garbage_collector.cpp b/src/buildtool/storage/garbage_collector.cpp
index cfc6daf5..232c8ab8 100644
--- a/src/buildtool/storage/garbage_collector.cpp
+++ b/src/buildtool/storage/garbage_collector.cpp
@@ -72,6 +72,20 @@ auto GarbageCollector::GlobalUplinkBlob(bazel_re::Digest const& digest,
return false;
}
+auto GarbageCollector::GlobalUplinkLargeBlob(
+ bazel_re::Digest const& digest) noexcept -> bool {
+ // Try to find large entry in all generations.
+ auto const& latest_cas = Storage::Generation(0).CAS();
+ for (std::size_t i = 0; i < StorageConfig::NumGenerations(); ++i) {
+ if (Storage::Generation(i)
+ .CAS()
+ .LocalUplinkLargeObject<ObjectType::File>(latest_cas, digest)) {
+ return true;
+ }
+ }
+ return false;
+}
+
auto GarbageCollector::GlobalUplinkTree(bazel_re::Digest const& digest) noexcept
-> bool {
// Try to find tree in all generations.
diff --git a/src/buildtool/storage/garbage_collector.hpp b/src/buildtool/storage/garbage_collector.hpp
index c4ceaf8a..28a5f08b 100644
--- a/src/buildtool/storage/garbage_collector.hpp
+++ b/src/buildtool/storage/garbage_collector.hpp
@@ -42,6 +42,13 @@ class GarbageCollector {
bool is_executable) noexcept
-> bool;
+ /// \brief Uplink large blob entry across LocalCASes from all generations to
+ /// latest. This method does not splice the large object.
+ /// \param digest Digest of the large blob entry to uplink.
+ /// \returns true if large entry was found and successfully uplinked.
+ [[nodiscard]] auto static GlobalUplinkLargeBlob(
+ bazel_re::Digest const& digest) noexcept -> bool;
+
/// \brief Uplink tree across LocalCASes from all generations to latest.
/// Note that the tree will be deeply uplinked, i.e., all entries referenced
/// by this tree will be uplinked before (including sub-trees).
diff --git a/src/buildtool/storage/large_object_cas.hpp b/src/buildtool/storage/large_object_cas.hpp
index 0d658af6..2c8a3348 100644
--- a/src/buildtool/storage/large_object_cas.hpp
+++ b/src/buildtool/storage/large_object_cas.hpp
@@ -133,6 +133,21 @@ class LargeObjectCAS final {
std::vector<bazel_re::Digest> const& parts)
const noexcept -> std::variant<LargeObjectError, LargeObject>;
+ /// \brief Uplink large entry from this generation to latest LocalCAS
+ /// generation. For the large entry it's parts get promoted first and then
+ /// the entry itself. This function is only available for instances that are
+ /// used as local GC generations (i.e., disabled global uplink).
+ /// \tparam kIsLocalGeneration True if this instance is a local generation.
+ /// \param latest The latest LocalCAS generation.
+ /// \param latest_large The latest LargeObjectCAS
+ /// \param digest The digest of the large entry to uplink.
+ /// \returns True if the large entry was successfully uplinked.
+ template <bool kIsLocalGeneration = not kDoGlobalUplink>
+ requires(kIsLocalGeneration) [[nodiscard]] auto LocalUplink(
+ LocalCAS<false> const& latest,
+ LargeObjectCAS<false, kType> const& latest_large,
+ bazel_re::Digest const& digest) const noexcept -> bool;
+
private:
// By default, overwrite existing entries. Unless this is a generation
// (disabled global uplink), then we never want to overwrite any entries.
diff --git a/src/buildtool/storage/large_object_cas.tpp b/src/buildtool/storage/large_object_cas.tpp
index 71b4528e..4bab5201 100644
--- a/src/buildtool/storage/large_object_cas.tpp
+++ b/src/buildtool/storage/large_object_cas.tpp
@@ -21,9 +21,11 @@
#include "fmt/core.h"
#include "nlohmann/json.hpp"
+#include "src/buildtool/compatibility/compatibility.hpp"
#include "src/buildtool/compatibility/native_support.hpp"
#include "src/buildtool/file_system/file_system_manager.hpp"
#include "src/buildtool/storage/file_chunker.hpp"
+#include "src/buildtool/storage/garbage_collector.hpp"
#include "src/buildtool/storage/large_object_cas.hpp"
#include "src/buildtool/storage/local_cas.hpp"
@@ -36,6 +38,18 @@ auto LargeObjectCAS<kDoGlobalUplink, kType>::GetEntryPath(
if (FileSystemManager::IsFile(file_path)) {
return file_path;
}
+
+ if constexpr (kDoGlobalUplink) {
+ // To promote parts of the tree properly, regular uplinking logic for
+ // trees is used:
+ bool uplinked =
+ IsTreeObject(kType) and not Compatibility::IsCompatible()
+ ? GarbageCollector::GlobalUplinkTree(digest)
+ : GarbageCollector::GlobalUplinkLargeBlob(digest);
+ if (uplinked and FileSystemManager::IsFile(file_path)) {
+ return file_path;
+ }
+ }
return std::nullopt;
}
@@ -213,4 +227,41 @@ auto LargeObjectCAS<kDoGlobalUplink, kType>::Splice(
return large_object;
}
+template <bool kDoGlobalUplink, ObjectType kType>
+template <bool kIsLocalGeneration>
+requires(kIsLocalGeneration) auto LargeObjectCAS<kDoGlobalUplink, kType>::
+ LocalUplink(LocalCAS<false> const& latest,
+ LargeObjectCAS<false, kType> const& latest_large,
+ bazel_re::Digest const& digest) const noexcept -> bool {
+ // Check the large entry in the youngest generation:
+ if (latest_large.GetEntryPath(digest)) {
+ return true;
+ }
+
+ // Check the large entry in the current generation:
+ auto parts = ReadEntry(digest);
+ if (not parts) {
+ // No large entry or the object is not large
+ return true;
+ }
+
+ // Promoting the parts of the large entry:
+ for (auto const& part : *parts) {
+ static constexpr bool is_executable = false;
+ static constexpr bool skip_sync = true;
+ if (not local_cas_.LocalUplinkBlob(
+ latest, part, is_executable, skip_sync)) {
+ return false;
+ }
+ }
+
+ auto path = GetEntryPath(digest);
+ if (not path) {
+ return false;
+ }
+
+ const auto hash = NativeSupport::Unprefix(digest.hash());
+ return latest_large.file_store_.AddFromFile(hash, *path, /*is_owner=*/true);
+}
+
#endif // INCLUDED_SRC_BUILDTOOL_STORAGE_LARGE_OBJECT_CAS_TPP
diff --git a/src/buildtool/storage/local_cas.hpp b/src/buildtool/storage/local_cas.hpp
index e4477011..057087ef 100644
--- a/src/buildtool/storage/local_cas.hpp
+++ b/src/buildtool/storage/local_cas.hpp
@@ -206,6 +206,20 @@ class LocalCAS {
LocalGenerationCAS const& latest,
bazel_re::Digest const& digest) const noexcept -> bool;
+ /// \brief Uplink large entry from this generation to latest LocalCAS
+ /// generation. This function is only available for instances that are used
+ /// as local GC generations (i.e., disabled global uplink).
+ /// \tparam kType Type of the large entry to be uplinked.
+ /// \tparam kIsLocalGeneration True if this instance is a local generation.
+ /// \param latest The latest LocalCAS generation.
+ /// \param latest_large The latest LargeObjectCAS
+ /// \param digest The digest of the large entry to uplink.
+ /// \returns True if the large entry was successfully uplinked.
+ template <ObjectType kType, bool kIsLocalGeneration = not kDoGlobalUplink>
+ requires(kIsLocalGeneration) [[nodiscard]] auto LocalUplinkLargeObject(
+ LocalGenerationCAS const& latest,
+ bazel_re::Digest const& digest) const noexcept -> bool;
+
private:
ObjectCAS<ObjectType::File> cas_file_;
ObjectCAS<ObjectType::Executable> cas_exec_;
diff --git a/src/buildtool/storage/local_cas.tpp b/src/buildtool/storage/local_cas.tpp
index 8c20ded9..0c2d794d 100644
--- a/src/buildtool/storage/local_cas.tpp
+++ b/src/buildtool/storage/local_cas.tpp
@@ -294,8 +294,19 @@ requires(kIsLocalGeneration) auto LocalCAS<kDoGlobalUplink>::LocalUplinkBlob(
}
// Uplink blob from older generation to the latest generation.
- return blob_path_latest.has_value() or
- latest.StoreBlob</*kOwner=*/true>(*blob_path, is_executable);
+ bool uplinked =
+ blob_path_latest.has_value() or
+ latest.StoreBlob</*kOwner=*/true>(*blob_path, is_executable);
+
+ if (uplinked) {
+ // The result of uplinking of a large object must not affect the
+ // result of uplinking in general. In other case, two sequential calls
+ // to BlobPath might return different results: The first call splices
+ // and uplinks the object, but fails at large entry uplinking. The
+ // second call finds the tree in the youngest generation and returns.
+ std::ignore = LocalUplinkLargeObject<ObjectType::File>(latest, digest);
+ }
+ return uplinked;
}
template <bool kDoGlobalUplink>
@@ -386,10 +397,17 @@ requires(kIsLocalGeneration) auto LocalCAS<kDoGlobalUplink>::LocalUplinkGitTree(
}
// Uplink tree from older generation to the latest generation.
- return latest.cas_tree_
- .StoreBlobFromFile(*tree_path,
- /*is_owner=*/true)
- .has_value();
+ if (latest.cas_tree_.StoreBlobFromFile(*tree_path, /*is owner=*/true)) {
+ // Uplink the large entry afterwards:
+ // The result of uplinking of a large object must not affect the
+ // result of uplinking in general. In other case, two sequential calls
+ // to TreePath might return different results: The first call splices
+ // and uplinks the object, but fails at large entry uplinking. The
+ // second call finds the tree in the youngest generation and returns.
+ std::ignore = LocalUplinkLargeObject<ObjectType::Tree>(latest, digest);
+ return true;
+ }
+ return false;
}
template <bool kDoGlobalUplink>
@@ -445,6 +463,16 @@ requires(kIsLocalGeneration) auto LocalCAS<kDoGlobalUplink>::
/*is_owner=*/true)) {
try {
seen->emplace(digest);
+
+ // Uplink the large entry afterwards:
+ // The result of uplinking of a large object must not affect the
+ // result of uplinking in general. In other case, two sequential
+ // calls to TreePath might return different results: The first call
+ // splices and uplinks the object, but fails at large entry
+ // uplinking. The second call finds the tree in the youngest
+ // generation and returns.
+ std::ignore =
+ LocalUplinkLargeObject<ObjectType::Tree>(latest, digest);
return true;
} catch (...) {
}
@@ -454,6 +482,22 @@ requires(kIsLocalGeneration) auto LocalCAS<kDoGlobalUplink>::
template <bool kDoGlobalUplink>
template <ObjectType kType, bool kIsLocalGeneration>
+requires(kIsLocalGeneration) auto LocalCAS<kDoGlobalUplink>::
+ LocalUplinkLargeObject(LocalGenerationCAS const& latest,
+ bazel_re::Digest const& digest) const noexcept
+ -> bool {
+ if constexpr (IsTreeObject(kType)) {
+ return cas_tree_large_.LocalUplink(
+ latest, latest.cas_tree_large_, digest);
+ }
+ else {
+ return cas_file_large_.LocalUplink(
+ latest, latest.cas_file_large_, digest);
+ }
+}
+
+template <bool kDoGlobalUplink>
+template <ObjectType kType, bool kIsLocalGeneration>
requires(kIsLocalGeneration) auto LocalCAS<kDoGlobalUplink>::TrySplice(
bazel_re::Digest const& digest) const noexcept
-> std::optional<LargeObject> {
diff --git a/test/buildtool/storage/large_object_cas.test.cpp b/test/buildtool/storage/large_object_cas.test.cpp
index 0a070349..c8718baa 100644
--- a/test/buildtool/storage/large_object_cas.test.cpp
+++ b/test/buildtool/storage/large_object_cas.test.cpp
@@ -61,6 +61,8 @@ class Blob final {
-> std::optional<std::filesystem::path>;
};
+using File = Blob<false>;
+
class Tree final {
public:
static constexpr auto kLargeId = std::string_view("tree_256");
@@ -149,6 +151,13 @@ static void TestLarge() noexcept {
CHECK(FileSystemManager::RemoveFile(path));
CHECK_FALSE(FileSystemManager::IsFile(path));
+ // For executables the non-executable entry must be also deleted.
+ if constexpr (kIsExec) {
+ auto blob_path = cas.BlobPath(digest, /*is_executable=*/false);
+ REQUIRE(blob_path);
+ CHECK(FileSystemManager::RemoveFile(*blob_path));
+ CHECK_FALSE(FileSystemManager::IsFile(*blob_path));
+ }
SECTION("Split short-circuting") {
// Check the second call loads the entry from the large CAS:
@@ -171,6 +180,41 @@ static void TestLarge() noexcept {
// The result must be in the same location:
CHECK(*spliced_path == path);
}
+
+ SECTION("Uplinking") {
+ // Increment generation:
+ CHECK(GarbageCollector::TriggerGarbageCollection());
+
+ // Check implicit splice:
+ auto spliced_path =
+ kIsTree ? cas.TreePath(digest) : cas.BlobPath(digest, kIsExec);
+ REQUIRE(spliced_path);
+
+ // The result must be spliced to the same location:
+ CHECK(*spliced_path == path);
+
+ // Check the large entry was uplinked too:
+ // Remove the spliced result:
+ CHECK(FileSystemManager::RemoveFile(path));
+ CHECK_FALSE(FileSystemManager::IsFile(path));
+
+ // Call split with disabled uplinking:
+ auto pack_3 = kIsTree
+ ? Storage::Generation(0).CAS().SplitTree(digest)
+ : Storage::Generation(0).CAS().SplitBlob(digest);
+ auto* split_3 = std::get_if<std::vector<bazel_re::Digest>>(&pack_3);
+ REQUIRE(split_3);
+ CHECK(split_3->size() == split->size());
+
+ // Check there are no spliced results in all generations:
+ for (std::size_t i = 0; i < StorageConfig::NumGenerations(); ++i) {
+ auto generation_path =
+ kIsTree ? Storage::Generation(i).CAS().TreePath(digest)
+ : Storage::Generation(i).CAS().BlobPath(digest,
+ kIsExec);
+ REQUIRE_FALSE(generation_path);
+ }
+ }
}
}
@@ -309,6 +353,104 @@ TEST_CASE_METHOD(HermeticLocalTestFixture,
}
}
+// Test uplinking of nested large objects:
+// A large tree depends on a number of nested objects:
+//
+// large_tree
+// | - nested_blob
+// | - nested_tree
+// | |- other nested entries
+// | - other entries
+//
+// All large entries are preliminarily split and the spliced results are
+// deleted. The youngest generation is empty. Uplinking must restore the
+// object(and it's parts) and uplink them properly.
+TEST_CASE_METHOD(HermeticLocalTestFixture,
+ "LargeObjectCAS: uplink nested large objects",
+ "[storage]") {
+ auto const& cas = Storage::Instance().CAS();
+
+ // Randomize a large directory:
+ auto tree_path = LargeTestUtils::Tree::Generate(
+ std::string("nested_tree"), LargeTestUtils::Tree::kLargeSize);
+ REQUIRE(tree_path);
+
+ // Randomize a large nested tree:
+ auto const nested_tree = (*tree_path) / "nested_tree";
+ REQUIRE(LargeObjectUtils::GenerateDirectory(
+ nested_tree, LargeTestUtils::Tree::kLargeSize));
+
+ // Randomize a large nested blob:
+ auto nested_blob = (*tree_path) / "nested_blob";
+ REQUIRE(LargeObjectUtils::GenerateFile(nested_blob,
+ LargeTestUtils::File::kLargeSize));
+
+ // Add the nested tree to the CAS:
+ auto nested_tree_digest = LargeTestUtils::Tree::StoreRaw(cas, nested_tree);
+ REQUIRE(nested_tree_digest);
+ auto nested_tree_path = cas.TreePath(*nested_tree_digest);
+ REQUIRE(nested_tree_path);
+
+ // Add the nested blob to the CAS:
+ auto nested_blob_digest = cas.StoreBlob(nested_blob, false);
+ REQUIRE(nested_blob_digest);
+ auto nested_blob_path = cas.BlobPath(*nested_blob_digest, false);
+ REQUIRE(nested_blob_path);
+
+ // Add the initial large directory to the CAS:
+ auto large_tree_digest = LargeTestUtils::Tree::StoreRaw(cas, *tree_path);
+ REQUIRE(large_tree_digest);
+ auto large_tree_path = cas.TreePath(*large_tree_digest);
+ REQUIRE(large_tree_path);
+
+ // Split large entries:
+ auto split_nested_tree = cas.SplitTree(*nested_tree_digest);
+ REQUIRE(std::get_if<std::vector<bazel_re::Digest>>(&split_nested_tree));
+
+ auto split_nested_blob = cas.SplitBlob(*nested_blob_digest);
+ REQUIRE(std::get_if<std::vector<bazel_re::Digest>>(&split_nested_blob));
+
+ auto split_large_tree = cas.SplitTree(*large_tree_digest);
+ REQUIRE(std::get_if<std::vector<bazel_re::Digest>>(&split_large_tree));
+
+ // Remove the spliced results:
+ REQUIRE(FileSystemManager::RemoveFile(*nested_tree_path));
+ REQUIRE(FileSystemManager::RemoveFile(*nested_blob_path));
+ REQUIRE(FileSystemManager::RemoveFile(*large_tree_path));
+
+ // Rotate generations:
+ REQUIRE(GarbageCollector::TriggerGarbageCollection());
+
+ // Ask to splice the large tree:
+ auto result_path = cas.TreePath(*large_tree_digest);
+ REQUIRE(result_path);
+
+ // The nested tree and all it's large parts must be spliced to the same
+ // locations:
+ CHECK(FileSystemManager::IsFile(*nested_tree_path));
+ CHECK(FileSystemManager::IsFile(*nested_blob_path));
+ CHECK(FileSystemManager::IsFile(*large_tree_path));
+
+ // Check there are no spliced results in old generations:
+ for (std::size_t i = 1; i < StorageConfig::NumGenerations(); ++i) {
+ auto const& generation_cas = Storage::Generation(i).CAS();
+ REQUIRE_FALSE(generation_cas.TreePath(*nested_tree_digest));
+ REQUIRE_FALSE(generation_cas.TreePath(*large_tree_digest));
+ REQUIRE_FALSE(generation_cas.BlobPath(*nested_blob_digest,
+ /*is_executable=*/false));
+ }
+
+ // Check large entries are in the latest generation:
+ auto const& latest_cas = Storage::Generation(0).CAS();
+ auto split_nested_tree_2 = latest_cas.SplitTree(*nested_tree_digest);
+ REQUIRE_FALSE(std::get_if<LargeObjectError>(&split_nested_tree_2));
+
+ auto split_nested_blob_2 = latest_cas.SplitBlob(*nested_blob_digest);
+ REQUIRE_FALSE(std::get_if<LargeObjectError>(&split_nested_blob_2));
+
+ auto split_large_tree_2 = latest_cas.SplitTree(*large_tree_digest);
+ REQUIRE_FALSE(std::get_if<LargeObjectError>(&split_large_tree_2));
+}
namespace {
/// \brief Extends the lifetime of large files for the whole set of tests.