diff options
-rw-r--r-- | src/buildtool/storage/TARGETS | 1 | ||||
-rw-r--r-- | src/buildtool/storage/compactifier.cpp | 82 | ||||
-rw-r--r-- | src/buildtool/storage/compactifier.hpp | 12 | ||||
-rw-r--r-- | src/buildtool/storage/garbage_collector.cpp | 10 | ||||
-rw-r--r-- | src/buildtool/storage/garbage_collector.hpp | 9 | ||||
-rw-r--r-- | test/buildtool/storage/large_object_cas.test.cpp | 15 |
6 files changed, 122 insertions, 7 deletions
diff --git a/src/buildtool/storage/TARGETS b/src/buildtool/storage/TARGETS index d52d6402..9161c398 100644 --- a/src/buildtool/storage/TARGETS +++ b/src/buildtool/storage/TARGETS @@ -69,6 +69,7 @@ , "private-deps": [ ["src/buildtool/execution_api/remote", "config"] , ["src/buildtool/logging", "log_level"] + , ["src/buildtool/execution_api/common", "message_limits"] ] } , "fs_utils": diff --git a/src/buildtool/storage/compactifier.cpp b/src/buildtool/storage/compactifier.cpp index 01c5c7a6..4874e139 100644 --- a/src/buildtool/storage/compactifier.cpp +++ b/src/buildtool/storage/compactifier.cpp @@ -17,9 +17,16 @@ #include <algorithm> #include <array> #include <filesystem> +#include <optional> +#include <variant> +#include <vector> +#include "src/buildtool/common/bazel_types.hpp" #include "src/buildtool/file_system/file_system_manager.hpp" +#include "src/buildtool/file_system/object_cas.hpp" #include "src/buildtool/file_system/object_type.hpp" +#include "src/buildtool/logging/log_level.hpp" +#include "src/buildtool/logging/logger.hpp" #include "src/buildtool/storage/local_cas.hpp" namespace { @@ -32,6 +39,17 @@ template <ObjectType... kType> requires(sizeof...(kType) != 0) [[nodiscard]] auto RemoveSpliced(LocalCAS<false> const& cas) noexcept -> bool; + +/// \brief Split and remove from the kType storage every entry that is larger +/// than the given threshold. Results of splitting are added to the LocalCAS. +/// \tparam kType Type of the storage to inspect. +/// \param cas LocalCAS to store results of splitting. +/// \param threshold Minimum size of an entry to be split. +/// \return True if the kType storage doesn't contain splitable +/// entries larger than the compactification threshold afterwards. +template <ObjectType kType> +[[nodiscard]] auto SplitLarge(LocalCAS<false> const& cas, + size_t threshold) noexcept -> bool; } // namespace auto Compactifier::RemoveSpliced(LocalCAS<false> const& cas) noexcept -> bool { @@ -39,6 +57,13 @@ auto Compactifier::RemoveSpliced(LocalCAS<false> const& cas) noexcept -> bool { ::RemoveSpliced<ObjectType::File, ObjectType::Executable>(cas); } +auto Compactifier::SplitLarge(LocalCAS<false> const& cas, + size_t threshold) noexcept -> bool { + return ::SplitLarge<ObjectType::File>(cas, threshold) and + ::SplitLarge<ObjectType::Executable>(cas, threshold) and + ::SplitLarge<ObjectType::Tree>(cas, threshold); +} + namespace { template <ObjectType... kType> requires(sizeof...(kType) != 0) @@ -82,4 +107,61 @@ requires(sizeof...(kType) != 0) return FileSystemManager::ReadDirectoryEntriesRecursive(large_storage, callback); } + +template <ObjectType kType> +[[nodiscard]] auto SplitLarge(LocalCAS<false> const& cas, + size_t threshold) noexcept -> bool { + // Obtain path to the storage root: + auto const& storage_root = cas.StorageRoot(kType); + + // Check there are entries to process: + if (not FileSystemManager::IsDirectory(storage_root)) { + return true; + } + + FileSystemManager::UseDirEntryFunc callback = + [&](std::filesystem::path const& entry_object, bool is_tree) -> bool { + // Use all folders: + if (is_tree) { + return true; + } + + // Filter files by size: + auto const path = storage_root / entry_object; + if (std::filesystem::file_size(path) < threshold) { + return true; + } + + // Calculate the digest for the entry: + auto const digest = ObjectCAS<kType>::CreateDigest(path); + if (not digest) { + Logger::Log(LogLevel::Error, + "Failed to calculate digest for {}", + path.generic_string()); + return false; + } + + // Split the entry: + auto split_result = IsTreeObject(kType) ? cas.SplitTree(*digest) + : cas.SplitBlob(*digest); + auto* parts = std::get_if<std::vector<bazel_re::Digest>>(&split_result); + if (parts == nullptr) { + Logger::Log(LogLevel::Error, "Failed to split {}", digest->hash()); + return false; + } + + // If the file cannot actually be split (the threshold is too low), the + // file must not be deleted. + if (parts->size() < 2) { + Logger::Log(LogLevel::Debug, + "{} cannot be compactified. The compactification " + "threshold is too low.", + digest->hash()); + return true; + } + return FileSystemManager::RemoveFile(path); + }; + return FileSystemManager::ReadDirectoryEntriesRecursive(storage_root, + callback); +} } // namespace diff --git a/src/buildtool/storage/compactifier.hpp b/src/buildtool/storage/compactifier.hpp index ced458fe..b8040b51 100644 --- a/src/buildtool/storage/compactifier.hpp +++ b/src/buildtool/storage/compactifier.hpp @@ -15,6 +15,8 @@ #ifndef INCLUDED_SRC_BUILDTOOL_STORAGE_COMPACTIFIER_HPP #define INCLUDED_SRC_BUILDTOOL_STORAGE_COMPACTIFIER_HPP +#include <cstddef> + template <bool> class LocalCAS; @@ -26,6 +28,16 @@ class Compactifier final { /// entries. [[nodiscard]] static auto RemoveSpliced(LocalCAS<false> const& cas) noexcept -> bool; + + /// \brief Split and remove from the storage every entry that is larger than + /// the compactification threshold. Results of splitting are added to the + /// LocalCAS. + /// \param local_cas LocalCAS to store results of splitting. + /// \param threshold Compactification threshold. + /// \return True if the storage doesn't contain splitable + /// entries larger than the compactification threshold afterwards. + [[nodiscard]] static auto SplitLarge(LocalCAS<false> const& cas, + size_t threshold) noexcept -> bool; }; #endif // INCLUDED_SRC_BUILDTOOL_STORAGE_COMPACTIFIER_HPP diff --git a/src/buildtool/storage/garbage_collector.cpp b/src/buildtool/storage/garbage_collector.cpp index 1b7dc09b..3d077daf 100644 --- a/src/buildtool/storage/garbage_collector.cpp +++ b/src/buildtool/storage/garbage_collector.cpp @@ -26,6 +26,7 @@ #include "src/buildtool/common/bazel_types.hpp" #include "src/buildtool/compatibility/compatibility.hpp" #include "src/buildtool/compatibility/native_support.hpp" +#include "src/buildtool/execution_api/common/message_limits.hpp" #include "src/buildtool/file_system/file_storage.hpp" #include "src/buildtool/file_system/file_system_manager.hpp" #include "src/buildtool/file_system/git_repo.hpp" @@ -242,7 +243,7 @@ auto GarbageCollector::TriggerGarbageCollection(bool no_rotation) noexcept // Compactification must take place before rotating generations. // Otherwise, an interruption of the process during compactification // would lead to an invalid old generation. - if (not GarbageCollector::Compactify()) { + if (not GarbageCollector::Compactify(kMaxBatchTransferSize)) { Logger::Log(LogLevel::Error, "Failed to compactify the youngest generation."); return false; @@ -290,7 +291,7 @@ auto GarbageCollector::TriggerGarbageCollection(bool no_rotation) noexcept return success; } -auto GarbageCollector::Compactify() noexcept -> bool { +auto GarbageCollector::Compactify(size_t threshold) noexcept -> bool { const bool mode = Compatibility::IsCompatible(); // Return to the initial compatibility mode once done: @@ -299,12 +300,13 @@ auto GarbageCollector::Compactify() noexcept -> bool { }); // Compactification must be done for both native and compatible storages. - auto compactify = [](bool compatible) -> bool { + auto compactify = [threshold](bool compatible) -> bool { auto const storage = ::Generation(StorageConfig::GenerationCacheDir(0, compatible)); Compatibility::SetCompatible(compatible); - return Compactifier::RemoveSpliced(storage.CAS()); + return Compactifier::RemoveSpliced(storage.CAS()) and + Compactifier::SplitLarge(storage.CAS(), threshold); }; return compactify(mode) and compactify(not mode); } diff --git a/src/buildtool/storage/garbage_collector.hpp b/src/buildtool/storage/garbage_collector.hpp index 6bca3f3c..0d80def0 100644 --- a/src/buildtool/storage/garbage_collector.hpp +++ b/src/buildtool/storage/garbage_collector.hpp @@ -15,6 +15,7 @@ #ifndef INCLUDED_SRC_BUILDTOOL_STORAGE_GARBAGE_COLLECTOR_HPP #define INCLUDED_SRC_BUILDTOOL_STORAGE_GARBAGE_COLLECTOR_HPP +#include <cstddef> #include <functional> #include <optional> #include <string> @@ -90,10 +91,12 @@ class GarbageCollector { [[nodiscard]] auto static LockFilePath() noexcept -> std::filesystem::path; - /// \brief Remove spliced objects from the youngest generation. - /// \return True if the youngest generation does not contain spliced + /// \brief Remove spliced objects from the youngest generation and split + /// objects that are larger than the threshold. + /// \param threshold Compactification threshold. + /// \return True if the youngest generation does not contain splicable /// objects afterwards. - [[nodiscard]] auto static Compactify() noexcept -> bool; + [[nodiscard]] auto static Compactify(size_t threshold) noexcept -> bool; }; #endif // INCLUDED_SRC_BUILDTOOL_STORAGE_GARBAGE_COLLECTOR_HPP diff --git a/test/buildtool/storage/large_object_cas.test.cpp b/test/buildtool/storage/large_object_cas.test.cpp index fe3b8e6d..96462329 100644 --- a/test/buildtool/storage/large_object_cas.test.cpp +++ b/test/buildtool/storage/large_object_cas.test.cpp @@ -425,6 +425,7 @@ static void TestExternal() noexcept { // Test compactification of a storage generation. // If there are objects in the storage that have an entry in // the large CAS, they must be deleted during compactification. +// All splitable objects in the generation must be split. template <ObjectType kType> static void TestCompactification() { SECTION("Compactify") { @@ -445,6 +446,17 @@ static void TestCompactification() { auto result = kIsTree ? cas.SplitTree(digest) : cas.SplitBlob(digest); REQUIRE(std::get_if<std::vector<bazel_re::Digest>>(&result) != nullptr); + // For trees the size must be increased to exceed the internal + // compactification threshold: + static constexpr auto ExceedThresholdSize = + kIsTree ? TestType::kLargeSize * 8 : TestType::kLargeSize; + + // Create a large object that is to be split during compactification: + auto object_2 = TestType::Create( + cas, std::string(TestType::kLargeId) + "_2", ExceedThresholdSize); + REQUIRE(object_2); + auto& [digest_2, path_2] = *object_2; + // Ensure all entries are in the storage: auto get_path = [](auto const& cas, bazel_re::Digest const& digest) { return kIsTree ? cas.TreePath(digest) @@ -453,6 +465,7 @@ static void TestCompactification() { auto const& latest = Storage::Generation(0).CAS(); REQUIRE(get_path(latest, digest).has_value()); + REQUIRE(get_path(latest, digest_2).has_value()); // Compactify the youngest generation: // Generation rotation is disabled to exclude uplinking. @@ -462,9 +475,11 @@ static void TestCompactification() { // All entries must be deleted during compactification, and for blobs // and executables there are no synchronized entries in the storage: REQUIRE_FALSE(get_path(latest, digest).has_value()); + REQUIRE_FALSE(get_path(latest, digest_2).has_value()); // All entries must be implicitly splicable: REQUIRE(get_path(cas, digest).has_value()); + REQUIRE(get_path(cas, digest_2).has_value()); } } |