From b88adc43bb6ffe914ca9303e3001624e36fd64fa Mon Sep 17 00:00:00 2001 From: Maksim Denisov Date: Tue, 2 Apr 2024 18:37:16 +0200 Subject: Compactification: Split large entries. During garbage collection split and remove from the storage every entry that is larger than a threshold. --- src/buildtool/storage/compactifier.cpp | 82 ++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) (limited to 'src/buildtool/storage/compactifier.cpp') diff --git a/src/buildtool/storage/compactifier.cpp b/src/buildtool/storage/compactifier.cpp index 01c5c7a6..4874e139 100644 --- a/src/buildtool/storage/compactifier.cpp +++ b/src/buildtool/storage/compactifier.cpp @@ -17,9 +17,16 @@ #include #include #include +#include +#include +#include +#include "src/buildtool/common/bazel_types.hpp" #include "src/buildtool/file_system/file_system_manager.hpp" +#include "src/buildtool/file_system/object_cas.hpp" #include "src/buildtool/file_system/object_type.hpp" +#include "src/buildtool/logging/log_level.hpp" +#include "src/buildtool/logging/logger.hpp" #include "src/buildtool/storage/local_cas.hpp" namespace { @@ -32,6 +39,17 @@ template requires(sizeof...(kType) != 0) [[nodiscard]] auto RemoveSpliced(LocalCAS const& cas) noexcept -> bool; + +/// \brief Split and remove from the kType storage every entry that is larger +/// than the given threshold. Results of splitting are added to the LocalCAS. +/// \tparam kType Type of the storage to inspect. +/// \param cas LocalCAS to store results of splitting. +/// \param threshold Minimum size of an entry to be split. +/// \return True if the kType storage doesn't contain splitable +/// entries larger than the compactification threshold afterwards. +template +[[nodiscard]] auto SplitLarge(LocalCAS const& cas, + size_t threshold) noexcept -> bool; } // namespace auto Compactifier::RemoveSpliced(LocalCAS const& cas) noexcept -> bool { @@ -39,6 +57,13 @@ auto Compactifier::RemoveSpliced(LocalCAS const& cas) noexcept -> bool { ::RemoveSpliced(cas); } +auto Compactifier::SplitLarge(LocalCAS const& cas, + size_t threshold) noexcept -> bool { + return ::SplitLarge(cas, threshold) and + ::SplitLarge(cas, threshold) and + ::SplitLarge(cas, threshold); +} + namespace { template requires(sizeof...(kType) != 0) @@ -82,4 +107,61 @@ requires(sizeof...(kType) != 0) return FileSystemManager::ReadDirectoryEntriesRecursive(large_storage, callback); } + +template +[[nodiscard]] auto SplitLarge(LocalCAS const& cas, + size_t threshold) noexcept -> bool { + // Obtain path to the storage root: + auto const& storage_root = cas.StorageRoot(kType); + + // Check there are entries to process: + if (not FileSystemManager::IsDirectory(storage_root)) { + return true; + } + + FileSystemManager::UseDirEntryFunc callback = + [&](std::filesystem::path const& entry_object, bool is_tree) -> bool { + // Use all folders: + if (is_tree) { + return true; + } + + // Filter files by size: + auto const path = storage_root / entry_object; + if (std::filesystem::file_size(path) < threshold) { + return true; + } + + // Calculate the digest for the entry: + auto const digest = ObjectCAS::CreateDigest(path); + if (not digest) { + Logger::Log(LogLevel::Error, + "Failed to calculate digest for {}", + path.generic_string()); + return false; + } + + // Split the entry: + auto split_result = IsTreeObject(kType) ? cas.SplitTree(*digest) + : cas.SplitBlob(*digest); + auto* parts = std::get_if>(&split_result); + if (parts == nullptr) { + Logger::Log(LogLevel::Error, "Failed to split {}", digest->hash()); + return false; + } + + // If the file cannot actually be split (the threshold is too low), the + // file must not be deleted. + if (parts->size() < 2) { + Logger::Log(LogLevel::Debug, + "{} cannot be compactified. The compactification " + "threshold is too low.", + digest->hash()); + return true; + } + return FileSystemManager::RemoveFile(path); + }; + return FileSystemManager::ReadDirectoryEntriesRecursive(storage_root, + callback); +} } // namespace -- cgit v1.2.3