summaryrefslogtreecommitdiff
path: root/src/buildtool/storage/compactifier.cpp
diff options
context:
space:
mode:
authorMaksim Denisov <denisov.maksim@huawei.com>2024-04-02 18:37:16 +0200
committerMaksim Denisov <denisov.maksim@huawei.com>2024-04-17 11:04:08 +0200
commitb88adc43bb6ffe914ca9303e3001624e36fd64fa (patch)
tree845fd9e4c746ae9b86679a0ec21640cccfeea825 /src/buildtool/storage/compactifier.cpp
parentd9bf1d63768f1c3d660c3057d6d77c9b3b4a346d (diff)
downloadjustbuild-b88adc43bb6ffe914ca9303e3001624e36fd64fa.tar.gz
Compactification: Split large entries.
During garbage collection split and remove from the storage every entry that is larger than a threshold.
Diffstat (limited to 'src/buildtool/storage/compactifier.cpp')
-rw-r--r--src/buildtool/storage/compactifier.cpp82
1 files changed, 82 insertions, 0 deletions
diff --git a/src/buildtool/storage/compactifier.cpp b/src/buildtool/storage/compactifier.cpp
index 01c5c7a6..4874e139 100644
--- a/src/buildtool/storage/compactifier.cpp
+++ b/src/buildtool/storage/compactifier.cpp
@@ -17,9 +17,16 @@
#include <algorithm>
#include <array>
#include <filesystem>
+#include <optional>
+#include <variant>
+#include <vector>
+#include "src/buildtool/common/bazel_types.hpp"
#include "src/buildtool/file_system/file_system_manager.hpp"
+#include "src/buildtool/file_system/object_cas.hpp"
#include "src/buildtool/file_system/object_type.hpp"
+#include "src/buildtool/logging/log_level.hpp"
+#include "src/buildtool/logging/logger.hpp"
#include "src/buildtool/storage/local_cas.hpp"
namespace {
@@ -32,6 +39,17 @@ template <ObjectType... kType>
requires(sizeof...(kType) != 0)
[[nodiscard]] auto RemoveSpliced(LocalCAS<false> const& cas) noexcept
-> bool;
+
+/// \brief Split and remove from the kType storage every entry that is larger
+/// than the given threshold. Results of splitting are added to the LocalCAS.
+/// \tparam kType Type of the storage to inspect.
+/// \param cas LocalCAS to store results of splitting.
+/// \param threshold Minimum size of an entry to be split.
+/// \return True if the kType storage doesn't contain splitable
+/// entries larger than the compactification threshold afterwards.
+template <ObjectType kType>
+[[nodiscard]] auto SplitLarge(LocalCAS<false> const& cas,
+ size_t threshold) noexcept -> bool;
} // namespace
auto Compactifier::RemoveSpliced(LocalCAS<false> const& cas) noexcept -> bool {
@@ -39,6 +57,13 @@ auto Compactifier::RemoveSpliced(LocalCAS<false> const& cas) noexcept -> bool {
::RemoveSpliced<ObjectType::File, ObjectType::Executable>(cas);
}
+auto Compactifier::SplitLarge(LocalCAS<false> const& cas,
+ size_t threshold) noexcept -> bool {
+ return ::SplitLarge<ObjectType::File>(cas, threshold) and
+ ::SplitLarge<ObjectType::Executable>(cas, threshold) and
+ ::SplitLarge<ObjectType::Tree>(cas, threshold);
+}
+
namespace {
template <ObjectType... kType>
requires(sizeof...(kType) != 0)
@@ -82,4 +107,61 @@ requires(sizeof...(kType) != 0)
return FileSystemManager::ReadDirectoryEntriesRecursive(large_storage,
callback);
}
+
+template <ObjectType kType>
+[[nodiscard]] auto SplitLarge(LocalCAS<false> const& cas,
+ size_t threshold) noexcept -> bool {
+ // Obtain path to the storage root:
+ auto const& storage_root = cas.StorageRoot(kType);
+
+ // Check there are entries to process:
+ if (not FileSystemManager::IsDirectory(storage_root)) {
+ return true;
+ }
+
+ FileSystemManager::UseDirEntryFunc callback =
+ [&](std::filesystem::path const& entry_object, bool is_tree) -> bool {
+ // Use all folders:
+ if (is_tree) {
+ return true;
+ }
+
+ // Filter files by size:
+ auto const path = storage_root / entry_object;
+ if (std::filesystem::file_size(path) < threshold) {
+ return true;
+ }
+
+ // Calculate the digest for the entry:
+ auto const digest = ObjectCAS<kType>::CreateDigest(path);
+ if (not digest) {
+ Logger::Log(LogLevel::Error,
+ "Failed to calculate digest for {}",
+ path.generic_string());
+ return false;
+ }
+
+ // Split the entry:
+ auto split_result = IsTreeObject(kType) ? cas.SplitTree(*digest)
+ : cas.SplitBlob(*digest);
+ auto* parts = std::get_if<std::vector<bazel_re::Digest>>(&split_result);
+ if (parts == nullptr) {
+ Logger::Log(LogLevel::Error, "Failed to split {}", digest->hash());
+ return false;
+ }
+
+ // If the file cannot actually be split (the threshold is too low), the
+ // file must not be deleted.
+ if (parts->size() < 2) {
+ Logger::Log(LogLevel::Debug,
+ "{} cannot be compactified. The compactification "
+ "threshold is too low.",
+ digest->hash());
+ return true;
+ }
+ return FileSystemManager::RemoveFile(path);
+ };
+ return FileSystemManager::ReadDirectoryEntriesRecursive(storage_root,
+ callback);
+}
} // namespace