diff options
author | Maksim Denisov <denisov.maksim@huawei.com> | 2024-04-22 13:25:26 +0200 |
---|---|---|
committer | Maksim Denisov <denisov.maksim@huawei.com> | 2024-04-22 15:39:27 +0200 |
commit | 62f506aefc18dd5125b03e6215877b031656d0ed (patch) | |
tree | cc4db8fbba4bd92296510bcdfc649473fe92d6c9 | |
parent | f7296f50103dc9320ffa1127640c843b227adfdd (diff) | |
download | justbuild-62f506aefc18dd5125b03e6215877b031656d0ed.tar.gz |
Compactification: Remove invalid entries from the storage.
During compactification, invalid entries must be deleted.
-rw-r--r-- | src/buildtool/storage/compactifier.cpp | 55 | ||||
-rw-r--r-- | src/buildtool/storage/compactifier.hpp | 8 | ||||
-rw-r--r-- | src/buildtool/storage/garbage_collector.cpp | 3 | ||||
-rw-r--r-- | test/buildtool/storage/large_object_cas.test.cpp | 16 |
4 files changed, 80 insertions, 2 deletions
diff --git a/src/buildtool/storage/compactifier.cpp b/src/buildtool/storage/compactifier.cpp index 4874e139..ecfd3d88 100644 --- a/src/buildtool/storage/compactifier.cpp +++ b/src/buildtool/storage/compactifier.cpp @@ -22,14 +22,25 @@ #include <vector> #include "src/buildtool/common/bazel_types.hpp" +#include "src/buildtool/crypto/hash_function.hpp" +#include "src/buildtool/crypto/hasher.hpp" #include "src/buildtool/file_system/file_system_manager.hpp" #include "src/buildtool/file_system/object_cas.hpp" #include "src/buildtool/file_system/object_type.hpp" #include "src/buildtool/logging/log_level.hpp" #include "src/buildtool/logging/logger.hpp" #include "src/buildtool/storage/local_cas.hpp" +#include "src/utils/cpp/hex_string.hpp" namespace { +/// \brief Remove invalid entries from the kType storage. +/// \tparam kType Type of the storage to inspect. +/// \param cas Storage to be inspected. +/// \return True if the kType storage doesn't contain invalid +/// entries. +template <ObjectType kType> +[[nodiscard]] auto RemoveInvalid(LocalCAS<false> const& cas) noexcept -> bool; + /// \brief Remove spliced entries from the kType storage. /// \tparam kType Type of the storage to inspect. /// \param cas Storage to be inspected. @@ -52,6 +63,12 @@ template <ObjectType kType> size_t threshold) noexcept -> bool; } // namespace +auto Compactifier::RemoveInvalid(LocalCAS<false> const& cas) noexcept -> bool { + return ::RemoveInvalid<ObjectType::File>(cas) and + ::RemoveInvalid<ObjectType::Executable>(cas) and + ::RemoveInvalid<ObjectType::Tree>(cas); +} + auto Compactifier::RemoveSpliced(LocalCAS<false> const& cas) noexcept -> bool { return ::RemoveSpliced<ObjectType::Tree>(cas) and ::RemoveSpliced<ObjectType::File, ObjectType::Executable>(cas); @@ -65,6 +82,44 @@ auto Compactifier::SplitLarge(LocalCAS<false> const& cas, } namespace { +template <ObjectType kType> +auto RemoveInvalid(LocalCAS<false> const& cas) noexcept -> bool { + auto storage_root = cas.StorageRoot(kType); + + // Check there are entries to process: + if (not FileSystemManager::IsDirectory(storage_root)) { + return true; + } + + // Calculate reference hash size: + auto const kHashSize = HashFunction::Hasher().GetHashLength(); + static constexpr size_t kDirNameSize = 2; + auto const kFileNameSize = kHashSize - kDirNameSize; + + FileSystemManager::UseDirEntryFunc callback = + [&storage_root, kFileNameSize](std::filesystem::path const& path, + bool is_tree) -> bool { + // Use all folders. + if (is_tree) { + return true; + } + + std::string const f_name = path.filename(); + std::string const d_name = path.parent_path().filename(); + + // A file is valid if: + // * it has a hexadecimal name of length kFileNameSize; + // * parent directory has a hexadecimal name of length kDirNameSize. + if (f_name.size() == kFileNameSize and FromHexString(f_name) and + d_name.size() == kDirNameSize and FromHexString(d_name)) { + return true; + } + return FileSystemManager::RemoveFile(storage_root / path); + }; + return FileSystemManager::ReadDirectoryEntriesRecursive(storage_root, + callback); +} + template <ObjectType... kType> requires(sizeof...(kType) != 0) [[nodiscard]] auto RemoveSpliced(LocalCAS<false> const& cas) noexcept diff --git a/src/buildtool/storage/compactifier.hpp b/src/buildtool/storage/compactifier.hpp index b8040b51..3f9608f2 100644 --- a/src/buildtool/storage/compactifier.hpp +++ b/src/buildtool/storage/compactifier.hpp @@ -22,6 +22,14 @@ class LocalCAS; class Compactifier final { public: + /// \brief Remove invalid entries from the storage. An entry is valid if the + /// file and its parent directory have a hexadecimal name of the proper + /// size. + /// \param cas Storage to be inspected. + /// \return True if storage does not contain invalid entries. + [[nodiscard]] static auto RemoveInvalid(LocalCAS<false> const& cas) noexcept + -> bool; + /// \brief Remove spliced entries from the storage. /// \param local_cas Storage to be inspected. /// \return True if object storages do not contain spliced diff --git a/src/buildtool/storage/garbage_collector.cpp b/src/buildtool/storage/garbage_collector.cpp index 3d077daf..27c1bc42 100644 --- a/src/buildtool/storage/garbage_collector.cpp +++ b/src/buildtool/storage/garbage_collector.cpp @@ -305,7 +305,8 @@ auto GarbageCollector::Compactify(size_t threshold) noexcept -> bool { ::Generation(StorageConfig::GenerationCacheDir(0, compatible)); Compatibility::SetCompatible(compatible); - return Compactifier::RemoveSpliced(storage.CAS()) and + return Compactifier::RemoveInvalid(storage.CAS()) and + Compactifier::RemoveSpliced(storage.CAS()) and Compactifier::SplitLarge(storage.CAS(), threshold); }; return compactify(mode) and compactify(not mode); diff --git a/test/buildtool/storage/large_object_cas.test.cpp b/test/buildtool/storage/large_object_cas.test.cpp index 96462329..10414748 100644 --- a/test/buildtool/storage/large_object_cas.test.cpp +++ b/test/buildtool/storage/large_object_cas.test.cpp @@ -457,6 +457,18 @@ static void TestCompactification() { REQUIRE(object_2); auto& [digest_2, path_2] = *object_2; + // After an interuption of a build process intermediate unique files may + // be present in the storage. To ensure compactification deals with them + // properly, a "unique" file is created: + auto invalid_object = TestType::Create( + cas, std::string(TestType::kLargeId) + "_3", ExceedThresholdSize); + REQUIRE(invalid_object); + auto& [invalid_digest, invalid_path] = *invalid_object; + + auto unique_path = CreateUniquePath(invalid_path); + REQUIRE(unique_path); + REQUIRE(FileSystemManager::Rename(invalid_path, *unique_path)); + // Ensure all entries are in the storage: auto get_path = [](auto const& cas, bazel_re::Digest const& digest) { return kIsTree ? cas.TreePath(digest) @@ -466,6 +478,7 @@ static void TestCompactification() { auto const& latest = Storage::Generation(0).CAS(); REQUIRE(get_path(latest, digest).has_value()); REQUIRE(get_path(latest, digest_2).has_value()); + REQUIRE(FileSystemManager::IsFile(*unique_path)); // Compactify the youngest generation: // Generation rotation is disabled to exclude uplinking. @@ -476,8 +489,9 @@ static void TestCompactification() { // and executables there are no synchronized entries in the storage: REQUIRE_FALSE(get_path(latest, digest).has_value()); REQUIRE_FALSE(get_path(latest, digest_2).has_value()); + REQUIRE_FALSE(FileSystemManager::IsFile(*unique_path)); - // All entries must be implicitly splicable: + // All valid entries must be implicitly spliceable: REQUIRE(get_path(cas, digest).has_value()); REQUIRE(get_path(cas, digest_2).has_value()); } |