summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaksim Denisov <denisov.maksim@huawei.com>2024-04-22 13:25:26 +0200
committerMaksim Denisov <denisov.maksim@huawei.com>2024-04-22 15:39:27 +0200
commit62f506aefc18dd5125b03e6215877b031656d0ed (patch)
treecc4db8fbba4bd92296510bcdfc649473fe92d6c9
parentf7296f50103dc9320ffa1127640c843b227adfdd (diff)
downloadjustbuild-62f506aefc18dd5125b03e6215877b031656d0ed.tar.gz
Compactification: Remove invalid entries from the storage.
During compactification, invalid entries must be deleted.
-rw-r--r--src/buildtool/storage/compactifier.cpp55
-rw-r--r--src/buildtool/storage/compactifier.hpp8
-rw-r--r--src/buildtool/storage/garbage_collector.cpp3
-rw-r--r--test/buildtool/storage/large_object_cas.test.cpp16
4 files changed, 80 insertions, 2 deletions
diff --git a/src/buildtool/storage/compactifier.cpp b/src/buildtool/storage/compactifier.cpp
index 4874e139..ecfd3d88 100644
--- a/src/buildtool/storage/compactifier.cpp
+++ b/src/buildtool/storage/compactifier.cpp
@@ -22,14 +22,25 @@
#include <vector>
#include "src/buildtool/common/bazel_types.hpp"
+#include "src/buildtool/crypto/hash_function.hpp"
+#include "src/buildtool/crypto/hasher.hpp"
#include "src/buildtool/file_system/file_system_manager.hpp"
#include "src/buildtool/file_system/object_cas.hpp"
#include "src/buildtool/file_system/object_type.hpp"
#include "src/buildtool/logging/log_level.hpp"
#include "src/buildtool/logging/logger.hpp"
#include "src/buildtool/storage/local_cas.hpp"
+#include "src/utils/cpp/hex_string.hpp"
namespace {
+/// \brief Remove invalid entries from the kType storage.
+/// \tparam kType Type of the storage to inspect.
+/// \param cas Storage to be inspected.
+/// \return True if the kType storage doesn't contain invalid
+/// entries.
+template <ObjectType kType>
+[[nodiscard]] auto RemoveInvalid(LocalCAS<false> const& cas) noexcept -> bool;
+
/// \brief Remove spliced entries from the kType storage.
/// \tparam kType Type of the storage to inspect.
/// \param cas Storage to be inspected.
@@ -52,6 +63,12 @@ template <ObjectType kType>
size_t threshold) noexcept -> bool;
} // namespace
+auto Compactifier::RemoveInvalid(LocalCAS<false> const& cas) noexcept -> bool {
+ return ::RemoveInvalid<ObjectType::File>(cas) and
+ ::RemoveInvalid<ObjectType::Executable>(cas) and
+ ::RemoveInvalid<ObjectType::Tree>(cas);
+}
+
auto Compactifier::RemoveSpliced(LocalCAS<false> const& cas) noexcept -> bool {
return ::RemoveSpliced<ObjectType::Tree>(cas) and
::RemoveSpliced<ObjectType::File, ObjectType::Executable>(cas);
@@ -65,6 +82,44 @@ auto Compactifier::SplitLarge(LocalCAS<false> const& cas,
}
namespace {
+template <ObjectType kType>
+auto RemoveInvalid(LocalCAS<false> const& cas) noexcept -> bool {
+ auto storage_root = cas.StorageRoot(kType);
+
+ // Check there are entries to process:
+ if (not FileSystemManager::IsDirectory(storage_root)) {
+ return true;
+ }
+
+ // Calculate reference hash size:
+ auto const kHashSize = HashFunction::Hasher().GetHashLength();
+ static constexpr size_t kDirNameSize = 2;
+ auto const kFileNameSize = kHashSize - kDirNameSize;
+
+ FileSystemManager::UseDirEntryFunc callback =
+ [&storage_root, kFileNameSize](std::filesystem::path const& path,
+ bool is_tree) -> bool {
+ // Use all folders.
+ if (is_tree) {
+ return true;
+ }
+
+ std::string const f_name = path.filename();
+ std::string const d_name = path.parent_path().filename();
+
+ // A file is valid if:
+ // * it has a hexadecimal name of length kFileNameSize;
+ // * parent directory has a hexadecimal name of length kDirNameSize.
+ if (f_name.size() == kFileNameSize and FromHexString(f_name) and
+ d_name.size() == kDirNameSize and FromHexString(d_name)) {
+ return true;
+ }
+ return FileSystemManager::RemoveFile(storage_root / path);
+ };
+ return FileSystemManager::ReadDirectoryEntriesRecursive(storage_root,
+ callback);
+}
+
template <ObjectType... kType>
requires(sizeof...(kType) != 0)
[[nodiscard]] auto RemoveSpliced(LocalCAS<false> const& cas) noexcept
diff --git a/src/buildtool/storage/compactifier.hpp b/src/buildtool/storage/compactifier.hpp
index b8040b51..3f9608f2 100644
--- a/src/buildtool/storage/compactifier.hpp
+++ b/src/buildtool/storage/compactifier.hpp
@@ -22,6 +22,14 @@ class LocalCAS;
class Compactifier final {
public:
+ /// \brief Remove invalid entries from the storage. An entry is valid if the
+ /// file and its parent directory have a hexadecimal name of the proper
+ /// size.
+ /// \param cas Storage to be inspected.
+ /// \return True if storage does not contain invalid entries.
+ [[nodiscard]] static auto RemoveInvalid(LocalCAS<false> const& cas) noexcept
+ -> bool;
+
/// \brief Remove spliced entries from the storage.
/// \param local_cas Storage to be inspected.
/// \return True if object storages do not contain spliced
diff --git a/src/buildtool/storage/garbage_collector.cpp b/src/buildtool/storage/garbage_collector.cpp
index 3d077daf..27c1bc42 100644
--- a/src/buildtool/storage/garbage_collector.cpp
+++ b/src/buildtool/storage/garbage_collector.cpp
@@ -305,7 +305,8 @@ auto GarbageCollector::Compactify(size_t threshold) noexcept -> bool {
::Generation(StorageConfig::GenerationCacheDir(0, compatible));
Compatibility::SetCompatible(compatible);
- return Compactifier::RemoveSpliced(storage.CAS()) and
+ return Compactifier::RemoveInvalid(storage.CAS()) and
+ Compactifier::RemoveSpliced(storage.CAS()) and
Compactifier::SplitLarge(storage.CAS(), threshold);
};
return compactify(mode) and compactify(not mode);
diff --git a/test/buildtool/storage/large_object_cas.test.cpp b/test/buildtool/storage/large_object_cas.test.cpp
index 96462329..10414748 100644
--- a/test/buildtool/storage/large_object_cas.test.cpp
+++ b/test/buildtool/storage/large_object_cas.test.cpp
@@ -457,6 +457,18 @@ static void TestCompactification() {
REQUIRE(object_2);
auto& [digest_2, path_2] = *object_2;
+ // After an interuption of a build process intermediate unique files may
+ // be present in the storage. To ensure compactification deals with them
+ // properly, a "unique" file is created:
+ auto invalid_object = TestType::Create(
+ cas, std::string(TestType::kLargeId) + "_3", ExceedThresholdSize);
+ REQUIRE(invalid_object);
+ auto& [invalid_digest, invalid_path] = *invalid_object;
+
+ auto unique_path = CreateUniquePath(invalid_path);
+ REQUIRE(unique_path);
+ REQUIRE(FileSystemManager::Rename(invalid_path, *unique_path));
+
// Ensure all entries are in the storage:
auto get_path = [](auto const& cas, bazel_re::Digest const& digest) {
return kIsTree ? cas.TreePath(digest)
@@ -466,6 +478,7 @@ static void TestCompactification() {
auto const& latest = Storage::Generation(0).CAS();
REQUIRE(get_path(latest, digest).has_value());
REQUIRE(get_path(latest, digest_2).has_value());
+ REQUIRE(FileSystemManager::IsFile(*unique_path));
// Compactify the youngest generation:
// Generation rotation is disabled to exclude uplinking.
@@ -476,8 +489,9 @@ static void TestCompactification() {
// and executables there are no synchronized entries in the storage:
REQUIRE_FALSE(get_path(latest, digest).has_value());
REQUIRE_FALSE(get_path(latest, digest_2).has_value());
+ REQUIRE_FALSE(FileSystemManager::IsFile(*unique_path));
- // All entries must be implicitly splicable:
+ // All valid entries must be implicitly spliceable:
REQUIRE(get_path(cas, digest).has_value());
REQUIRE(get_path(cas, digest_2).has_value());
}