summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaksim Denisov <denisov.maksim@huawei.com>2024-03-22 18:13:23 +0100
committerMaksim Denisov <denisov.maksim@huawei.com>2024-04-02 15:30:03 +0200
commitfb04df06a3586f211532453903f7414907b2a7b0 (patch)
treeab896a5e6c06aba32eb4c129bb8f83ae7c71d88b
parentc2c3ca4b17122f5e93001b26330e97fb00f6c7f3 (diff)
downloadjustbuild-fb04df06a3586f211532453903f7414907b2a7b0.tar.gz
LargeBlobs: Split large objects.
* Add LargeObjectCAS fields for files and trees to LocalCAS; * Add logic for splitting objects located in the main storage. Tested: Splitting of large, small and empty objects.
-rw-r--r--src/buildtool/storage/TARGETS2
-rw-r--r--src/buildtool/storage/large_object_cas.hpp20
-rw-r--r--src/buildtool/storage/large_object_cas.tpp68
-rw-r--r--src/buildtool/storage/local_cas.hpp29
-rw-r--r--test/buildtool/storage/TARGETS28
-rw-r--r--test/buildtool/storage/large_object_cas.test.cpp380
6 files changed, 515 insertions, 12 deletions
diff --git a/src/buildtool/storage/TARGETS b/src/buildtool/storage/TARGETS
index fd6a3ff1..ad846a85 100644
--- a/src/buildtool/storage/TARGETS
+++ b/src/buildtool/storage/TARGETS
@@ -38,6 +38,7 @@
["target_cache_key.cpp", "target_cache_entry.cpp", "garbage_collector.cpp"]
, "deps":
[ "config"
+ , "file_chunker"
, ["src/buildtool/common", "common"]
, ["src/buildtool/file_system", "file_storage"]
, ["src/buildtool/file_system", "object_cas"]
@@ -49,6 +50,7 @@
, ["src/utils/cpp", "gsl"]
, ["@", "gsl", "", "gsl"]
, ["@", "json", "", "json"]
+ , ["@", "fmt", "", "fmt"]
, ["src/buildtool/file_system", "object_type"]
, ["src/buildtool/file_system", "file_system_manager"]
, ["src/buildtool/execution_api/bazel_msg", "bazel_msg_factory"]
diff --git a/src/buildtool/storage/large_object_cas.hpp b/src/buildtool/storage/large_object_cas.hpp
index 881acc15..d7af8e9f 100644
--- a/src/buildtool/storage/large_object_cas.hpp
+++ b/src/buildtool/storage/large_object_cas.hpp
@@ -19,12 +19,16 @@
#include <optional>
#include <string>
#include <utility>
+#include <variant>
#include <vector>
#include "src/buildtool/common/bazel_types.hpp"
#include "src/buildtool/file_system/file_storage.hpp"
#include "src/buildtool/file_system/object_type.hpp"
+template <bool>
+class LocalCAS;
+
enum class LargeObjectErrorCode {
/// \brief An internal error occured.
Internal = 0,
@@ -58,11 +62,12 @@ class LargeObjectError final {
/// The entries are keyed by the hash of the spliced result and the value of an
/// entry is the concatenation of the hashes of chunks the large object is
/// composed of.
-template <bool kDoGlobalUplink>
+template <bool kDoGlobalUplink, ObjectType kType>
class LargeObjectCAS final {
public:
- explicit LargeObjectCAS(std::filesystem::path const& store_path) noexcept
- : file_store_(store_path) {}
+ LargeObjectCAS(LocalCAS<kDoGlobalUplink> const& local_cas,
+ std::filesystem::path const& store_path) noexcept
+ : local_cas_(local_cas), file_store_(store_path) {}
LargeObjectCAS(LargeObjectCAS const&) = delete;
LargeObjectCAS(LargeObjectCAS&&) = delete;
@@ -76,12 +81,21 @@ class LargeObjectCAS final {
[[nodiscard]] auto GetEntryPath(bazel_re::Digest const& digest)
const noexcept -> std::optional<std::filesystem::path>;
+ /// \brief Split an object from the main CAS into chunks. If the object had
+ /// been split before, it would not get split again.
+ /// \param digest The digest of the object to be split.
+ /// \return A set of chunks the resulting object is composed of
+ /// or an error on failure.
+ [[nodiscard]] auto Split(bazel_re::Digest const& digest) const noexcept
+ -> std::variant<LargeObjectError, std::vector<bazel_re::Digest>>;
+
private:
// By default, overwrite existing entries. Unless this is a generation
// (disabled global uplink), then we never want to overwrite any entries.
static constexpr auto kStoreMode =
kDoGlobalUplink ? StoreMode::LastWins : StoreMode::FirstWins;
+ LocalCAS<kDoGlobalUplink> const& local_cas_;
FileStorage<ObjectType::File, kStoreMode, /*kSetEpochTime=*/false>
file_store_;
diff --git a/src/buildtool/storage/large_object_cas.tpp b/src/buildtool/storage/large_object_cas.tpp
index d5228c72..5adcb6a3 100644
--- a/src/buildtool/storage/large_object_cas.tpp
+++ b/src/buildtool/storage/large_object_cas.tpp
@@ -19,13 +19,16 @@
#include <cstdlib>
#include <fstream>
+#include "fmt/core.h"
#include "nlohmann/json.hpp"
#include "src/buildtool/compatibility/native_support.hpp"
#include "src/buildtool/file_system/file_system_manager.hpp"
+#include "src/buildtool/storage/file_chunker.hpp"
#include "src/buildtool/storage/large_object_cas.hpp"
+#include "src/buildtool/storage/local_cas.hpp"
-template <bool kDoGlobalUplink>
-auto LargeObjectCAS<kDoGlobalUplink>::GetEntryPath(
+template <bool kDoGlobalUplink, ObjectType kType>
+auto LargeObjectCAS<kDoGlobalUplink, kType>::GetEntryPath(
bazel_re::Digest const& digest) const noexcept
-> std::optional<std::filesystem::path> {
const std::string hash = NativeSupport::Unprefix(digest.hash());
@@ -36,9 +39,10 @@ auto LargeObjectCAS<kDoGlobalUplink>::GetEntryPath(
return std::nullopt;
}
-template <bool kDoGlobalUplink>
-auto LargeObjectCAS<kDoGlobalUplink>::ReadEntry(bazel_re::Digest const& digest)
- const noexcept -> std::optional<std::vector<bazel_re::Digest>> {
+template <bool kDoGlobalUplink, ObjectType kType>
+auto LargeObjectCAS<kDoGlobalUplink, kType>::ReadEntry(
+ bazel_re::Digest const& digest) const noexcept
+ -> std::optional<std::vector<bazel_re::Digest>> {
auto const file_path = GetEntryPath(digest);
if (not file_path) {
return std::nullopt;
@@ -63,8 +67,8 @@ auto LargeObjectCAS<kDoGlobalUplink>::ReadEntry(bazel_re::Digest const& digest)
return parts;
}
-template <bool kDoGlobalUplink>
-auto LargeObjectCAS<kDoGlobalUplink>::WriteEntry(
+template <bool kDoGlobalUplink, ObjectType kType>
+auto LargeObjectCAS<kDoGlobalUplink, kType>::WriteEntry(
bazel_re::Digest const& digest,
std::vector<bazel_re::Digest> const& parts) const noexcept -> bool {
if (GetEntryPath(digest)) {
@@ -96,4 +100,54 @@ auto LargeObjectCAS<kDoGlobalUplink>::WriteEntry(
return file_store_.AddFromBytes(hash, j.dump());
}
+template <bool kDoGlobalUplink, ObjectType kType>
+auto LargeObjectCAS<kDoGlobalUplink, kType>::Split(
+ bazel_re::Digest const& digest) const noexcept
+ -> std::variant<LargeObjectError, std::vector<bazel_re::Digest>> {
+ if (auto large_entry = ReadEntry(digest)) {
+ return std::move(*large_entry);
+ }
+
+ // Get path to the file:
+ auto file_path = IsTreeObject(kType)
+ ? local_cas_.TreePath(digest)
+ : local_cas_.BlobPath(digest, /*is_executable=*/false);
+ if (not file_path) {
+ return LargeObjectError{
+ LargeObjectErrorCode::FileNotFound,
+ fmt::format("could not find {}", digest.hash())};
+ }
+
+ // Split file into chunks:
+ FileChunker chunker{*file_path};
+ if (not chunker.IsOpen()) {
+ return LargeObjectError{
+ LargeObjectErrorCode::Internal,
+ fmt::format("could not split {}", digest.hash())};
+ }
+
+ std::vector<bazel_re::Digest> parts;
+ try {
+ while (auto chunk = chunker.NextChunk()) {
+ auto part = local_cas_.StoreBlob(*chunk, /*is_executable=*/false);
+ if (not part) {
+ return LargeObjectError{LargeObjectErrorCode::Internal,
+ "could not store a part."};
+ }
+ parts.push_back(std::move(*part));
+ }
+ } catch (...) {
+ return LargeObjectError{LargeObjectErrorCode::Internal,
+ "an unknown error occured."};
+ }
+ if (not chunker.Finished()) {
+ return LargeObjectError{
+ LargeObjectErrorCode::Internal,
+ fmt::format("could not split {}", digest.hash())};
+ }
+
+ std::ignore = WriteEntry(digest, parts);
+ return parts;
+}
+
#endif // INCLUDED_SRC_BUILDTOOL_STORAGE_LARGE_OBJECT_CAS_TPP
diff --git a/src/buildtool/storage/local_cas.hpp b/src/buildtool/storage/local_cas.hpp
index 382af848..ee7ecdf8 100644
--- a/src/buildtool/storage/local_cas.hpp
+++ b/src/buildtool/storage/local_cas.hpp
@@ -18,11 +18,14 @@
#include <filesystem>
#include <optional>
#include <unordered_set>
+#include <variant>
+#include <vector>
#include "gsl/gsl"
#include "src/buildtool/file_system/git_repo.hpp"
#include "src/buildtool/file_system/object_cas.hpp"
#include "src/buildtool/storage/garbage_collector.hpp"
+#include "src/buildtool/storage/large_object_cas.hpp"
/// \brief The local (logical) CAS for storing blobs and trees.
/// Blobs can be stored/queried as executable or non-executable. Trees might be
@@ -45,7 +48,11 @@ class LocalCAS {
: cas_file_{base.string() + 'f', Uplinker<ObjectType::File>()},
cas_exec_{base.string() + 'x', Uplinker<ObjectType::Executable>()},
cas_tree_{base.string() + (Compatibility::IsCompatible() ? 'f' : 't'),
- Uplinker<ObjectType::Tree>()} {}
+ Uplinker<ObjectType::Tree>()},
+ cas_file_large_{*this, base.string() + "-large-f"},
+ cas_tree_large_{*this,
+ base.string() + "-large-" +
+ (Compatibility::IsCompatible() ? 'f' : 't')} {}
/// \brief Store blob from file path with x-bit.
/// \tparam kOwner Indicates ownership for optimization (hardlink).
@@ -101,6 +108,15 @@ class LocalCAS {
return path ? path : TrySyncBlob(digest, is_executable);
}
+ /// \brief Split a blob into chunks.
+ /// \param digest The digest of a blob to be split.
+ /// \returns Digests of the parts of the large object or an
+ /// error code on failure.
+ [[nodiscard]] auto SplitBlob(bazel_re::Digest const& digest) const noexcept
+ -> std::variant<LargeObjectError, std::vector<bazel_re::Digest>> {
+ return cas_file_large_.Split(digest);
+ }
+
/// \brief Obtain tree path from digest.
/// \param digest Digest of the tree to lookup.
/// \returns Path to the tree if found or nullopt otherwise.
@@ -109,6 +125,15 @@ class LocalCAS {
return cas_tree_.BlobPath(digest);
}
+ /// \brief Split a tree into chunks.
+ /// \param digest The digest of a tree to be split.
+ /// \returns Digests of the parts of the large object or an
+ /// error code on failure.
+ [[nodiscard]] auto SplitTree(bazel_re::Digest const& digest) const noexcept
+ -> std::variant<LargeObjectError, std::vector<bazel_re::Digest>> {
+ return cas_tree_large_.Split(digest);
+ }
+
/// \brief Traverses a tree recursively and retrieves object infos of all
/// found blobs (leafs). Tree objects are by default not added to the result
/// list, but converted to a path name.
@@ -185,6 +210,8 @@ class LocalCAS {
ObjectCAS<ObjectType::File> cas_file_;
ObjectCAS<ObjectType::Executable> cas_exec_;
ObjectCAS<ObjectType::Tree> cas_tree_;
+ LargeObjectCAS<kDoGlobalUplink, ObjectType::File> cas_file_large_;
+ LargeObjectCAS<kDoGlobalUplink, ObjectType::Tree> cas_tree_large_;
/// \brief Provides uplink via "exists callback" for physical object CAS.
template <ObjectType kType>
diff --git a/test/buildtool/storage/TARGETS b/test/buildtool/storage/TARGETS
index 31e5ceb5..9374bf58 100644
--- a/test/buildtool/storage/TARGETS
+++ b/test/buildtool/storage/TARGETS
@@ -32,6 +32,32 @@
]
, "stage": ["test", "buildtool", "storage"]
}
+, "large_object_cas":
+ { "type": ["@", "rules", "CC/test", "test"]
+ , "name": ["large_object_cas"]
+ , "srcs": ["large_object_cas.test.cpp"]
+ , "private-deps":
+ [ ["@", "catch2", "", "catch2"]
+ , ["", "catch-main"]
+ , ["@", "src", "src/buildtool/file_system", "file_system_manager"]
+ , ["@", "src", "src/buildtool/storage", "storage"]
+ , ["@", "src", "src/buildtool/storage", "config"]
+ , ["utils", "local_hermeticity"]
+ , ["@", "src", "src/buildtool/common", "bazel_types"]
+ , ["utils", "large_object_utils"]
+ , ["@", "src", "src/utils/cpp", "tmp_dir"]
+ , [ "@"
+ , "src"
+ , "src/buildtool/execution_api/bazel_msg"
+ , "bazel_msg_factory"
+ ]
+ , ["@", "src", "src/buildtool/compatibility", "compatibility"]
+ ]
+ , "stage": ["test", "buildtool", "storage"]
+ }
, "TESTS":
- {"type": "install", "tainted": ["test"], "deps": ["local_cas", "local_ac"]}
+ { "type": "install"
+ , "tainted": ["test"]
+ , "deps": ["local_cas", "local_ac", "large_object_cas"]
+ }
}
diff --git a/test/buildtool/storage/large_object_cas.test.cpp b/test/buildtool/storage/large_object_cas.test.cpp
new file mode 100644
index 00000000..61aa25da
--- /dev/null
+++ b/test/buildtool/storage/large_object_cas.test.cpp
@@ -0,0 +1,380 @@
+// Copyright 2024 Huawei Cloud Computing Technology Co., Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstdint>
+#include <cstdlib>
+#include <filesystem>
+#include <optional>
+#include <string>
+#include <string_view>
+#include <type_traits>
+#include <utility>
+#include <variant>
+#include <vector>
+
+#include "catch2/catch_test_macros.hpp"
+#include "src/buildtool/common/bazel_types.hpp"
+#include "src/buildtool/compatibility/native_support.hpp"
+#include "src/buildtool/execution_api/bazel_msg/bazel_msg_factory.hpp"
+#include "src/buildtool/file_system/file_system_manager.hpp"
+#include "src/buildtool/storage/config.hpp"
+#include "src/buildtool/storage/large_object_cas.hpp"
+#include "src/buildtool/storage/storage.hpp"
+#include "src/utils/cpp/tmp_dir.hpp"
+#include "test/utils/hermeticity/local.hpp"
+#include "test/utils/large_objects/large_object_utils.hpp"
+
+namespace {
+namespace LargeTestUtils {
+
+template <bool IsExecutable>
+class Blob final {
+ public:
+ static constexpr auto kLargeId = std::string_view("bl_8Mb");
+ static constexpr auto kLargeSize = std::uintmax_t(8 * 1024 * 1024);
+
+ static constexpr auto kSmallId = std::string_view("bl_1kB");
+ static constexpr auto kSmallSize = std::uintmax_t(1024);
+
+ static constexpr auto kEmptyId = std::string_view("bl_0");
+ static constexpr auto kEmptySize = std::uintmax_t(0);
+
+ [[nodiscard]] static auto Create(
+ LocalCAS<kDefaultDoGlobalUplink> const& cas,
+ std::string const& id,
+ std::uintmax_t size) noexcept
+ -> std::optional<std::pair<bazel_re::Digest, std::filesystem::path>>;
+
+ [[nodiscard]] static auto Generate(std::string const& id,
+ std::uintmax_t size) noexcept
+ -> std::optional<std::filesystem::path>;
+};
+
+class Tree final {
+ public:
+ static constexpr auto kLargeId = std::string_view("tree_256");
+ static constexpr auto kLargeSize = std::uintmax_t(256);
+
+ static constexpr auto kSmallId = std::string_view("tree_1");
+ static constexpr auto kSmallSize = std::uintmax_t(1);
+
+ static constexpr auto kEmptyId = std::string_view("tree_0");
+ static constexpr auto kEmptySize = std::uintmax_t(0);
+
+ [[nodiscard]] static auto Create(
+ LocalCAS<kDefaultDoGlobalUplink> const& cas,
+ std::string const& id,
+ std::uintmax_t entries_count) noexcept
+ -> std::optional<std::pair<bazel_re::Digest, std::filesystem::path>>;
+
+ [[nodiscard]] static auto Generate(std::string const& id,
+ std::uintmax_t entries_count) noexcept
+ -> std::optional<std::filesystem::path>;
+
+ [[nodiscard]] static auto StoreRaw(
+ LocalCAS<kDefaultDoGlobalUplink> const& cas,
+ std::filesystem::path const& directory) noexcept
+ -> std::optional<bazel_re::Digest>;
+};
+
+} // namespace LargeTestUtils
+} // namespace
+
+// Test splitting of a small tree.
+TEST_CASE_METHOD(HermeticLocalTestFixture,
+ "LargeObjectCAS: split a small tree",
+ "[storage]") {
+ auto temp_dir = StorageConfig::CreateTypedTmpDir("large_object_cas");
+ REQUIRE(temp_dir);
+
+ auto const& cas = Storage::Instance().CAS();
+ LargeObjectCAS<true, ObjectType::Tree> const large_cas(
+ cas, temp_dir->GetPath() / "root_1");
+
+ // Create a small tree:
+ using LargeTestUtils::Tree;
+ auto small =
+ Tree::Create(cas, std::string(Tree::kSmallId), Tree::kSmallSize);
+ REQUIRE(small);
+ auto const& [digest, path] = *small;
+
+ // Split must be successful:
+ auto split_pack = large_cas.Split(digest);
+ auto* parts = std::get_if<std::vector<bazel_re::Digest>>(&split_pack);
+ REQUIRE(parts);
+
+ // The result must contain one blob digest:
+ CHECK(parts->size() == 1);
+ CHECK_FALSE(NativeSupport::IsTree(parts->front().hash()));
+}
+
+// Test splitting of a large object. The split must be successful and the entry
+// must be placed to the LargeCAS. The second split of the same object must load
+// the result from the LargeCAS, no actual split must occur.
+template <ObjectType kType>
+static void TestLarge() noexcept {
+ SECTION("Large") {
+ static constexpr bool kIsTree = IsTreeObject(kType);
+ static constexpr bool kIsExec = IsExecutableObject(kType);
+
+ using TestType = std::conditional_t<kIsTree,
+ LargeTestUtils::Tree,
+ LargeTestUtils::Blob<kIsExec>>;
+
+ auto const& cas = Storage::Instance().CAS();
+
+ // Create a large object:
+ auto object = TestType::Create(
+ cas, std::string(TestType::kLargeId), TestType::kLargeSize);
+ CHECK(object);
+ auto const& [digest, path] = *object;
+
+ // Split the large object:
+ auto pack_1 = kIsTree ? cas.SplitTree(digest) : cas.SplitBlob(digest);
+ auto* split = std::get_if<std::vector<bazel_re::Digest>>(&pack_1);
+ CHECK(split);
+ CHECK(split->size() > 1);
+
+ CHECK(FileSystemManager::RemoveFile(path));
+ CHECK_FALSE(FileSystemManager::IsFile(path));
+
+ SECTION("Split short-circuting") {
+ // Check the second call loads the entry from the large CAS:
+ auto pack_2 =
+ kIsTree ? cas.SplitTree(digest) : cas.SplitBlob(digest);
+ auto* split_2 = std::get_if<std::vector<bazel_re::Digest>>(&pack_2);
+ CHECK(split_2);
+ CHECK(split_2->size() == split->size());
+
+ // There must be no spliced file:
+ CHECK_FALSE(FileSystemManager::IsFile(path));
+ }
+ }
+}
+
+// Test splitting of a small object. The split must be successful, but the entry
+// must not be placed to the LargeCAS. The result of spliting must contain one
+// blob.
+template <ObjectType kType>
+static void TestSmall() noexcept {
+ SECTION("Small") {
+ static constexpr bool kIsTree = IsTreeObject(kType);
+ static constexpr bool kIsExec = IsExecutableObject(kType);
+
+ using TestType = std::conditional_t<kIsTree,
+ LargeTestUtils::Tree,
+ LargeTestUtils::Blob<kIsExec>>;
+
+ auto const& cas = Storage::Instance().CAS();
+
+ // Create a small object:
+ auto object = TestType::Create(
+ cas, std::string(TestType::kSmallId), TestType::kSmallSize);
+ CHECK(object);
+ auto const& [digest, path] = *object;
+
+ // Split the small object:
+ auto pack_1 = kIsTree ? cas.SplitTree(digest) : cas.SplitBlob(digest);
+ auto* split = std::get_if<std::vector<bazel_re::Digest>>(&pack_1);
+ CHECK(split);
+ CHECK(split->size() == 1);
+ CHECK_FALSE(NativeSupport::IsTree(split->front().hash()));
+
+ // Test that there is no large entry in the storage:
+ // To ensure there is no split of the initial object, it is removed:
+ CHECK(FileSystemManager::RemoveFile(path));
+ CHECK_FALSE(FileSystemManager::IsFile(path));
+
+ // The part of a small executable is the same file but without the
+ // execution permission. It must be deleted too.
+ if constexpr (kIsExec) {
+ auto part_path = cas.BlobPath(split->front(), false);
+ CHECK(part_path);
+ CHECK(FileSystemManager::RemoveFile(*part_path));
+ }
+
+ // Split must not find the large entry:
+ auto pack_2 = kIsTree ? cas.SplitTree(digest) : cas.SplitBlob(digest);
+ auto* error_2 = std::get_if<LargeObjectError>(&pack_2);
+ CHECK(error_2);
+ CHECK(error_2->Code() == LargeObjectErrorCode::FileNotFound);
+ }
+}
+
+// Test splitting of an empty object. The split must be successful, but the
+// entry must not be placed to the LargeCAS. The result of splitting must be
+// empty.
+template <ObjectType kType>
+static void TestEmpty() noexcept {
+ SECTION("Empty") {
+ static constexpr bool kIsTree = IsTreeObject(kType);
+ static constexpr bool kIsExec = IsExecutableObject(kType);
+
+ using TestType = std::conditional_t<kIsTree,
+ LargeTestUtils::Tree,
+ LargeTestUtils::Blob<kIsExec>>;
+
+ auto const& cas = Storage::Instance().CAS();
+
+ // Create an empty object:
+ auto object = TestType::Create(
+ cas, std::string(TestType::kEmptyId), TestType::kEmptySize);
+ CHECK(object);
+ auto const& [digest, path] = *object;
+
+ // Split the empty object:
+ auto pack_1 = kIsTree ? cas.SplitTree(digest) : cas.SplitBlob(digest);
+ auto* split = std::get_if<std::vector<bazel_re::Digest>>(&pack_1);
+ CHECK(split);
+ CHECK(split->empty());
+
+ // Test that there is no large entry in the storage:
+ // To ensure there is no split of the initial object, it is removed:
+ CHECK(FileSystemManager::RemoveFile(path));
+ CHECK_FALSE(FileSystemManager::IsFile(path));
+
+ // For executables the non-executable entry must be also deleted.
+ if constexpr (kIsExec) {
+ auto blob_path = cas.BlobPath(digest, /*is_executable=*/false);
+ REQUIRE(blob_path);
+ CHECK(FileSystemManager::RemoveFile(*blob_path));
+ CHECK_FALSE(FileSystemManager::IsFile(*blob_path));
+ }
+
+ // Split must not find the large entry:
+ auto pack_2 = kIsTree ? cas.SplitTree(digest) : cas.SplitBlob(digest);
+ auto* error_2 = std::get_if<LargeObjectError>(&pack_2);
+ CHECK(error_2);
+ CHECK(error_2->Code() == LargeObjectErrorCode::FileNotFound);
+ }
+}
+
+TEST_CASE_METHOD(HermeticLocalTestFixture,
+ "LocalCAS: Split-Splice",
+ "[storage]") {
+ SECTION("File") {
+ TestLarge<ObjectType::File>();
+ TestSmall<ObjectType::File>();
+ TestEmpty<ObjectType::File>();
+ }
+ SECTION("Tree") {
+ TestLarge<ObjectType::Tree>();
+ TestSmall<ObjectType::Tree>();
+ TestEmpty<ObjectType::Tree>();
+ }
+ SECTION("Executable") {
+ TestLarge<ObjectType::Executable>();
+ TestSmall<ObjectType::Executable>();
+ TestEmpty<ObjectType::Executable>();
+ }
+}
+
+namespace {
+
+/// \brief Extends the lifetime of large files for the whole set of tests.
+class TestFilesDirectory final {
+ public:
+ [[nodiscard]] static auto Instance() noexcept -> TestFilesDirectory const& {
+ static TestFilesDirectory directory;
+ return directory;
+ }
+ [[nodiscard]] auto GetPath() const noexcept -> std::filesystem::path {
+ return temp_directory_->GetPath();
+ }
+
+ private:
+ TmpDirPtr temp_directory_;
+ explicit TestFilesDirectory() noexcept {
+ auto test_dir = FileSystemManager::GetCurrentDirectory() / "tmp";
+ temp_directory_ = TmpDir::Create(test_dir / "tmp_space");
+ }
+};
+
+namespace LargeTestUtils {
+template <bool IsExecutable>
+auto Blob<IsExecutable>::Create(LocalCAS<kDefaultDoGlobalUplink> const& cas,
+ std::string const& id,
+ std::uintmax_t size) noexcept
+ -> std::optional<std::pair<bazel_re::Digest, std::filesystem::path>> {
+ auto path = Generate(id, size);
+ auto digest = path ? cas.StoreBlob(*path, IsExecutable) : std::nullopt;
+ auto blob_path =
+ digest ? cas.BlobPath(*digest, IsExecutable) : std::nullopt;
+ if (digest and blob_path) {
+ return std::make_pair(std::move(*digest), std::move(*blob_path));
+ }
+ return std::nullopt;
+}
+
+template <bool IsExecutable>
+auto Blob<IsExecutable>::Generate(std::string const& id,
+ std::uintmax_t size) noexcept
+ -> std::optional<std::filesystem::path> {
+ std::string const path_id = "blob" + id;
+ auto path = TestFilesDirectory::Instance().GetPath() / path_id;
+ if (FileSystemManager::IsFile(path) or
+ LargeObjectUtils::GenerateFile(path, size)) {
+ return path;
+ }
+ return std::nullopt;
+}
+
+auto Tree::Create(LocalCAS<kDefaultDoGlobalUplink> const& cas,
+ std::string const& id,
+ std::uintmax_t entries_count) noexcept
+ -> std::optional<std::pair<bazel_re::Digest, std::filesystem::path>> {
+ auto path = Generate(id, entries_count);
+ auto digest = path ? StoreRaw(cas, *path) : std::nullopt;
+ auto cas_path = digest ? cas.TreePath(*digest) : std::nullopt;
+ if (digest and cas_path) {
+ return std::make_pair(std::move(*digest), std::move(*cas_path));
+ }
+ return std::nullopt;
+}
+
+auto Tree::Generate(std::string const& id,
+ std::uintmax_t entries_count) noexcept
+ -> std::optional<std::filesystem::path> {
+ std::string const path_id = "tree" + id;
+ auto path = TestFilesDirectory::Instance().GetPath() / path_id;
+ if (FileSystemManager::IsDirectory(path) or
+ LargeObjectUtils::GenerateDirectory(path, entries_count)) {
+ return path;
+ }
+ return std::nullopt;
+}
+
+auto Tree::StoreRaw(LocalCAS<kDefaultDoGlobalUplink> const& cas,
+ std::filesystem::path const& directory) noexcept
+ -> std::optional<bazel_re::Digest> {
+ if (not FileSystemManager::IsDirectory(directory)) {
+ return std::nullopt;
+ }
+
+ auto store_blob = [&cas](auto const& path, auto is_exec) {
+ return cas.StoreBlob(path, is_exec);
+ };
+ auto store_tree = [&cas](auto const& bytes, auto const& /*dir*/) {
+ return cas.StoreTree(bytes);
+ };
+ auto store_symlink = [&cas](auto const& content) {
+ return cas.StoreBlob(content);
+ };
+
+ return BazelMsgFactory::CreateGitTreeDigestFromLocalTree(
+ directory, store_blob, store_tree, store_symlink);
+}
+} // namespace LargeTestUtils
+
+} // namespace