diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/buildtool/storage/TARGETS | 2 | ||||
-rw-r--r-- | src/buildtool/storage/large_object_cas.hpp | 20 | ||||
-rw-r--r-- | src/buildtool/storage/large_object_cas.tpp | 68 | ||||
-rw-r--r-- | src/buildtool/storage/local_cas.hpp | 29 |
4 files changed, 108 insertions, 11 deletions
diff --git a/src/buildtool/storage/TARGETS b/src/buildtool/storage/TARGETS index fd6a3ff1..ad846a85 100644 --- a/src/buildtool/storage/TARGETS +++ b/src/buildtool/storage/TARGETS @@ -38,6 +38,7 @@ ["target_cache_key.cpp", "target_cache_entry.cpp", "garbage_collector.cpp"] , "deps": [ "config" + , "file_chunker" , ["src/buildtool/common", "common"] , ["src/buildtool/file_system", "file_storage"] , ["src/buildtool/file_system", "object_cas"] @@ -49,6 +50,7 @@ , ["src/utils/cpp", "gsl"] , ["@", "gsl", "", "gsl"] , ["@", "json", "", "json"] + , ["@", "fmt", "", "fmt"] , ["src/buildtool/file_system", "object_type"] , ["src/buildtool/file_system", "file_system_manager"] , ["src/buildtool/execution_api/bazel_msg", "bazel_msg_factory"] diff --git a/src/buildtool/storage/large_object_cas.hpp b/src/buildtool/storage/large_object_cas.hpp index 881acc15..d7af8e9f 100644 --- a/src/buildtool/storage/large_object_cas.hpp +++ b/src/buildtool/storage/large_object_cas.hpp @@ -19,12 +19,16 @@ #include <optional> #include <string> #include <utility> +#include <variant> #include <vector> #include "src/buildtool/common/bazel_types.hpp" #include "src/buildtool/file_system/file_storage.hpp" #include "src/buildtool/file_system/object_type.hpp" +template <bool> +class LocalCAS; + enum class LargeObjectErrorCode { /// \brief An internal error occured. Internal = 0, @@ -58,11 +62,12 @@ class LargeObjectError final { /// The entries are keyed by the hash of the spliced result and the value of an /// entry is the concatenation of the hashes of chunks the large object is /// composed of. -template <bool kDoGlobalUplink> +template <bool kDoGlobalUplink, ObjectType kType> class LargeObjectCAS final { public: - explicit LargeObjectCAS(std::filesystem::path const& store_path) noexcept - : file_store_(store_path) {} + LargeObjectCAS(LocalCAS<kDoGlobalUplink> const& local_cas, + std::filesystem::path const& store_path) noexcept + : local_cas_(local_cas), file_store_(store_path) {} LargeObjectCAS(LargeObjectCAS const&) = delete; LargeObjectCAS(LargeObjectCAS&&) = delete; @@ -76,12 +81,21 @@ class LargeObjectCAS final { [[nodiscard]] auto GetEntryPath(bazel_re::Digest const& digest) const noexcept -> std::optional<std::filesystem::path>; + /// \brief Split an object from the main CAS into chunks. If the object had + /// been split before, it would not get split again. + /// \param digest The digest of the object to be split. + /// \return A set of chunks the resulting object is composed of + /// or an error on failure. + [[nodiscard]] auto Split(bazel_re::Digest const& digest) const noexcept + -> std::variant<LargeObjectError, std::vector<bazel_re::Digest>>; + private: // By default, overwrite existing entries. Unless this is a generation // (disabled global uplink), then we never want to overwrite any entries. static constexpr auto kStoreMode = kDoGlobalUplink ? StoreMode::LastWins : StoreMode::FirstWins; + LocalCAS<kDoGlobalUplink> const& local_cas_; FileStorage<ObjectType::File, kStoreMode, /*kSetEpochTime=*/false> file_store_; diff --git a/src/buildtool/storage/large_object_cas.tpp b/src/buildtool/storage/large_object_cas.tpp index d5228c72..5adcb6a3 100644 --- a/src/buildtool/storage/large_object_cas.tpp +++ b/src/buildtool/storage/large_object_cas.tpp @@ -19,13 +19,16 @@ #include <cstdlib> #include <fstream> +#include "fmt/core.h" #include "nlohmann/json.hpp" #include "src/buildtool/compatibility/native_support.hpp" #include "src/buildtool/file_system/file_system_manager.hpp" +#include "src/buildtool/storage/file_chunker.hpp" #include "src/buildtool/storage/large_object_cas.hpp" +#include "src/buildtool/storage/local_cas.hpp" -template <bool kDoGlobalUplink> -auto LargeObjectCAS<kDoGlobalUplink>::GetEntryPath( +template <bool kDoGlobalUplink, ObjectType kType> +auto LargeObjectCAS<kDoGlobalUplink, kType>::GetEntryPath( bazel_re::Digest const& digest) const noexcept -> std::optional<std::filesystem::path> { const std::string hash = NativeSupport::Unprefix(digest.hash()); @@ -36,9 +39,10 @@ auto LargeObjectCAS<kDoGlobalUplink>::GetEntryPath( return std::nullopt; } -template <bool kDoGlobalUplink> -auto LargeObjectCAS<kDoGlobalUplink>::ReadEntry(bazel_re::Digest const& digest) - const noexcept -> std::optional<std::vector<bazel_re::Digest>> { +template <bool kDoGlobalUplink, ObjectType kType> +auto LargeObjectCAS<kDoGlobalUplink, kType>::ReadEntry( + bazel_re::Digest const& digest) const noexcept + -> std::optional<std::vector<bazel_re::Digest>> { auto const file_path = GetEntryPath(digest); if (not file_path) { return std::nullopt; @@ -63,8 +67,8 @@ auto LargeObjectCAS<kDoGlobalUplink>::ReadEntry(bazel_re::Digest const& digest) return parts; } -template <bool kDoGlobalUplink> -auto LargeObjectCAS<kDoGlobalUplink>::WriteEntry( +template <bool kDoGlobalUplink, ObjectType kType> +auto LargeObjectCAS<kDoGlobalUplink, kType>::WriteEntry( bazel_re::Digest const& digest, std::vector<bazel_re::Digest> const& parts) const noexcept -> bool { if (GetEntryPath(digest)) { @@ -96,4 +100,54 @@ auto LargeObjectCAS<kDoGlobalUplink>::WriteEntry( return file_store_.AddFromBytes(hash, j.dump()); } +template <bool kDoGlobalUplink, ObjectType kType> +auto LargeObjectCAS<kDoGlobalUplink, kType>::Split( + bazel_re::Digest const& digest) const noexcept + -> std::variant<LargeObjectError, std::vector<bazel_re::Digest>> { + if (auto large_entry = ReadEntry(digest)) { + return std::move(*large_entry); + } + + // Get path to the file: + auto file_path = IsTreeObject(kType) + ? local_cas_.TreePath(digest) + : local_cas_.BlobPath(digest, /*is_executable=*/false); + if (not file_path) { + return LargeObjectError{ + LargeObjectErrorCode::FileNotFound, + fmt::format("could not find {}", digest.hash())}; + } + + // Split file into chunks: + FileChunker chunker{*file_path}; + if (not chunker.IsOpen()) { + return LargeObjectError{ + LargeObjectErrorCode::Internal, + fmt::format("could not split {}", digest.hash())}; + } + + std::vector<bazel_re::Digest> parts; + try { + while (auto chunk = chunker.NextChunk()) { + auto part = local_cas_.StoreBlob(*chunk, /*is_executable=*/false); + if (not part) { + return LargeObjectError{LargeObjectErrorCode::Internal, + "could not store a part."}; + } + parts.push_back(std::move(*part)); + } + } catch (...) { + return LargeObjectError{LargeObjectErrorCode::Internal, + "an unknown error occured."}; + } + if (not chunker.Finished()) { + return LargeObjectError{ + LargeObjectErrorCode::Internal, + fmt::format("could not split {}", digest.hash())}; + } + + std::ignore = WriteEntry(digest, parts); + return parts; +} + #endif // INCLUDED_SRC_BUILDTOOL_STORAGE_LARGE_OBJECT_CAS_TPP diff --git a/src/buildtool/storage/local_cas.hpp b/src/buildtool/storage/local_cas.hpp index 382af848..ee7ecdf8 100644 --- a/src/buildtool/storage/local_cas.hpp +++ b/src/buildtool/storage/local_cas.hpp @@ -18,11 +18,14 @@ #include <filesystem> #include <optional> #include <unordered_set> +#include <variant> +#include <vector> #include "gsl/gsl" #include "src/buildtool/file_system/git_repo.hpp" #include "src/buildtool/file_system/object_cas.hpp" #include "src/buildtool/storage/garbage_collector.hpp" +#include "src/buildtool/storage/large_object_cas.hpp" /// \brief The local (logical) CAS for storing blobs and trees. /// Blobs can be stored/queried as executable or non-executable. Trees might be @@ -45,7 +48,11 @@ class LocalCAS { : cas_file_{base.string() + 'f', Uplinker<ObjectType::File>()}, cas_exec_{base.string() + 'x', Uplinker<ObjectType::Executable>()}, cas_tree_{base.string() + (Compatibility::IsCompatible() ? 'f' : 't'), - Uplinker<ObjectType::Tree>()} {} + Uplinker<ObjectType::Tree>()}, + cas_file_large_{*this, base.string() + "-large-f"}, + cas_tree_large_{*this, + base.string() + "-large-" + + (Compatibility::IsCompatible() ? 'f' : 't')} {} /// \brief Store blob from file path with x-bit. /// \tparam kOwner Indicates ownership for optimization (hardlink). @@ -101,6 +108,15 @@ class LocalCAS { return path ? path : TrySyncBlob(digest, is_executable); } + /// \brief Split a blob into chunks. + /// \param digest The digest of a blob to be split. + /// \returns Digests of the parts of the large object or an + /// error code on failure. + [[nodiscard]] auto SplitBlob(bazel_re::Digest const& digest) const noexcept + -> std::variant<LargeObjectError, std::vector<bazel_re::Digest>> { + return cas_file_large_.Split(digest); + } + /// \brief Obtain tree path from digest. /// \param digest Digest of the tree to lookup. /// \returns Path to the tree if found or nullopt otherwise. @@ -109,6 +125,15 @@ class LocalCAS { return cas_tree_.BlobPath(digest); } + /// \brief Split a tree into chunks. + /// \param digest The digest of a tree to be split. + /// \returns Digests of the parts of the large object or an + /// error code on failure. + [[nodiscard]] auto SplitTree(bazel_re::Digest const& digest) const noexcept + -> std::variant<LargeObjectError, std::vector<bazel_re::Digest>> { + return cas_tree_large_.Split(digest); + } + /// \brief Traverses a tree recursively and retrieves object infos of all /// found blobs (leafs). Tree objects are by default not added to the result /// list, but converted to a path name. @@ -185,6 +210,8 @@ class LocalCAS { ObjectCAS<ObjectType::File> cas_file_; ObjectCAS<ObjectType::Executable> cas_exec_; ObjectCAS<ObjectType::Tree> cas_tree_; + LargeObjectCAS<kDoGlobalUplink, ObjectType::File> cas_file_large_; + LargeObjectCAS<kDoGlobalUplink, ObjectType::Tree> cas_tree_large_; /// \brief Provides uplink via "exists callback" for physical object CAS. template <ObjectType kType> |