summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMaksim Denisov <denisov.maksim@huawei.com>2024-03-22 18:13:23 +0100
committerMaksim Denisov <denisov.maksim@huawei.com>2024-04-02 15:30:03 +0200
commitfb04df06a3586f211532453903f7414907b2a7b0 (patch)
treeab896a5e6c06aba32eb4c129bb8f83ae7c71d88b /src
parentc2c3ca4b17122f5e93001b26330e97fb00f6c7f3 (diff)
downloadjustbuild-fb04df06a3586f211532453903f7414907b2a7b0.tar.gz
LargeBlobs: Split large objects.
* Add LargeObjectCAS fields for files and trees to LocalCAS; * Add logic for splitting objects located in the main storage. Tested: Splitting of large, small and empty objects.
Diffstat (limited to 'src')
-rw-r--r--src/buildtool/storage/TARGETS2
-rw-r--r--src/buildtool/storage/large_object_cas.hpp20
-rw-r--r--src/buildtool/storage/large_object_cas.tpp68
-rw-r--r--src/buildtool/storage/local_cas.hpp29
4 files changed, 108 insertions, 11 deletions
diff --git a/src/buildtool/storage/TARGETS b/src/buildtool/storage/TARGETS
index fd6a3ff1..ad846a85 100644
--- a/src/buildtool/storage/TARGETS
+++ b/src/buildtool/storage/TARGETS
@@ -38,6 +38,7 @@
["target_cache_key.cpp", "target_cache_entry.cpp", "garbage_collector.cpp"]
, "deps":
[ "config"
+ , "file_chunker"
, ["src/buildtool/common", "common"]
, ["src/buildtool/file_system", "file_storage"]
, ["src/buildtool/file_system", "object_cas"]
@@ -49,6 +50,7 @@
, ["src/utils/cpp", "gsl"]
, ["@", "gsl", "", "gsl"]
, ["@", "json", "", "json"]
+ , ["@", "fmt", "", "fmt"]
, ["src/buildtool/file_system", "object_type"]
, ["src/buildtool/file_system", "file_system_manager"]
, ["src/buildtool/execution_api/bazel_msg", "bazel_msg_factory"]
diff --git a/src/buildtool/storage/large_object_cas.hpp b/src/buildtool/storage/large_object_cas.hpp
index 881acc15..d7af8e9f 100644
--- a/src/buildtool/storage/large_object_cas.hpp
+++ b/src/buildtool/storage/large_object_cas.hpp
@@ -19,12 +19,16 @@
#include <optional>
#include <string>
#include <utility>
+#include <variant>
#include <vector>
#include "src/buildtool/common/bazel_types.hpp"
#include "src/buildtool/file_system/file_storage.hpp"
#include "src/buildtool/file_system/object_type.hpp"
+template <bool>
+class LocalCAS;
+
enum class LargeObjectErrorCode {
/// \brief An internal error occured.
Internal = 0,
@@ -58,11 +62,12 @@ class LargeObjectError final {
/// The entries are keyed by the hash of the spliced result and the value of an
/// entry is the concatenation of the hashes of chunks the large object is
/// composed of.
-template <bool kDoGlobalUplink>
+template <bool kDoGlobalUplink, ObjectType kType>
class LargeObjectCAS final {
public:
- explicit LargeObjectCAS(std::filesystem::path const& store_path) noexcept
- : file_store_(store_path) {}
+ LargeObjectCAS(LocalCAS<kDoGlobalUplink> const& local_cas,
+ std::filesystem::path const& store_path) noexcept
+ : local_cas_(local_cas), file_store_(store_path) {}
LargeObjectCAS(LargeObjectCAS const&) = delete;
LargeObjectCAS(LargeObjectCAS&&) = delete;
@@ -76,12 +81,21 @@ class LargeObjectCAS final {
[[nodiscard]] auto GetEntryPath(bazel_re::Digest const& digest)
const noexcept -> std::optional<std::filesystem::path>;
+ /// \brief Split an object from the main CAS into chunks. If the object had
+ /// been split before, it would not get split again.
+ /// \param digest The digest of the object to be split.
+ /// \return A set of chunks the resulting object is composed of
+ /// or an error on failure.
+ [[nodiscard]] auto Split(bazel_re::Digest const& digest) const noexcept
+ -> std::variant<LargeObjectError, std::vector<bazel_re::Digest>>;
+
private:
// By default, overwrite existing entries. Unless this is a generation
// (disabled global uplink), then we never want to overwrite any entries.
static constexpr auto kStoreMode =
kDoGlobalUplink ? StoreMode::LastWins : StoreMode::FirstWins;
+ LocalCAS<kDoGlobalUplink> const& local_cas_;
FileStorage<ObjectType::File, kStoreMode, /*kSetEpochTime=*/false>
file_store_;
diff --git a/src/buildtool/storage/large_object_cas.tpp b/src/buildtool/storage/large_object_cas.tpp
index d5228c72..5adcb6a3 100644
--- a/src/buildtool/storage/large_object_cas.tpp
+++ b/src/buildtool/storage/large_object_cas.tpp
@@ -19,13 +19,16 @@
#include <cstdlib>
#include <fstream>
+#include "fmt/core.h"
#include "nlohmann/json.hpp"
#include "src/buildtool/compatibility/native_support.hpp"
#include "src/buildtool/file_system/file_system_manager.hpp"
+#include "src/buildtool/storage/file_chunker.hpp"
#include "src/buildtool/storage/large_object_cas.hpp"
+#include "src/buildtool/storage/local_cas.hpp"
-template <bool kDoGlobalUplink>
-auto LargeObjectCAS<kDoGlobalUplink>::GetEntryPath(
+template <bool kDoGlobalUplink, ObjectType kType>
+auto LargeObjectCAS<kDoGlobalUplink, kType>::GetEntryPath(
bazel_re::Digest const& digest) const noexcept
-> std::optional<std::filesystem::path> {
const std::string hash = NativeSupport::Unprefix(digest.hash());
@@ -36,9 +39,10 @@ auto LargeObjectCAS<kDoGlobalUplink>::GetEntryPath(
return std::nullopt;
}
-template <bool kDoGlobalUplink>
-auto LargeObjectCAS<kDoGlobalUplink>::ReadEntry(bazel_re::Digest const& digest)
- const noexcept -> std::optional<std::vector<bazel_re::Digest>> {
+template <bool kDoGlobalUplink, ObjectType kType>
+auto LargeObjectCAS<kDoGlobalUplink, kType>::ReadEntry(
+ bazel_re::Digest const& digest) const noexcept
+ -> std::optional<std::vector<bazel_re::Digest>> {
auto const file_path = GetEntryPath(digest);
if (not file_path) {
return std::nullopt;
@@ -63,8 +67,8 @@ auto LargeObjectCAS<kDoGlobalUplink>::ReadEntry(bazel_re::Digest const& digest)
return parts;
}
-template <bool kDoGlobalUplink>
-auto LargeObjectCAS<kDoGlobalUplink>::WriteEntry(
+template <bool kDoGlobalUplink, ObjectType kType>
+auto LargeObjectCAS<kDoGlobalUplink, kType>::WriteEntry(
bazel_re::Digest const& digest,
std::vector<bazel_re::Digest> const& parts) const noexcept -> bool {
if (GetEntryPath(digest)) {
@@ -96,4 +100,54 @@ auto LargeObjectCAS<kDoGlobalUplink>::WriteEntry(
return file_store_.AddFromBytes(hash, j.dump());
}
+template <bool kDoGlobalUplink, ObjectType kType>
+auto LargeObjectCAS<kDoGlobalUplink, kType>::Split(
+ bazel_re::Digest const& digest) const noexcept
+ -> std::variant<LargeObjectError, std::vector<bazel_re::Digest>> {
+ if (auto large_entry = ReadEntry(digest)) {
+ return std::move(*large_entry);
+ }
+
+ // Get path to the file:
+ auto file_path = IsTreeObject(kType)
+ ? local_cas_.TreePath(digest)
+ : local_cas_.BlobPath(digest, /*is_executable=*/false);
+ if (not file_path) {
+ return LargeObjectError{
+ LargeObjectErrorCode::FileNotFound,
+ fmt::format("could not find {}", digest.hash())};
+ }
+
+ // Split file into chunks:
+ FileChunker chunker{*file_path};
+ if (not chunker.IsOpen()) {
+ return LargeObjectError{
+ LargeObjectErrorCode::Internal,
+ fmt::format("could not split {}", digest.hash())};
+ }
+
+ std::vector<bazel_re::Digest> parts;
+ try {
+ while (auto chunk = chunker.NextChunk()) {
+ auto part = local_cas_.StoreBlob(*chunk, /*is_executable=*/false);
+ if (not part) {
+ return LargeObjectError{LargeObjectErrorCode::Internal,
+ "could not store a part."};
+ }
+ parts.push_back(std::move(*part));
+ }
+ } catch (...) {
+ return LargeObjectError{LargeObjectErrorCode::Internal,
+ "an unknown error occured."};
+ }
+ if (not chunker.Finished()) {
+ return LargeObjectError{
+ LargeObjectErrorCode::Internal,
+ fmt::format("could not split {}", digest.hash())};
+ }
+
+ std::ignore = WriteEntry(digest, parts);
+ return parts;
+}
+
#endif // INCLUDED_SRC_BUILDTOOL_STORAGE_LARGE_OBJECT_CAS_TPP
diff --git a/src/buildtool/storage/local_cas.hpp b/src/buildtool/storage/local_cas.hpp
index 382af848..ee7ecdf8 100644
--- a/src/buildtool/storage/local_cas.hpp
+++ b/src/buildtool/storage/local_cas.hpp
@@ -18,11 +18,14 @@
#include <filesystem>
#include <optional>
#include <unordered_set>
+#include <variant>
+#include <vector>
#include "gsl/gsl"
#include "src/buildtool/file_system/git_repo.hpp"
#include "src/buildtool/file_system/object_cas.hpp"
#include "src/buildtool/storage/garbage_collector.hpp"
+#include "src/buildtool/storage/large_object_cas.hpp"
/// \brief The local (logical) CAS for storing blobs and trees.
/// Blobs can be stored/queried as executable or non-executable. Trees might be
@@ -45,7 +48,11 @@ class LocalCAS {
: cas_file_{base.string() + 'f', Uplinker<ObjectType::File>()},
cas_exec_{base.string() + 'x', Uplinker<ObjectType::Executable>()},
cas_tree_{base.string() + (Compatibility::IsCompatible() ? 'f' : 't'),
- Uplinker<ObjectType::Tree>()} {}
+ Uplinker<ObjectType::Tree>()},
+ cas_file_large_{*this, base.string() + "-large-f"},
+ cas_tree_large_{*this,
+ base.string() + "-large-" +
+ (Compatibility::IsCompatible() ? 'f' : 't')} {}
/// \brief Store blob from file path with x-bit.
/// \tparam kOwner Indicates ownership for optimization (hardlink).
@@ -101,6 +108,15 @@ class LocalCAS {
return path ? path : TrySyncBlob(digest, is_executable);
}
+ /// \brief Split a blob into chunks.
+ /// \param digest The digest of a blob to be split.
+ /// \returns Digests of the parts of the large object or an
+ /// error code on failure.
+ [[nodiscard]] auto SplitBlob(bazel_re::Digest const& digest) const noexcept
+ -> std::variant<LargeObjectError, std::vector<bazel_re::Digest>> {
+ return cas_file_large_.Split(digest);
+ }
+
/// \brief Obtain tree path from digest.
/// \param digest Digest of the tree to lookup.
/// \returns Path to the tree if found or nullopt otherwise.
@@ -109,6 +125,15 @@ class LocalCAS {
return cas_tree_.BlobPath(digest);
}
+ /// \brief Split a tree into chunks.
+ /// \param digest The digest of a tree to be split.
+ /// \returns Digests of the parts of the large object or an
+ /// error code on failure.
+ [[nodiscard]] auto SplitTree(bazel_re::Digest const& digest) const noexcept
+ -> std::variant<LargeObjectError, std::vector<bazel_re::Digest>> {
+ return cas_tree_large_.Split(digest);
+ }
+
/// \brief Traverses a tree recursively and retrieves object infos of all
/// found blobs (leafs). Tree objects are by default not added to the result
/// list, but converted to a path name.
@@ -185,6 +210,8 @@ class LocalCAS {
ObjectCAS<ObjectType::File> cas_file_;
ObjectCAS<ObjectType::Executable> cas_exec_;
ObjectCAS<ObjectType::Tree> cas_tree_;
+ LargeObjectCAS<kDoGlobalUplink, ObjectType::File> cas_file_large_;
+ LargeObjectCAS<kDoGlobalUplink, ObjectType::Tree> cas_tree_large_;
/// \brief Provides uplink via "exists callback" for physical object CAS.
template <ObjectType kType>