diff options
author | Maksim Denisov <denisov.maksim@huawei.com> | 2024-03-22 17:14:01 +0100 |
---|---|---|
committer | Maksim Denisov <denisov.maksim@huawei.com> | 2024-04-02 15:30:03 +0200 |
commit | 2fd9e21ac7e29e83e535cc33a0d5429d89613057 (patch) | |
tree | 55c56d5b82fb19e9d96f8d75359735235d089dd3 /src | |
parent | fbd7eb02efc6a541a79360490e940bad4387c12c (diff) | |
download | justbuild-2fd9e21ac7e29e83e535cc33a0d5429d89613057.tar.gz |
LargeObjectCAS: Store large objects.
Every large object is keyed by the hash of the result and contains hashes of the parts from which the result can be reconstructed.
Diffstat (limited to 'src')
-rw-r--r-- | src/buildtool/storage/TARGETS | 3 | ||||
-rw-r--r-- | src/buildtool/storage/large_object_cas.hpp | 75 | ||||
-rw-r--r-- | src/buildtool/storage/large_object_cas.tpp | 100 |
3 files changed, 178 insertions, 0 deletions
diff --git a/src/buildtool/storage/TARGETS b/src/buildtool/storage/TARGETS index 38180055..fd6a3ff1 100644 --- a/src/buildtool/storage/TARGETS +++ b/src/buildtool/storage/TARGETS @@ -31,6 +31,8 @@ , "target_cache_key.hpp" , "target_cache_entry.hpp" , "garbage_collector.hpp" + , "large_object_cas.hpp" + , "large_object_cas.tpp" ] , "srcs": ["target_cache_key.cpp", "target_cache_entry.cpp", "garbage_collector.cpp"] @@ -54,6 +56,7 @@ , ["src/buildtool/common", "bazel_types"] , ["src/buildtool/file_system", "git_repo"] , ["src/buildtool/common", "artifact_description"] + , ["src/buildtool/compatibility", "compatibility"] ] , "stage": ["src", "buildtool", "storage"] , "private-deps": diff --git a/src/buildtool/storage/large_object_cas.hpp b/src/buildtool/storage/large_object_cas.hpp new file mode 100644 index 00000000..a1a93606 --- /dev/null +++ b/src/buildtool/storage/large_object_cas.hpp @@ -0,0 +1,75 @@ +// Copyright 2022 Huawei Cloud Computing Technology Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef INCLUDED_SRC_BUILDTOOL_STORAGE_LARGE_OBJECT_CAS_HPP +#define INCLUDED_SRC_BUILDTOOL_STORAGE_LARGE_OBJECT_CAS_HPP + +#include <filesystem> +#include <optional> +#include <vector> + +#include "src/buildtool/common/bazel_types.hpp" +#include "src/buildtool/file_system/file_storage.hpp" +#include "src/buildtool/file_system/object_type.hpp" + +/// \brief Stores auxiliary information for reconstructing large objects. +/// The entries are keyed by the hash of the spliced result and the value of an +/// entry is the concatenation of the hashes of chunks the large object is +/// composed of. +template <bool kDoGlobalUplink> +class LargeObjectCAS final { + public: + explicit LargeObjectCAS(std::filesystem::path const& store_path) noexcept + : file_store_(store_path) {} + + LargeObjectCAS(LargeObjectCAS const&) = delete; + LargeObjectCAS(LargeObjectCAS&&) = delete; + auto operator=(LargeObjectCAS const&) -> LargeObjectCAS& = delete; + auto operator=(LargeObjectCAS&&) -> LargeObjectCAS& = delete; + ~LargeObjectCAS() noexcept = default; + + /// \brief Get the path to a large entry in the storage. + /// \param digest The digest of a large object. + /// \returns Path to the large entry if in the storage. + [[nodiscard]] auto GetEntryPath(bazel_re::Digest const& digest) + const noexcept -> std::optional<std::filesystem::path>; + + private: + // By default, overwrite existing entries. Unless this is a generation + // (disabled global uplink), then we never want to overwrite any entries. + static constexpr auto kStoreMode = + kDoGlobalUplink ? StoreMode::LastWins : StoreMode::FirstWins; + + FileStorage<ObjectType::File, kStoreMode, /*kSetEpochTime=*/false> + file_store_; + + /// \brief Obtain the information for reconstructing a large object. + /// \param digest The digest of a large object. + /// \returns Parts the large object is composed of, if present in + /// the storage. + [[nodiscard]] auto ReadEntry(bazel_re::Digest const& digest) const noexcept + -> std::optional<std::vector<bazel_re::Digest>>; + + /// \brief Create a new entry description and add it to the storage. + /// \param digest The digest of the result. + /// \param parts Parts the resulting object is composed of. + /// \returns True if the entry exists afterwards. + [[nodiscard]] auto WriteEntry( + bazel_re::Digest const& digest, + std::vector<bazel_re::Digest> const& parts) const noexcept -> bool; +}; + +#include "src/buildtool/storage/large_object_cas.tpp" + +#endif // INCLUDED_SRC_BUILDTOOL_STORAGE_LARGE_OBJECT_CAS_HPP diff --git a/src/buildtool/storage/large_object_cas.tpp b/src/buildtool/storage/large_object_cas.tpp new file mode 100644 index 00000000..12894992 --- /dev/null +++ b/src/buildtool/storage/large_object_cas.tpp @@ -0,0 +1,100 @@ +// Copyright 2022 Huawei Cloud Computing Technology Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef INCLUDED_SRC_BUILDTOOL_STORAGE_LARGE_OBJECT_CAS_TPP +#define INCLUDED_SRC_BUILDTOOL_STORAGE_LARGE_OBJECT_CAS_TPP + +#include <cstdint> +#include <cstdlib> +#include <fstream> +#include <string> + +#include "nlohmann/json.hpp" +#include "src/buildtool/compatibility/native_support.hpp" +#include "src/buildtool/file_system/file_system_manager.hpp" +#include "src/buildtool/storage/large_object_cas.hpp" + +template <bool kDoGlobalUplink> +auto LargeObjectCAS<kDoGlobalUplink>::GetEntryPath( + bazel_re::Digest const& digest) const noexcept + -> std::optional<std::filesystem::path> { + const std::string hash = NativeSupport::Unprefix(digest.hash()); + const std::filesystem::path file_path = file_store_.GetPath(hash); + if (FileSystemManager::IsFile(file_path)) { + return file_path; + } + return std::nullopt; +} + +template <bool kDoGlobalUplink> +auto LargeObjectCAS<kDoGlobalUplink>::ReadEntry(bazel_re::Digest const& digest) + const noexcept -> std::optional<std::vector<bazel_re::Digest>> { + auto const file_path = GetEntryPath(digest); + if (not file_path) { + return std::nullopt; + } + + std::vector<bazel_re::Digest> parts; + try { + std::ifstream stream(*file_path); + nlohmann::json j = nlohmann::json::parse(stream); + const size_t size = j.at("size").template get<size_t>(); + parts.reserve(size); + + auto const& j_parts = j.at("parts"); + for (size_t i = 0; i < size; ++i) { + bazel_re::Digest& d = parts.emplace_back(); + d.set_hash(j_parts.at(i).at("hash").template get<std::string>()); + d.set_size_bytes(j_parts.at(i).at("size").template get<int64_t>()); + } + } catch (...) { + return std::nullopt; + } + return parts; +} + +template <bool kDoGlobalUplink> +auto LargeObjectCAS<kDoGlobalUplink>::WriteEntry( + bazel_re::Digest const& digest, + std::vector<bazel_re::Digest> const& parts) const noexcept -> bool { + if (GetEntryPath(digest)) { + return true; + } + + // The large entry cannot refer itself or be empty. + // Otherwise, the digest in the main CAS would be removed during GC. + // It would bring the LargeObjectCAS to an invalid state: the + // large entry exists, but the parts do not. + if (parts.size() < 2) { + return false; + } + + nlohmann::json j; + try { + j["size"] = parts.size(); + auto& j_parts = j["parts"]; + for (auto const& part : parts) { + auto& j = j_parts.emplace_back(); + j["hash"] = part.hash(); + j["size"] = part.size_bytes(); + } + } catch (...) { + return false; + } + + const auto hash = NativeSupport::Unprefix(digest.hash()); + return file_store_.AddFromBytes(hash, j.dump()); +} + +#endif // INCLUDED_SRC_BUILDTOOL_STORAGE_LARGE_OBJECT_CAS_TPP |