diff options
-rw-r--r-- | src/buildtool/storage/TARGETS | 3 | ||||
-rw-r--r-- | src/buildtool/storage/large_object_cas.hpp | 75 | ||||
-rw-r--r-- | src/buildtool/storage/large_object_cas.tpp | 100 |
3 files changed, 178 insertions, 0 deletions
diff --git a/src/buildtool/storage/TARGETS b/src/buildtool/storage/TARGETS index 38180055..fd6a3ff1 100644 --- a/src/buildtool/storage/TARGETS +++ b/src/buildtool/storage/TARGETS @@ -31,6 +31,8 @@ , "target_cache_key.hpp" , "target_cache_entry.hpp" , "garbage_collector.hpp" + , "large_object_cas.hpp" + , "large_object_cas.tpp" ] , "srcs": ["target_cache_key.cpp", "target_cache_entry.cpp", "garbage_collector.cpp"] @@ -54,6 +56,7 @@ , ["src/buildtool/common", "bazel_types"] , ["src/buildtool/file_system", "git_repo"] , ["src/buildtool/common", "artifact_description"] + , ["src/buildtool/compatibility", "compatibility"] ] , "stage": ["src", "buildtool", "storage"] , "private-deps": diff --git a/src/buildtool/storage/large_object_cas.hpp b/src/buildtool/storage/large_object_cas.hpp new file mode 100644 index 00000000..a1a93606 --- /dev/null +++ b/src/buildtool/storage/large_object_cas.hpp @@ -0,0 +1,75 @@ +// Copyright 2022 Huawei Cloud Computing Technology Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef INCLUDED_SRC_BUILDTOOL_STORAGE_LARGE_OBJECT_CAS_HPP +#define INCLUDED_SRC_BUILDTOOL_STORAGE_LARGE_OBJECT_CAS_HPP + +#include <filesystem> +#include <optional> +#include <vector> + +#include "src/buildtool/common/bazel_types.hpp" +#include "src/buildtool/file_system/file_storage.hpp" +#include "src/buildtool/file_system/object_type.hpp" + +/// \brief Stores auxiliary information for reconstructing large objects. +/// The entries are keyed by the hash of the spliced result and the value of an +/// entry is the concatenation of the hashes of chunks the large object is +/// composed of. +template <bool kDoGlobalUplink> +class LargeObjectCAS final { + public: + explicit LargeObjectCAS(std::filesystem::path const& store_path) noexcept + : file_store_(store_path) {} + + LargeObjectCAS(LargeObjectCAS const&) = delete; + LargeObjectCAS(LargeObjectCAS&&) = delete; + auto operator=(LargeObjectCAS const&) -> LargeObjectCAS& = delete; + auto operator=(LargeObjectCAS&&) -> LargeObjectCAS& = delete; + ~LargeObjectCAS() noexcept = default; + + /// \brief Get the path to a large entry in the storage. + /// \param digest The digest of a large object. + /// \returns Path to the large entry if in the storage. + [[nodiscard]] auto GetEntryPath(bazel_re::Digest const& digest) + const noexcept -> std::optional<std::filesystem::path>; + + private: + // By default, overwrite existing entries. Unless this is a generation + // (disabled global uplink), then we never want to overwrite any entries. + static constexpr auto kStoreMode = + kDoGlobalUplink ? StoreMode::LastWins : StoreMode::FirstWins; + + FileStorage<ObjectType::File, kStoreMode, /*kSetEpochTime=*/false> + file_store_; + + /// \brief Obtain the information for reconstructing a large object. + /// \param digest The digest of a large object. + /// \returns Parts the large object is composed of, if present in + /// the storage. + [[nodiscard]] auto ReadEntry(bazel_re::Digest const& digest) const noexcept + -> std::optional<std::vector<bazel_re::Digest>>; + + /// \brief Create a new entry description and add it to the storage. + /// \param digest The digest of the result. + /// \param parts Parts the resulting object is composed of. + /// \returns True if the entry exists afterwards. + [[nodiscard]] auto WriteEntry( + bazel_re::Digest const& digest, + std::vector<bazel_re::Digest> const& parts) const noexcept -> bool; +}; + +#include "src/buildtool/storage/large_object_cas.tpp" + +#endif // INCLUDED_SRC_BUILDTOOL_STORAGE_LARGE_OBJECT_CAS_HPP diff --git a/src/buildtool/storage/large_object_cas.tpp b/src/buildtool/storage/large_object_cas.tpp new file mode 100644 index 00000000..12894992 --- /dev/null +++ b/src/buildtool/storage/large_object_cas.tpp @@ -0,0 +1,100 @@ +// Copyright 2022 Huawei Cloud Computing Technology Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef INCLUDED_SRC_BUILDTOOL_STORAGE_LARGE_OBJECT_CAS_TPP +#define INCLUDED_SRC_BUILDTOOL_STORAGE_LARGE_OBJECT_CAS_TPP + +#include <cstdint> +#include <cstdlib> +#include <fstream> +#include <string> + +#include "nlohmann/json.hpp" +#include "src/buildtool/compatibility/native_support.hpp" +#include "src/buildtool/file_system/file_system_manager.hpp" +#include "src/buildtool/storage/large_object_cas.hpp" + +template <bool kDoGlobalUplink> +auto LargeObjectCAS<kDoGlobalUplink>::GetEntryPath( + bazel_re::Digest const& digest) const noexcept + -> std::optional<std::filesystem::path> { + const std::string hash = NativeSupport::Unprefix(digest.hash()); + const std::filesystem::path file_path = file_store_.GetPath(hash); + if (FileSystemManager::IsFile(file_path)) { + return file_path; + } + return std::nullopt; +} + +template <bool kDoGlobalUplink> +auto LargeObjectCAS<kDoGlobalUplink>::ReadEntry(bazel_re::Digest const& digest) + const noexcept -> std::optional<std::vector<bazel_re::Digest>> { + auto const file_path = GetEntryPath(digest); + if (not file_path) { + return std::nullopt; + } + + std::vector<bazel_re::Digest> parts; + try { + std::ifstream stream(*file_path); + nlohmann::json j = nlohmann::json::parse(stream); + const size_t size = j.at("size").template get<size_t>(); + parts.reserve(size); + + auto const& j_parts = j.at("parts"); + for (size_t i = 0; i < size; ++i) { + bazel_re::Digest& d = parts.emplace_back(); + d.set_hash(j_parts.at(i).at("hash").template get<std::string>()); + d.set_size_bytes(j_parts.at(i).at("size").template get<int64_t>()); + } + } catch (...) { + return std::nullopt; + } + return parts; +} + +template <bool kDoGlobalUplink> +auto LargeObjectCAS<kDoGlobalUplink>::WriteEntry( + bazel_re::Digest const& digest, + std::vector<bazel_re::Digest> const& parts) const noexcept -> bool { + if (GetEntryPath(digest)) { + return true; + } + + // The large entry cannot refer itself or be empty. + // Otherwise, the digest in the main CAS would be removed during GC. + // It would bring the LargeObjectCAS to an invalid state: the + // large entry exists, but the parts do not. + if (parts.size() < 2) { + return false; + } + + nlohmann::json j; + try { + j["size"] = parts.size(); + auto& j_parts = j["parts"]; + for (auto const& part : parts) { + auto& j = j_parts.emplace_back(); + j["hash"] = part.hash(); + j["size"] = part.size_bytes(); + } + } catch (...) { + return false; + } + + const auto hash = NativeSupport::Unprefix(digest.hash()); + return file_store_.AddFromBytes(hash, j.dump()); +} + +#endif // INCLUDED_SRC_BUILDTOOL_STORAGE_LARGE_OBJECT_CAS_TPP |