diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/other_tools/just_mr/TARGETS | 3 | ||||
-rw-r--r-- | src/other_tools/just_mr/utils.cpp | 26 | ||||
-rw-r--r-- | src/other_tools/just_mr/utils.hpp | 9 | ||||
-rw-r--r-- | src/other_tools/ops_maps/TARGETS | 18 | ||||
-rw-r--r-- | src/other_tools/ops_maps/content_cas_map.cpp | 127 | ||||
-rw-r--r-- | src/other_tools/ops_maps/content_cas_map.hpp | 73 |
6 files changed, 255 insertions, 1 deletions
diff --git a/src/other_tools/just_mr/TARGETS b/src/other_tools/just_mr/TARGETS index 181e563c..77fe8d75 100644 --- a/src/other_tools/just_mr/TARGETS +++ b/src/other_tools/just_mr/TARGETS @@ -9,6 +9,7 @@ , ["src/buildtool/main", "constants"] ] , "stage": ["src", "other_tools", "just_mr"] - , "private-deps": [["src/utils/cpp", "path"]] + , "private-deps": + [["src/utils/cpp", "path"], ["src/buildtool/execution_api/local", "local"]] } } diff --git a/src/other_tools/just_mr/utils.cpp b/src/other_tools/just_mr/utils.cpp index 3470b7db..4d668bc6 100644 --- a/src/other_tools/just_mr/utils.cpp +++ b/src/other_tools/just_mr/utils.cpp @@ -14,6 +14,8 @@ #include "src/other_tools/just_mr/utils.hpp" +#include "src/buildtool/execution_api/local/file_storage.hpp" +#include "src/buildtool/execution_api/local/local_cas.hpp" #include "src/utils/cpp/path.hpp" namespace JustMR::Utils { @@ -74,4 +76,28 @@ auto WriteTreeIDFile(std::filesystem::path const& tree_id_file, return FileSystemManager::Rename(tmp_file.string(), tree_id_file); } +auto AddToCAS(std::string const& data) noexcept + -> std::optional<std::filesystem::path> { + // get file CAS instance + auto const& casf = LocalCAS<ObjectType::File>::Instance(); + // write to casf + auto digest = casf.StoreBlobFromBytes(data); + if (digest) { + return casf.BlobPath(*digest); + } + return std::nullopt; +} + +void AddDistfileToCAS(std::filesystem::path const& distfile, + JustMR::PathsPtr const& just_mr_paths) noexcept { + auto const& casf = LocalCAS<ObjectType::File>::Instance(); + for (auto const& dirpath : just_mr_paths->distdirs) { + auto candidate = dirpath / distfile; + if (FileSystemManager::Exists(candidate)) { + // try to add to CAS + [[maybe_unused]] auto digest = casf.StoreBlobFromFile(candidate); + } + } +} + } // namespace JustMR::Utils diff --git a/src/other_tools/just_mr/utils.hpp b/src/other_tools/just_mr/utils.hpp index 4e5e69c8..2da0155a 100644 --- a/src/other_tools/just_mr/utils.hpp +++ b/src/other_tools/just_mr/utils.hpp @@ -143,6 +143,15 @@ namespace Utils { [[nodiscard]] auto WriteTreeIDFile(std::filesystem::path const& tree_id_file, std::string const& tree_id) noexcept -> bool; +/// \brief Add data to file CAS. +/// Returns the path to the file added to CAS, or nullopt if not added. +[[nodiscard]] auto AddToCAS(std::string const& data) noexcept + -> std::optional<std::filesystem::path>; + +/// \brief Try to add distfile to CAS. +void AddDistfileToCAS(std::filesystem::path const& distfile, + JustMR::PathsPtr const& just_mr_paths) noexcept; + } // namespace Utils } // namespace JustMR diff --git a/src/other_tools/ops_maps/TARGETS b/src/other_tools/ops_maps/TARGETS index a46d63c3..b265ba72 100644 --- a/src/other_tools/ops_maps/TARGETS +++ b/src/other_tools/ops_maps/TARGETS @@ -46,4 +46,22 @@ , ["src/utils/cpp", "tmp_dir"] ] } +, "content_cas_map": + { "type": ["@", "rules", "CC", "library"] + , "name": ["content_cas_map"] + , "hdrs": ["content_cas_map.hpp"] + , "srcs": ["content_cas_map.cpp"] + , "deps": + [ ["src/other_tools/just_mr", "utils"] + , ["src/buildtool/multithreading", "async_map_consumer"] + , ["src/utils/cpp", "hash_combine"] + , ["@", "json", "", "json"] + ] + , "stage": ["src", "other_tools", "ops_maps"] + , "private-deps": + [ ["src/utils/cpp", "curl_easy_handle"] + , ["src/buildtool/crypto", "hasher"] + , ["src/buildtool/execution_api/local", "local"] + ] + } } diff --git a/src/other_tools/ops_maps/content_cas_map.cpp b/src/other_tools/ops_maps/content_cas_map.cpp new file mode 100644 index 00000000..28b2e4bc --- /dev/null +++ b/src/other_tools/ops_maps/content_cas_map.cpp @@ -0,0 +1,127 @@ +// Copyright 2022 Huawei Cloud Computing Technology Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "src/other_tools/ops_maps/content_cas_map.hpp" + +#include "src/buildtool/crypto/hasher.hpp" +#include "src/buildtool/execution_api/local/file_storage.hpp" +#include "src/buildtool/execution_api/local/local_cas.hpp" +#include "src/utils/cpp/curl_easy_handle.hpp" + +namespace { + +/// \brief Fetches a file from the internet and stores its content in memory. +/// Returns the content. +[[nodiscard]] auto NetworkFetch(std::string const& fetch_url) noexcept + -> std::optional<std::string> { + auto curl_handle = CurlEasyHandle::Create(); + if (not curl_handle) { + return std::nullopt; + } + return curl_handle->DownloadToString(fetch_url); +} + +template <Hasher::HashType type> +[[nodiscard]] auto GetContentHash(std::string const& data) noexcept + -> std::string { + Hasher hasher{type}; + hasher.Update(data); + auto digest = std::move(hasher).Finalize(); + return digest.HexString(); +} + +} // namespace + +auto CreateContentCASMap(JustMR::PathsPtr const& just_mr_paths, + std::size_t jobs) -> ContentCASMap { + auto ensure_in_cas = [just_mr_paths](auto /*unused*/, + auto setter, + auto logger, + auto /*unused*/, + auto const& key) { + // check if content already in CAS + auto const& casf = LocalCAS<ObjectType::File>::Instance(); + auto digest = ArtifactDigest(key.content, 0, false); + if (casf.BlobPath(digest)) { + (*setter)(true); + return; + } + // add distfile to CAS + auto repo_distfile = + (key.distfile + ? key.distfile.value() + : std::filesystem::path(key.fetch_url).filename().string()); + JustMR::Utils::AddDistfileToCAS(repo_distfile, just_mr_paths); + // check if content is in CAS now + if (casf.BlobPath(digest)) { + (*setter)(true); + return; + } + // archive needs fetching + // before any network fetching, check that mandatory fields are provided + if (key.fetch_url.empty()) { + (*logger)("Failed to provide archive fetch url!", + /*fatal=*/true); + return; + } + // now do the actual fetch + auto data = NetworkFetch(key.fetch_url); + if (data == std::nullopt) { + (*logger)(fmt::format("Failed to fetch a file with id {} from {}", + key.content, + key.fetch_url), + /*fatal=*/true); + return; + } + // check content wrt checksums + if (key.sha256) { + auto actual_sha256 = + GetContentHash<Hasher::HashType::SHA256>(*data); + if (actual_sha256 != key.sha256.value()) { + (*logger)( + fmt::format("SHA256 mismatch for {}: expected {}, got {}", + key.fetch_url, + key.sha256.value(), + actual_sha256), + /*fatal=*/true); + return; + } + } + if (key.sha512) { + auto actual_sha512 = + GetContentHash<Hasher::HashType::SHA512>(*data); + if (actual_sha512 != key.sha512.value()) { + (*logger)( + fmt::format("SHA512 mismatch for {}: expected {}, got {}", + key.fetch_url, + key.sha512.value(), + actual_sha512), + /*fatal=*/true); + return; + } + } + // add the fetched data to CAS + auto path = JustMR::Utils::AddToCAS(*data); + // check one last time if content is in CAS now + if (not path) { + (*logger)(fmt::format("Failed to fetch a file with id {} from {}", + key.content, + key.fetch_url), + /*fatal=*/true); + return; + } + (*setter)(true); + }; + return AsyncMapConsumer<ArchiveContent, bool>(ensure_in_cas, jobs); +}
\ No newline at end of file diff --git a/src/other_tools/ops_maps/content_cas_map.hpp b/src/other_tools/ops_maps/content_cas_map.hpp new file mode 100644 index 00000000..9d4954d5 --- /dev/null +++ b/src/other_tools/ops_maps/content_cas_map.hpp @@ -0,0 +1,73 @@ +// Copyright 2022 Huawei Cloud Computing Technology Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef INCLUDED_SRC_OTHER_TOOLS_OPS_MAPS_CONTENT_CAS_MAP_HPP +#define INCLUDED_SRC_OTHER_TOOLS_OPS_MAPS_CONTENT_CAS_MAP_HPP + +#include <string> + +#include "nlohmann/json.hpp" +#include "src/buildtool/multithreading/async_map_consumer.hpp" +#include "src/other_tools/just_mr/utils.hpp" +#include "src/utils/cpp/hash_combine.hpp" + +struct ArchiveContent { + std::string content; /* key */ + std::optional<std::string> distfile; + std::string fetch_url; + std::optional<std::string> sha256; + std::optional<std::string> sha512; + + [[nodiscard]] auto operator==(const ArchiveContent& other) const -> bool { + return content == other.content; + } +}; + +// Used in callers of ContentCASMap which need extra fields +struct ArchiveRepoInfo { + ArchiveContent archive; /* key (see ArchiveContent) */ + std::string repo_type; + std::string subdir; + + [[nodiscard]] auto operator==(const ArchiveRepoInfo& other) const -> bool { + return archive.content == other.archive.content; + } +}; + +/// \brief Maps the content hash of an archive to an "exists" status flag. +using ContentCASMap = AsyncMapConsumer<ArchiveContent, bool>; + +[[nodiscard]] auto CreateContentCASMap(JustMR::PathsPtr const& just_mr_paths, + std::size_t jobs) -> ContentCASMap; + +namespace std { +template <> +struct hash<ArchiveContent> { + [[nodiscard]] auto operator()(const ArchiveContent& ct) const noexcept + -> std::size_t { + return std::hash<std::string>{}(ct.content); + } +}; + +// Used in callers of ContentCASMap which need extra fields +template <> +struct hash<ArchiveRepoInfo> { + [[nodiscard]] auto operator()(const ArchiveRepoInfo& ct) const noexcept + -> std::size_t { + return std::hash<ArchiveContent>{}(ct.archive); + } +}; +} // namespace std + +#endif // INCLUDED_SRC_OTHER_TOOLS_OPS_MAPS_CONTENT_CAS_MAP_HPP
\ No newline at end of file |