summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/other_tools/just_mr/TARGETS3
-rw-r--r--src/other_tools/just_mr/utils.cpp26
-rw-r--r--src/other_tools/just_mr/utils.hpp9
-rw-r--r--src/other_tools/ops_maps/TARGETS18
-rw-r--r--src/other_tools/ops_maps/content_cas_map.cpp127
-rw-r--r--src/other_tools/ops_maps/content_cas_map.hpp73
6 files changed, 255 insertions, 1 deletions
diff --git a/src/other_tools/just_mr/TARGETS b/src/other_tools/just_mr/TARGETS
index 181e563c..77fe8d75 100644
--- a/src/other_tools/just_mr/TARGETS
+++ b/src/other_tools/just_mr/TARGETS
@@ -9,6 +9,7 @@
, ["src/buildtool/main", "constants"]
]
, "stage": ["src", "other_tools", "just_mr"]
- , "private-deps": [["src/utils/cpp", "path"]]
+ , "private-deps":
+ [["src/utils/cpp", "path"], ["src/buildtool/execution_api/local", "local"]]
}
}
diff --git a/src/other_tools/just_mr/utils.cpp b/src/other_tools/just_mr/utils.cpp
index 3470b7db..4d668bc6 100644
--- a/src/other_tools/just_mr/utils.cpp
+++ b/src/other_tools/just_mr/utils.cpp
@@ -14,6 +14,8 @@
#include "src/other_tools/just_mr/utils.hpp"
+#include "src/buildtool/execution_api/local/file_storage.hpp"
+#include "src/buildtool/execution_api/local/local_cas.hpp"
#include "src/utils/cpp/path.hpp"
namespace JustMR::Utils {
@@ -74,4 +76,28 @@ auto WriteTreeIDFile(std::filesystem::path const& tree_id_file,
return FileSystemManager::Rename(tmp_file.string(), tree_id_file);
}
+auto AddToCAS(std::string const& data) noexcept
+ -> std::optional<std::filesystem::path> {
+ // get file CAS instance
+ auto const& casf = LocalCAS<ObjectType::File>::Instance();
+ // write to casf
+ auto digest = casf.StoreBlobFromBytes(data);
+ if (digest) {
+ return casf.BlobPath(*digest);
+ }
+ return std::nullopt;
+}
+
+void AddDistfileToCAS(std::filesystem::path const& distfile,
+ JustMR::PathsPtr const& just_mr_paths) noexcept {
+ auto const& casf = LocalCAS<ObjectType::File>::Instance();
+ for (auto const& dirpath : just_mr_paths->distdirs) {
+ auto candidate = dirpath / distfile;
+ if (FileSystemManager::Exists(candidate)) {
+ // try to add to CAS
+ [[maybe_unused]] auto digest = casf.StoreBlobFromFile(candidate);
+ }
+ }
+}
+
} // namespace JustMR::Utils
diff --git a/src/other_tools/just_mr/utils.hpp b/src/other_tools/just_mr/utils.hpp
index 4e5e69c8..2da0155a 100644
--- a/src/other_tools/just_mr/utils.hpp
+++ b/src/other_tools/just_mr/utils.hpp
@@ -143,6 +143,15 @@ namespace Utils {
[[nodiscard]] auto WriteTreeIDFile(std::filesystem::path const& tree_id_file,
std::string const& tree_id) noexcept -> bool;
+/// \brief Add data to file CAS.
+/// Returns the path to the file added to CAS, or nullopt if not added.
+[[nodiscard]] auto AddToCAS(std::string const& data) noexcept
+ -> std::optional<std::filesystem::path>;
+
+/// \brief Try to add distfile to CAS.
+void AddDistfileToCAS(std::filesystem::path const& distfile,
+ JustMR::PathsPtr const& just_mr_paths) noexcept;
+
} // namespace Utils
} // namespace JustMR
diff --git a/src/other_tools/ops_maps/TARGETS b/src/other_tools/ops_maps/TARGETS
index a46d63c3..b265ba72 100644
--- a/src/other_tools/ops_maps/TARGETS
+++ b/src/other_tools/ops_maps/TARGETS
@@ -46,4 +46,22 @@
, ["src/utils/cpp", "tmp_dir"]
]
}
+, "content_cas_map":
+ { "type": ["@", "rules", "CC", "library"]
+ , "name": ["content_cas_map"]
+ , "hdrs": ["content_cas_map.hpp"]
+ , "srcs": ["content_cas_map.cpp"]
+ , "deps":
+ [ ["src/other_tools/just_mr", "utils"]
+ , ["src/buildtool/multithreading", "async_map_consumer"]
+ , ["src/utils/cpp", "hash_combine"]
+ , ["@", "json", "", "json"]
+ ]
+ , "stage": ["src", "other_tools", "ops_maps"]
+ , "private-deps":
+ [ ["src/utils/cpp", "curl_easy_handle"]
+ , ["src/buildtool/crypto", "hasher"]
+ , ["src/buildtool/execution_api/local", "local"]
+ ]
+ }
}
diff --git a/src/other_tools/ops_maps/content_cas_map.cpp b/src/other_tools/ops_maps/content_cas_map.cpp
new file mode 100644
index 00000000..28b2e4bc
--- /dev/null
+++ b/src/other_tools/ops_maps/content_cas_map.cpp
@@ -0,0 +1,127 @@
+// Copyright 2022 Huawei Cloud Computing Technology Co., Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/other_tools/ops_maps/content_cas_map.hpp"
+
+#include "src/buildtool/crypto/hasher.hpp"
+#include "src/buildtool/execution_api/local/file_storage.hpp"
+#include "src/buildtool/execution_api/local/local_cas.hpp"
+#include "src/utils/cpp/curl_easy_handle.hpp"
+
+namespace {
+
+/// \brief Fetches a file from the internet and stores its content in memory.
+/// Returns the content.
+[[nodiscard]] auto NetworkFetch(std::string const& fetch_url) noexcept
+ -> std::optional<std::string> {
+ auto curl_handle = CurlEasyHandle::Create();
+ if (not curl_handle) {
+ return std::nullopt;
+ }
+ return curl_handle->DownloadToString(fetch_url);
+}
+
+template <Hasher::HashType type>
+[[nodiscard]] auto GetContentHash(std::string const& data) noexcept
+ -> std::string {
+ Hasher hasher{type};
+ hasher.Update(data);
+ auto digest = std::move(hasher).Finalize();
+ return digest.HexString();
+}
+
+} // namespace
+
+auto CreateContentCASMap(JustMR::PathsPtr const& just_mr_paths,
+ std::size_t jobs) -> ContentCASMap {
+ auto ensure_in_cas = [just_mr_paths](auto /*unused*/,
+ auto setter,
+ auto logger,
+ auto /*unused*/,
+ auto const& key) {
+ // check if content already in CAS
+ auto const& casf = LocalCAS<ObjectType::File>::Instance();
+ auto digest = ArtifactDigest(key.content, 0, false);
+ if (casf.BlobPath(digest)) {
+ (*setter)(true);
+ return;
+ }
+ // add distfile to CAS
+ auto repo_distfile =
+ (key.distfile
+ ? key.distfile.value()
+ : std::filesystem::path(key.fetch_url).filename().string());
+ JustMR::Utils::AddDistfileToCAS(repo_distfile, just_mr_paths);
+ // check if content is in CAS now
+ if (casf.BlobPath(digest)) {
+ (*setter)(true);
+ return;
+ }
+ // archive needs fetching
+ // before any network fetching, check that mandatory fields are provided
+ if (key.fetch_url.empty()) {
+ (*logger)("Failed to provide archive fetch url!",
+ /*fatal=*/true);
+ return;
+ }
+ // now do the actual fetch
+ auto data = NetworkFetch(key.fetch_url);
+ if (data == std::nullopt) {
+ (*logger)(fmt::format("Failed to fetch a file with id {} from {}",
+ key.content,
+ key.fetch_url),
+ /*fatal=*/true);
+ return;
+ }
+ // check content wrt checksums
+ if (key.sha256) {
+ auto actual_sha256 =
+ GetContentHash<Hasher::HashType::SHA256>(*data);
+ if (actual_sha256 != key.sha256.value()) {
+ (*logger)(
+ fmt::format("SHA256 mismatch for {}: expected {}, got {}",
+ key.fetch_url,
+ key.sha256.value(),
+ actual_sha256),
+ /*fatal=*/true);
+ return;
+ }
+ }
+ if (key.sha512) {
+ auto actual_sha512 =
+ GetContentHash<Hasher::HashType::SHA512>(*data);
+ if (actual_sha512 != key.sha512.value()) {
+ (*logger)(
+ fmt::format("SHA512 mismatch for {}: expected {}, got {}",
+ key.fetch_url,
+ key.sha512.value(),
+ actual_sha512),
+ /*fatal=*/true);
+ return;
+ }
+ }
+ // add the fetched data to CAS
+ auto path = JustMR::Utils::AddToCAS(*data);
+ // check one last time if content is in CAS now
+ if (not path) {
+ (*logger)(fmt::format("Failed to fetch a file with id {} from {}",
+ key.content,
+ key.fetch_url),
+ /*fatal=*/true);
+ return;
+ }
+ (*setter)(true);
+ };
+ return AsyncMapConsumer<ArchiveContent, bool>(ensure_in_cas, jobs);
+} \ No newline at end of file
diff --git a/src/other_tools/ops_maps/content_cas_map.hpp b/src/other_tools/ops_maps/content_cas_map.hpp
new file mode 100644
index 00000000..9d4954d5
--- /dev/null
+++ b/src/other_tools/ops_maps/content_cas_map.hpp
@@ -0,0 +1,73 @@
+// Copyright 2022 Huawei Cloud Computing Technology Co., Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef INCLUDED_SRC_OTHER_TOOLS_OPS_MAPS_CONTENT_CAS_MAP_HPP
+#define INCLUDED_SRC_OTHER_TOOLS_OPS_MAPS_CONTENT_CAS_MAP_HPP
+
+#include <string>
+
+#include "nlohmann/json.hpp"
+#include "src/buildtool/multithreading/async_map_consumer.hpp"
+#include "src/other_tools/just_mr/utils.hpp"
+#include "src/utils/cpp/hash_combine.hpp"
+
+struct ArchiveContent {
+ std::string content; /* key */
+ std::optional<std::string> distfile;
+ std::string fetch_url;
+ std::optional<std::string> sha256;
+ std::optional<std::string> sha512;
+
+ [[nodiscard]] auto operator==(const ArchiveContent& other) const -> bool {
+ return content == other.content;
+ }
+};
+
+// Used in callers of ContentCASMap which need extra fields
+struct ArchiveRepoInfo {
+ ArchiveContent archive; /* key (see ArchiveContent) */
+ std::string repo_type;
+ std::string subdir;
+
+ [[nodiscard]] auto operator==(const ArchiveRepoInfo& other) const -> bool {
+ return archive.content == other.archive.content;
+ }
+};
+
+/// \brief Maps the content hash of an archive to an "exists" status flag.
+using ContentCASMap = AsyncMapConsumer<ArchiveContent, bool>;
+
+[[nodiscard]] auto CreateContentCASMap(JustMR::PathsPtr const& just_mr_paths,
+ std::size_t jobs) -> ContentCASMap;
+
+namespace std {
+template <>
+struct hash<ArchiveContent> {
+ [[nodiscard]] auto operator()(const ArchiveContent& ct) const noexcept
+ -> std::size_t {
+ return std::hash<std::string>{}(ct.content);
+ }
+};
+
+// Used in callers of ContentCASMap which need extra fields
+template <>
+struct hash<ArchiveRepoInfo> {
+ [[nodiscard]] auto operator()(const ArchiveRepoInfo& ct) const noexcept
+ -> std::size_t {
+ return std::hash<ArchiveContent>{}(ct.archive);
+ }
+};
+} // namespace std
+
+#endif // INCLUDED_SRC_OTHER_TOOLS_OPS_MAPS_CONTENT_CAS_MAP_HPP \ No newline at end of file