diff options
Diffstat (limited to 'src/buildtool/execution_api/git')
-rw-r--r-- | src/buildtool/execution_api/git/TARGETS | 9 | ||||
-rw-r--r-- | src/buildtool/execution_api/git/git_api.cpp | 293 | ||||
-rw-r--r-- | src/buildtool/execution_api/git/git_api.hpp | 285 |
3 files changed, 321 insertions, 266 deletions
diff --git a/src/buildtool/execution_api/git/TARGETS b/src/buildtool/execution_api/git/TARGETS index adc3c908..6bb4bf46 100644 --- a/src/buildtool/execution_api/git/TARGETS +++ b/src/buildtool/execution_api/git/TARGETS @@ -2,15 +2,18 @@ { "type": ["@", "rules", "CC", "library"] , "name": ["git"] , "hdrs": ["git_api.hpp"] + , "srcs": ["git_api.cpp"] , "deps": [ ["@", "gsl", "", "gsl"] - , ["@", "json", "", "json"] - , ["src/buildtool/common", "artifact_digest_factory"] , ["src/buildtool/common", "common"] , ["src/buildtool/common", "config"] + , ["src/buildtool/execution_api/common", "common"] + ] + , "private-deps": + [ ["@", "json", "", "json"] + , ["src/buildtool/common", "artifact_digest_factory"] , ["src/buildtool/crypto", "hash_function"] , ["src/buildtool/execution_api/common", "artifact_blob"] - , ["src/buildtool/execution_api/common", "common"] , ["src/buildtool/execution_api/common", "common_api"] , ["src/buildtool/file_system", "file_system_manager"] , ["src/buildtool/file_system", "git_tree"] diff --git a/src/buildtool/execution_api/git/git_api.cpp b/src/buildtool/execution_api/git/git_api.cpp new file mode 100644 index 00000000..f58df64d --- /dev/null +++ b/src/buildtool/execution_api/git/git_api.cpp @@ -0,0 +1,293 @@ +// Copyright 2025 Huawei Cloud Computing Technology Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "src/buildtool/execution_api/git/git_api.hpp" + +#include <cstddef> +#include <cstdio> +#include <functional> +#include <memory> +#include <unordered_map> +#include <utility> + +#include "nlohmann/json.hpp" +#include "src/buildtool/common/artifact_digest_factory.hpp" +#include "src/buildtool/crypto/hash_function.hpp" +#include "src/buildtool/execution_api/common/artifact_blob.hpp" +#include "src/buildtool/execution_api/common/common_api.hpp" +#include "src/buildtool/file_system/file_system_manager.hpp" +#include "src/buildtool/file_system/git_tree.hpp" +#include "src/buildtool/file_system/object_type.hpp" +#include "src/buildtool/logging/log_level.hpp" +#include "src/buildtool/logging/logger.hpp" +#include "src/utils/cpp/expected.hpp" + +namespace { +[[nodiscard]] auto ToArtifactDigest(GitTreeEntry const& entry) noexcept + -> std::optional<ArtifactDigest> { + auto digest = ArtifactDigestFactory::Create(HashFunction::Type::GitSHA1, + entry.Hash(), + /*size=*/0, + entry.IsTree()); + if (not digest) { + return std::nullopt; + } + return *std::move(digest); +} +} // namespace + +GitApi::GitApi(gsl::not_null<RepositoryConfig const*> const& repo_config) + : repo_config_{repo_config} {} + +auto GitApi::RetrieveToPaths( + std::vector<Artifact::ObjectInfo> const& artifacts_info, + std::vector<std::filesystem::path> const& output_paths) const noexcept + -> bool { + if (artifacts_info.size() != output_paths.size()) { + Logger::Log(LogLevel::Error, + "different number of digests and output paths."); + return false; + } + for (std::size_t i{}; i < artifacts_info.size(); ++i) { + auto const& info = artifacts_info[i]; + if (IsTreeObject(info.type)) { + auto tree = repo_config_->ReadTreeFromGitCAS(info.digest.hash()); + if (not tree) { + return false; + } + for (auto const& [path, entry] : *tree) { + auto digest = ToArtifactDigest(*entry); + if (not digest or + not RetrieveToPaths( + {Artifact::ObjectInfo{.digest = *std::move(digest), + .type = entry->Type(), + .failed = false}}, + {output_paths[i] / path})) { + return false; + } + } + } + else { + auto blob = repo_config_->ReadBlobFromGitCAS(info.digest.hash()); + if (not blob) { + return false; + } + if (not FileSystemManager::CreateDirectory( + output_paths[i].parent_path()) or + not FileSystemManager::WriteFileAs</*kSetEpochTime=*/true, + /*kSetWritable=*/true>( + *blob, output_paths[i], info.type)) { + Logger::Log(LogLevel::Error, + "staging to output path {} failed.", + output_paths[i].string()); + return false; + } + } + } + return true; +} + +auto GitApi::RetrieveToFds( + std::vector<Artifact::ObjectInfo> const& artifacts_info, + std::vector<int> const& fds, + bool raw_tree) const noexcept -> bool { + if (artifacts_info.size() != fds.size()) { + Logger::Log(LogLevel::Error, + "different number of digests and file descriptors."); + return false; + } + for (std::size_t i{}; i < artifacts_info.size(); ++i) { + auto const& info = artifacts_info[i]; + + std::string content; + if (IsTreeObject(info.type) and not raw_tree) { + auto tree = repo_config_->ReadTreeFromGitCAS(info.digest.hash()); + if (not tree) { + Logger::Log(LogLevel::Debug, + "Tree {} not known to git", + info.digest.hash()); + return false; + } + + try { + auto json = nlohmann::json::object(); + for (auto const& [path, entry] : *tree) { + auto digest = ToArtifactDigest(*entry); + if (not digest) { + return false; + } + json[path] = + Artifact::ObjectInfo{.digest = *std::move(digest), + .type = entry->Type(), + .failed = false} + .ToString(/*size_unknown*/ true); + } + content = json.dump(2) + "\n"; + } catch (...) { + return false; + } + } + else { + auto blob = repo_config_->ReadBlobFromGitCAS(info.digest.hash()); + if (not blob) { + Logger::Log(LogLevel::Debug, + "Blob {} not known to git", + info.digest.hash()); + return false; + } + content = *std::move(blob); + } + + if (gsl::owner<FILE*> out = fdopen(fds[i], "wb")) { // NOLINT + std::fwrite(content.data(), 1, content.size(), out); + std::fclose(out); + } + else { + Logger::Log(LogLevel::Error, + "dumping to file descriptor {} failed.", + fds[i]); + return false; + } + } + return true; +} + +auto GitApi::RetrieveToCas( + std::vector<Artifact::ObjectInfo> const& artifacts_info, + IExecutionApi const& api) const noexcept -> bool { + // Determine missing artifacts in other CAS. + auto missing_artifacts_info = GetMissingArtifactsInfo<Artifact::ObjectInfo>( + api, + artifacts_info.begin(), + artifacts_info.end(), + [](Artifact::ObjectInfo const& info) { return info.digest; }); + if (not missing_artifacts_info) { + Logger::Log(LogLevel::Error, + "GitApi: Failed to retrieve the missing artifacts"); + return false; + } + + // GitApi works in the native mode only. + HashFunction const hash_function{HashFunction::Type::GitSHA1}; + + // Collect blobs of missing artifacts from local CAS. Trees are + // processed recursively before any blob is uploaded. + std::unordered_set<ArtifactBlob> container; + for (auto const& dgst : missing_artifacts_info->digests) { + auto const& info = missing_artifacts_info->back_map[dgst]; + std::optional<std::string> content; + // Recursively process trees. + if (IsTreeObject(info.type)) { + auto tree = repo_config_->ReadTreeFromGitCAS(info.digest.hash()); + if (not tree) { + return false; + } + std::unordered_set<ArtifactBlob> tree_deps_only_blobs; + for (auto const& [path, entry] : *tree) { + if (entry->IsTree()) { + auto digest = ToArtifactDigest(*entry); + if (not digest or + not RetrieveToCas( + {Artifact::ObjectInfo{.digest = *std::move(digest), + .type = entry->Type(), + .failed = false}}, + api)) { + return false; + } + } + else { + auto const& entry_content = entry->RawData(); + if (not entry_content) { + return false; + } + auto digest = + ArtifactDigestFactory::HashDataAs<ObjectType::File>( + hash_function, *entry_content); + // Collect blob and upload to remote CAS if transfer + // size reached. + if (not UpdateContainerAndUpload( + &tree_deps_only_blobs, + ArtifactBlob{std::move(digest), + *entry_content, + IsExecutableObject(entry->Type())}, + /*exception_is_fatal=*/true, + [&api](std::unordered_set<ArtifactBlob>&& blobs) + -> bool { + return api.Upload(std::move(blobs)); + })) { + return false; + } + } + } + // Upload remaining blobs. + if (not api.Upload(std::move(tree_deps_only_blobs))) { + return false; + } + content = tree->RawData(); + } + else { + content = repo_config_->ReadBlobFromGitCAS(info.digest.hash()); + } + if (not content) { + return false; + } + + ArtifactDigest digest = + IsTreeObject(info.type) + ? ArtifactDigestFactory::HashDataAs<ObjectType::Tree>( + hash_function, *content) + : ArtifactDigestFactory::HashDataAs<ObjectType::File>( + hash_function, *content); + + // Collect blob and upload to remote CAS if transfer size reached. + if (not UpdateContainerAndUpload( + &container, + ArtifactBlob{std::move(digest), + std::move(*content), + IsExecutableObject(info.type)}, + /*exception_is_fatal=*/true, + [&api](std::unordered_set<ArtifactBlob>&& blobs) { + return api.Upload(std::move(blobs), + /*skip_find_missing=*/true); + })) { + return false; + } + } + + // Upload remaining blobs to remote CAS. + return api.Upload(std::move(container), /*skip_find_missing=*/true); +} + +auto GitApi::RetrieveToMemory(Artifact::ObjectInfo const& artifact_info) + const noexcept -> std::optional<std::string> { + return repo_config_->ReadBlobFromGitCAS(artifact_info.digest.hash()); +} + +auto GitApi::IsAvailable(ArtifactDigest const& digest) const noexcept -> bool { + return repo_config_->ReadBlobFromGitCAS(digest.hash(), LogLevel::Trace) + .has_value(); +} + +auto GitApi::GetMissingDigests( + std::unordered_set<ArtifactDigest> const& digests) const noexcept + -> std::unordered_set<ArtifactDigest> { + std::unordered_set<ArtifactDigest> result; + result.reserve(digests.size()); + for (auto const& digest : digests) { + if (not IsAvailable(digest)) { + result.emplace(digest); + } + } + return result; +} diff --git a/src/buildtool/execution_api/git/git_api.hpp b/src/buildtool/execution_api/git/git_api.hpp index 22834007..bf73c71b 100644 --- a/src/buildtool/execution_api/git/git_api.hpp +++ b/src/buildtool/execution_api/git/git_api.hpp @@ -15,304 +15,63 @@ #ifndef INCLUDED_SRC_BUILDTOOL_EXECUTION_API_GIT_GIT_API_HPP #define INCLUDED_SRC_BUILDTOOL_EXECUTION_API_GIT_GIT_API_HPP -#include <cstddef> -#include <cstdio> #include <filesystem> -#include <functional> -#include <memory> #include <optional> #include <string> -#include <unordered_map> #include <unordered_set> -#include <utility> #include <vector> #include "gsl/gsl" -#include "nlohmann/json.hpp" #include "src/buildtool/common/artifact.hpp" #include "src/buildtool/common/artifact_digest.hpp" -#include "src/buildtool/common/artifact_digest_factory.hpp" #include "src/buildtool/common/repository_config.hpp" -#include "src/buildtool/crypto/hash_function.hpp" -#include "src/buildtool/execution_api/common/artifact_blob.hpp" -#include "src/buildtool/execution_api/common/common_api.hpp" #include "src/buildtool/execution_api/common/execution_api.hpp" -#include "src/buildtool/file_system/file_system_manager.hpp" -#include "src/buildtool/file_system/git_tree.hpp" -#include "src/buildtool/file_system/object_type.hpp" -#include "src/buildtool/logging/log_level.hpp" -#include "src/buildtool/logging/logger.hpp" -#include "src/utils/cpp/expected.hpp" class GitApi final { public: - explicit GitApi(gsl::not_null<const RepositoryConfig*> const& repo_config) - : repo_config_{repo_config} {} + explicit GitApi(gsl::not_null<RepositoryConfig const*> const& repo_config); + /// \brief Retrieve artifacts from git and store to specified paths. + /// Tree artifacts are resolved and its containing file artifacts are + /// recursively retrieved. [[nodiscard]] auto RetrieveToPaths( std::vector<Artifact::ObjectInfo> const& artifacts_info, std::vector<std::filesystem::path> const& output_paths) const noexcept - -> bool { - if (artifacts_info.size() != output_paths.size()) { - Logger::Log(LogLevel::Error, - "different number of digests and output paths."); - return false; - } - for (std::size_t i{}; i < artifacts_info.size(); ++i) { - auto const& info = artifacts_info[i]; - if (IsTreeObject(info.type)) { - auto tree = - repo_config_->ReadTreeFromGitCAS(info.digest.hash()); - if (not tree) { - return false; - } - for (auto const& [path, entry] : *tree) { - auto digest = ToArtifactDigest(*entry); - if (not digest or - not RetrieveToPaths( - {Artifact::ObjectInfo{.digest = *std::move(digest), - .type = entry->Type(), - .failed = false}}, - {output_paths[i] / path})) { - return false; - } - } - } - else { - auto blob = - repo_config_->ReadBlobFromGitCAS(info.digest.hash()); - if (not blob) { - return false; - } - if (not FileSystemManager::CreateDirectory( - output_paths[i].parent_path()) or - not FileSystemManager::WriteFileAs</*kSetEpochTime=*/true, - /*kSetWritable=*/true>( - *blob, output_paths[i], info.type)) { - Logger::Log(LogLevel::Error, - "staging to output path {} failed.", - output_paths[i].string()); - return false; - } - } - } - return true; - } + -> bool; + /// \brief Retrieve artifacts from git and write to file descriptors. + /// Tree artifacts are not resolved and instead the tree object will be + /// pretty-printed before writing to fd. If `raw_tree` is set, pretty + /// printing will be omitted and the raw tree object will be written + /// instead. [[nodiscard]] auto RetrieveToFds( std::vector<Artifact::ObjectInfo> const& artifacts_info, std::vector<int> const& fds, - bool raw_tree) const noexcept -> bool { - if (artifacts_info.size() != fds.size()) { - Logger::Log(LogLevel::Error, - "different number of digests and file descriptors."); - return false; - } - for (std::size_t i{}; i < artifacts_info.size(); ++i) { - auto const& info = artifacts_info[i]; - - std::string content; - if (IsTreeObject(info.type) and not raw_tree) { - auto tree = - repo_config_->ReadTreeFromGitCAS(info.digest.hash()); - if (not tree) { - Logger::Log(LogLevel::Debug, - "Tree {} not known to git", - info.digest.hash()); - return false; - } - - try { - auto json = nlohmann::json::object(); - for (auto const& [path, entry] : *tree) { - auto digest = ToArtifactDigest(*entry); - if (not digest) { - return false; - } - json[path] = - Artifact::ObjectInfo{.digest = *std::move(digest), - .type = entry->Type(), - .failed = false} - .ToString(/*size_unknown*/ true); - } - content = json.dump(2) + "\n"; - } catch (...) { - return false; - } - } - else { - auto blob = - repo_config_->ReadBlobFromGitCAS(info.digest.hash()); - if (not blob) { - Logger::Log(LogLevel::Debug, - "Blob {} not known to git", - info.digest.hash()); - return false; - } - content = *std::move(blob); - } - - if (gsl::owner<FILE*> out = fdopen(fds[i], "wb")) { // NOLINT - std::fwrite(content.data(), 1, content.size(), out); - std::fclose(out); - } - else { - Logger::Log(LogLevel::Error, - "dumping to file descriptor {} failed.", - fds[i]); - return false; - } - } - return true; - } + bool raw_tree) const noexcept -> bool; + /// \brief Synchronization of artifacts between api and git. Retrieves + /// artifacts from git and uploads to api. Tree artifacts are resolved and + /// its containing file artifacts are recursively retrieved. [[nodiscard]] auto RetrieveToCas( std::vector<Artifact::ObjectInfo> const& artifacts_info, - IExecutionApi const& api) const noexcept -> bool { - // Determine missing artifacts in other CAS. - auto missing_artifacts_info = - GetMissingArtifactsInfo<Artifact::ObjectInfo>( - api, - artifacts_info.begin(), - artifacts_info.end(), - [](Artifact::ObjectInfo const& info) { return info.digest; }); - if (not missing_artifacts_info) { - Logger::Log(LogLevel::Error, - "GitApi: Failed to retrieve the missing artifacts"); - return false; - } - - // GitApi works in the native mode only. - HashFunction const hash_function{HashFunction::Type::GitSHA1}; - - // Collect blobs of missing artifacts from local CAS. Trees are - // processed recursively before any blob is uploaded. - std::unordered_set<ArtifactBlob> container; - for (auto const& dgst : missing_artifacts_info->digests) { - auto const& info = missing_artifacts_info->back_map[dgst]; - std::optional<std::string> content; - // Recursively process trees. - if (IsTreeObject(info.type)) { - auto tree = - repo_config_->ReadTreeFromGitCAS(info.digest.hash()); - if (not tree) { - return false; - } - std::unordered_set<ArtifactBlob> tree_deps_only_blobs; - for (auto const& [path, entry] : *tree) { - if (entry->IsTree()) { - auto digest = ToArtifactDigest(*entry); - if (not digest or - not RetrieveToCas({Artifact::ObjectInfo{ - .digest = *std::move(digest), - .type = entry->Type(), - .failed = false}}, - api)) { - return false; - } - } - else { - auto const& entry_content = entry->RawData(); - if (not entry_content) { - return false; - } - auto digest = - ArtifactDigestFactory::HashDataAs<ObjectType::File>( - hash_function, *entry_content); - // Collect blob and upload to remote CAS if transfer - // size reached. - if (not UpdateContainerAndUpload( - &tree_deps_only_blobs, - ArtifactBlob{std::move(digest), - *entry_content, - IsExecutableObject(entry->Type())}, - /*exception_is_fatal=*/true, - [&api](std::unordered_set<ArtifactBlob>&& blobs) - -> bool { - return api.Upload(std::move(blobs)); - })) { - return false; - } - } - } - // Upload remaining blobs. - if (not api.Upload(std::move(tree_deps_only_blobs))) { - return false; - } - content = tree->RawData(); - } - else { - content = repo_config_->ReadBlobFromGitCAS(info.digest.hash()); - } - if (not content) { - return false; - } - - ArtifactDigest digest = - IsTreeObject(info.type) - ? ArtifactDigestFactory::HashDataAs<ObjectType::Tree>( - hash_function, *content) - : ArtifactDigestFactory::HashDataAs<ObjectType::File>( - hash_function, *content); - - // Collect blob and upload to remote CAS if transfer size reached. - if (not UpdateContainerAndUpload( - &container, - ArtifactBlob{std::move(digest), - std::move(*content), - IsExecutableObject(info.type)}, - /*exception_is_fatal=*/true, - [&api](std::unordered_set<ArtifactBlob>&& blobs) { - return api.Upload(std::move(blobs), - /*skip_find_missing=*/true); - })) { - return false; - } - } - - // Upload remaining blobs to remote CAS. - return api.Upload(std::move(container), /*skip_find_missing=*/true); - } + IExecutionApi const& api) const noexcept -> bool; + /// \brief Retrieve one artifact from git and make it available for further + /// in-memory processing [[nodiscard]] auto RetrieveToMemory( Artifact::ObjectInfo const& artifact_info) const noexcept - -> std::optional<std::string> { - return repo_config_->ReadBlobFromGitCAS(artifact_info.digest.hash()); - } + -> std::optional<std::string>; + /// \brief Check if the given digest is available in git. [[nodiscard]] auto IsAvailable(ArtifactDigest const& digest) const noexcept - -> bool { - return repo_config_->ReadBlobFromGitCAS(digest.hash(), LogLevel::Trace) - .has_value(); - } + -> bool; [[nodiscard]] auto GetMissingDigests( std::unordered_set<ArtifactDigest> const& digests) const noexcept - -> std::unordered_set<ArtifactDigest> { - std::unordered_set<ArtifactDigest> result; - result.reserve(digests.size()); - for (auto const& digest : digests) { - if (not IsAvailable(digest)) { - result.emplace(digest); - } - } - return result; - } + -> std::unordered_set<ArtifactDigest>; private: - gsl::not_null<const RepositoryConfig*> repo_config_; - - [[nodiscard]] static auto ToArtifactDigest( - GitTreeEntry const& entry) noexcept -> std::optional<ArtifactDigest> { - auto digest = ArtifactDigestFactory::Create(HashFunction::Type::GitSHA1, - entry.Hash(), - /*size=*/0, - entry.IsTree()); - if (not digest) { - return std::nullopt; - } - return *std::move(digest); - } + gsl::not_null<RepositoryConfig const*> repo_config_; }; #endif // INCLUDED_SRC_BUILDTOOL_EXECUTION_API_GIT_GIT_API_HPP |