diff options
Diffstat (limited to 'src/buildtool/execution_api/git/git_api.hpp')
-rw-r--r-- | src/buildtool/execution_api/git/git_api.hpp | 285 |
1 files changed, 22 insertions, 263 deletions
diff --git a/src/buildtool/execution_api/git/git_api.hpp b/src/buildtool/execution_api/git/git_api.hpp index 22834007..bf73c71b 100644 --- a/src/buildtool/execution_api/git/git_api.hpp +++ b/src/buildtool/execution_api/git/git_api.hpp @@ -15,304 +15,63 @@ #ifndef INCLUDED_SRC_BUILDTOOL_EXECUTION_API_GIT_GIT_API_HPP #define INCLUDED_SRC_BUILDTOOL_EXECUTION_API_GIT_GIT_API_HPP -#include <cstddef> -#include <cstdio> #include <filesystem> -#include <functional> -#include <memory> #include <optional> #include <string> -#include <unordered_map> #include <unordered_set> -#include <utility> #include <vector> #include "gsl/gsl" -#include "nlohmann/json.hpp" #include "src/buildtool/common/artifact.hpp" #include "src/buildtool/common/artifact_digest.hpp" -#include "src/buildtool/common/artifact_digest_factory.hpp" #include "src/buildtool/common/repository_config.hpp" -#include "src/buildtool/crypto/hash_function.hpp" -#include "src/buildtool/execution_api/common/artifact_blob.hpp" -#include "src/buildtool/execution_api/common/common_api.hpp" #include "src/buildtool/execution_api/common/execution_api.hpp" -#include "src/buildtool/file_system/file_system_manager.hpp" -#include "src/buildtool/file_system/git_tree.hpp" -#include "src/buildtool/file_system/object_type.hpp" -#include "src/buildtool/logging/log_level.hpp" -#include "src/buildtool/logging/logger.hpp" -#include "src/utils/cpp/expected.hpp" class GitApi final { public: - explicit GitApi(gsl::not_null<const RepositoryConfig*> const& repo_config) - : repo_config_{repo_config} {} + explicit GitApi(gsl::not_null<RepositoryConfig const*> const& repo_config); + /// \brief Retrieve artifacts from git and store to specified paths. + /// Tree artifacts are resolved and its containing file artifacts are + /// recursively retrieved. [[nodiscard]] auto RetrieveToPaths( std::vector<Artifact::ObjectInfo> const& artifacts_info, std::vector<std::filesystem::path> const& output_paths) const noexcept - -> bool { - if (artifacts_info.size() != output_paths.size()) { - Logger::Log(LogLevel::Error, - "different number of digests and output paths."); - return false; - } - for (std::size_t i{}; i < artifacts_info.size(); ++i) { - auto const& info = artifacts_info[i]; - if (IsTreeObject(info.type)) { - auto tree = - repo_config_->ReadTreeFromGitCAS(info.digest.hash()); - if (not tree) { - return false; - } - for (auto const& [path, entry] : *tree) { - auto digest = ToArtifactDigest(*entry); - if (not digest or - not RetrieveToPaths( - {Artifact::ObjectInfo{.digest = *std::move(digest), - .type = entry->Type(), - .failed = false}}, - {output_paths[i] / path})) { - return false; - } - } - } - else { - auto blob = - repo_config_->ReadBlobFromGitCAS(info.digest.hash()); - if (not blob) { - return false; - } - if (not FileSystemManager::CreateDirectory( - output_paths[i].parent_path()) or - not FileSystemManager::WriteFileAs</*kSetEpochTime=*/true, - /*kSetWritable=*/true>( - *blob, output_paths[i], info.type)) { - Logger::Log(LogLevel::Error, - "staging to output path {} failed.", - output_paths[i].string()); - return false; - } - } - } - return true; - } + -> bool; + /// \brief Retrieve artifacts from git and write to file descriptors. + /// Tree artifacts are not resolved and instead the tree object will be + /// pretty-printed before writing to fd. If `raw_tree` is set, pretty + /// printing will be omitted and the raw tree object will be written + /// instead. [[nodiscard]] auto RetrieveToFds( std::vector<Artifact::ObjectInfo> const& artifacts_info, std::vector<int> const& fds, - bool raw_tree) const noexcept -> bool { - if (artifacts_info.size() != fds.size()) { - Logger::Log(LogLevel::Error, - "different number of digests and file descriptors."); - return false; - } - for (std::size_t i{}; i < artifacts_info.size(); ++i) { - auto const& info = artifacts_info[i]; - - std::string content; - if (IsTreeObject(info.type) and not raw_tree) { - auto tree = - repo_config_->ReadTreeFromGitCAS(info.digest.hash()); - if (not tree) { - Logger::Log(LogLevel::Debug, - "Tree {} not known to git", - info.digest.hash()); - return false; - } - - try { - auto json = nlohmann::json::object(); - for (auto const& [path, entry] : *tree) { - auto digest = ToArtifactDigest(*entry); - if (not digest) { - return false; - } - json[path] = - Artifact::ObjectInfo{.digest = *std::move(digest), - .type = entry->Type(), - .failed = false} - .ToString(/*size_unknown*/ true); - } - content = json.dump(2) + "\n"; - } catch (...) { - return false; - } - } - else { - auto blob = - repo_config_->ReadBlobFromGitCAS(info.digest.hash()); - if (not blob) { - Logger::Log(LogLevel::Debug, - "Blob {} not known to git", - info.digest.hash()); - return false; - } - content = *std::move(blob); - } - - if (gsl::owner<FILE*> out = fdopen(fds[i], "wb")) { // NOLINT - std::fwrite(content.data(), 1, content.size(), out); - std::fclose(out); - } - else { - Logger::Log(LogLevel::Error, - "dumping to file descriptor {} failed.", - fds[i]); - return false; - } - } - return true; - } + bool raw_tree) const noexcept -> bool; + /// \brief Synchronization of artifacts between api and git. Retrieves + /// artifacts from git and uploads to api. Tree artifacts are resolved and + /// its containing file artifacts are recursively retrieved. [[nodiscard]] auto RetrieveToCas( std::vector<Artifact::ObjectInfo> const& artifacts_info, - IExecutionApi const& api) const noexcept -> bool { - // Determine missing artifacts in other CAS. - auto missing_artifacts_info = - GetMissingArtifactsInfo<Artifact::ObjectInfo>( - api, - artifacts_info.begin(), - artifacts_info.end(), - [](Artifact::ObjectInfo const& info) { return info.digest; }); - if (not missing_artifacts_info) { - Logger::Log(LogLevel::Error, - "GitApi: Failed to retrieve the missing artifacts"); - return false; - } - - // GitApi works in the native mode only. - HashFunction const hash_function{HashFunction::Type::GitSHA1}; - - // Collect blobs of missing artifacts from local CAS. Trees are - // processed recursively before any blob is uploaded. - std::unordered_set<ArtifactBlob> container; - for (auto const& dgst : missing_artifacts_info->digests) { - auto const& info = missing_artifacts_info->back_map[dgst]; - std::optional<std::string> content; - // Recursively process trees. - if (IsTreeObject(info.type)) { - auto tree = - repo_config_->ReadTreeFromGitCAS(info.digest.hash()); - if (not tree) { - return false; - } - std::unordered_set<ArtifactBlob> tree_deps_only_blobs; - for (auto const& [path, entry] : *tree) { - if (entry->IsTree()) { - auto digest = ToArtifactDigest(*entry); - if (not digest or - not RetrieveToCas({Artifact::ObjectInfo{ - .digest = *std::move(digest), - .type = entry->Type(), - .failed = false}}, - api)) { - return false; - } - } - else { - auto const& entry_content = entry->RawData(); - if (not entry_content) { - return false; - } - auto digest = - ArtifactDigestFactory::HashDataAs<ObjectType::File>( - hash_function, *entry_content); - // Collect blob and upload to remote CAS if transfer - // size reached. - if (not UpdateContainerAndUpload( - &tree_deps_only_blobs, - ArtifactBlob{std::move(digest), - *entry_content, - IsExecutableObject(entry->Type())}, - /*exception_is_fatal=*/true, - [&api](std::unordered_set<ArtifactBlob>&& blobs) - -> bool { - return api.Upload(std::move(blobs)); - })) { - return false; - } - } - } - // Upload remaining blobs. - if (not api.Upload(std::move(tree_deps_only_blobs))) { - return false; - } - content = tree->RawData(); - } - else { - content = repo_config_->ReadBlobFromGitCAS(info.digest.hash()); - } - if (not content) { - return false; - } - - ArtifactDigest digest = - IsTreeObject(info.type) - ? ArtifactDigestFactory::HashDataAs<ObjectType::Tree>( - hash_function, *content) - : ArtifactDigestFactory::HashDataAs<ObjectType::File>( - hash_function, *content); - - // Collect blob and upload to remote CAS if transfer size reached. - if (not UpdateContainerAndUpload( - &container, - ArtifactBlob{std::move(digest), - std::move(*content), - IsExecutableObject(info.type)}, - /*exception_is_fatal=*/true, - [&api](std::unordered_set<ArtifactBlob>&& blobs) { - return api.Upload(std::move(blobs), - /*skip_find_missing=*/true); - })) { - return false; - } - } - - // Upload remaining blobs to remote CAS. - return api.Upload(std::move(container), /*skip_find_missing=*/true); - } + IExecutionApi const& api) const noexcept -> bool; + /// \brief Retrieve one artifact from git and make it available for further + /// in-memory processing [[nodiscard]] auto RetrieveToMemory( Artifact::ObjectInfo const& artifact_info) const noexcept - -> std::optional<std::string> { - return repo_config_->ReadBlobFromGitCAS(artifact_info.digest.hash()); - } + -> std::optional<std::string>; + /// \brief Check if the given digest is available in git. [[nodiscard]] auto IsAvailable(ArtifactDigest const& digest) const noexcept - -> bool { - return repo_config_->ReadBlobFromGitCAS(digest.hash(), LogLevel::Trace) - .has_value(); - } + -> bool; [[nodiscard]] auto GetMissingDigests( std::unordered_set<ArtifactDigest> const& digests) const noexcept - -> std::unordered_set<ArtifactDigest> { - std::unordered_set<ArtifactDigest> result; - result.reserve(digests.size()); - for (auto const& digest : digests) { - if (not IsAvailable(digest)) { - result.emplace(digest); - } - } - return result; - } + -> std::unordered_set<ArtifactDigest>; private: - gsl::not_null<const RepositoryConfig*> repo_config_; - - [[nodiscard]] static auto ToArtifactDigest( - GitTreeEntry const& entry) noexcept -> std::optional<ArtifactDigest> { - auto digest = ArtifactDigestFactory::Create(HashFunction::Type::GitSHA1, - entry.Hash(), - /*size=*/0, - entry.IsTree()); - if (not digest) { - return std::nullopt; - } - return *std::move(digest); - } + gsl::not_null<RepositoryConfig const*> repo_config_; }; #endif // INCLUDED_SRC_BUILDTOOL_EXECUTION_API_GIT_GIT_API_HPP |