diff options
author | Paul Cristian Sarbu <paul.cristian.sarbu@huawei.com> | 2024-09-24 11:52:07 +0200 |
---|---|---|
committer | Paul Cristian Sarbu <paul.cristian.sarbu@huawei.com> | 2024-10-25 13:00:43 +0200 |
commit | 7419d9b0b7ebf5ab3a7ab0f3217f92ad06cb7ffb (patch) | |
tree | a97a9a859d7dd44e037d86e146bd4d264c3d817a | |
parent | 7571f74dbc53e9b4fe3b9000662ca686960db78e (diff) | |
download | justbuild-7419d9b0b7ebf5ab3a7ab0f3217f92ad06cb7ffb.tar.gz |
BazelMsgFactory: Add method to create bazel Directory from Git tree
3 files changed, 247 insertions, 1 deletions
diff --git a/src/buildtool/execution_api/bazel_msg/TARGETS b/src/buildtool/execution_api/bazel_msg/TARGETS index 98b1c736..5c69e161 100644 --- a/src/buildtool/execution_api/bazel_msg/TARGETS +++ b/src/buildtool/execution_api/bazel_msg/TARGETS @@ -29,11 +29,13 @@ , ["src/buildtool/crypto", "hash_function"] , ["src/buildtool/execution_api/common", "artifact_blob_container"] , ["src/buildtool/execution_engine/dag", "dag"] + , ["src/buildtool/file_system", "object_type"] , ["src/buildtool/logging", "log_level"] , ["src/buildtool/logging", "logging"] + , ["src/utils/cpp", "expected"] ] , "private-deps": - [ ["src/buildtool/common", "artifact_digest_factory"] + [ ["@", "fmt", "", "fmt"] , ["src/buildtool/file_system", "file_system_manager"] , ["src/buildtool/file_system", "git_repo"] , ["src/utils/cpp", "hex_string"] diff --git a/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.cpp b/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.cpp index 9eaecd7a..0d5aa8b0 100644 --- a/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.cpp +++ b/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.cpp @@ -25,6 +25,7 @@ #include <utility> // std::move #include <vector> +#include "fmt/core.h" #include "src/buildtool/common/artifact_digest_factory.hpp" #include "src/buildtool/file_system/file_system_manager.hpp" #include "src/buildtool/file_system/git_repo.hpp" @@ -292,6 +293,31 @@ struct DirectoryNodeBundle final { return std::nullopt; } +[[nodiscard]] auto GetContentFromGitEntry( + BazelMsgFactory::GitReadFunc const& read_git, + ArtifactDigest const& digest, + ObjectType entry_type) -> expected<std::string, std::string> { + auto read_git_res = read_git(digest, entry_type); + if (not read_git_res) { + return unexpected{ + fmt::format("failed reading Git entry {}", digest.hash())}; + } + if (std::holds_alternative<std::string>(read_git_res.value())) { + return std::get<std::string>(std::move(read_git_res).value()); + } + if (std::holds_alternative<std::filesystem::path>(read_git_res.value())) { + auto content = FileSystemManager::ReadFile( + std::get<std::filesystem::path>(std::move(read_git_res).value())); + if (not content) { + return unexpected{fmt::format("failed reading content of tree {}", + digest.hash())}; + } + return *std::move(content); + } + return unexpected{ + fmt::format("unexpected failure reading Git entry {}", digest.hash())}; +} + } // namespace auto BazelMsgFactory::CreateDirectoryDigestFromTree( @@ -315,6 +341,183 @@ auto BazelMsgFactory::CreateDirectoryDigestFromTree( return digest; } +auto BazelMsgFactory::CreateDirectoryDigestFromGitTree( + ArtifactDigest const& digest, + GitReadFunc const& read_git, + BlobStoreFunc const& store_file, + TreeStoreFunc const& store_dir, + SymlinkStoreFunc const& store_symlink, + RehashedDigestReadFunc const& read_rehashed, + RehashedDigestStoreFunc const& store_rehashed) noexcept + -> expected<ArtifactDigest, std::string> { + std::vector<bazel_re::FileNode> files{}; + std::vector<bazel_re::DirectoryNode> dirs{}; + std::vector<bazel_re::SymlinkNode> symlinks{}; + + try { + // read tree object + auto const tree_content = + GetContentFromGitEntry(read_git, digest, ObjectType::Tree); + if (not tree_content) { + return unexpected{tree_content.error()}; + } + auto const check_symlinks = + [&read_git](std::vector<ArtifactDigest> const& ids) { + return std::all_of(ids.begin(), + ids.end(), + [&read_git](auto const& id) -> bool { + auto content = GetContentFromGitEntry( + read_git, id, ObjectType::Symlink); + return content and + PathIsNonUpwards(*content); + }); + }; + + // Git-SHA1 hashing is used for reading from git + HashFunction const hash_function{HashFunction::Type::GitSHA1}; + // the tree digest is in native mode, so no need for rehashing content + auto const entries = GitRepo::ReadTreeData( + *tree_content, digest.hash(), check_symlinks, /*is_hex_id=*/true); + if (not entries) { + return unexpected{fmt::format("failed reading entries of tree {}", + digest.hash())}; + } + + // handle tree entries + for (auto const& [raw_id, es] : *entries) { + auto const hex_id = ToHexString(raw_id); + for (auto const& entry : es) { + // get native digest of entry + auto const git_digest = + ArtifactDigestFactory::Create(HashFunction::Type::GitSHA1, + hex_id, + /*size is unknown*/ 0, + IsTreeObject(entry.type)); + if (not git_digest) { + return unexpected{git_digest.error()}; + } + // get any cached digest mapping, to avoid unnecessary work + auto const cached_obj = read_rehashed(*git_digest); + if (not cached_obj) { + return unexpected{cached_obj.error()}; + } + // create and store the directory entry + switch (entry.type) { + case ObjectType::Tree: { + if (cached_obj.value()) { + // no work to be done if we already know the digest + dirs.emplace_back(CreateDirectoryNode( + entry.name, cached_obj.value()->digest)); + } + else { + // create and store sub directory + auto const dir_digest = + CreateDirectoryDigestFromGitTree( + *git_digest, + read_git, + store_file, + store_dir, + store_symlink, + read_rehashed, + store_rehashed); + if (not dir_digest) { + return unexpected{dir_digest.error()}; + } + dirs.emplace_back( + CreateDirectoryNode(entry.name, *dir_digest)); + // no need to cache the digest mapping, as this was + // done in the recursive call + } + } break; + case ObjectType::Symlink: { + // create and store symlink; for this entry type the + // cached digest is ignored because we always need the + // target (i.e., the symlink content) + auto const sym_target = GetContentFromGitEntry( + read_git, *git_digest, ObjectType::Symlink); + if (not sym_target) { + return unexpected{sym_target.error()}; + } + auto const sym_digest = store_symlink(*sym_target); + if (not sym_digest) { + return unexpected{fmt::format( + "failed storing symlink {}", hex_id)}; + } + symlinks.emplace_back( + CreateSymlinkNode(entry.name, *sym_target)); + // while useless for future symlinks, cache digest + // mapping for file-type blobs with same content + if (auto error_msg = + store_rehashed(*git_digest, + *sym_digest, + ObjectType::Symlink)) { + return unexpected{*std::move(error_msg)}; + } + } break; + default: { + if (cached_obj.value()) { + // no work to be done if we already know the digest + files.emplace_back( + CreateFileNode(entry.name, + entry.type, + cached_obj.value()->digest)); + } + else { + // create and store file; here we want to NOT read + // the content if from CAS, where we can rehash via + // streams! + auto const read_git_file = + read_git(*git_digest, entry.type); + if (not read_git_file) { + return unexpected{ + fmt::format("failed reading Git entry ")}; + } + auto const file_digest = store_file( + *read_git_file, IsExecutableObject(entry.type)); + if (not file_digest) { + return unexpected{fmt::format( + "failed storing file {}", hex_id)}; + } + files.emplace_back(CreateFileNode( + entry.name, entry.type, *file_digest)); + // cache digest mapping + if (auto error_msg = store_rehashed( + *git_digest, *file_digest, entry.type)) { + return unexpected{*std::move(error_msg)}; + } + } + } + } + } + } + + // create and store tree + auto const bytes = + SerializeMessage(CreateDirectory(files, dirs, symlinks)); + if (not bytes) { + return unexpected{ + fmt::format("failed serializing bazel Directory for tree {}", + digest.hash())}; + } + auto const tree_digest = store_dir(*bytes); + if (not tree_digest) { + return unexpected{fmt::format( + "failed storing bazel Directory for tree {}", digest.hash())}; + } + // cache digest mapping + if (auto error_msg = + store_rehashed(digest, *tree_digest, ObjectType::Tree)) { + return unexpected{*std::move(error_msg)}; + } + // return digest + return *tree_digest; + } catch (std::exception const& ex) { + return unexpected{fmt::format( + "creating bazel Directory digest unexpectedly failed with:\n{}", + ex.what())}; + } +} + auto BazelMsgFactory::CreateDirectoryDigestFromLocalTree( std::filesystem::path const& root, FileStoreFunc const& store_file, diff --git a/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.hpp b/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.hpp index 51f1c5f0..540aaa39 100644 --- a/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.hpp +++ b/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.hpp @@ -21,6 +21,7 @@ #include <memory> #include <optional> #include <string> +#include <utility> #include <vector> #include "gsl/gsl" @@ -32,8 +33,10 @@ #include "src/buildtool/execution_api/bazel_msg/directory_tree.hpp" #include "src/buildtool/execution_api/common/artifact_blob_container.hpp" #include "src/buildtool/execution_engine/dag/dag.hpp" +#include "src/buildtool/file_system/object_type.hpp" #include "src/buildtool/logging/log_level.hpp" #include "src/buildtool/logging/logger.hpp" +#include "src/utils/cpp/expected.hpp" /// \brief Factory for creating Bazel API protobuf messages. /// Responsible for creating protobuf messages necessary for Bazel API server @@ -45,12 +48,25 @@ class BazelMsgFactory { using LinkDigestResolveFunc = std::function<void(std::vector<ArtifactDigest> const&, gsl::not_null<std::vector<std::string>*> const&)>; + using GitReadFunc = std::function<std::optional< + std::variant<std::filesystem::path, std::string>>(ArtifactDigest const&, + ObjectType)>; + using BlobStoreFunc = std::function<std::optional<ArtifactDigest>( + std::variant<std::filesystem::path, std::string> const&, + bool)>; using FileStoreFunc = std::function< std::optional<ArtifactDigest>(std::filesystem::path const&, bool)>; using SymlinkStoreFunc = std::function<std::optional<ArtifactDigest>(std::string const&)>; using TreeStoreFunc = std::function<std::optional<ArtifactDigest>(std::string const&)>; + using RehashedDigestReadFunc = + std::function<expected<std::optional<Artifact::ObjectInfo>, + std::string>(ArtifactDigest const&)>; + using RehashedDigestStoreFunc = + std::function<std::optional<std::string>(ArtifactDigest const&, + ArtifactDigest const&, + ObjectType)>; /// \brief Create Directory digest from artifact tree structure. Uses /// compatible HashFunction for hashing. Recursively traverse entire tree @@ -65,6 +81,31 @@ class BazelMsgFactory { BlobProcessFunc const& process_blob) noexcept -> std::optional<ArtifactDigest>; + /// \brief Create Directory digest from an owned Git tree. + /// Recursively traverse entire tree and store files and directories. + /// Used to convert from native to compatible representation of trees. + /// \param digest Digest of a Git tree. + /// \param read_git Function for reading Git tree entries. Reading from + /// CAS returns the CAS path, while reading from Git CAS + /// returns content directly. This differentiation is + /// made to avoid unnecessary storing blobs in memory. + /// \param store_file Function for storing file via path or content. + /// \param store_dir Function for storing Directory blobs. + /// \param store_symlink Function for storing symlink via content. + /// \param read_rehashed Function to read mapping between digests. + /// \param store_rehashed Function to store mapping between digests. + /// \returns Digest representing the entire tree directory, or error string + /// on failure. + [[nodiscard]] static auto CreateDirectoryDigestFromGitTree( + ArtifactDigest const& digest, + GitReadFunc const& read_git, + BlobStoreFunc const& store_file, + TreeStoreFunc const& store_dir, + SymlinkStoreFunc const& store_symlink, + RehashedDigestReadFunc const& read_rehashed, + RehashedDigestStoreFunc const& store_rehashed) noexcept + -> expected<ArtifactDigest, std::string>; + /// \brief Create Directory digest from local file root. /// Recursively traverse entire root and store files and directories. /// \param root Path to local file root. |