summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorPaul Cristian Sarbu <paul.cristian.sarbu@huawei.com>2024-09-24 11:52:07 +0200
committerPaul Cristian Sarbu <paul.cristian.sarbu@huawei.com>2024-10-25 13:00:43 +0200
commit7419d9b0b7ebf5ab3a7ab0f3217f92ad06cb7ffb (patch)
treea97a9a859d7dd44e037d86e146bd4d264c3d817a /src
parent7571f74dbc53e9b4fe3b9000662ca686960db78e (diff)
downloadjustbuild-7419d9b0b7ebf5ab3a7ab0f3217f92ad06cb7ffb.tar.gz
BazelMsgFactory: Add method to create bazel Directory from Git tree
Diffstat (limited to 'src')
-rw-r--r--src/buildtool/execution_api/bazel_msg/TARGETS4
-rw-r--r--src/buildtool/execution_api/bazel_msg/bazel_msg_factory.cpp203
-rw-r--r--src/buildtool/execution_api/bazel_msg/bazel_msg_factory.hpp41
3 files changed, 247 insertions, 1 deletions
diff --git a/src/buildtool/execution_api/bazel_msg/TARGETS b/src/buildtool/execution_api/bazel_msg/TARGETS
index 98b1c736..5c69e161 100644
--- a/src/buildtool/execution_api/bazel_msg/TARGETS
+++ b/src/buildtool/execution_api/bazel_msg/TARGETS
@@ -29,11 +29,13 @@
, ["src/buildtool/crypto", "hash_function"]
, ["src/buildtool/execution_api/common", "artifact_blob_container"]
, ["src/buildtool/execution_engine/dag", "dag"]
+ , ["src/buildtool/file_system", "object_type"]
, ["src/buildtool/logging", "log_level"]
, ["src/buildtool/logging", "logging"]
+ , ["src/utils/cpp", "expected"]
]
, "private-deps":
- [ ["src/buildtool/common", "artifact_digest_factory"]
+ [ ["@", "fmt", "", "fmt"]
, ["src/buildtool/file_system", "file_system_manager"]
, ["src/buildtool/file_system", "git_repo"]
, ["src/utils/cpp", "hex_string"]
diff --git a/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.cpp b/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.cpp
index 9eaecd7a..0d5aa8b0 100644
--- a/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.cpp
+++ b/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.cpp
@@ -25,6 +25,7 @@
#include <utility> // std::move
#include <vector>
+#include "fmt/core.h"
#include "src/buildtool/common/artifact_digest_factory.hpp"
#include "src/buildtool/file_system/file_system_manager.hpp"
#include "src/buildtool/file_system/git_repo.hpp"
@@ -292,6 +293,31 @@ struct DirectoryNodeBundle final {
return std::nullopt;
}
+[[nodiscard]] auto GetContentFromGitEntry(
+ BazelMsgFactory::GitReadFunc const& read_git,
+ ArtifactDigest const& digest,
+ ObjectType entry_type) -> expected<std::string, std::string> {
+ auto read_git_res = read_git(digest, entry_type);
+ if (not read_git_res) {
+ return unexpected{
+ fmt::format("failed reading Git entry {}", digest.hash())};
+ }
+ if (std::holds_alternative<std::string>(read_git_res.value())) {
+ return std::get<std::string>(std::move(read_git_res).value());
+ }
+ if (std::holds_alternative<std::filesystem::path>(read_git_res.value())) {
+ auto content = FileSystemManager::ReadFile(
+ std::get<std::filesystem::path>(std::move(read_git_res).value()));
+ if (not content) {
+ return unexpected{fmt::format("failed reading content of tree {}",
+ digest.hash())};
+ }
+ return *std::move(content);
+ }
+ return unexpected{
+ fmt::format("unexpected failure reading Git entry {}", digest.hash())};
+}
+
} // namespace
auto BazelMsgFactory::CreateDirectoryDigestFromTree(
@@ -315,6 +341,183 @@ auto BazelMsgFactory::CreateDirectoryDigestFromTree(
return digest;
}
+auto BazelMsgFactory::CreateDirectoryDigestFromGitTree(
+ ArtifactDigest const& digest,
+ GitReadFunc const& read_git,
+ BlobStoreFunc const& store_file,
+ TreeStoreFunc const& store_dir,
+ SymlinkStoreFunc const& store_symlink,
+ RehashedDigestReadFunc const& read_rehashed,
+ RehashedDigestStoreFunc const& store_rehashed) noexcept
+ -> expected<ArtifactDigest, std::string> {
+ std::vector<bazel_re::FileNode> files{};
+ std::vector<bazel_re::DirectoryNode> dirs{};
+ std::vector<bazel_re::SymlinkNode> symlinks{};
+
+ try {
+ // read tree object
+ auto const tree_content =
+ GetContentFromGitEntry(read_git, digest, ObjectType::Tree);
+ if (not tree_content) {
+ return unexpected{tree_content.error()};
+ }
+ auto const check_symlinks =
+ [&read_git](std::vector<ArtifactDigest> const& ids) {
+ return std::all_of(ids.begin(),
+ ids.end(),
+ [&read_git](auto const& id) -> bool {
+ auto content = GetContentFromGitEntry(
+ read_git, id, ObjectType::Symlink);
+ return content and
+ PathIsNonUpwards(*content);
+ });
+ };
+
+ // Git-SHA1 hashing is used for reading from git
+ HashFunction const hash_function{HashFunction::Type::GitSHA1};
+ // the tree digest is in native mode, so no need for rehashing content
+ auto const entries = GitRepo::ReadTreeData(
+ *tree_content, digest.hash(), check_symlinks, /*is_hex_id=*/true);
+ if (not entries) {
+ return unexpected{fmt::format("failed reading entries of tree {}",
+ digest.hash())};
+ }
+
+ // handle tree entries
+ for (auto const& [raw_id, es] : *entries) {
+ auto const hex_id = ToHexString(raw_id);
+ for (auto const& entry : es) {
+ // get native digest of entry
+ auto const git_digest =
+ ArtifactDigestFactory::Create(HashFunction::Type::GitSHA1,
+ hex_id,
+ /*size is unknown*/ 0,
+ IsTreeObject(entry.type));
+ if (not git_digest) {
+ return unexpected{git_digest.error()};
+ }
+ // get any cached digest mapping, to avoid unnecessary work
+ auto const cached_obj = read_rehashed(*git_digest);
+ if (not cached_obj) {
+ return unexpected{cached_obj.error()};
+ }
+ // create and store the directory entry
+ switch (entry.type) {
+ case ObjectType::Tree: {
+ if (cached_obj.value()) {
+ // no work to be done if we already know the digest
+ dirs.emplace_back(CreateDirectoryNode(
+ entry.name, cached_obj.value()->digest));
+ }
+ else {
+ // create and store sub directory
+ auto const dir_digest =
+ CreateDirectoryDigestFromGitTree(
+ *git_digest,
+ read_git,
+ store_file,
+ store_dir,
+ store_symlink,
+ read_rehashed,
+ store_rehashed);
+ if (not dir_digest) {
+ return unexpected{dir_digest.error()};
+ }
+ dirs.emplace_back(
+ CreateDirectoryNode(entry.name, *dir_digest));
+ // no need to cache the digest mapping, as this was
+ // done in the recursive call
+ }
+ } break;
+ case ObjectType::Symlink: {
+ // create and store symlink; for this entry type the
+ // cached digest is ignored because we always need the
+ // target (i.e., the symlink content)
+ auto const sym_target = GetContentFromGitEntry(
+ read_git, *git_digest, ObjectType::Symlink);
+ if (not sym_target) {
+ return unexpected{sym_target.error()};
+ }
+ auto const sym_digest = store_symlink(*sym_target);
+ if (not sym_digest) {
+ return unexpected{fmt::format(
+ "failed storing symlink {}", hex_id)};
+ }
+ symlinks.emplace_back(
+ CreateSymlinkNode(entry.name, *sym_target));
+ // while useless for future symlinks, cache digest
+ // mapping for file-type blobs with same content
+ if (auto error_msg =
+ store_rehashed(*git_digest,
+ *sym_digest,
+ ObjectType::Symlink)) {
+ return unexpected{*std::move(error_msg)};
+ }
+ } break;
+ default: {
+ if (cached_obj.value()) {
+ // no work to be done if we already know the digest
+ files.emplace_back(
+ CreateFileNode(entry.name,
+ entry.type,
+ cached_obj.value()->digest));
+ }
+ else {
+ // create and store file; here we want to NOT read
+ // the content if from CAS, where we can rehash via
+ // streams!
+ auto const read_git_file =
+ read_git(*git_digest, entry.type);
+ if (not read_git_file) {
+ return unexpected{
+ fmt::format("failed reading Git entry ")};
+ }
+ auto const file_digest = store_file(
+ *read_git_file, IsExecutableObject(entry.type));
+ if (not file_digest) {
+ return unexpected{fmt::format(
+ "failed storing file {}", hex_id)};
+ }
+ files.emplace_back(CreateFileNode(
+ entry.name, entry.type, *file_digest));
+ // cache digest mapping
+ if (auto error_msg = store_rehashed(
+ *git_digest, *file_digest, entry.type)) {
+ return unexpected{*std::move(error_msg)};
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // create and store tree
+ auto const bytes =
+ SerializeMessage(CreateDirectory(files, dirs, symlinks));
+ if (not bytes) {
+ return unexpected{
+ fmt::format("failed serializing bazel Directory for tree {}",
+ digest.hash())};
+ }
+ auto const tree_digest = store_dir(*bytes);
+ if (not tree_digest) {
+ return unexpected{fmt::format(
+ "failed storing bazel Directory for tree {}", digest.hash())};
+ }
+ // cache digest mapping
+ if (auto error_msg =
+ store_rehashed(digest, *tree_digest, ObjectType::Tree)) {
+ return unexpected{*std::move(error_msg)};
+ }
+ // return digest
+ return *tree_digest;
+ } catch (std::exception const& ex) {
+ return unexpected{fmt::format(
+ "creating bazel Directory digest unexpectedly failed with:\n{}",
+ ex.what())};
+ }
+}
+
auto BazelMsgFactory::CreateDirectoryDigestFromLocalTree(
std::filesystem::path const& root,
FileStoreFunc const& store_file,
diff --git a/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.hpp b/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.hpp
index 51f1c5f0..540aaa39 100644
--- a/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.hpp
+++ b/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.hpp
@@ -21,6 +21,7 @@
#include <memory>
#include <optional>
#include <string>
+#include <utility>
#include <vector>
#include "gsl/gsl"
@@ -32,8 +33,10 @@
#include "src/buildtool/execution_api/bazel_msg/directory_tree.hpp"
#include "src/buildtool/execution_api/common/artifact_blob_container.hpp"
#include "src/buildtool/execution_engine/dag/dag.hpp"
+#include "src/buildtool/file_system/object_type.hpp"
#include "src/buildtool/logging/log_level.hpp"
#include "src/buildtool/logging/logger.hpp"
+#include "src/utils/cpp/expected.hpp"
/// \brief Factory for creating Bazel API protobuf messages.
/// Responsible for creating protobuf messages necessary for Bazel API server
@@ -45,12 +48,25 @@ class BazelMsgFactory {
using LinkDigestResolveFunc =
std::function<void(std::vector<ArtifactDigest> const&,
gsl::not_null<std::vector<std::string>*> const&)>;
+ using GitReadFunc = std::function<std::optional<
+ std::variant<std::filesystem::path, std::string>>(ArtifactDigest const&,
+ ObjectType)>;
+ using BlobStoreFunc = std::function<std::optional<ArtifactDigest>(
+ std::variant<std::filesystem::path, std::string> const&,
+ bool)>;
using FileStoreFunc = std::function<
std::optional<ArtifactDigest>(std::filesystem::path const&, bool)>;
using SymlinkStoreFunc =
std::function<std::optional<ArtifactDigest>(std::string const&)>;
using TreeStoreFunc =
std::function<std::optional<ArtifactDigest>(std::string const&)>;
+ using RehashedDigestReadFunc =
+ std::function<expected<std::optional<Artifact::ObjectInfo>,
+ std::string>(ArtifactDigest const&)>;
+ using RehashedDigestStoreFunc =
+ std::function<std::optional<std::string>(ArtifactDigest const&,
+ ArtifactDigest const&,
+ ObjectType)>;
/// \brief Create Directory digest from artifact tree structure. Uses
/// compatible HashFunction for hashing. Recursively traverse entire tree
@@ -65,6 +81,31 @@ class BazelMsgFactory {
BlobProcessFunc const& process_blob) noexcept
-> std::optional<ArtifactDigest>;
+ /// \brief Create Directory digest from an owned Git tree.
+ /// Recursively traverse entire tree and store files and directories.
+ /// Used to convert from native to compatible representation of trees.
+ /// \param digest Digest of a Git tree.
+ /// \param read_git Function for reading Git tree entries. Reading from
+ /// CAS returns the CAS path, while reading from Git CAS
+ /// returns content directly. This differentiation is
+ /// made to avoid unnecessary storing blobs in memory.
+ /// \param store_file Function for storing file via path or content.
+ /// \param store_dir Function for storing Directory blobs.
+ /// \param store_symlink Function for storing symlink via content.
+ /// \param read_rehashed Function to read mapping between digests.
+ /// \param store_rehashed Function to store mapping between digests.
+ /// \returns Digest representing the entire tree directory, or error string
+ /// on failure.
+ [[nodiscard]] static auto CreateDirectoryDigestFromGitTree(
+ ArtifactDigest const& digest,
+ GitReadFunc const& read_git,
+ BlobStoreFunc const& store_file,
+ TreeStoreFunc const& store_dir,
+ SymlinkStoreFunc const& store_symlink,
+ RehashedDigestReadFunc const& read_rehashed,
+ RehashedDigestStoreFunc const& store_rehashed) noexcept
+ -> expected<ArtifactDigest, std::string>;
+
/// \brief Create Directory digest from local file root.
/// Recursively traverse entire root and store files and directories.
/// \param root Path to local file root.