diff options
author | Paul Cristian Sarbu <paul.cristian.sarbu@huawei.com> | 2024-09-25 16:45:19 +0200 |
---|---|---|
committer | Klaus Aehlig <klaus.aehlig@huawei.com> | 2024-12-04 18:03:44 +0100 |
commit | 321c814684e2a0717d05c24119b5dc08d4971e44 (patch) | |
tree | 007a537cb6f5bb34f204c227960d5d83e0ff967f /src/buildtool/execution_api | |
parent | d3ce7e1633ec8be6d76e43f7e28b4155c9f06b2a (diff) | |
download | justbuild-321c814684e2a0717d05c24119b5dc08d4971e44.tar.gz |
BazelMsgFactory: Add method to create Git tree from bazel Directory
Diffstat (limited to 'src/buildtool/execution_api')
-rw-r--r-- | src/buildtool/execution_api/bazel_msg/bazel_msg_factory.cpp | 217 | ||||
-rw-r--r-- | src/buildtool/execution_api/bazel_msg/bazel_msg_factory.hpp | 26 |
2 files changed, 243 insertions, 0 deletions
diff --git a/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.cpp b/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.cpp index be0e3309..18e22292 100644 --- a/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.cpp +++ b/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.cpp @@ -522,6 +522,223 @@ auto BazelMsgFactory::CreateDirectoryDigestFromGitTree( } } +// NOLINTNEXTLINE(misc-no-recursion) +auto BazelMsgFactory::CreateGitTreeDigestFromDirectory( + ArtifactDigest const& digest, + PathReadFunc const& read_path, + FileStoreFunc const& store_file, + TreeStoreFunc const& store_tree, + SymlinkStoreFunc const& store_symlink, + RehashedDigestReadFunc const& read_rehashed, + RehashedDigestStoreFunc const& store_rehashed) noexcept + -> expected<ArtifactDigest, std::string> { + GitRepo::tree_entries_t entries{}; + + try { + // read directory object + auto const tree_path = read_path(digest, ObjectType::Tree); + if (not tree_path) { + return unexpected{ + fmt::format("failed reading CAS entry {}", digest.hash())}; + } + auto const tree_content = FileSystemManager::ReadFile(*tree_path); + if (not tree_content) { + return unexpected{fmt::format("failed reading content of tree {}", + digest.hash())}; + } + auto dir = BazelMsgFactory::MessageFromString<bazel_re::Directory>( + *tree_content); + + // process subdirectories + for (auto const& subdir : dir->directories()) { + // get digest + auto const subdir_digest = ArtifactDigestFactory::FromBazel( + HashFunction::Type::PlainSHA256, subdir.digest()); + if (not subdir_digest) { + return unexpected{subdir_digest.error()}; + } + // get any cached digest mapping, to avoid unnecessary work + auto const cached_obj = read_rehashed(*subdir_digest); + if (not cached_obj) { + return unexpected{cached_obj.error()}; + } + if (cached_obj.value()) { + // no work to be done, just add to map + if (auto raw_id = + FromHexString(cached_obj.value()->digest.hash())) { + entries[std::move(*raw_id)].emplace_back(subdir.name(), + ObjectType::Tree); + } + else { + return unexpected{fmt::format( + "failed to get raw id for cached dir digest {}", + cached_obj.value()->digest.hash())}; + } + } + else { + // recursively get the subdirectory digest + auto const tree_digest = + CreateGitTreeDigestFromDirectory(*subdir_digest, + read_path, + store_file, + store_tree, + store_symlink, + read_rehashed, + store_rehashed); + if (not tree_digest) { + return unexpected{tree_digest.error()}; + } + if (auto raw_id = FromHexString(tree_digest->hash())) { + entries[std::move(*raw_id)].emplace_back(subdir.name(), + ObjectType::Tree); + } + else { + return unexpected{ + fmt::format("failed to get raw id for tree digest {}", + tree_digest->hash())}; + } + // no need to cache the digest mapping, as this was done in the + // recursive call + } + } + + // process symlinks + for (auto const& sym : dir->symlinks()) { + // get digest + auto const sym_digest = + ArtifactDigestFactory::HashDataAs<ObjectType::File>( + HashFunction{HashFunction::Type::PlainSHA256}, + sym.target()); + // get any cached digest mapping, to avoid unnecessary work + auto const cached_obj = read_rehashed(sym_digest); + if (not cached_obj) { + return unexpected{cached_obj.error()}; + } + if (cached_obj.value()) { + // no work to be done, just add to map + if (auto raw_id = + FromHexString(cached_obj.value()->digest.hash())) { + entries[std::move(*raw_id)].emplace_back( + sym.name(), ObjectType::Symlink); + } + else { + return unexpected{fmt::format( + "failed to get raw id for cached symlink digest {}", + cached_obj.value()->digest.hash())}; + } + } + else { + // check validity of symlink + if (not PathIsNonUpwards(sym.target())) { + return unexpected{fmt::format( + "found non-upwards symlink {}", sym_digest.hash())}; + } + // rehash symlink + auto const blob_digest = store_symlink(sym.target()); + if (not blob_digest) { + return unexpected{ + fmt::format("failed rehashing as blob symlink {}", + sym_digest.hash())}; + } + if (auto raw_id = FromHexString(blob_digest->hash())) { + entries[std::move(*raw_id)].emplace_back( + sym.name(), ObjectType::Symlink); + } + else { + return unexpected{fmt::format( + "failed to get raw id for symlink blob digest {}", + blob_digest->hash())}; + } + // while useless for future symlinks, cache digest mapping for + // file-type blobs with same content + if (auto error_msg = store_rehashed( + sym_digest, *blob_digest, ObjectType::Symlink)) { + return unexpected{*std::move(error_msg)}; + } + } + } + + // process files + for (auto const& file : dir->files()) { + // get digest + auto const file_digest = ArtifactDigestFactory::FromBazel( + HashFunction::Type::PlainSHA256, file.digest()); + if (not file_digest) { + return unexpected{file_digest.error()}; + } + auto const file_type = file.is_executable() ? ObjectType::Executable + : ObjectType::File; + // get any cached digest mapping, to avoid unnecessary work + auto const cached_obj = read_rehashed(*file_digest); + if (not cached_obj) { + return unexpected{cached_obj.error()}; + } + if (cached_obj.value()) { + // no work to be done, just add to map + if (auto raw_id = + FromHexString(cached_obj.value()->digest.hash())) { + entries[std::move(*raw_id)].emplace_back(file.name(), + file_type); + } + else { + return unexpected{fmt::format( + "failed to get raw id for cached file digest {}", + cached_obj.value()->digest.hash())}; + } + } + else { + // rehash file + auto const file_path = read_path(*file_digest, file_type); + if (not file_path) { + return unexpected{fmt::format("failed reading CAS entry {}", + file_digest->hash())}; + } + auto const blob_digest = + store_file(*file_path, file.is_executable()); + if (not blob_digest) { + return unexpected{ + fmt::format("failed rehashing as blob file {}", + file_digest->hash())}; + } + if (auto raw_id = FromHexString(blob_digest->hash())) { + entries[std::move(*raw_id)].emplace_back(file.name(), + file_type); + } + else { + return unexpected{fmt::format( + "failed to get raw id for file blob digest {}", + blob_digest->hash())}; + } + // cache digest mapping + if (auto error_msg = + store_rehashed(*file_digest, *blob_digest, file_type)) { + return unexpected{*std::move(error_msg)}; + } + } + } + + // create and store Git tree + auto const git_tree = GitRepo::CreateShallowTree(entries); + if (not git_tree) { + return unexpected{ + fmt::format("failed creating Git tree for bazel Directory {}", + digest.hash())}; + } + auto const tree_digest = store_tree(git_tree->second); + // cache digest mapping + if (auto error_msg = + store_rehashed(digest, *tree_digest, ObjectType::Tree)) { + return unexpected{*std::move(error_msg)}; + } + // return digest + return *tree_digest; + } catch (std::exception const& ex) { + return unexpected{fmt::format( + "creating Git tree digest unexpectedly failed with:\n{}", + ex.what())}; + } +} + auto BazelMsgFactory::CreateDirectoryDigestFromLocalTree( std::filesystem::path const& root, FileStoreFunc const& store_file, diff --git a/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.hpp b/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.hpp index 030dc0b1..6e71ace8 100644 --- a/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.hpp +++ b/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.hpp @@ -48,6 +48,9 @@ class BazelMsgFactory { using LinkDigestResolveFunc = std::function<void(std::vector<ArtifactDigest> const&, gsl::not_null<std::vector<std::string>*> const&)>; + using PathReadFunc = std::function<std::optional<std::filesystem::path>( + ArtifactDigest const&, + ObjectType)>; using GitReadFunc = std::function<std::optional< std::variant<std::filesystem::path, std::string>>(ArtifactDigest const&, ObjectType)>; @@ -106,6 +109,29 @@ class BazelMsgFactory { RehashedDigestStoreFunc const& store_rehashed) noexcept -> expected<ArtifactDigest, std::string>; + /// \brief Create Git tree digest from an owned Directory. + /// Recursively traverse entire directory and store blobs and trees. + /// Used to convert from compatible to native representation of trees. + /// \param digest Digest of a bazel directory. + /// \param read_path Function for reading CAS path of compatible + /// blobs. + /// \param store_file Function for storing local file via path. + /// \param store_tree Function for storing Git trees. + /// \param store_symlink Function for storing symlink via content. + /// \param read_rehashed Function for retrieving cached digests. + /// \param store_rehashed Function to register digests for caching. + /// \returns Digest of a Git tree representing the entire bazel Directory, + /// or error string on failure. + [[nodiscard]] static auto CreateGitTreeDigestFromDirectory( + ArtifactDigest const& digest, + PathReadFunc const& read_path, + FileStoreFunc const& store_file, + TreeStoreFunc const& store_tree, + SymlinkStoreFunc const& store_symlink, + RehashedDigestReadFunc const& read_rehashed, + RehashedDigestStoreFunc const& store_rehashed) noexcept + -> expected<ArtifactDigest, std::string>; + /// \brief Create Directory digest from local file root. /// Recursively traverse entire root and store files and directories. /// \param root Path to local file root. |