diff options
author | Paul Cristian Sarbu <paul.cristian.sarbu@huawei.com> | 2022-08-29 11:35:15 +0200 |
---|---|---|
committer | Paul Cristian Sarbu <paul.cristian.sarbu@huawei.com> | 2022-12-21 14:41:57 +0100 |
commit | 1f344bd93258fac03b6ea82c15dd5dad41ee20fa (patch) | |
tree | a6696ca58ec9da63d4e1829bb2163c4e60811438 /src | |
parent | af20b222322d943595cd580404eda7be7a0b5ba4 (diff) | |
download | justbuild-1f344bd93258fac03b6ea82c15dd5dad41ee20fa.tar.gz |
Git CAS: Move Git tree ops to fake repo wrapper class
Diffstat (limited to 'src')
-rw-r--r-- | src/buildtool/execution_api/bazel_msg/TARGETS | 4 | ||||
-rw-r--r-- | src/buildtool/execution_api/bazel_msg/bazel_msg_factory.cpp | 10 | ||||
-rw-r--r-- | src/buildtool/execution_api/bazel_msg/bazel_msg_factory.hpp | 8 | ||||
-rw-r--r-- | src/buildtool/execution_api/bazel_msg/blob_tree.cpp | 6 | ||||
-rw-r--r-- | src/buildtool/execution_api/local/local_storage.cpp | 4 | ||||
-rw-r--r-- | src/buildtool/execution_api/remote/bazel/bazel_network.cpp | 4 | ||||
-rw-r--r-- | src/buildtool/file_system/TARGETS | 3 | ||||
-rw-r--r-- | src/buildtool/file_system/git_cas.cpp | 367 | ||||
-rw-r--r-- | src/buildtool/file_system/git_cas.hpp | 59 | ||||
-rw-r--r-- | src/buildtool/file_system/git_repo.cpp | 413 | ||||
-rw-r--r-- | src/buildtool/file_system/git_repo.hpp | 59 | ||||
-rw-r--r-- | src/buildtool/file_system/git_tree.cpp | 16 | ||||
-rw-r--r-- | src/buildtool/file_system/git_tree.hpp | 4 |
13 files changed, 507 insertions, 450 deletions
diff --git a/src/buildtool/execution_api/bazel_msg/TARGETS b/src/buildtool/execution_api/bazel_msg/TARGETS index 26d5ef22..c83fac48 100644 --- a/src/buildtool/execution_api/bazel_msg/TARGETS +++ b/src/buildtool/execution_api/bazel_msg/TARGETS @@ -23,7 +23,7 @@ [ "bazel_msg" , "directory_tree" , ["src/buildtool/common", "common"] - , ["src/buildtool/file_system", "git_cas"] + , ["src/buildtool/file_system", "git_repo"] , ["src/buildtool/execution_engine/dag", "dag"] , ["src/buildtool/common", "bazel_types"] ] @@ -65,7 +65,7 @@ , "private-deps": [ ["src/buildtool/common", "common"] , ["src/buildtool/common", "bazel_types"] - , ["src/buildtool/file_system", "git_cas"] + , ["src/buildtool/file_system", "git_repo"] , ["src/buildtool/file_system", "object_type"] , ["src/utils/cpp", "hex_string"] ] diff --git a/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.cpp b/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.cpp index 7dcc359a..6a8ebb87 100644 --- a/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.cpp +++ b/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.cpp @@ -28,7 +28,7 @@ #include "src/buildtool/common/bazel_types.hpp" #include "src/buildtool/compatibility/native_support.hpp" #include "src/buildtool/file_system/file_system_manager.hpp" -#include "src/buildtool/file_system/git_cas.hpp" +#include "src/buildtool/file_system/git_repo.hpp" #include "src/utils/cpp/hex_string.hpp" namespace { @@ -439,7 +439,7 @@ auto BazelMsgFactory::ReadObjectInfosFromDirectory( } auto BazelMsgFactory::ReadObjectInfosFromGitTree( - GitCAS::tree_entries_t const& entries, + GitRepo::tree_entries_t const& entries, InfoStoreFunc const& store_info) noexcept -> bool { try { for (auto const& [raw_id, es] : entries) { @@ -548,7 +548,7 @@ auto BazelMsgFactory::CreateGitTreeDigestFromLocalTree( FileStoreFunc const& store_file, TreeStoreFunc const& store_tree) noexcept -> std::optional<bazel_re::Digest> { - GitCAS::tree_entries_t entries{}; + GitRepo::tree_entries_t entries{}; auto dir_reader = [&entries, &root, &store_file, &store_tree](auto name, auto type) { if (IsTreeObject(type)) { @@ -588,7 +588,7 @@ auto BazelMsgFactory::CreateGitTreeDigestFromLocalTree( }; if (FileSystemManager::ReadDirectory(root, dir_reader)) { - if (auto tree = GitCAS::CreateShallowTree(entries)) { + if (auto tree = GitRepo::CreateShallowTree(entries)) { try { if (auto digest = store_tree(tree->second, entries)) { return *digest; @@ -654,7 +654,7 @@ auto BazelMsgFactory::DirectoryToString(bazel_re::Directory const& dir) noexcept } auto BazelMsgFactory::GitTreeToString( - GitCAS::tree_entries_t const& entries) noexcept + GitRepo::tree_entries_t const& entries) noexcept -> std::optional<std::string> { auto json = nlohmann::json::object(); try { diff --git a/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.hpp b/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.hpp index ec4ded35..faeab647 100644 --- a/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.hpp +++ b/src/buildtool/execution_api/bazel_msg/bazel_msg_factory.hpp @@ -30,7 +30,7 @@ #include "src/buildtool/execution_api/bazel_msg/bazel_common.hpp" #include "src/buildtool/execution_api/bazel_msg/directory_tree.hpp" #include "src/buildtool/execution_engine/dag/dag.hpp" -#include "src/buildtool/file_system/git_cas.hpp" +#include "src/buildtool/file_system/git_repo.hpp" /// \brief Factory for creating Bazel API protobuf messages. /// Responsible for creating protobuf messages necessary for Bazel API server @@ -47,7 +47,7 @@ class BazelMsgFactory { bazel_re::Directory const&)>; using TreeStoreFunc = std::function<std::optional<bazel_re::Digest>( std::string const&, - GitCAS::tree_entries_t const&)>; + GitRepo::tree_entries_t const&)>; /// \brief Read object infos from directory. /// \returns true on success. @@ -58,7 +58,7 @@ class BazelMsgFactory { /// \brief Read object infos from git tree. /// \returns true on success. [[nodiscard]] static auto ReadObjectInfosFromGitTree( - GitCAS::tree_entries_t const& entries, + GitRepo::tree_entries_t const& entries, InfoStoreFunc const& store_info) noexcept -> bool; /// \brief Create Directory digest from artifact tree structure. @@ -130,7 +130,7 @@ class BazelMsgFactory { /// \brief Create descriptive string from Git tree entries. [[nodiscard]] static auto GitTreeToString( - GitCAS::tree_entries_t const& entries) noexcept + GitRepo::tree_entries_t const& entries) noexcept -> std::optional<std::string>; /// \brief Create message vector from std::map. diff --git a/src/buildtool/execution_api/bazel_msg/blob_tree.cpp b/src/buildtool/execution_api/bazel_msg/blob_tree.cpp index 4b06fd7a..3e5e2fa6 100644 --- a/src/buildtool/execution_api/bazel_msg/blob_tree.cpp +++ b/src/buildtool/execution_api/bazel_msg/blob_tree.cpp @@ -20,7 +20,7 @@ #include "src/buildtool/common/artifact.hpp" #include "src/buildtool/common/bazel_types.hpp" #include "src/buildtool/compatibility/native_support.hpp" -#include "src/buildtool/file_system/git_cas.hpp" +#include "src/buildtool/file_system/git_repo.hpp" #include "src/buildtool/file_system/object_type.hpp" #include "src/utils/cpp/hex_string.hpp" @@ -28,7 +28,7 @@ auto BlobTree::FromDirectoryTree(DirectoryTreePtr const& tree, std::filesystem::path const& parent) noexcept -> std::optional<BlobTreePtr> { - GitCAS::tree_entries_t entries; + GitRepo::tree_entries_t entries; std::vector<BlobTreePtr> nodes; try { entries.reserve(tree->size()); @@ -63,7 +63,7 @@ auto BlobTree::FromDirectoryTree(DirectoryTreePtr const& tree, object_info->type); } } - if (auto git_tree = GitCAS::CreateShallowTree(entries)) { + if (auto git_tree = GitRepo::CreateShallowTree(entries)) { bazel_re::Digest digest{}; digest.set_hash(NativeSupport::Prefix(ToHexString(git_tree->first), /*is_tree=*/true)); diff --git a/src/buildtool/execution_api/local/local_storage.cpp b/src/buildtool/execution_api/local/local_storage.cpp index a52e347d..eaeaa617 100644 --- a/src/buildtool/execution_api/local/local_storage.cpp +++ b/src/buildtool/execution_api/local/local_storage.cpp @@ -35,10 +35,10 @@ namespace { [[nodiscard]] auto ReadGitTree( gsl::not_null<LocalStorage const*> const& storage, bazel_re::Digest const& digest) noexcept - -> std::optional<GitCAS::tree_entries_t> { + -> std::optional<GitRepo::tree_entries_t> { if (auto const path = storage->TreePath(digest)) { if (auto const content = FileSystemManager::ReadFile(*path)) { - return GitCAS::ReadTreeData( + return GitRepo::ReadTreeData( *content, HashFunction::ComputeTreeHash(*content).Bytes(), /*is_hex_id=*/false); diff --git a/src/buildtool/execution_api/remote/bazel/bazel_network.cpp b/src/buildtool/execution_api/remote/bazel/bazel_network.cpp index eb9078f1..c92164ba 100644 --- a/src/buildtool/execution_api/remote/bazel/bazel_network.cpp +++ b/src/buildtool/execution_api/remote/bazel/bazel_network.cpp @@ -38,11 +38,11 @@ namespace { [[nodiscard]] auto ReadGitTree( gsl::not_null<BazelNetwork const*> const& network, bazel_re::Digest const& digest) noexcept - -> std::optional<GitCAS::tree_entries_t> { + -> std::optional<GitRepo::tree_entries_t> { auto blobs = network->ReadBlobs({digest}).Next(); if (blobs.size() == 1) { auto const& content = blobs.at(0).data; - return GitCAS::ReadTreeData( + return GitRepo::ReadTreeData( content, HashFunction::ComputeTreeHash(content).Bytes(), /*is_hex_id=*/false); diff --git a/src/buildtool/file_system/TARGETS b/src/buildtool/file_system/TARGETS index 46e5195b..107be171 100644 --- a/src/buildtool/file_system/TARGETS +++ b/src/buildtool/file_system/TARGETS @@ -43,7 +43,7 @@ , "hdrs": ["git_tree.hpp"] , "srcs": ["git_tree.cpp"] , "deps": - [ "git_cas" + [ "git_repo" , "object_type" , ["src/buildtool/multithreading", "atomic_value"] , ["src/utils/cpp", "hex_string"] @@ -75,6 +75,7 @@ , ["src/buildtool/logging", "logging"] , ["", "libgit2"] , ["src/utils/cpp", "path"] + , ["src/utils/cpp", "hex_string"] ] , "stage": ["src", "buildtool", "file_system"] } diff --git a/src/buildtool/file_system/git_cas.cpp b/src/buildtool/file_system/git_cas.cpp index e03cece8..d0bda197 100644 --- a/src/buildtool/file_system/git_cas.cpp +++ b/src/buildtool/file_system/git_cas.cpp @@ -26,7 +26,6 @@ extern "C" { #include <git2.h> -#include <git2/sys/odb_backend.h> } namespace { @@ -69,56 +68,6 @@ constexpr std::size_t kOIDHexSize{GIT_OID_HEXSZ}; return std::nullopt; } -[[nodiscard]] auto ToHexString(git_oid const& oid) noexcept - -> std::optional<std::string> { - std::string hex_id(GIT_OID_HEXSZ, '\0'); - if (git_oid_fmt(hex_id.data(), &oid) != 0) { - return std::nullopt; - } - return hex_id; -} - -[[nodiscard]] auto ToRawString(git_oid const& oid) noexcept - -> std::optional<std::string> { - if (auto hex_id = ToHexString(oid)) { - return FromHexString(*hex_id); - } - return std::nullopt; -} - -[[nodiscard]] auto GitFileModeToObjectType(git_filemode_t const& mode) noexcept - -> std::optional<ObjectType> { - switch (mode) { - case GIT_FILEMODE_BLOB: - return ObjectType::File; - case GIT_FILEMODE_BLOB_EXECUTABLE: - return ObjectType::Executable; - case GIT_FILEMODE_TREE: - return ObjectType::Tree; - default: { - std::ostringstream str; - str << std::oct << static_cast<int>(mode); - Logger::Log( - LogLevel::Error, "unsupported git filemode {}", str.str()); - return std::nullopt; - } - } -} - -[[nodiscard]] constexpr auto ObjectTypeToGitFileMode(ObjectType type) noexcept - -> git_filemode_t { - switch (type) { - case ObjectType::File: - return GIT_FILEMODE_BLOB; - case ObjectType::Executable: - return GIT_FILEMODE_BLOB_EXECUTABLE; - case ObjectType::Tree: - return GIT_FILEMODE_TREE; - } - - return GIT_FILEMODE_UNREADABLE; // make gcc happy -} - [[nodiscard]] auto GitTypeToObjectType(git_object_t const& type) noexcept -> std::optional<ObjectType> { switch (type) { @@ -134,169 +83,6 @@ constexpr std::size_t kOIDHexSize{GIT_OID_HEXSZ}; } } -[[maybe_unused]] [[nodiscard]] auto ValidateEntries( - GitCAS::tree_entries_t const& entries) -> bool { - return std::all_of(entries.begin(), entries.end(), [](auto entry) { - auto const& [id, nodes] = entry; - // for a given raw id, either all entries are trees or none of them - return std::all_of( - nodes.begin(), - nodes.end(), - [](auto entry) { return IsTreeObject(entry.type); }) or - std::none_of(nodes.begin(), nodes.end(), [](auto entry) { - return IsTreeObject(entry.type); - }); - }); -} - -auto const repo_closer = [](gsl::owner<git_repository*> repo) { - if (repo != nullptr) { - git_repository_free(repo); - } -}; - -auto const tree_closer = [](gsl::owner<git_tree*> tree) { - if (tree != nullptr) { - git_tree_free(tree); - } -}; - -auto const treebuilder_closer = [](gsl::owner<git_treebuilder*> builder) { - if (builder != nullptr) { - git_treebuilder_free(builder); - } -}; - -[[nodiscard]] auto flat_tree_walker(const char* /*root*/, - const git_tree_entry* entry, - void* payload) noexcept -> int { - auto* entries = - reinterpret_cast<GitCAS::tree_entries_t*>(payload); // NOLINT - - std::string name = git_tree_entry_name(entry); - auto const* oid = git_tree_entry_id(entry); - if (auto raw_id = ToRawString(*oid)) { - if (auto type = - GitFileModeToObjectType(git_tree_entry_filemode(entry))) { - (*entries)[*raw_id].emplace_back(std::move(name), *type); - return 1; // return >=0 on success, 1 == skip subtrees (flat) - } - } - return -1; // fail -} - -struct InMemoryODBBackend { - git_odb_backend parent; - GitCAS::tree_entries_t const* entries{nullptr}; // object headers - std::unordered_map<std::string, std::string> trees{}; // solid tree objects -}; - -[[nodiscard]] auto backend_read_header(size_t* len_p, - git_object_t* type_p, - git_odb_backend* _backend, - const git_oid* oid) -> int { - if (len_p != nullptr and type_p != nullptr and _backend != nullptr and - oid != nullptr) { - auto* b = reinterpret_cast<InMemoryODBBackend*>(_backend); // NOLINT - if (auto id = ToRawString(*oid)) { - if (auto it = b->trees.find(*id); it != b->trees.end()) { - *type_p = GIT_OBJECT_TREE; - *len_p = it->second.size(); - return GIT_OK; - } - if (b->entries != nullptr) { - if (auto it = b->entries->find(*id); it != b->entries->end()) { - if (not it->second.empty()) { - // pretend object is in database, size is ignored. - *type_p = IsTreeObject(it->second.front().type) - ? GIT_OBJECT_TREE - : GIT_OBJECT_BLOB; - *len_p = 0; - return GIT_OK; - } - } - } - return GIT_ENOTFOUND; - } - } - return GIT_ERROR; -} - -[[nodiscard]] auto backend_read(void** data_p, - size_t* len_p, - git_object_t* type_p, - git_odb_backend* _backend, - const git_oid* oid) -> int { - if (data_p != nullptr and len_p != nullptr and type_p != nullptr and - _backend != nullptr and oid != nullptr) { - auto* b = reinterpret_cast<InMemoryODBBackend*>(_backend); // NOLINT - if (auto id = ToRawString(*oid)) { - if (auto it = b->trees.find(*id); it != b->trees.end()) { - *type_p = GIT_OBJECT_TREE; - *len_p = it->second.size(); - *data_p = git_odb_backend_data_alloc(_backend, *len_p); - if (*data_p == nullptr) { - return GIT_ERROR; - } - std::memcpy(*data_p, it->second.data(), *len_p); - return GIT_OK; - } - return GIT_ENOTFOUND; - } - } - return GIT_ERROR; -} - -[[nodiscard]] auto backend_exists(git_odb_backend* _backend, const git_oid* oid) - -> int { - if (_backend != nullptr and oid != nullptr) { - auto* b = reinterpret_cast<InMemoryODBBackend*>(_backend); // NOLINT - if (auto id = ToRawString(*oid)) { - return (b->entries != nullptr and b->entries->contains(*id)) or - b->trees.contains(*id) - ? 1 - : 0; - } - } - return GIT_ERROR; -} - -[[nodiscard]] auto backend_write(git_odb_backend* _backend, - const git_oid* oid, - const void* data, - size_t len, - git_object_t type) -> int { - if (data != nullptr and _backend != nullptr and oid != nullptr) { - auto* b = reinterpret_cast<InMemoryODBBackend*>(_backend); // NOLINT - if (auto id = ToRawString(*oid)) { - if (auto t = GitTypeToObjectType(type)) { - std::string s(static_cast<char const*>(data), len); - if (type == GIT_OBJECT_TREE) { - b->trees.emplace(std::move(*id), std::move(s)); - return GIT_OK; - } - } - } - } - return GIT_ERROR; -} - -void backend_free(git_odb_backend* /*_backend*/) {} - -[[nodiscard]] auto CreateInMemoryODBParent() -> git_odb_backend { - git_odb_backend b{}; - b.version = GIT_ODB_BACKEND_VERSION; - b.read_header = &backend_read_header; - b.read = &backend_read; - b.exists = &backend_exists; - b.write = &backend_write; - b.free = &backend_free; - return b; -} - -// A backend that can be used to read and create tree objects in-memory. -auto const kInMemoryODBParent = CreateInMemoryODBParent(); - } // namespace #endif // BOOTSTRAP_BUILD_TOOL @@ -358,55 +144,6 @@ auto GitCAS::ReadObject(std::string const& id, bool is_hex_id) const noexcept #endif } -auto GitCAS::ReadTree(std::string const& id, bool is_hex_id) const noexcept - -> std::optional<tree_entries_t> { -#ifdef BOOTSTRAP_BUILD_TOOL - return std::nullopt; -#else - // create object id - auto oid = GitObjectID(id, is_hex_id); - if (not oid) { - return std::nullopt; - } - - // create fake repository from ODB - git_repository* repo_ptr{nullptr}; - if (git_repository_wrap_odb(&repo_ptr, odb_) != 0) { - Logger::Log(LogLevel::Debug, - "failed to create fake Git repository from object db"); - return std::nullopt; - } - auto fake_repo = std::unique_ptr<git_repository, decltype(repo_closer)>{ - repo_ptr, repo_closer}; - - // lookup tree - git_tree* tree_ptr{nullptr}; - if (git_tree_lookup(&tree_ptr, fake_repo.get(), &(*oid)) != 0) { - Logger::Log(LogLevel::Debug, - "failed to lookup Git tree {}", - is_hex_id ? std::string{id} : ToHexString(id)); - return std::nullopt; - } - auto tree = - std::unique_ptr<git_tree, decltype(tree_closer)>{tree_ptr, tree_closer}; - - // walk tree (flat) and create entries - tree_entries_t entries{}; - entries.reserve(git_tree_entrycount(tree.get())); - if (git_tree_walk( - tree.get(), GIT_TREEWALK_PRE, flat_tree_walker, &entries) != 0) { - Logger::Log(LogLevel::Debug, - "failed to walk Git tree {}", - is_hex_id ? std::string{id} : ToHexString(id)); - return std::nullopt; - } - - gsl_EnsuresAudit(ValidateEntries(entries)); - - return entries; -#endif -} - auto GitCAS::ReadHeader(std::string const& id, bool is_hex_id) const noexcept -> std::optional<std::pair<std::size_t, ObjectType>> { #ifndef BOOTSTRAP_BUILD_TOOL @@ -437,62 +174,6 @@ auto GitCAS::ReadHeader(std::string const& id, bool is_hex_id) const noexcept return std::nullopt; } -auto GitCAS::CreateTree(GitCAS::tree_entries_t const& entries) const noexcept - -> std::optional<std::string> { -#ifdef BOOTSTRAP_BUILD_TOOL - return std::nullopt; -#else - gsl_ExpectsAudit(ValidateEntries(entries)); - - // create fake repository from ODB - git_repository* repo_ptr{nullptr}; - if (git_repository_wrap_odb(&repo_ptr, odb_) != 0) { - Logger::Log(LogLevel::Debug, - "failed to create fake Git repository from object db"); - return std::nullopt; - } - auto fake_repo = std::unique_ptr<git_repository, decltype(repo_closer)>{ - repo_ptr, repo_closer}; - - git_treebuilder* builder_ptr{nullptr}; - if (git_treebuilder_new(&builder_ptr, fake_repo.get(), nullptr) != 0) { - Logger::Log(LogLevel::Debug, "failed to create Git tree builder"); - return std::nullopt; - } - auto builder = - std::unique_ptr<git_treebuilder, decltype(treebuilder_closer)>{ - builder_ptr, treebuilder_closer}; - - for (auto const& [raw_id, es] : entries) { - auto id = GitObjectID(raw_id, /*is_hex_id=*/false); - for (auto const& entry : es) { - git_tree_entry const* tree_entry{nullptr}; - if (not id or git_treebuilder_insert( - &tree_entry, - builder.get(), - entry.name.c_str(), - &(*id), - ObjectTypeToGitFileMode(entry.type)) != 0) { - Logger::Log(LogLevel::Debug, - "failed adding object {} to Git tree", - ToHexString(raw_id)); - return std::nullopt; - } - } - } - - git_oid oid; - if (git_treebuilder_write(&oid, builder.get()) != 0) { - return std::nullopt; - } - auto raw_id = ToRawString(oid); - if (not raw_id) { - return std::nullopt; - } - return std::move(*raw_id); -#endif -} - auto GitCAS::OpenODB(std::filesystem::path const& repo_path) noexcept -> bool { static std::mutex repo_mutex{}; #ifdef BOOTSTRAP_BUILD_TOOL @@ -525,51 +206,3 @@ auto GitCAS::OpenODB(std::filesystem::path const& repo_path) noexcept -> bool { return true; #endif } - -auto GitCAS::ReadTreeData(std::string const& data, - std::string const& id, - bool is_hex_id) noexcept - -> std::optional<tree_entries_t> { -#ifndef BOOTSTRAP_BUILD_TOOL - InMemoryODBBackend b{kInMemoryODBParent}; - GitCAS cas{}; - if (auto raw_id = is_hex_id ? FromHexString(id) : std::make_optional(id)) { - try { - b.trees.emplace(*raw_id, data); - } catch (...) { - return std::nullopt; - } - // create a GitCAS from a special-purpose in-memory object database. - if (git_odb_new(&cas.odb_) == 0 and - git_odb_add_backend( - cas.odb_, - reinterpret_cast<git_odb_backend*>(&b), // NOLINT - 0) == 0) { - return cas.ReadTree(*raw_id, /*is_hex_id=*/false); - } - } -#endif - return std::nullopt; -} - -auto GitCAS::CreateShallowTree(GitCAS::tree_entries_t const& entries) noexcept - -> std::optional<std::pair<std::string, std::string>> { -#ifndef BOOTSTRAP_BUILD_TOOL - InMemoryODBBackend b{kInMemoryODBParent, &entries}; - GitCAS cas{}; - // create a GitCAS from a special-purpose in-memory object database. - if (git_odb_new(&cas.odb_) == 0 and - git_odb_add_backend(cas.odb_, - reinterpret_cast<git_odb_backend*>(&b), // NOLINT - 0) == 0) { - if (auto raw_id = cas.CreateTree(entries)) { - // read result from in-memory trees - if (auto it = b.trees.find(*raw_id); it != b.trees.end()) { - return std::make_pair(std::move(*raw_id), - std::move(it->second)); - } - } - } -#endif - return std::nullopt; -} diff --git a/src/buildtool/file_system/git_cas.hpp b/src/buildtool/file_system/git_cas.hpp index 47592927..0d637c5e 100644 --- a/src/buildtool/file_system/git_cas.hpp +++ b/src/buildtool/file_system/git_cas.hpp @@ -34,25 +34,6 @@ using GitCASPtr = std::shared_ptr<GitCAS const>; /// \brief Git CAS that maintains its Git context. class GitCAS { public: - // Stores the data for defining a single Git tree entry, which consists of - // a name (flat basename) and an object type (file/executable/tree). - struct tree_entry_t { - tree_entry_t(std::string n, ObjectType t) - : name{std::move(n)}, type{t} {} - std::string name; - ObjectType type; - [[nodiscard]] auto operator==(tree_entry_t const& other) const noexcept - -> bool { - return name == other.name and type == other.type; - } - }; - - // Tree entries by raw id. The same id might refer to multiple entries. - // Note that sharding by id is used as this format enables a more efficient - // internal implementation for creating trees. - using tree_entries_t = - std::unordered_map<std::string, std::vector<tree_entry_t>>; - static auto Open(std::filesystem::path const& repo_path) noexcept -> GitCASPtr; @@ -83,46 +64,6 @@ class GitCAS { bool is_hex_id = false) const noexcept -> std::optional<std::pair<std::size_t, ObjectType>>; - /// \brief Read entries from tree in CAS. - /// Reading a tree must be backed by an object database. Therefore, a valid - /// instance of this class is required. - /// \param id The object id. - /// \param is_hex_id Specify whether `id` is hex string or raw. - [[nodiscard]] auto ReadTree(std::string const& id, - bool is_hex_id = false) const noexcept - -> std::optional<tree_entries_t>; - - /// \brief Create a flat tree from entries and store tree in CAS. - /// Creating a tree must be backed by an object database. Therefore, a valid - /// instance of this class is required. Furthermore, all entries must be - /// available in the underlying object database and object types must - /// correctly reflect the type of the object found in the database. - /// \param entries The entries to create the tree from. - /// \returns The raw object id as string, if successful. - [[nodiscard]] auto CreateTree(GitCAS::tree_entries_t const& entries) - const noexcept -> std::optional<std::string>; - - /// \brief Read entries from tree data (without object db). - /// \param data The tree object as plain data. - /// \param id The object id. - /// \param is_hex_id Specify whether `id` is hex string or raw. - /// \returns The tree entries. - [[nodiscard]] static auto ReadTreeData(std::string const& data, - std::string const& id, - bool is_hex_id = false) noexcept - -> std::optional<tree_entries_t>; - - /// \brief Create a flat shallow (without objects in db) tree and return it. - /// Creates a tree object from the entries without access to the actual - /// blobs. Objects are not required to be available in the underlying object - /// database. It is sufficient to provide the raw object id and and object - /// type for every entry. - /// \param entries The entries to create the tree from. - /// \returns A pair of raw object id and the tree object content. - [[nodiscard]] static auto CreateShallowTree( - GitCAS::tree_entries_t const& entries) noexcept - -> std::optional<std::pair<std::string, std::string>>; - private: // IMPORTANT: the GitContext needs to be initialized before any git object! GitContext git_context_{}; // maintains a Git context while CAS is alive diff --git a/src/buildtool/file_system/git_repo.cpp b/src/buildtool/file_system/git_repo.cpp index 4b46bd9d..3f323681 100644 --- a/src/buildtool/file_system/git_repo.cpp +++ b/src/buildtool/file_system/git_repo.cpp @@ -15,16 +15,21 @@ #include <src/buildtool/file_system/git_repo.hpp> #include "src/buildtool/logging/logger.hpp" +#include "src/utils/cpp/hex_string.hpp" #include "src/utils/cpp/path.hpp" extern "C" { #include <git2.h> +#include <git2/sys/odb_backend.h> } namespace { constexpr std::size_t kWaitTime{2}; // time in ms between tries for git locks +constexpr std::size_t kOIDRawSize{GIT_OID_RAWSZ}; +constexpr std::size_t kOIDHexSize{GIT_OID_HEXSZ}; + [[nodiscard]] auto GitLastError() noexcept -> std::string { git_error const* err{nullptr}; if ((err = git_error_last()) != nullptr and err->message != nullptr) { @@ -33,6 +38,260 @@ constexpr std::size_t kWaitTime{2}; // time in ms between tries for git locks return "<unknown error>"; } +[[nodiscard]] auto GitObjectID(std::string const& id, + bool is_hex_id = false) noexcept + -> std::optional<git_oid> { + if (id.size() < (is_hex_id ? kOIDHexSize : kOIDRawSize)) { + Logger::Log(LogLevel::Error, + "invalid git object id {}", + is_hex_id ? id : ToHexString(id)); + return std::nullopt; + } + git_oid oid{}; + if (is_hex_id and git_oid_fromstr(&oid, id.c_str()) == 0) { + return oid; + } + if (not is_hex_id and + git_oid_fromraw( + &oid, + reinterpret_cast<unsigned char const*>(id.data()) // NOLINT + ) == 0) { + return oid; + } + Logger::Log(LogLevel::Error, + "parsing git object id {} failed with:\n{}", + is_hex_id ? id : ToHexString(id), + GitLastError()); + return std::nullopt; +} + +[[nodiscard]] auto ToHexString(git_oid const& oid) noexcept + -> std::optional<std::string> { + std::string hex_id(GIT_OID_HEXSZ, '\0'); + if (git_oid_fmt(hex_id.data(), &oid) != 0) { + return std::nullopt; + } + return hex_id; +} + +[[nodiscard]] auto ToRawString(git_oid const& oid) noexcept + -> std::optional<std::string> { + if (auto hex_id = ToHexString(oid)) { + return FromHexString(*hex_id); + } + return std::nullopt; +} + +[[nodiscard]] auto GitFileModeToObjectType(git_filemode_t const& mode) noexcept + -> std::optional<ObjectType> { + switch (mode) { + case GIT_FILEMODE_BLOB: + return ObjectType::File; + case GIT_FILEMODE_BLOB_EXECUTABLE: + return ObjectType::Executable; + case GIT_FILEMODE_TREE: + return ObjectType::Tree; + default: { + std::ostringstream str; + str << std::oct << static_cast<int>(mode); + Logger::Log( + LogLevel::Error, "unsupported git filemode {}", str.str()); + return std::nullopt; + } + } +} + +[[nodiscard]] constexpr auto ObjectTypeToGitFileMode(ObjectType type) noexcept + -> git_filemode_t { + switch (type) { + case ObjectType::File: + return GIT_FILEMODE_BLOB; + case ObjectType::Executable: + return GIT_FILEMODE_BLOB_EXECUTABLE; + case ObjectType::Tree: + return GIT_FILEMODE_TREE; + } + return GIT_FILEMODE_UNREADABLE; // make gcc happy +} + +[[nodiscard]] auto GitTypeToObjectType(git_object_t const& type) noexcept + -> std::optional<ObjectType> { + switch (type) { + case GIT_OBJECT_BLOB: + return ObjectType::File; + case GIT_OBJECT_TREE: + return ObjectType::Tree; + default: + Logger::Log(LogLevel::Error, + "unsupported git object type {}", + git_object_type2string(type)); + return std::nullopt; + } +} + +#ifndef NDEBUG +[[nodiscard]] auto ValidateEntries(GitRepo::tree_entries_t const& entries) + -> bool { + return std::all_of(entries.begin(), entries.end(), [](auto entry) { + auto const& [id, nodes] = entry; + // for a given raw id, either all entries are trees or none of them + return std::all_of( + nodes.begin(), + nodes.end(), + [](auto entry) { return IsTreeObject(entry.type); }) or + std::none_of(nodes.begin(), nodes.end(), [](auto entry) { + return IsTreeObject(entry.type); + }); + }); +} +#endif + +auto const tree_closer = [](gsl::owner<git_tree*> tree) { + if (tree != nullptr) { + git_tree_free(tree); + } +}; + +auto const treebuilder_closer = [](gsl::owner<git_treebuilder*> builder) { + if (builder != nullptr) { + git_treebuilder_free(builder); + } +}; + +[[nodiscard]] auto flat_tree_walker(const char* /*root*/, + const git_tree_entry* entry, + void* payload) noexcept -> int { + auto* entries = + reinterpret_cast<GitRepo::tree_entries_t*>(payload); // NOLINT + + std::string name = git_tree_entry_name(entry); + auto const* oid = git_tree_entry_id(entry); + if (auto raw_id = ToRawString(*oid)) { + if (auto type = + GitFileModeToObjectType(git_tree_entry_filemode(entry))) { + (*entries)[*raw_id].emplace_back(std::move(name), *type); + return 1; // return >=0 on success, 1 == skip subtrees (flat) + } + } + return -1; // fail +} + +struct InMemoryODBBackend { + git_odb_backend parent; + GitRepo::tree_entries_t const* entries{nullptr}; // object headers + std::unordered_map<std::string, std::string> trees{}; // solid tree objects +}; + +[[nodiscard]] auto backend_read_header(size_t* len_p, + git_object_t* type_p, + git_odb_backend* _backend, + const git_oid* oid) -> int { + if (len_p != nullptr and type_p != nullptr and _backend != nullptr and + oid != nullptr) { + auto* b = reinterpret_cast<InMemoryODBBackend*>(_backend); // NOLINT + if (auto id = ToRawString(*oid)) { + if (auto it = b->trees.find(*id); it != b->trees.end()) { + *type_p = GIT_OBJECT_TREE; + *len_p = it->second.size(); + return GIT_OK; + } + if (b->entries != nullptr) { + if (auto it = b->entries->find(*id); it != b->entries->end()) { + if (not it->second.empty()) { + // pretend object is in database, size is ignored. + *type_p = IsTreeObject(it->second.front().type) + ? GIT_OBJECT_TREE + : GIT_OBJECT_BLOB; + *len_p = 0; + return GIT_OK; + } + } + } + return GIT_ENOTFOUND; + } + } + return GIT_ERROR; +} + +[[nodiscard]] auto backend_read(void** data_p, + size_t* len_p, + git_object_t* type_p, + git_odb_backend* _backend, + const git_oid* oid) -> int { + if (data_p != nullptr and len_p != nullptr and type_p != nullptr and + _backend != nullptr and oid != nullptr) { + auto* b = reinterpret_cast<InMemoryODBBackend*>(_backend); // NOLINT + if (auto id = ToRawString(*oid)) { + if (auto it = b->trees.find(*id); it != b->trees.end()) { + *type_p = GIT_OBJECT_TREE; + *len_p = it->second.size(); + *data_p = git_odb_backend_data_alloc(_backend, *len_p); + if (*data_p == nullptr) { + return GIT_ERROR; + } + std::memcpy(*data_p, it->second.data(), *len_p); + return GIT_OK; + } + return GIT_ENOTFOUND; + } + } + return GIT_ERROR; +} + +[[nodiscard]] auto backend_exists(git_odb_backend* _backend, const git_oid* oid) + -> int { + if (_backend != nullptr and oid != nullptr) { + auto* b = reinterpret_cast<InMemoryODBBackend*>(_backend); // NOLINT + if (auto id = ToRawString(*oid)) { + return (b->entries != nullptr and b->entries->contains(*id)) or + b->trees.contains(*id) + ? 1 + : 0; + } + } + return GIT_ERROR; +} + +[[nodiscard]] auto backend_write(git_odb_backend* _backend, + const git_oid* oid, + const void* data, + size_t len, + git_object_t type) -> int { + if (data != nullptr and _backend != nullptr and oid != nullptr) { + auto* b = reinterpret_cast<InMemoryODBBackend*>(_backend); // NOLINT + if (auto id = ToRawString(*oid)) { + if (auto t = GitTypeToObjectType(type)) { + std::string s(static_cast<char const*>(data), len); + if (type == GIT_OBJECT_TREE) { + b->trees.emplace(std::move(*id), std::move(s)); + return GIT_OK; + } + } + } + } + return GIT_ERROR; +} + +void backend_free(git_odb_backend* /*_backend*/) {} + +[[nodiscard]] auto CreateInMemoryODBParent() -> git_odb_backend { + git_odb_backend b{}; + b.version = GIT_ODB_BACKEND_VERSION; + b.read_header = &backend_read_header; + b.read = &backend_read; + b.exists = &backend_exists; + b.write = &backend_write; + b.free = &backend_free; + return b; +} + +#ifndef BOOTSTRAP_BUILD_TOOL + +// A backend that can be used to read and create tree objects in-memory. +auto const kInMemoryODBParent = CreateInMemoryODBParent(); + +#endif // BOOTSTRAP_BUILD_TOOL + } // namespace auto GitRepo::Open(GitCASPtr git_cas) noexcept -> std::optional<GitRepo> { @@ -195,3 +454,157 @@ GitRepo::~GitRepo() noexcept { auto GitRepo::IsRepoFake() const noexcept -> bool { return is_repo_fake_; } + +auto GitRepo::ReadTree(std::string const& id, bool is_hex_id) const noexcept + -> std::optional<tree_entries_t> { +#ifdef BOOTSTRAP_BUILD_TOOL + return std::nullopt; +#else + // create object id + auto oid = GitObjectID(id, is_hex_id); + if (not oid) { + return std::nullopt; + } + + // lookup tree + git_tree* tree_ptr{nullptr}; + if (git_tree_lookup(&tree_ptr, repo_, &(*oid)) != 0) { + Logger::Log(LogLevel::Debug, + "failed to lookup Git tree {}", + is_hex_id ? std::string{id} : ToHexString(id)); + return std::nullopt; + } + auto tree = + std::unique_ptr<git_tree, decltype(tree_closer)>{tree_ptr, tree_closer}; + + // walk tree (flat) and create entries + tree_entries_t entries{}; + entries.reserve(git_tree_entrycount(tree.get())); + if (git_tree_walk( + tree.get(), GIT_TREEWALK_PRE, flat_tree_walker, &entries) != 0) { + Logger::Log(LogLevel::Debug, + "failed to walk Git tree {}", + is_hex_id ? std::string{id} : ToHexString(id)); + return std::nullopt; + } + +#ifndef NDEBUG + gsl_EnsuresAudit(ValidateEntries(entries)); +#endif + + return entries; +#endif +} + +auto GitRepo::CreateTree(tree_entries_t const& entries) const noexcept + -> std::optional<std::string> { +#ifdef BOOTSTRAP_BUILD_TOOL + return std::nullopt; +#else +#ifndef NDEBUG + gsl_ExpectsAudit(ValidateEntries(entries)); +#endif // NDEBUG + + git_treebuilder* builder_ptr{nullptr}; + if (git_treebuilder_new(&builder_ptr, repo_, nullptr) != 0) { + Logger::Log(LogLevel::Debug, "failed to create Git tree builder"); + return std::nullopt; + } + auto builder = + std::unique_ptr<git_treebuilder, decltype(treebuilder_closer)>{ + builder_ptr, treebuilder_closer}; + + for (auto const& [raw_id, es] : entries) { + auto id = GitObjectID(raw_id, /*is_hex_id=*/false); + for (auto const& entry : es) { + if (not id or git_treebuilder_insert( + nullptr, + builder.get(), + entry.name.c_str(), + &(*id), + ObjectTypeToGitFileMode(entry.type)) != 0) { + Logger::Log(LogLevel::Debug, + "failed adding object {} to Git tree", + ToHexString(raw_id)); + return std::nullopt; + } + } + } + + git_oid oid; + if (git_treebuilder_write(&oid, builder.get()) != 0) { + return std::nullopt; + } + auto raw_id = ToRawString(oid); + if (not raw_id) { + return std::nullopt; + } + return std::move(*raw_id); +#endif +} + +auto GitRepo::ReadTreeData(std::string const& data, + std::string const& id, + bool is_hex_id) noexcept + -> std::optional<tree_entries_t> { +#ifndef BOOTSTRAP_BUILD_TOOL + try { + InMemoryODBBackend b{kInMemoryODBParent}; + auto cas = std::make_shared<GitCAS>(); + if (auto raw_id = + is_hex_id ? FromHexString(id) : std::make_optional(id)) { + try { + b.trees.emplace(*raw_id, data); + } catch (...) { + return std::nullopt; + } + // create a GitCAS from a special-purpose in-memory object database. + if (git_odb_new(&cas->odb_) == 0 and + git_odb_add_backend( + cas->odb_, + reinterpret_cast<git_odb_backend*>(&b), // NOLINT + 0) == 0) { + // wrap odb in "fake" repo + auto repo = + GitRepo(std::static_pointer_cast<GitCAS const>(cas)); + return repo.ReadTree(*raw_id, /*is_hex_id=*/false); + } + } + } catch (std::exception const& ex) { + Logger::Log( + LogLevel::Error, "reading tree data failed with:\n{}", ex.what()); + } +#endif + return std::nullopt; +} + +auto GitRepo::CreateShallowTree(tree_entries_t const& entries) noexcept + -> std::optional<std::pair<std::string, std::string>> { +#ifndef BOOTSTRAP_BUILD_TOOL + try { + InMemoryODBBackend b{kInMemoryODBParent, &entries}; + auto cas = std::make_shared<GitCAS>(); + // create a GitCAS from a special-purpose in-memory object database. + if (git_odb_new(&cas->odb_) == 0 and + git_odb_add_backend( + cas->odb_, + reinterpret_cast<git_odb_backend*>(&b), // NOLINT + 0) == 0) { + // wrap odb in "fake" repo + auto repo = GitRepo(std::static_pointer_cast<GitCAS const>(cas)); + if (auto raw_id = repo.CreateTree(entries)) { + // read result from in-memory trees + if (auto it = b.trees.find(*raw_id); it != b.trees.end()) { + return std::make_pair(std::move(*raw_id), + std::move(it->second)); + } + } + } + } catch (std::exception const& ex) { + Logger::Log(LogLevel::Error, + "creating shallow tree failed with:\n{}", + ex.what()); + } +#endif + return std::nullopt; +} diff --git a/src/buildtool/file_system/git_repo.hpp b/src/buildtool/file_system/git_repo.hpp index ae0c183b..6103d879 100644 --- a/src/buildtool/file_system/git_repo.hpp +++ b/src/buildtool/file_system/git_repo.hpp @@ -28,6 +28,25 @@ using git_repository = struct git_repository; /// existing ODB, allowing thread-safe operations. class GitRepo { public: + // Stores the data for defining a single Git tree entry, which consists of + // a name (flat basename) and an object type (file/executable/tree). + struct tree_entry_t { + tree_entry_t(std::string n, ObjectType t) + : name{std::move(n)}, type{t} {} + std::string name; + ObjectType type; + [[nodiscard]] auto operator==(tree_entry_t const& other) const noexcept + -> bool { + return name == other.name and type == other.type; + } + }; + + // Tree entries by raw id. The same id might refer to multiple entries. + // Note that sharding by id is used as this format enables a more efficient + // internal implementation for creating trees. + using tree_entries_t = + std::unordered_map<std::string, std::vector<tree_entry_t>>; + GitRepo() = delete; // no default ctor // allow only move, no copy @@ -55,6 +74,46 @@ class GitRepo { [[nodiscard]] auto IsRepoFake() const noexcept -> bool; + /// \brief Read entries from tree in CAS. + /// Reading a tree must be backed by an object database. Therefore, a real + /// repository is required. + /// \param id The object id. + /// \param is_hex_id Specify whether `id` is hex string or raw. + [[nodiscard]] auto ReadTree(std::string const& id, + bool is_hex_id = false) const noexcept + -> std::optional<tree_entries_t>; + + /// \brief Create a flat tree from entries and store tree in CAS. + /// Creating a tree must be backed by an object database. Therefore, a real + /// repository is required. Furthermore, all entries must be available in + /// the underlying object database and object types must correctly reflect + /// the type of the object found in the database. + /// \param entries The entries to create the tree from. + /// \returns The raw object id as string, if successful. + [[nodiscard]] auto CreateTree(GitRepo::tree_entries_t const& entries) + const noexcept -> std::optional<std::string>; + + /// \brief Read entries from tree data (without object db). + /// \param data The tree object as plain data. + /// \param id The object id. + /// \param is_hex_id Specify whether `id` is hex string or raw. + /// \returns The tree entries. + [[nodiscard]] static auto ReadTreeData(std::string const& data, + std::string const& id, + bool is_hex_id = false) noexcept + -> std::optional<tree_entries_t>; + + /// \brief Create a flat shallow (without objects in db) tree and return it. + /// Creates a tree object from the entries without access to the actual + /// blobs. Objects are not required to be available in the underlying object + /// database. It is sufficient to provide the raw object id and and object + /// type for every entry. + /// \param entries The entries to create the tree from. + /// \returns A pair of raw object id and the tree object content. + [[nodiscard]] static auto CreateShallowTree( + GitRepo::tree_entries_t const& entries) noexcept + -> std::optional<std::pair<std::string, std::string>>; + ~GitRepo() noexcept; private: diff --git a/src/buildtool/file_system/git_tree.cpp b/src/buildtool/file_system/git_tree.cpp index 6e65ecd6..db391b7e 100644 --- a/src/buildtool/file_system/git_tree.cpp +++ b/src/buildtool/file_system/git_tree.cpp @@ -68,8 +68,14 @@ auto GitTree::Read(gsl::not_null<GitCASPtr> const& cas, std::string const& tree_id) noexcept -> std::optional<GitTree> { if (auto raw_id = FromHexString(tree_id)) { - if (auto entries = cas->ReadTree(*raw_id)) { - return GitTree::FromEntries(cas, std::move(*entries), *raw_id); + auto repo = GitRepo::Open(cas); + if (repo != std::nullopt) { + if (auto entries = repo->ReadTree(*raw_id)) { + return GitTree::FromEntries(cas, std::move(*entries), *raw_id); + } + } + else { + return ::std::nullopt; } } return std::nullopt; @@ -113,7 +119,11 @@ auto GitTreeEntry::Blob() const noexcept -> std::optional<std::string> { auto GitTreeEntry::Tree() const& noexcept -> std::optional<GitTree> const& { return tree_cached_.SetOnceAndGet([this]() -> std::optional<GitTree> { if (IsTree()) { - if (auto entries = cas_->ReadTree(raw_id_)) { + auto repo = GitRepo::Open(cas_); + if (repo == std::nullopt) { + return std::nullopt; + } + if (auto entries = repo->ReadTree(raw_id_)) { return GitTree::FromEntries(cas_, std::move(*entries), raw_id_); } } diff --git a/src/buildtool/file_system/git_tree.hpp b/src/buildtool/file_system/git_tree.hpp index 98e5a8bd..ac409e69 100644 --- a/src/buildtool/file_system/git_tree.hpp +++ b/src/buildtool/file_system/git_tree.hpp @@ -20,7 +20,7 @@ #include <unordered_map> #include "gsl-lite/gsl-lite.hpp" -#include "src/buildtool/file_system/git_cas.hpp" +#include "src/buildtool/file_system/git_repo.hpp" #include "src/buildtool/file_system/object_type.hpp" #include "src/buildtool/multithreading/atomic_value.hpp" #include "src/utils/cpp/hex_string.hpp" @@ -77,7 +77,7 @@ class GitTree { raw_id_{std::move(raw_id)} {} [[nodiscard]] static auto FromEntries(gsl::not_null<GitCASPtr> cas, - GitCAS::tree_entries_t&& entries, + GitRepo::tree_entries_t&& entries, std::string raw_id) noexcept -> std::optional<GitTree> { entries_t e{}; |