diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/buildtool/file_system/git_cas.cpp | 136 | ||||
-rw-r--r-- | src/buildtool/file_system/git_cas.hpp | 30 | ||||
-rw-r--r-- | src/buildtool/file_system/git_tree.cpp | 82 | ||||
-rw-r--r-- | src/buildtool/file_system/git_tree.hpp | 24 |
4 files changed, 193 insertions, 79 deletions
diff --git a/src/buildtool/file_system/git_cas.cpp b/src/buildtool/file_system/git_cas.cpp index ccfef032..96674584 100644 --- a/src/buildtool/file_system/git_cas.cpp +++ b/src/buildtool/file_system/git_cas.cpp @@ -28,14 +28,14 @@ constexpr auto kOIDHexSize{GIT_OID_HEXSZ}; bool is_hex_id = false) noexcept -> std::optional<git_oid> { #ifndef BOOTSTRAP_BUILD_TOOL - if ((is_hex_id and id.size() < kOIDHexSize) or id.size() < kOIDRawSize) { + if (id.size() < (is_hex_id ? kOIDHexSize : kOIDRawSize)) { Logger::Log(LogLevel::Error, "invalid git object id {}", is_hex_id ? id : ToHexString(id)); return std::nullopt; } git_oid oid{}; - if (is_hex_id and git_oid_fromstr(&oid, id.data()) == 0) { + if (is_hex_id and git_oid_fromstr(&oid, id.c_str()) == 0) { return oid; } if (not is_hex_id and @@ -53,6 +53,44 @@ constexpr auto kOIDHexSize{GIT_OID_HEXSZ}; return std::nullopt; } +[[nodiscard]] auto ToHexString(git_oid const& oid) noexcept + -> std::optional<std::string> { + std::string hex_id(GIT_OID_HEXSZ, '\0'); +#ifndef BOOTSTRAP_BUILD_TOOL + if (git_oid_fmt(hex_id.data(), &oid) != 0) { + return std::nullopt; + } +#endif + return hex_id; +} + +[[nodiscard]] auto ToRawString(git_oid const& oid) noexcept + -> std::optional<std::string> { + if (auto hex_id = ToHexString(oid)) { + return FromHexString(*hex_id); + } + return std::nullopt; +} + +[[nodiscard]] auto GitFileModeToObjectType(git_filemode_t const& mode) noexcept + -> std::optional<ObjectType> { + switch (mode) { + case GIT_FILEMODE_BLOB: + return ObjectType::File; + case GIT_FILEMODE_BLOB_EXECUTABLE: + return ObjectType::Executable; + case GIT_FILEMODE_TREE: + return ObjectType::Tree; + default: { + std::ostringstream str; + str << std::oct << static_cast<int>(mode); + Logger::Log( + LogLevel::Error, "unsupported git filemode {}", str.str()); + return std::nullopt; + } + } +} + [[nodiscard]] auto GitTypeToObjectType(git_object_t const& type) noexcept -> std::optional<ObjectType> { switch (type) { @@ -68,6 +106,51 @@ constexpr auto kOIDHexSize{GIT_OID_HEXSZ}; } } +[[maybe_unused]] [[nodiscard]] auto ValidateEntries( + GitCAS::tree_entries_t const& entries) -> bool { + return std::all_of(entries.begin(), entries.end(), [](auto entry) { + auto const& [id, nodes] = entry; + // for a given raw id, either all entries are trees or none of them + return std::all_of( + nodes.begin(), + nodes.end(), + [](auto entry) { return IsTreeObject(entry.type); }) or + std::none_of(nodes.begin(), nodes.end(), [](auto entry) { + return IsTreeObject(entry.type); + }); + }); +} + +auto const repo_closer = [](gsl::owner<git_repository*> repo) { + if (repo != nullptr) { + git_repository_free(repo); + } +}; + +auto const tree_closer = [](gsl::owner<git_tree*> tree) { + if (tree != nullptr) { + git_tree_free(tree); + } +}; + +[[nodiscard]] auto flat_tree_walker(const char* /*root*/, + const git_tree_entry* entry, + void* payload) noexcept -> int { + auto* entries = + reinterpret_cast<GitCAS::tree_entries_t*>(payload); // NOLINT + + std::string name = git_tree_entry_name(entry); + auto const* oid = git_tree_entry_id(entry); + if (auto raw_id = ToRawString(*oid)) { + if (auto type = + GitFileModeToObjectType(git_tree_entry_filemode(entry))) { + (*entries)[*raw_id].emplace_back(std::move(name), *type); + return 1; // return >=0 on success, 1 == skip subtrees (flat) + } + } + return -1; // fail +} + } // namespace auto GitCAS::Open(std::filesystem::path const& repo_path) noexcept @@ -137,6 +220,55 @@ auto GitCAS::ReadObject(std::string const& id, bool is_hex_id) const noexcept #endif } +auto GitCAS::ReadTree(std::string const& id, bool is_hex_id) const noexcept + -> std::optional<tree_entries_t> { +#ifdef BOOTSTRAP_BUILD_TOOL + return std::nullopt; +#else + // create object id + auto oid = GitObjectID(id, is_hex_id); + if (not oid) { + return std::nullopt; + } + + // create fake repository from ODB + git_repository* repo_ptr{nullptr}; + if (git_repository_wrap_odb(&repo_ptr, odb_) != 0) { + Logger::Log(LogLevel::Debug, + "failed to create fake Git repository from object db"); + return std::nullopt; + } + auto fake_repo = std::unique_ptr<git_repository, decltype(repo_closer)>{ + repo_ptr, repo_closer}; + + // lookup tree + git_tree* tree_ptr{nullptr}; + if (git_tree_lookup(&tree_ptr, fake_repo.get(), &(*oid)) != 0) { + Logger::Log(LogLevel::Debug, + "failed to lookup Git tree {}", + is_hex_id ? std::string{id} : ToHexString(id)); + return std::nullopt; + } + auto tree = + std::unique_ptr<git_tree, decltype(tree_closer)>{tree_ptr, tree_closer}; + + // walk tree (flat) and create entries + tree_entries_t entries{}; + entries.reserve(git_tree_entrycount(tree.get())); + if (git_tree_walk( + tree.get(), GIT_TREEWALK_PRE, flat_tree_walker, &entries) != 0) { + Logger::Log(LogLevel::Debug, + "failed to walk Git tree {}", + is_hex_id ? std::string{id} : ToHexString(id)); + return std::nullopt; + } + + gsl_EnsuresAudit(ValidateEntries(entries)); + + return entries; +#endif +} + auto GitCAS::ReadHeader(std::string const& id, bool is_hex_id) const noexcept -> std::optional<std::pair<std::size_t, ObjectType>> { #ifndef BOOTSTRAP_BUILD_TOOL diff --git a/src/buildtool/file_system/git_cas.hpp b/src/buildtool/file_system/git_cas.hpp index c964a739..051e40f1 100644 --- a/src/buildtool/file_system/git_cas.hpp +++ b/src/buildtool/file_system/git_cas.hpp @@ -4,6 +4,8 @@ #include <filesystem> #include <memory> #include <optional> +#include <unordered_map> +#include <vector> #include "src/buildtool/file_system/object_type.hpp" @@ -17,6 +19,25 @@ using GitCASPtr = std::shared_ptr<GitCAS const>; /// \brief Git CAS that maintains its own libgit2 global state. class GitCAS { public: + // Stores the data for defining a single Git tree entry, which consists of + // a name (flat basename) and an object type (file/executable/tree). + struct tree_entry_t { + tree_entry_t(std::string n, ObjectType t) + : name{std::move(n)}, type{t} {} + std::string name; + ObjectType type; + [[nodiscard]] auto operator==(tree_entry_t const& other) const noexcept + -> bool { + return name == other.name and type == other.type; + } + }; + + // Tree entries by raw id. The same id might refer to multiple entries. + // Note that sharding by id is used as this format enables a more efficient + // internal implementation for creating trees. + using tree_entries_t = + std::unordered_map<std::string, std::vector<tree_entry_t>>; + static auto Open(std::filesystem::path const& repo_path) noexcept -> GitCASPtr; @@ -47,6 +68,15 @@ class GitCAS { bool is_hex_id = false) const noexcept -> std::optional<std::pair<std::size_t, ObjectType>>; + /// \brief Read entries from tree in CAS. + /// Reading a tree must be backed by an object database. Therefore, a valid + /// instance of this class is required. + /// \param id The object id. + /// \param is_hex_id Specify whether `id` is hex string or raw. + [[nodiscard]] auto ReadTree(std::string const& id, + bool is_hex_id = false) const noexcept + -> std::optional<tree_entries_t>; + private: git_odb* odb_{nullptr}; bool initialized_{false}; diff --git a/src/buildtool/file_system/git_tree.cpp b/src/buildtool/file_system/git_tree.cpp index f4912a7b..8d220e30 100644 --- a/src/buildtool/file_system/git_tree.cpp +++ b/src/buildtool/file_system/git_tree.cpp @@ -11,64 +11,6 @@ extern "C" { namespace { -constexpr auto kOIDRawSize{GIT_OID_RAWSZ}; - -[[nodiscard]] auto PermToType(std::string const& perm_str) noexcept - -> std::optional<ObjectType> { - constexpr auto kPermBase = 8; - constexpr auto kTreePerm = 040000; - constexpr auto kFilePerm = 0100644; - constexpr auto kExecPerm = 0100755; - constexpr auto kLinkPerm = 0120000; - - int perm = std::stoi(perm_str, nullptr, kPermBase); - - switch (perm) { - case kTreePerm: - return ObjectType::Tree; - case kFilePerm: - return ObjectType::File; - case kExecPerm: - return ObjectType::Executable; - case kLinkPerm: - Logger::Log(LogLevel::Error, "symlinks are not yet supported"); - return std::nullopt; - default: - Logger::Log(LogLevel::Error, "unsupported permission {}", perm_str); - return std::nullopt; - } -} - -auto ParseRawTreeObject(GitCASPtr const& cas, - std::string const& raw_tree) noexcept - -> std::optional<GitTree::entries_t> { - std::string perm{}; - std::string path{}; - std::string hash(kOIDRawSize, '\0'); - std::istringstream iss{raw_tree}; - GitTree::entries_t entries{}; - // raw tree format is: "<perm> <path>\0<hash>[next entries...]" - while (std::getline(iss, perm, ' ') and // <perm> - std::getline(iss, path, '\0') and // <path> - iss.read(hash.data(), // <hash> - static_cast<std::streamsize>(hash.size()))) { - auto type = PermToType(perm); - if (not type) { - return std::nullopt; - } - try { - entries.emplace(path, - std::make_shared<GitTreeEntry>(cas, hash, *type)); - } catch (std::exception const& ex) { - Logger::Log(LogLevel::Error, - "parsing git raw tree object failed with:\n{}", - ex.what()); - return std::nullopt; - } - } - return entries; -} - // resolve '.' and '..' in path. [[nodiscard]] auto ResolveRelativePath( std::filesystem::path const& path) noexcept -> std::filesystem::path { @@ -111,19 +53,12 @@ auto GitTree::Read(std::filesystem::path const& repo_path, auto GitTree::Read(gsl::not_null<GitCASPtr> const& cas, std::string const& tree_id) noexcept -> std::optional<GitTree> { - auto raw_id = FromHexString(tree_id); - if (not raw_id) { - return std::nullopt; - } - auto obj = cas->ReadObject(*raw_id); - if (not obj) { - return std::nullopt; - } - auto entries = ParseRawTreeObject(cas, *obj); - if (not entries) { - return std::nullopt; + if (auto raw_id = FromHexString(tree_id)) { + if (auto entries = cas->ReadTree(*raw_id)) { + return GitTree::FromEntries(cas, std::move(*entries), *raw_id); + } } - return GitTree{cas, std::move(*entries), std::move(*raw_id)}; + return std::nullopt; } auto GitTree::LookupEntryByName(std::string const& name) const noexcept @@ -152,12 +87,9 @@ auto GitTreeEntry::Blob() const noexcept -> std::optional<std::string> { auto GitTreeEntry::Tree() const& noexcept -> std::optional<GitTree> const& { return tree_cached_.SetOnceAndGet([this]() -> std::optional<GitTree> { - std::optional<std::string> obj{}; if (IsTree()) { - if (auto obj = cas_->ReadObject(raw_id_)) { - if (auto entries = ParseRawTreeObject(cas_, *obj)) { - return GitTree{cas_, std::move(*entries), raw_id_}; - } + if (auto entries = cas_->ReadTree(raw_id_)) { + return GitTree::FromEntries(cas_, std::move(*entries), raw_id_); } } return std::nullopt; diff --git a/src/buildtool/file_system/git_tree.hpp b/src/buildtool/file_system/git_tree.hpp index ff7dc828..eb1c2147 100644 --- a/src/buildtool/file_system/git_tree.hpp +++ b/src/buildtool/file_system/git_tree.hpp @@ -59,6 +59,26 @@ class GitTree { : cas_{std::move(cas)}, entries_{std::move(entries)}, raw_id_{std::move(raw_id)} {} + + [[nodiscard]] static auto FromEntries(gsl::not_null<GitCASPtr> cas, + GitCAS::tree_entries_t&& entries, + std::string raw_id) noexcept + -> std::optional<GitTree> { + entries_t e{}; + e.reserve(entries.size()); + for (auto& [id, es] : entries) { + for (auto& entry : es) { + try { + e.emplace( + std::move(entry.name), + std::make_shared<GitTreeEntry>(cas, id, entry.type)); + } catch (...) { + return std::nullopt; + } + } + } + return GitTree(std::move(cas), std::move(e), std::move(raw_id)); + } }; class GitTreeEntry { @@ -77,8 +97,8 @@ class GitTreeEntry { [[nodiscard]] auto Hash() const noexcept { return ToHexString(raw_id_); } [[nodiscard]] auto Type() const noexcept { return type_; } - // Use with care. Implementation might read entire object to obtain size. - // Consider using Blob()->size() instead. + // Use with care. Implementation might read entire object to obtain + // size. Consider using Blob()->size() instead. [[nodiscard]] auto Size() const noexcept -> std::optional<std::size_t>; private: |