From d7f6ca59d2e2713efe40c96e2c6522f6d86180d2 Mon Sep 17 00:00:00 2001 From: Paul Cristian Sarbu Date: Mon, 12 Aug 2024 15:46:39 +0200 Subject: GitRepo: Create trees by directly writing to object database The libgit2 treebuilder has unnecessary validity checks for tree entries, including for Git-specific magic names (such as '.git'), which cannot be disabled. However, in our tool any filesystem entry should be allowed to be part of a tree. Therefore, the treebuilder-based implementation for CreateTree is replaced by a direct writing of trees, by content, into the underlying repository object database. Additionally, as direct insertion into the object dabase does not check the validity of the tree entries, as was done implicitly by the treebuilder before, add a check for existence of the tree entries into the debug-level generic validity check, with the option to not perform it for ReadTree (as it is unnecessary there). --- src/buildtool/file_system/git_repo.cpp | 117 +++++++++++++++++++++++---------- 1 file changed, 81 insertions(+), 36 deletions(-) (limited to 'src/buildtool/file_system/git_repo.cpp') diff --git a/src/buildtool/file_system/git_repo.cpp b/src/buildtool/file_system/git_repo.cpp index 5cdb552b..d378a2ba 100644 --- a/src/buildtool/file_system/git_repo.cpp +++ b/src/buildtool/file_system/git_repo.cpp @@ -16,6 +16,9 @@ #include #include +#include +#include +#include #include #include @@ -115,11 +118,32 @@ std::unordered_set const kNonSpecialGitFileModes{ } } +[[nodiscard]] auto ObjectTypeToPerm(ObjectType type) noexcept -> std::string { + switch (type) { + case ObjectType::File: + return "100644"; + case ObjectType::Executable: + return "100755"; + case ObjectType::Tree: + return "40000"; + case ObjectType::Symlink: + return "120000"; + } + return ""; // make gcc happy +} + #ifndef NDEBUG -[[nodiscard]] auto ValidateEntries(GitRepo::tree_entries_t const& entries) - -> bool { - return std::all_of(entries.begin(), entries.end(), [](auto entry) { +/// \brief Debug-level check that given tree entries are consistent. If needed, +/// also check that the entries are in the underlying object database of the +/// provided CAS instance. +[[nodiscard]] auto ValidateEntries(GitRepo::tree_entries_t const& entries, + GitCASPtr const& cas = nullptr) -> bool { + return std::all_of(entries.begin(), entries.end(), [cas](auto entry) { auto const& [id, nodes] = entry; + // if CAS given, check that the entry is in the object database + if (cas != nullptr and not cas->ReadHeader(id)) { + return false; + } // for a given raw id, either all entries are trees or none of them return std::all_of( nodes.begin(), @@ -1861,6 +1885,7 @@ auto GitRepo::ReadTree(std::string const& id, } #ifndef NDEBUG + // Check consistency of entries. No need to check if entries exist. EnsuresAudit(ValidateEntries(entries)); #endif @@ -1880,48 +1905,68 @@ auto GitRepo::CreateTree(tree_entries_t const& entries) const noexcept return std::nullopt; #else #ifndef NDEBUG - ExpectsAudit(ValidateEntries(entries)); + // Check consistency of entries. Also check that entries exist. + ExpectsAudit(ValidateEntries(entries, GetGitCAS())); #endif // NDEBUG // share the odb lock std::shared_lock lock{GetGitCAS()->mutex_}; - git_treebuilder* builder_ptr{nullptr}; - if (git_treebuilder_new(&builder_ptr, repo_->Ptr(), nullptr) != 0) { - Logger::Log(LogLevel::Debug, "failed to create Git tree builder"); - return std::nullopt; - } - auto builder = - std::unique_ptr{ - builder_ptr, treebuilder_closer}; - - for (auto const& [raw_id, es] : entries) { - auto id = GitObjectID(raw_id, /*is_hex_id=*/false); - for (auto const& entry : es) { - if (not id or git_treebuilder_insert( - nullptr, - builder.get(), - entry.name.c_str(), - &(*id), - ObjectTypeToGitFileMode(entry.type)) != 0) { - Logger::Log( - LogLevel::Debug, - "failed adding object {} to Git tree{}", - ToHexString(raw_id), - id ? fmt::format(" with:\n{}", GitLastError()) : ""); - return std::nullopt; + try { + // As the libgit2 treebuilder checks for magic names and does not allow + // us to add any and all entries to a Git tree, we resort to + // constructing the tree content ourselves and add it manually to the + // repository ODB. + + // We need to sort the filenames according to Git rules: tree entries + // need to be considered "as if" their filename has a trailing + // separator ('/'). + std::map> sorted; + for (auto const& [raw_id, es] : entries) { + for (auto const& entry : es) { + sorted.emplace( + entry.name + (IsTreeObject(entry.type) ? "/" : ""), + std::make_pair(raw_id, entry.type)); } } - } - git_oid oid; - if (git_treebuilder_write(&oid, builder.get()) != 0) { - return std::nullopt; - } - auto raw_id = ToRawString(oid); - if (not raw_id) { + // Compute the tree content. For tree entries the trailing slash needs + // to be removed from filename before appending it. + std::stringstream tree_content{}; + for (auto const& [name, entry] : sorted) { + std::string_view const filename{ + name.data(), + name.size() - + static_cast(IsTreeObject(entry.second))}; + // tree format: " \0[next entries...]" + tree_content << fmt::format("{} {}", + ObjectTypeToPerm(entry.second), + filename) + << '\0' << entry.first; + } + + // Write tree to ODB and return raw id string + git_oid oid; + auto const tree_content_str = tree_content.str(); + if (git_odb_write(&oid, + GetGitOdb().get(), + tree_content_str.c_str(), + tree_content_str.size(), + GIT_OBJECT_TREE) != 0) { + Logger::Log(LogLevel::Debug, + "failed writing tree to ODB with:\n{}", + GitLastError()); + return std::nullopt; + } + auto raw_id = ToRawString(oid); + if (not raw_id) { + return std::nullopt; + } + return std::move(*raw_id); + } catch (std::exception const& ex) { + Logger::Log( + LogLevel::Error, "creating tree failed with:\n{}", ex.what()); return std::nullopt; } - return std::move(*raw_id); #endif } -- cgit v1.2.3