From af8f9a663d69e7bce01be200d556909b90ec1873 Mon Sep 17 00:00:00 2001 From: Paul Cristian Sarbu Date: Mon, 26 May 2025 13:19:48 +0200 Subject: FileRoot: Ensure all read blobs and trees contain valid entries In particular, ensure that Git roots check for, e.g., upwards symlinks, before returning blobs and trees. To ensure that only the bear minimum extra work is performed for this purpose, Git roots now keep also the root's GitTreeEntry as a field, allowing the validity check of root source trees to take place only once and only if required. --- .../execution_engine/executor/executor.hpp | 10 +- src/buildtool/file_system/TARGETS | 2 + src/buildtool/file_system/file_root.hpp | 124 ++++++++++++++++----- 3 files changed, 106 insertions(+), 30 deletions(-) (limited to 'src') diff --git a/src/buildtool/execution_engine/executor/executor.hpp b/src/buildtool/execution_engine/executor/executor.hpp index e38a656f..018a1536 100644 --- a/src/buildtool/execution_engine/executor/executor.hpp +++ b/src/buildtool/execution_engine/executor/executor.hpp @@ -457,7 +457,8 @@ class ExecutorImpl { } else { // if known blob is not available, read and upload it - content = ReadGitBlob(repo, repo_config, hash); + content = ReadGitBlob( + repo, repo_config, hash, IsSymlinkObject(info.type)); } if (not content) { logger.Emit(LogLevel::Error, "failed to get content"); @@ -478,15 +479,16 @@ class ExecutorImpl { [[nodiscard]] static auto ReadGitBlob( std::string const& repo, gsl::not_null const& repo_config, - std::string const& hash) noexcept -> std::optional { + std::string const& hash, + bool is_symlink) noexcept -> std::optional { std::optional blob{}; if (auto const* ws_root = repo_config->WorkspaceRoot(repo)) { // try to obtain blob from local workspace's Git CAS, if any - blob = ws_root->ReadBlob(hash); + blob = ws_root->ReadBlob(hash, is_symlink); } if (not blob) { // try to obtain blob from global Git CAS, if any - blob = repo_config->ReadBlobFromGitCAS(hash, /*is_symlink=*/false); + blob = repo_config->ReadBlobFromGitCAS(hash, is_symlink); } return blob; } diff --git a/src/buildtool/file_system/TARGETS b/src/buildtool/file_system/TARGETS index 2978397b..cdd5bce7 100644 --- a/src/buildtool/file_system/TARGETS +++ b/src/buildtool/file_system/TARGETS @@ -178,6 +178,7 @@ [ "file_system_manager" , "git_cas" , "git_tree" + , "git_tree_utils" , "object_type" , "precomputed_root" , ["@", "fmt", "", "fmt"] @@ -193,6 +194,7 @@ , ["src/buildtool/storage", "config"] , ["src/utils/cpp", "concepts"] , ["src/utils/cpp", "expected"] + , ["src/utils/cpp", "hex_string"] , ["src/utils/cpp", "json"] ] , "stage": ["src", "buildtool", "file_system"] diff --git a/src/buildtool/file_system/file_root.hpp b/src/buildtool/file_system/file_root.hpp index 708e29cb..62ce9049 100644 --- a/src/buildtool/file_system/file_root.hpp +++ b/src/buildtool/file_system/file_root.hpp @@ -40,6 +40,7 @@ #include "src/buildtool/file_system/file_system_manager.hpp" #include "src/buildtool/file_system/git_cas.hpp" #include "src/buildtool/file_system/git_tree.hpp" +#include "src/buildtool/file_system/git_tree_utils.hpp" #include "src/buildtool/file_system/object_type.hpp" #include "src/buildtool/file_system/precomputed_root.hpp" #include "src/buildtool/logging/log_level.hpp" @@ -47,6 +48,7 @@ #include "src/buildtool/storage/config.hpp" #include "src/utils/cpp/concepts.hpp" #include "src/utils/cpp/expected.hpp" +#include "src/utils/cpp/hex_string.hpp" // Keep it to ensure fmt::format works on JSON objects #include "src/utils/cpp/json.hpp" // IWYU pragma: keep @@ -111,6 +113,7 @@ class FileRoot { struct RootGit { gsl::not_null cas; gsl::not_null tree; + gsl::not_null root_entry; }; // absent roots are defined by a tree hash with no witnessing repository @@ -338,9 +341,10 @@ class FileRoot { : root_{fs_root_t{std::move(root)}}, ignore_special_{ignore_special} {} FileRoot(gsl::not_null const& cas, gsl::not_null const& tree, + gsl::not_null const& root_entry, gsl::not_null const& storage_config, bool ignore_special = false) noexcept - : root_{RootGit{cas, tree}}, + : root_{RootGit{cas, tree, root_entry}}, storage_config_{storage_config}, ignore_special_{ignore_special} {} explicit FileRoot(PrecomputedRoot precomputed) @@ -351,19 +355,30 @@ class FileRoot { std::filesystem::path const& repo_path, std::string const& git_tree_id, bool ignore_special = false) noexcept -> std::optional { + // Check validity of hash early + auto raw_tree_id = FromHexString(git_tree_id); + if (not raw_tree_id) { + return std::nullopt; + } auto cas = GitCAS::Open(repo_path); if (not cas) { return std::nullopt; } - auto tree = GitTree::Read(cas, git_tree_id, ignore_special); + // Read source tree root permissively from repository. Validity of + // entries will be checked only when actually needed. + auto tree = GitTree::Read( + cas, git_tree_id, ignore_special, /*skip_checks=*/true); if (not tree) { return std::nullopt; } try { - return FileRoot{cas, - std::make_shared(std::move(*tree)), - storage_config, - ignore_special}; + return FileRoot{ + cas, + std::make_shared(std::move(*tree)), + std::make_shared( + std::move(cas), std::move(*raw_tree_id), ObjectType::Tree), + storage_config, + ignore_special}; } catch (...) { return std::nullopt; } @@ -431,10 +446,15 @@ class FileRoot { if (path == ".") { return true; } - return static_cast( - std::get(root_).tree->LookupEntryByPath(path)); + if (auto entry = + std::get(root_).tree->LookupEntryByPath(path)) { + return IsNonSpecialObject(entry->Type()) or + (not ignore_special_ and + IsSymlinkObject(entry->Type()) and + entry->Blob().has_value()); + } } - if (std::holds_alternative(root_)) { + else if (std::holds_alternative(root_)) { auto root_path = std::get(root_) / path; auto exists = FileSystemManager::Exists(root_path); auto type = @@ -463,10 +483,15 @@ class FileRoot { [[nodiscard]] auto IsSymlink( std::filesystem::path const& file_path) const noexcept -> bool { + if (ignore_special_) { + // skip unnecessary check + return false; + } if (std::holds_alternative(root_)) { if (auto entry = std::get(root_).tree->LookupEntryByPath( file_path)) { - return IsSymlinkObject(entry->Type()); + return IsSymlinkObject(entry->Type()) and + entry->Blob().has_value(); } } else if (std::holds_alternative(root_)) { @@ -506,6 +531,7 @@ class FileRoot { if (auto entry = std::get(root_).tree->LookupEntryByPath( file_path)) { if (IsBlobObject(entry->Type())) { + // return blob content, if valid return entry->Blob(); } } @@ -513,7 +539,8 @@ class FileRoot { else if (std::holds_alternative(root_)) { auto full_path = std::get(root_) / file_path; if (auto type = FileSystemManager::Type(full_path, - /*allow_upwards=*/true)) { + /*allow_upwards=*/false)) { + // return content of file or non-upward symlink return IsSymlinkObject(*type) ? FileSystemManager::ReadSymlink(full_path) : FileSystemManager::ReadFile(full_path); @@ -526,14 +553,37 @@ class FileRoot { const noexcept -> DirectoryEntries { try { if (std::holds_alternative(root_)) { - auto const& tree = std::get(root_).tree; + auto const& git_root = std::get(root_); + auto const& root_tree = git_root.tree; if (dir_path == ".") { - return DirectoryEntries{&(*tree)}; + if (ignore_special_ or + GitTreeUtils::IsGitTreeValid(*storage_config_, + git_root.root_entry)) { + // stored root tree contains only valid entries, so use + // it directly + return DirectoryEntries{&(*root_tree)}; + } + Logger::Log(LogLevel::Warning, + "Asked to read invalid Git source tree {}", + root_tree->FileRootHash()); + return DirectoryEntries{DirectoryEntries::pairs_t{}}; } - if (auto entry = tree->LookupEntryByPath(dir_path)) { - if (auto const& found_tree = entry->Tree(ignore_special_)) { - return DirectoryEntries{&(*found_tree)}; + if (auto entry = root_tree->LookupEntryByPath(dir_path)) { + if (ignore_special_ or + GitTreeUtils::IsGitTreeValid(*storage_config_, entry)) { + // simply get the GitTree reference; this is now valid + // and it is guaranteed to not reread unnecessarily + if (auto const& entry_tree = + entry->Tree(ignore_special_)) { + return DirectoryEntries{&(*entry_tree)}; + } } + Logger::Log( + LogLevel::Warning, + "Asked to read invalid subdir {} of Git source tree {}", + dir_path.string(), + root_tree->FileRootHash()); + return DirectoryEntries{DirectoryEntries::pairs_t{}}; } } else if (std::holds_alternative(root_)) { @@ -581,11 +631,14 @@ class FileRoot { } /// \brief Read a blob from the root based on its ID. - [[nodiscard]] auto ReadBlob(std::string const& blob_id) const noexcept + [[nodiscard]] auto ReadBlob(std::string const& blob_id, + bool is_symlink = false) const noexcept -> std::optional { if (std::holds_alternative(root_)) { - return std::get(root_).cas->ReadObject(blob_id, - /*is_hex_id=*/true); + return std::get(root_).cas->ReadObject( + blob_id, + /*is_hex_id=*/true, + /*as_valid_symlink=*/is_symlink); } return std::nullopt; } @@ -594,13 +647,17 @@ class FileRoot { /// This should include all valid entry types. [[nodiscard]] auto ReadTree(std::string const& tree_id) const noexcept -> std::optional { - if (std::holds_alternative(root_)) { - try { + try { + if (std::holds_alternative(root_)) { auto const& cas = std::get(root_).cas; - return GitTree::Read(cas, tree_id); - } catch (...) { - return std::nullopt; + return GitTreeUtils::ReadValidGitCASTree( + *storage_config_, tree_id, cas); } + } catch (std::exception const& ex) { + Logger::Log(LogLevel::Error, + "Unexpected failure while reading source tree {}:\n{}", + tree_id, + ex.what()); } return std::nullopt; } @@ -615,11 +672,26 @@ class FileRoot { if (std::holds_alternative(root_)) { if (auto entry = std::get(root_).tree->LookupEntryByPath( file_path)) { - if (entry->IsBlob()) { + // if symlink, read content ahead of time to check validity + std::optional blob{}; + if (IsSymlinkObject(entry->Type())) { + blob = entry->Blob(); + if (not blob) { + return std::nullopt; + } + } + if (IsBlobObject(entry->Type())) { if (not ProtocolTraits::IsNative(hash_type)) { + // read blob content, if not read already + if (not blob) { + blob = entry->Blob(); + if (not blob) { + return std::nullopt; + } + } auto compatible_hash = GitHashesConverter::Instance().RegisterGitEntry( - entry->Hash(), *entry->Blob(), repository); + entry->Hash(), *std::move(blob), repository); auto digest = ArtifactDigestFactory::Create(hash_type, compatible_hash, -- cgit v1.2.3