diff options
-rw-r--r-- | src/buildtool/execution_engine/executor/executor.hpp | 10 | ||||
-rw-r--r-- | src/buildtool/file_system/TARGETS | 2 | ||||
-rw-r--r-- | src/buildtool/file_system/file_root.hpp | 124 |
3 files changed, 106 insertions, 30 deletions
diff --git a/src/buildtool/execution_engine/executor/executor.hpp b/src/buildtool/execution_engine/executor/executor.hpp index e38a656f..018a1536 100644 --- a/src/buildtool/execution_engine/executor/executor.hpp +++ b/src/buildtool/execution_engine/executor/executor.hpp @@ -457,7 +457,8 @@ class ExecutorImpl { } else { // if known blob is not available, read and upload it - content = ReadGitBlob(repo, repo_config, hash); + content = ReadGitBlob( + repo, repo_config, hash, IsSymlinkObject(info.type)); } if (not content) { logger.Emit(LogLevel::Error, "failed to get content"); @@ -478,15 +479,16 @@ class ExecutorImpl { [[nodiscard]] static auto ReadGitBlob( std::string const& repo, gsl::not_null<const RepositoryConfig*> const& repo_config, - std::string const& hash) noexcept -> std::optional<std::string> { + std::string const& hash, + bool is_symlink) noexcept -> std::optional<std::string> { std::optional<std::string> blob{}; if (auto const* ws_root = repo_config->WorkspaceRoot(repo)) { // try to obtain blob from local workspace's Git CAS, if any - blob = ws_root->ReadBlob(hash); + blob = ws_root->ReadBlob(hash, is_symlink); } if (not blob) { // try to obtain blob from global Git CAS, if any - blob = repo_config->ReadBlobFromGitCAS(hash, /*is_symlink=*/false); + blob = repo_config->ReadBlobFromGitCAS(hash, is_symlink); } return blob; } diff --git a/src/buildtool/file_system/TARGETS b/src/buildtool/file_system/TARGETS index 2978397b..cdd5bce7 100644 --- a/src/buildtool/file_system/TARGETS +++ b/src/buildtool/file_system/TARGETS @@ -178,6 +178,7 @@ [ "file_system_manager" , "git_cas" , "git_tree" + , "git_tree_utils" , "object_type" , "precomputed_root" , ["@", "fmt", "", "fmt"] @@ -193,6 +194,7 @@ , ["src/buildtool/storage", "config"] , ["src/utils/cpp", "concepts"] , ["src/utils/cpp", "expected"] + , ["src/utils/cpp", "hex_string"] , ["src/utils/cpp", "json"] ] , "stage": ["src", "buildtool", "file_system"] diff --git a/src/buildtool/file_system/file_root.hpp b/src/buildtool/file_system/file_root.hpp index 708e29cb..62ce9049 100644 --- a/src/buildtool/file_system/file_root.hpp +++ b/src/buildtool/file_system/file_root.hpp @@ -40,6 +40,7 @@ #include "src/buildtool/file_system/file_system_manager.hpp" #include "src/buildtool/file_system/git_cas.hpp" #include "src/buildtool/file_system/git_tree.hpp" +#include "src/buildtool/file_system/git_tree_utils.hpp" #include "src/buildtool/file_system/object_type.hpp" #include "src/buildtool/file_system/precomputed_root.hpp" #include "src/buildtool/logging/log_level.hpp" @@ -47,6 +48,7 @@ #include "src/buildtool/storage/config.hpp" #include "src/utils/cpp/concepts.hpp" #include "src/utils/cpp/expected.hpp" +#include "src/utils/cpp/hex_string.hpp" // Keep it to ensure fmt::format works on JSON objects #include "src/utils/cpp/json.hpp" // IWYU pragma: keep @@ -111,6 +113,7 @@ class FileRoot { struct RootGit { gsl::not_null<GitCASPtr> cas; gsl::not_null<GitTreePtr> tree; + gsl::not_null<GitTreeEntryPtr> root_entry; }; // absent roots are defined by a tree hash with no witnessing repository @@ -338,9 +341,10 @@ class FileRoot { : root_{fs_root_t{std::move(root)}}, ignore_special_{ignore_special} {} FileRoot(gsl::not_null<GitCASPtr> const& cas, gsl::not_null<GitTreePtr> const& tree, + gsl::not_null<GitTreeEntryPtr> const& root_entry, gsl::not_null<StorageConfig const*> const& storage_config, bool ignore_special = false) noexcept - : root_{RootGit{cas, tree}}, + : root_{RootGit{cas, tree, root_entry}}, storage_config_{storage_config}, ignore_special_{ignore_special} {} explicit FileRoot(PrecomputedRoot precomputed) @@ -351,19 +355,30 @@ class FileRoot { std::filesystem::path const& repo_path, std::string const& git_tree_id, bool ignore_special = false) noexcept -> std::optional<FileRoot> { + // Check validity of hash early + auto raw_tree_id = FromHexString(git_tree_id); + if (not raw_tree_id) { + return std::nullopt; + } auto cas = GitCAS::Open(repo_path); if (not cas) { return std::nullopt; } - auto tree = GitTree::Read(cas, git_tree_id, ignore_special); + // Read source tree root permissively from repository. Validity of + // entries will be checked only when actually needed. + auto tree = GitTree::Read( + cas, git_tree_id, ignore_special, /*skip_checks=*/true); if (not tree) { return std::nullopt; } try { - return FileRoot{cas, - std::make_shared<GitTree const>(std::move(*tree)), - storage_config, - ignore_special}; + return FileRoot{ + cas, + std::make_shared<GitTree const>(std::move(*tree)), + std::make_shared<GitTreeEntry const>( + std::move(cas), std::move(*raw_tree_id), ObjectType::Tree), + storage_config, + ignore_special}; } catch (...) { return std::nullopt; } @@ -431,10 +446,15 @@ class FileRoot { if (path == ".") { return true; } - return static_cast<bool>( - std::get<RootGit>(root_).tree->LookupEntryByPath(path)); + if (auto entry = + std::get<RootGit>(root_).tree->LookupEntryByPath(path)) { + return IsNonSpecialObject(entry->Type()) or + (not ignore_special_ and + IsSymlinkObject(entry->Type()) and + entry->Blob().has_value()); + } } - if (std::holds_alternative<fs_root_t>(root_)) { + else if (std::holds_alternative<fs_root_t>(root_)) { auto root_path = std::get<fs_root_t>(root_) / path; auto exists = FileSystemManager::Exists(root_path); auto type = @@ -463,10 +483,15 @@ class FileRoot { [[nodiscard]] auto IsSymlink( std::filesystem::path const& file_path) const noexcept -> bool { + if (ignore_special_) { + // skip unnecessary check + return false; + } if (std::holds_alternative<RootGit>(root_)) { if (auto entry = std::get<RootGit>(root_).tree->LookupEntryByPath( file_path)) { - return IsSymlinkObject(entry->Type()); + return IsSymlinkObject(entry->Type()) and + entry->Blob().has_value(); } } else if (std::holds_alternative<fs_root_t>(root_)) { @@ -506,6 +531,7 @@ class FileRoot { if (auto entry = std::get<RootGit>(root_).tree->LookupEntryByPath( file_path)) { if (IsBlobObject(entry->Type())) { + // return blob content, if valid return entry->Blob(); } } @@ -513,7 +539,8 @@ class FileRoot { else if (std::holds_alternative<fs_root_t>(root_)) { auto full_path = std::get<fs_root_t>(root_) / file_path; if (auto type = FileSystemManager::Type(full_path, - /*allow_upwards=*/true)) { + /*allow_upwards=*/false)) { + // return content of file or non-upward symlink return IsSymlinkObject(*type) ? FileSystemManager::ReadSymlink(full_path) : FileSystemManager::ReadFile(full_path); @@ -526,14 +553,37 @@ class FileRoot { const noexcept -> DirectoryEntries { try { if (std::holds_alternative<RootGit>(root_)) { - auto const& tree = std::get<RootGit>(root_).tree; + auto const& git_root = std::get<RootGit>(root_); + auto const& root_tree = git_root.tree; if (dir_path == ".") { - return DirectoryEntries{&(*tree)}; + if (ignore_special_ or + GitTreeUtils::IsGitTreeValid(*storage_config_, + git_root.root_entry)) { + // stored root tree contains only valid entries, so use + // it directly + return DirectoryEntries{&(*root_tree)}; + } + Logger::Log(LogLevel::Warning, + "Asked to read invalid Git source tree {}", + root_tree->FileRootHash()); + return DirectoryEntries{DirectoryEntries::pairs_t{}}; } - if (auto entry = tree->LookupEntryByPath(dir_path)) { - if (auto const& found_tree = entry->Tree(ignore_special_)) { - return DirectoryEntries{&(*found_tree)}; + if (auto entry = root_tree->LookupEntryByPath(dir_path)) { + if (ignore_special_ or + GitTreeUtils::IsGitTreeValid(*storage_config_, entry)) { + // simply get the GitTree reference; this is now valid + // and it is guaranteed to not reread unnecessarily + if (auto const& entry_tree = + entry->Tree(ignore_special_)) { + return DirectoryEntries{&(*entry_tree)}; + } } + Logger::Log( + LogLevel::Warning, + "Asked to read invalid subdir {} of Git source tree {}", + dir_path.string(), + root_tree->FileRootHash()); + return DirectoryEntries{DirectoryEntries::pairs_t{}}; } } else if (std::holds_alternative<fs_root_t>(root_)) { @@ -581,11 +631,14 @@ class FileRoot { } /// \brief Read a blob from the root based on its ID. - [[nodiscard]] auto ReadBlob(std::string const& blob_id) const noexcept + [[nodiscard]] auto ReadBlob(std::string const& blob_id, + bool is_symlink = false) const noexcept -> std::optional<std::string> { if (std::holds_alternative<RootGit>(root_)) { - return std::get<RootGit>(root_).cas->ReadObject(blob_id, - /*is_hex_id=*/true); + return std::get<RootGit>(root_).cas->ReadObject( + blob_id, + /*is_hex_id=*/true, + /*as_valid_symlink=*/is_symlink); } return std::nullopt; } @@ -594,13 +647,17 @@ class FileRoot { /// This should include all valid entry types. [[nodiscard]] auto ReadTree(std::string const& tree_id) const noexcept -> std::optional<GitTree> { - if (std::holds_alternative<RootGit>(root_)) { - try { + try { + if (std::holds_alternative<RootGit>(root_)) { auto const& cas = std::get<RootGit>(root_).cas; - return GitTree::Read(cas, tree_id); - } catch (...) { - return std::nullopt; + return GitTreeUtils::ReadValidGitCASTree( + *storage_config_, tree_id, cas); } + } catch (std::exception const& ex) { + Logger::Log(LogLevel::Error, + "Unexpected failure while reading source tree {}:\n{}", + tree_id, + ex.what()); } return std::nullopt; } @@ -615,11 +672,26 @@ class FileRoot { if (std::holds_alternative<RootGit>(root_)) { if (auto entry = std::get<RootGit>(root_).tree->LookupEntryByPath( file_path)) { - if (entry->IsBlob()) { + // if symlink, read content ahead of time to check validity + std::optional<std::string> blob{}; + if (IsSymlinkObject(entry->Type())) { + blob = entry->Blob(); + if (not blob) { + return std::nullopt; + } + } + if (IsBlobObject(entry->Type())) { if (not ProtocolTraits::IsNative(hash_type)) { + // read blob content, if not read already + if (not blob) { + blob = entry->Blob(); + if (not blob) { + return std::nullopt; + } + } auto compatible_hash = GitHashesConverter::Instance().RegisterGitEntry( - entry->Hash(), *entry->Blob(), repository); + entry->Hash(), *std::move(blob), repository); auto digest = ArtifactDigestFactory::Create(hash_type, compatible_hash, |