diff options
author | Paul Cristian Sarbu <paul.cristian.sarbu@huawei.com> | 2025-05-26 13:19:48 +0200 |
---|---|---|
committer | Paul Cristian Sarbu <paul.cristian.sarbu@huawei.com> | 2025-06-04 14:34:44 +0200 |
commit | af8f9a663d69e7bce01be200d556909b90ec1873 (patch) | |
tree | 9b51eb2a757e3c44bb3715013433df580ea2d2f5 /src/buildtool | |
parent | d1f9bd1744b281adb353226c2f1285508e50ef53 (diff) | |
download | justbuild-af8f9a663d69e7bce01be200d556909b90ec1873.tar.gz |
FileRoot: Ensure all read blobs and trees contain valid entries
In particular, ensure that Git roots check for, e.g., upwards
symlinks, before returning blobs and trees. To ensure that only the
bear minimum extra work is performed for this purpose, Git roots
now keep also the root's GitTreeEntry as a field, allowing the
validity check of root source trees to take place only once and
only if required.
Diffstat (limited to 'src/buildtool')
-rw-r--r-- | src/buildtool/execution_engine/executor/executor.hpp | 10 | ||||
-rw-r--r-- | src/buildtool/file_system/TARGETS | 2 | ||||
-rw-r--r-- | src/buildtool/file_system/file_root.hpp | 124 |
3 files changed, 106 insertions, 30 deletions
diff --git a/src/buildtool/execution_engine/executor/executor.hpp b/src/buildtool/execution_engine/executor/executor.hpp index e38a656f..018a1536 100644 --- a/src/buildtool/execution_engine/executor/executor.hpp +++ b/src/buildtool/execution_engine/executor/executor.hpp @@ -457,7 +457,8 @@ class ExecutorImpl { } else { // if known blob is not available, read and upload it - content = ReadGitBlob(repo, repo_config, hash); + content = ReadGitBlob( + repo, repo_config, hash, IsSymlinkObject(info.type)); } if (not content) { logger.Emit(LogLevel::Error, "failed to get content"); @@ -478,15 +479,16 @@ class ExecutorImpl { [[nodiscard]] static auto ReadGitBlob( std::string const& repo, gsl::not_null<const RepositoryConfig*> const& repo_config, - std::string const& hash) noexcept -> std::optional<std::string> { + std::string const& hash, + bool is_symlink) noexcept -> std::optional<std::string> { std::optional<std::string> blob{}; if (auto const* ws_root = repo_config->WorkspaceRoot(repo)) { // try to obtain blob from local workspace's Git CAS, if any - blob = ws_root->ReadBlob(hash); + blob = ws_root->ReadBlob(hash, is_symlink); } if (not blob) { // try to obtain blob from global Git CAS, if any - blob = repo_config->ReadBlobFromGitCAS(hash, /*is_symlink=*/false); + blob = repo_config->ReadBlobFromGitCAS(hash, is_symlink); } return blob; } diff --git a/src/buildtool/file_system/TARGETS b/src/buildtool/file_system/TARGETS index 2978397b..cdd5bce7 100644 --- a/src/buildtool/file_system/TARGETS +++ b/src/buildtool/file_system/TARGETS @@ -178,6 +178,7 @@ [ "file_system_manager" , "git_cas" , "git_tree" + , "git_tree_utils" , "object_type" , "precomputed_root" , ["@", "fmt", "", "fmt"] @@ -193,6 +194,7 @@ , ["src/buildtool/storage", "config"] , ["src/utils/cpp", "concepts"] , ["src/utils/cpp", "expected"] + , ["src/utils/cpp", "hex_string"] , ["src/utils/cpp", "json"] ] , "stage": ["src", "buildtool", "file_system"] diff --git a/src/buildtool/file_system/file_root.hpp b/src/buildtool/file_system/file_root.hpp index 708e29cb..62ce9049 100644 --- a/src/buildtool/file_system/file_root.hpp +++ b/src/buildtool/file_system/file_root.hpp @@ -40,6 +40,7 @@ #include "src/buildtool/file_system/file_system_manager.hpp" #include "src/buildtool/file_system/git_cas.hpp" #include "src/buildtool/file_system/git_tree.hpp" +#include "src/buildtool/file_system/git_tree_utils.hpp" #include "src/buildtool/file_system/object_type.hpp" #include "src/buildtool/file_system/precomputed_root.hpp" #include "src/buildtool/logging/log_level.hpp" @@ -47,6 +48,7 @@ #include "src/buildtool/storage/config.hpp" #include "src/utils/cpp/concepts.hpp" #include "src/utils/cpp/expected.hpp" +#include "src/utils/cpp/hex_string.hpp" // Keep it to ensure fmt::format works on JSON objects #include "src/utils/cpp/json.hpp" // IWYU pragma: keep @@ -111,6 +113,7 @@ class FileRoot { struct RootGit { gsl::not_null<GitCASPtr> cas; gsl::not_null<GitTreePtr> tree; + gsl::not_null<GitTreeEntryPtr> root_entry; }; // absent roots are defined by a tree hash with no witnessing repository @@ -338,9 +341,10 @@ class FileRoot { : root_{fs_root_t{std::move(root)}}, ignore_special_{ignore_special} {} FileRoot(gsl::not_null<GitCASPtr> const& cas, gsl::not_null<GitTreePtr> const& tree, + gsl::not_null<GitTreeEntryPtr> const& root_entry, gsl::not_null<StorageConfig const*> const& storage_config, bool ignore_special = false) noexcept - : root_{RootGit{cas, tree}}, + : root_{RootGit{cas, tree, root_entry}}, storage_config_{storage_config}, ignore_special_{ignore_special} {} explicit FileRoot(PrecomputedRoot precomputed) @@ -351,19 +355,30 @@ class FileRoot { std::filesystem::path const& repo_path, std::string const& git_tree_id, bool ignore_special = false) noexcept -> std::optional<FileRoot> { + // Check validity of hash early + auto raw_tree_id = FromHexString(git_tree_id); + if (not raw_tree_id) { + return std::nullopt; + } auto cas = GitCAS::Open(repo_path); if (not cas) { return std::nullopt; } - auto tree = GitTree::Read(cas, git_tree_id, ignore_special); + // Read source tree root permissively from repository. Validity of + // entries will be checked only when actually needed. + auto tree = GitTree::Read( + cas, git_tree_id, ignore_special, /*skip_checks=*/true); if (not tree) { return std::nullopt; } try { - return FileRoot{cas, - std::make_shared<GitTree const>(std::move(*tree)), - storage_config, - ignore_special}; + return FileRoot{ + cas, + std::make_shared<GitTree const>(std::move(*tree)), + std::make_shared<GitTreeEntry const>( + std::move(cas), std::move(*raw_tree_id), ObjectType::Tree), + storage_config, + ignore_special}; } catch (...) { return std::nullopt; } @@ -431,10 +446,15 @@ class FileRoot { if (path == ".") { return true; } - return static_cast<bool>( - std::get<RootGit>(root_).tree->LookupEntryByPath(path)); + if (auto entry = + std::get<RootGit>(root_).tree->LookupEntryByPath(path)) { + return IsNonSpecialObject(entry->Type()) or + (not ignore_special_ and + IsSymlinkObject(entry->Type()) and + entry->Blob().has_value()); + } } - if (std::holds_alternative<fs_root_t>(root_)) { + else if (std::holds_alternative<fs_root_t>(root_)) { auto root_path = std::get<fs_root_t>(root_) / path; auto exists = FileSystemManager::Exists(root_path); auto type = @@ -463,10 +483,15 @@ class FileRoot { [[nodiscard]] auto IsSymlink( std::filesystem::path const& file_path) const noexcept -> bool { + if (ignore_special_) { + // skip unnecessary check + return false; + } if (std::holds_alternative<RootGit>(root_)) { if (auto entry = std::get<RootGit>(root_).tree->LookupEntryByPath( file_path)) { - return IsSymlinkObject(entry->Type()); + return IsSymlinkObject(entry->Type()) and + entry->Blob().has_value(); } } else if (std::holds_alternative<fs_root_t>(root_)) { @@ -506,6 +531,7 @@ class FileRoot { if (auto entry = std::get<RootGit>(root_).tree->LookupEntryByPath( file_path)) { if (IsBlobObject(entry->Type())) { + // return blob content, if valid return entry->Blob(); } } @@ -513,7 +539,8 @@ class FileRoot { else if (std::holds_alternative<fs_root_t>(root_)) { auto full_path = std::get<fs_root_t>(root_) / file_path; if (auto type = FileSystemManager::Type(full_path, - /*allow_upwards=*/true)) { + /*allow_upwards=*/false)) { + // return content of file or non-upward symlink return IsSymlinkObject(*type) ? FileSystemManager::ReadSymlink(full_path) : FileSystemManager::ReadFile(full_path); @@ -526,14 +553,37 @@ class FileRoot { const noexcept -> DirectoryEntries { try { if (std::holds_alternative<RootGit>(root_)) { - auto const& tree = std::get<RootGit>(root_).tree; + auto const& git_root = std::get<RootGit>(root_); + auto const& root_tree = git_root.tree; if (dir_path == ".") { - return DirectoryEntries{&(*tree)}; + if (ignore_special_ or + GitTreeUtils::IsGitTreeValid(*storage_config_, + git_root.root_entry)) { + // stored root tree contains only valid entries, so use + // it directly + return DirectoryEntries{&(*root_tree)}; + } + Logger::Log(LogLevel::Warning, + "Asked to read invalid Git source tree {}", + root_tree->FileRootHash()); + return DirectoryEntries{DirectoryEntries::pairs_t{}}; } - if (auto entry = tree->LookupEntryByPath(dir_path)) { - if (auto const& found_tree = entry->Tree(ignore_special_)) { - return DirectoryEntries{&(*found_tree)}; + if (auto entry = root_tree->LookupEntryByPath(dir_path)) { + if (ignore_special_ or + GitTreeUtils::IsGitTreeValid(*storage_config_, entry)) { + // simply get the GitTree reference; this is now valid + // and it is guaranteed to not reread unnecessarily + if (auto const& entry_tree = + entry->Tree(ignore_special_)) { + return DirectoryEntries{&(*entry_tree)}; + } } + Logger::Log( + LogLevel::Warning, + "Asked to read invalid subdir {} of Git source tree {}", + dir_path.string(), + root_tree->FileRootHash()); + return DirectoryEntries{DirectoryEntries::pairs_t{}}; } } else if (std::holds_alternative<fs_root_t>(root_)) { @@ -581,11 +631,14 @@ class FileRoot { } /// \brief Read a blob from the root based on its ID. - [[nodiscard]] auto ReadBlob(std::string const& blob_id) const noexcept + [[nodiscard]] auto ReadBlob(std::string const& blob_id, + bool is_symlink = false) const noexcept -> std::optional<std::string> { if (std::holds_alternative<RootGit>(root_)) { - return std::get<RootGit>(root_).cas->ReadObject(blob_id, - /*is_hex_id=*/true); + return std::get<RootGit>(root_).cas->ReadObject( + blob_id, + /*is_hex_id=*/true, + /*as_valid_symlink=*/is_symlink); } return std::nullopt; } @@ -594,13 +647,17 @@ class FileRoot { /// This should include all valid entry types. [[nodiscard]] auto ReadTree(std::string const& tree_id) const noexcept -> std::optional<GitTree> { - if (std::holds_alternative<RootGit>(root_)) { - try { + try { + if (std::holds_alternative<RootGit>(root_)) { auto const& cas = std::get<RootGit>(root_).cas; - return GitTree::Read(cas, tree_id); - } catch (...) { - return std::nullopt; + return GitTreeUtils::ReadValidGitCASTree( + *storage_config_, tree_id, cas); } + } catch (std::exception const& ex) { + Logger::Log(LogLevel::Error, + "Unexpected failure while reading source tree {}:\n{}", + tree_id, + ex.what()); } return std::nullopt; } @@ -615,11 +672,26 @@ class FileRoot { if (std::holds_alternative<RootGit>(root_)) { if (auto entry = std::get<RootGit>(root_).tree->LookupEntryByPath( file_path)) { - if (entry->IsBlob()) { + // if symlink, read content ahead of time to check validity + std::optional<std::string> blob{}; + if (IsSymlinkObject(entry->Type())) { + blob = entry->Blob(); + if (not blob) { + return std::nullopt; + } + } + if (IsBlobObject(entry->Type())) { if (not ProtocolTraits::IsNative(hash_type)) { + // read blob content, if not read already + if (not blob) { + blob = entry->Blob(); + if (not blob) { + return std::nullopt; + } + } auto compatible_hash = GitHashesConverter::Instance().RegisterGitEntry( - entry->Hash(), *entry->Blob(), repository); + entry->Hash(), *std::move(blob), repository); auto digest = ArtifactDigestFactory::Create(hash_type, compatible_hash, |