diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/buildtool/execution_api/remote/bazel/bazel_network.cpp | 23 | ||||
-rw-r--r-- | src/buildtool/file_system/TARGETS | 5 | ||||
-rw-r--r-- | src/buildtool/file_system/file_system_manager.hpp | 20 | ||||
-rw-r--r-- | src/buildtool/file_system/git_repo.cpp | 110 | ||||
-rw-r--r-- | src/buildtool/file_system/git_repo.hpp | 18 | ||||
-rw-r--r-- | src/buildtool/file_system/git_tree.cpp | 38 | ||||
-rw-r--r-- | src/buildtool/storage/local_cas.tpp | 33 |
7 files changed, 202 insertions, 45 deletions
diff --git a/src/buildtool/execution_api/remote/bazel/bazel_network.cpp b/src/buildtool/execution_api/remote/bazel/bazel_network.cpp index 159a848e..60ed22e1 100644 --- a/src/buildtool/execution_api/remote/bazel/bazel_network.cpp +++ b/src/buildtool/execution_api/remote/bazel/bazel_network.cpp @@ -42,9 +42,32 @@ namespace { auto blobs = network->ReadBlobs({digest}).Next(); if (blobs.size() == 1) { auto const& content = blobs.at(0).data; + auto check_symlinks = + [&network](std::vector<bazel_re::Digest> const& ids) { + auto size = ids.size(); + auto reader = network->ReadBlobs(ids); + auto blobs = reader.Next(); + std::size_t count{}; + while (not blobs.empty()) { + if (count + blobs.size() > size) { + Logger::Log(LogLevel::Debug, + "received more blobs than requested."); + return false; + } + for (auto const& blob : blobs) { + if (not PathIsNonUpwards(blob.data)) { + return false; + } + } + count += blobs.size(); + blobs = reader.Next(); + } + return true; + }; return GitRepo::ReadTreeData( content, HashFunction::ComputeTreeHash(content).Bytes(), + check_symlinks, /*is_hex_id=*/false); } Logger::Log(LogLevel::Error, diff --git a/src/buildtool/file_system/TARGETS b/src/buildtool/file_system/TARGETS index 81455fe6..b368e5b4 100644 --- a/src/buildtool/file_system/TARGETS +++ b/src/buildtool/file_system/TARGETS @@ -79,6 +79,8 @@ [ ["", "libgit2"] , "file_system_manager" , ["src/buildtool/logging", "logging"] + , ["src/utils/cpp", "path"] + , ["src/buildtool/common", "common"] ] } , "git_context": @@ -98,7 +100,7 @@ , "name": ["git_repo"] , "hdrs": ["git_repo.hpp"] , "srcs": ["git_repo.cpp"] - , "deps": ["git_cas"] + , "deps": ["git_cas", ["src/buildtool/common", "bazel_types"]] , "stage": ["src", "buildtool", "file_system"] , "private-deps": [ ["src/buildtool/logging", "logging"] @@ -107,6 +109,7 @@ , ["src/utils/cpp", "hex_string"] , ["src/utils/cpp", "gsl"] , ["src/buildtool/file_system", "file_system_manager"] + , ["src/buildtool/common", "common"] ] , "cflags": ["-pthread"] } diff --git a/src/buildtool/file_system/file_system_manager.hpp b/src/buildtool/file_system/file_system_manager.hpp index 8555e8dc..30a469af 100644 --- a/src/buildtool/file_system/file_system_manager.hpp +++ b/src/buildtool/file_system/file_system_manager.hpp @@ -675,6 +675,26 @@ class FileSystemManager { return true; } + /// \brief Read the content of a symlink. + [[nodiscard]] static auto ReadSymlink(std::filesystem::path const& link) + -> std::optional<std::string> { + try { + if (std::filesystem::is_symlink(link)) { + return std::filesystem::read_symlink(link).string(); + } + Logger::Log(LogLevel::Debug, + "{} can not be read because it is not a symlink.", + link.string()); + } catch (std::exception const& ex) { + Logger::Log(LogLevel::Error, + "reading symlink {} failed:\n{}", + link.string(), + ex.what()); + } + + return std::nullopt; + } + /// \brief Write file /// If argument fd_less is given, the write will be performed in a child /// process to prevent polluting the parent with open writable file diff --git a/src/buildtool/file_system/git_repo.cpp b/src/buildtool/file_system/git_repo.cpp index e10da57e..25ab6c4f 100644 --- a/src/buildtool/file_system/git_repo.cpp +++ b/src/buildtool/file_system/git_repo.cpp @@ -17,6 +17,7 @@ #include <thread> #include <unordered_set> +#include "src/buildtool/common/artifact_digest.hpp" #include "src/buildtool/file_system/file_system_manager.hpp" #include "src/buildtool/logging/logger.hpp" #include "src/utils/cpp/gsl.hpp" @@ -1154,53 +1155,90 @@ auto GitRepo::IsRepoFake() const noexcept -> bool { } auto GitRepo::ReadTree(std::string const& id, + SymlinksCheckFunc const& check_symlinks, bool is_hex_id, bool ignore_special) const noexcept -> std::optional<tree_entries_t> { -#ifdef BOOTSTRAP_BUILD_TOOL - return std::nullopt; -#else - // create object id - auto oid = GitObjectID(id, is_hex_id); - if (not oid) { - return std::nullopt; - } +#ifndef BOOTSTRAP_BUILD_TOOL + try { + // create object id + auto oid = GitObjectID(id, is_hex_id); + if (not oid) { + return std::nullopt; + } - // lookup tree - git_tree* tree_ptr{nullptr}; - { - // share the odb lock - std::shared_lock lock{GetGitCAS()->mutex_}; - if (git_tree_lookup(&tree_ptr, repo_->Ptr(), &(*oid)) != 0) { + // lookup tree + git_tree* tree_ptr{nullptr}; + { + // share the odb lock + std::shared_lock lock{GetGitCAS()->mutex_}; + if (git_tree_lookup(&tree_ptr, repo_->Ptr(), &(*oid)) != 0) { + Logger::Log(LogLevel::Debug, + "failed to lookup Git tree {}", + is_hex_id ? std::string{id} : ToHexString(id)); + return std::nullopt; + } + } + auto tree = std::unique_ptr<git_tree, decltype(&tree_closer)>{ + tree_ptr, tree_closer}; + + // walk tree (flat) and create entries + tree_entries_t entries{}; + entries.reserve(git_tree_entrycount(tree.get())); + if (git_tree_walk(tree.get(), + GIT_TREEWALK_PRE, + ignore_special ? flat_tree_walker_ignore_special + : flat_tree_walker, + &entries) != 0) { Logger::Log(LogLevel::Debug, - "failed to lookup Git tree {}", + "failed to walk Git tree {}", is_hex_id ? std::string{id} : ToHexString(id)); return std::nullopt; } - } - auto tree = std::unique_ptr<git_tree, decltype(&tree_closer)>{tree_ptr, - tree_closer}; - - // walk tree (flat) and create entries - tree_entries_t entries{}; - entries.reserve(git_tree_entrycount(tree.get())); - if (git_tree_walk( - tree.get(), - GIT_TREEWALK_PRE, - ignore_special ? flat_tree_walker_ignore_special : flat_tree_walker, - &entries) != 0) { - Logger::Log(LogLevel::Debug, - "failed to walk Git tree {}", - is_hex_id ? std::string{id} : ToHexString(id)); - return std::nullopt; - } + + // checking non-upwardness of symlinks can not be easily or safely done + // during the tree walk, so it is done here. This is only needed for + // ignore_special==false. + if (not ignore_special) { + // we first gather all symlink candidates + std::vector<bazel_re::Digest> symlinks{}; + symlinks.reserve(entries.size()); // at most one symlink per entry + for (auto const& entry : entries) { + for (auto const& item : entry.second) { + if (IsSymlinkObject(item.type)) { + symlinks.emplace_back(bazel_re::Digest( + ArtifactDigest(ToHexString(entry.first), + /*size=*/0, + /*is_tree=*/false))); + break; // no need to check other items with same hash + } + } + } + // we check symlinks in bulk, optimized for network-backed repos + if (not check_symlinks) { + Logger::Log(LogLevel::Debug, "check_symlink callable is empty"); + return std::nullopt; + } + if (not check_symlinks(symlinks)) { + Logger::Log(LogLevel::Error, + "found upwards symlinks in Git tree {}", + is_hex_id ? std::string{id} : ToHexString(id)); + return std::nullopt; + } + } #ifndef NDEBUG - EnsuresAudit(ValidateEntries(entries)); + EnsuresAudit(ValidateEntries(entries)); #endif - return entries; + return entries; + } catch (std::exception const& ex) { + Logger::Log( + LogLevel::Error, "reading tree failed with:\n{}", ex.what()); + } #endif + + return std::nullopt; } auto GitRepo::CreateTree(tree_entries_t const& entries) const noexcept @@ -1254,6 +1292,7 @@ auto GitRepo::CreateTree(tree_entries_t const& entries) const noexcept auto GitRepo::ReadTreeData(std::string const& data, std::string const& id, + SymlinksCheckFunc const& check_symlinks, bool is_hex_id) noexcept -> std::optional<tree_entries_t> { #ifndef BOOTSTRAP_BUILD_TOOL @@ -1278,7 +1317,8 @@ auto GitRepo::ReadTreeData(std::string const& data, // wrap odb in "fake" repo auto repo = GitRepo(std::static_pointer_cast<GitCAS const>(cas)); - return repo.ReadTree(*raw_id, /*is_hex_id=*/false); + return repo.ReadTree( + *raw_id, check_symlinks, /*is_hex_id=*/false); } } } catch (std::exception const& ex) { diff --git a/src/buildtool/file_system/git_repo.hpp b/src/buildtool/file_system/git_repo.hpp index 43c5fa22..e96f401b 100644 --- a/src/buildtool/file_system/git_repo.hpp +++ b/src/buildtool/file_system/git_repo.hpp @@ -17,6 +17,7 @@ #include <functional> +#include "src/buildtool/common/bazel_types.hpp" #include "src/buildtool/file_system/git_cas.hpp" extern "C" { @@ -48,6 +49,11 @@ class GitRepo { using tree_entries_t = std::unordered_map<std::string, std::vector<tree_entry_t>>; + // Checks whether a list of symlinks given by their hashes are non-upwards, + // based on content read from an actual backend. + using SymlinksCheckFunc = + std::function<bool(std::vector<bazel_re::Digest> const&)>; + GitRepo() = delete; // no default ctor ~GitRepo() noexcept = default; @@ -80,9 +86,11 @@ class GitRepo { /// Reading a tree must be backed by an object database. Therefore, a real /// repository is required. /// \param id The object id. + /// \param check_symlinks Function to check non-upwardness condition. /// \param is_hex_id Specify whether `id` is hex string or raw. /// \param ignore_special If set, treat symlinks as absent. [[nodiscard]] auto ReadTree(std::string const& id, + SymlinksCheckFunc const& check_symlinks, bool is_hex_id = false, bool ignore_special = false) const noexcept -> std::optional<tree_entries_t>; @@ -100,12 +108,14 @@ class GitRepo { /// \brief Read entries from tree data (without object db). /// \param data The tree object as plain data. /// \param id The object id. + /// \param check_symlinks Function to check non-upwardness condition. /// \param is_hex_id Specify whether `id` is hex string or raw. /// \returns The tree entries. - [[nodiscard]] static auto ReadTreeData(std::string const& data, - std::string const& id, - bool is_hex_id = false) noexcept - -> std::optional<tree_entries_t>; + [[nodiscard]] static auto ReadTreeData( + std::string const& data, + std::string const& id, + SymlinksCheckFunc const& check_symlinks, + bool is_hex_id = false) noexcept -> std::optional<tree_entries_t>; /// \brief Create a flat shallow (without objects in db) tree and return it. /// Creates a tree object from the entries without access to the actual diff --git a/src/buildtool/file_system/git_tree.cpp b/src/buildtool/file_system/git_tree.cpp index d594e638..a3c441d9 100644 --- a/src/buildtool/file_system/git_tree.cpp +++ b/src/buildtool/file_system/git_tree.cpp @@ -16,8 +16,9 @@ #include <sstream> +#include "src/buildtool/common/artifact_digest.hpp" #include "src/buildtool/logging/logger.hpp" -#include "src/utils/cpp/hex_string.hpp" +#include "src/utils/cpp/path.hpp" extern "C" { #include <git2.h> @@ -68,11 +69,24 @@ auto GitTree::Read(std::filesystem::path const& repo_path, auto GitTree::Read(gsl::not_null<GitCASPtr> const& cas, std::string const& tree_id, bool ignore_special) noexcept -> std::optional<GitTree> { + // create symlinks checker + auto check_symlinks = [&cas](std::vector<bazel_re::Digest> const& ids) { + for (auto const& id : ids) { + auto content = + cas->ReadObject(ArtifactDigest(id).hash(), /*is_hex_id=*/true); + if (not content or not PathIsNonUpwards(*content)) { + return false; + } + } + return true; + }; if (auto raw_id = FromHexString(tree_id)) { auto repo = GitRepo::Open(cas); if (repo != std::nullopt) { - if (auto entries = repo->ReadTree( - *raw_id, /*is_hex_id=*/false, ignore_special)) { + if (auto entries = repo->ReadTree(*raw_id, + check_symlinks, + /*is_hex_id=*/false, + ignore_special)) { // the raw_id value is NOT recomputed when ignore_special==true, // so we set it to empty to signal that it should not be used! return GitTree::FromEntries(cas, @@ -133,8 +147,22 @@ auto GitTreeEntry::Tree(bool ignore_special) const& noexcept if (repo == std::nullopt) { return std::nullopt; } - if (auto entries = repo->ReadTree( - raw_id_, /*is_hex_id=*/false, ignore_special)) { + // create symlinks checker + auto check_symlinks = + [cas = cas_](std::vector<bazel_re::Digest> const& ids) { + for (auto const& id : ids) { + auto content = cas->ReadObject( + ArtifactDigest(id).hash(), /*is_hex_id=*/true); + if (not content or not PathIsNonUpwards(*content)) { + return false; + } + } + return true; + }; + if (auto entries = repo->ReadTree(raw_id_, + check_symlinks, + /*is_hex_id=*/false, + ignore_special)) { // the raw_id value is not used when ignore_special==true return GitTree::FromEntries( cas_, std::move(*entries), raw_id_, ignore_special); diff --git a/src/buildtool/storage/local_cas.tpp b/src/buildtool/storage/local_cas.tpp index 4b91c0e0..ba5471e7 100644 --- a/src/buildtool/storage/local_cas.tpp +++ b/src/buildtool/storage/local_cas.tpp @@ -42,9 +42,26 @@ template <class T_CAS> -> std::optional<GitRepo::tree_entries_t> { if (auto const path = cas.TreePath(digest)) { if (auto const content = FileSystemManager::ReadFile(*path)) { + auto check_symlinks = + [&cas](std::vector<bazel_re::Digest> const& ids) { + for (auto const& id : ids) { + auto link_path = + cas.BlobPath(id, /*is_executable=*/false); + if (not link_path) { + return false; + } + auto content = + FileSystemManager::ReadSymlink(*link_path); + if (not content or not PathIsNonUpwards(*content)) { + return false; + } + } + return true; + }; return GitRepo::ReadTreeData( *content, HashFunction::ComputeTreeHash(*content).Bytes(), + check_symlinks, /*is_hex_id=*/false); } } @@ -274,8 +291,24 @@ requires(kIsLocalGeneration) auto LocalCAS<kDoGlobalUplink>::LocalUplinkGitTree( // Determine tree entries. auto content = FileSystemManager::ReadFile(*tree_path); auto id = NativeSupport::Unprefix(digest.hash()); + auto check_symlinks = + [cas = &cas_file_](std::vector<bazel_re::Digest> const& ids) { + for (auto const& id : ids) { + auto link_path = cas->BlobPath(id); + if (not link_path) { + return false; + } + // in the local CAS we store as files + auto content = FileSystemManager::ReadFile(*link_path); + if (not content or not PathIsNonUpwards(*content)) { + return false; + } + } + return true; + }; auto tree_entries = GitRepo::ReadTreeData(*content, id, + check_symlinks, /*is_hex_id=*/true); if (not tree_entries) { return false; |