diff options
-rw-r--r-- | src/buildtool/execution_api/remote/bazel/bazel_network.cpp | 23 | ||||
-rw-r--r-- | src/buildtool/file_system/TARGETS | 5 | ||||
-rw-r--r-- | src/buildtool/file_system/file_system_manager.hpp | 20 | ||||
-rw-r--r-- | src/buildtool/file_system/git_repo.cpp | 110 | ||||
-rw-r--r-- | src/buildtool/file_system/git_repo.hpp | 18 | ||||
-rw-r--r-- | src/buildtool/file_system/git_tree.cpp | 38 | ||||
-rw-r--r-- | src/buildtool/storage/local_cas.tpp | 33 | ||||
-rw-r--r-- | test/buildtool/file_system/TARGETS | 1 | ||||
-rw-r--r-- | test/buildtool/file_system/git_tree.test.cpp | 494 |
9 files changed, 641 insertions, 101 deletions
diff --git a/src/buildtool/execution_api/remote/bazel/bazel_network.cpp b/src/buildtool/execution_api/remote/bazel/bazel_network.cpp index 159a848e..60ed22e1 100644 --- a/src/buildtool/execution_api/remote/bazel/bazel_network.cpp +++ b/src/buildtool/execution_api/remote/bazel/bazel_network.cpp @@ -42,9 +42,32 @@ namespace { auto blobs = network->ReadBlobs({digest}).Next(); if (blobs.size() == 1) { auto const& content = blobs.at(0).data; + auto check_symlinks = + [&network](std::vector<bazel_re::Digest> const& ids) { + auto size = ids.size(); + auto reader = network->ReadBlobs(ids); + auto blobs = reader.Next(); + std::size_t count{}; + while (not blobs.empty()) { + if (count + blobs.size() > size) { + Logger::Log(LogLevel::Debug, + "received more blobs than requested."); + return false; + } + for (auto const& blob : blobs) { + if (not PathIsNonUpwards(blob.data)) { + return false; + } + } + count += blobs.size(); + blobs = reader.Next(); + } + return true; + }; return GitRepo::ReadTreeData( content, HashFunction::ComputeTreeHash(content).Bytes(), + check_symlinks, /*is_hex_id=*/false); } Logger::Log(LogLevel::Error, diff --git a/src/buildtool/file_system/TARGETS b/src/buildtool/file_system/TARGETS index 81455fe6..b368e5b4 100644 --- a/src/buildtool/file_system/TARGETS +++ b/src/buildtool/file_system/TARGETS @@ -79,6 +79,8 @@ [ ["", "libgit2"] , "file_system_manager" , ["src/buildtool/logging", "logging"] + , ["src/utils/cpp", "path"] + , ["src/buildtool/common", "common"] ] } , "git_context": @@ -98,7 +100,7 @@ , "name": ["git_repo"] , "hdrs": ["git_repo.hpp"] , "srcs": ["git_repo.cpp"] - , "deps": ["git_cas"] + , "deps": ["git_cas", ["src/buildtool/common", "bazel_types"]] , "stage": ["src", "buildtool", "file_system"] , "private-deps": [ ["src/buildtool/logging", "logging"] @@ -107,6 +109,7 @@ , ["src/utils/cpp", "hex_string"] , ["src/utils/cpp", "gsl"] , ["src/buildtool/file_system", "file_system_manager"] + , ["src/buildtool/common", "common"] ] , "cflags": ["-pthread"] } diff --git a/src/buildtool/file_system/file_system_manager.hpp b/src/buildtool/file_system/file_system_manager.hpp index 8555e8dc..30a469af 100644 --- a/src/buildtool/file_system/file_system_manager.hpp +++ b/src/buildtool/file_system/file_system_manager.hpp @@ -675,6 +675,26 @@ class FileSystemManager { return true; } + /// \brief Read the content of a symlink. + [[nodiscard]] static auto ReadSymlink(std::filesystem::path const& link) + -> std::optional<std::string> { + try { + if (std::filesystem::is_symlink(link)) { + return std::filesystem::read_symlink(link).string(); + } + Logger::Log(LogLevel::Debug, + "{} can not be read because it is not a symlink.", + link.string()); + } catch (std::exception const& ex) { + Logger::Log(LogLevel::Error, + "reading symlink {} failed:\n{}", + link.string(), + ex.what()); + } + + return std::nullopt; + } + /// \brief Write file /// If argument fd_less is given, the write will be performed in a child /// process to prevent polluting the parent with open writable file diff --git a/src/buildtool/file_system/git_repo.cpp b/src/buildtool/file_system/git_repo.cpp index e10da57e..25ab6c4f 100644 --- a/src/buildtool/file_system/git_repo.cpp +++ b/src/buildtool/file_system/git_repo.cpp @@ -17,6 +17,7 @@ #include <thread> #include <unordered_set> +#include "src/buildtool/common/artifact_digest.hpp" #include "src/buildtool/file_system/file_system_manager.hpp" #include "src/buildtool/logging/logger.hpp" #include "src/utils/cpp/gsl.hpp" @@ -1154,53 +1155,90 @@ auto GitRepo::IsRepoFake() const noexcept -> bool { } auto GitRepo::ReadTree(std::string const& id, + SymlinksCheckFunc const& check_symlinks, bool is_hex_id, bool ignore_special) const noexcept -> std::optional<tree_entries_t> { -#ifdef BOOTSTRAP_BUILD_TOOL - return std::nullopt; -#else - // create object id - auto oid = GitObjectID(id, is_hex_id); - if (not oid) { - return std::nullopt; - } +#ifndef BOOTSTRAP_BUILD_TOOL + try { + // create object id + auto oid = GitObjectID(id, is_hex_id); + if (not oid) { + return std::nullopt; + } - // lookup tree - git_tree* tree_ptr{nullptr}; - { - // share the odb lock - std::shared_lock lock{GetGitCAS()->mutex_}; - if (git_tree_lookup(&tree_ptr, repo_->Ptr(), &(*oid)) != 0) { + // lookup tree + git_tree* tree_ptr{nullptr}; + { + // share the odb lock + std::shared_lock lock{GetGitCAS()->mutex_}; + if (git_tree_lookup(&tree_ptr, repo_->Ptr(), &(*oid)) != 0) { + Logger::Log(LogLevel::Debug, + "failed to lookup Git tree {}", + is_hex_id ? std::string{id} : ToHexString(id)); + return std::nullopt; + } + } + auto tree = std::unique_ptr<git_tree, decltype(&tree_closer)>{ + tree_ptr, tree_closer}; + + // walk tree (flat) and create entries + tree_entries_t entries{}; + entries.reserve(git_tree_entrycount(tree.get())); + if (git_tree_walk(tree.get(), + GIT_TREEWALK_PRE, + ignore_special ? flat_tree_walker_ignore_special + : flat_tree_walker, + &entries) != 0) { Logger::Log(LogLevel::Debug, - "failed to lookup Git tree {}", + "failed to walk Git tree {}", is_hex_id ? std::string{id} : ToHexString(id)); return std::nullopt; } - } - auto tree = std::unique_ptr<git_tree, decltype(&tree_closer)>{tree_ptr, - tree_closer}; - - // walk tree (flat) and create entries - tree_entries_t entries{}; - entries.reserve(git_tree_entrycount(tree.get())); - if (git_tree_walk( - tree.get(), - GIT_TREEWALK_PRE, - ignore_special ? flat_tree_walker_ignore_special : flat_tree_walker, - &entries) != 0) { - Logger::Log(LogLevel::Debug, - "failed to walk Git tree {}", - is_hex_id ? std::string{id} : ToHexString(id)); - return std::nullopt; - } + + // checking non-upwardness of symlinks can not be easily or safely done + // during the tree walk, so it is done here. This is only needed for + // ignore_special==false. + if (not ignore_special) { + // we first gather all symlink candidates + std::vector<bazel_re::Digest> symlinks{}; + symlinks.reserve(entries.size()); // at most one symlink per entry + for (auto const& entry : entries) { + for (auto const& item : entry.second) { + if (IsSymlinkObject(item.type)) { + symlinks.emplace_back(bazel_re::Digest( + ArtifactDigest(ToHexString(entry.first), + /*size=*/0, + /*is_tree=*/false))); + break; // no need to check other items with same hash + } + } + } + // we check symlinks in bulk, optimized for network-backed repos + if (not check_symlinks) { + Logger::Log(LogLevel::Debug, "check_symlink callable is empty"); + return std::nullopt; + } + if (not check_symlinks(symlinks)) { + Logger::Log(LogLevel::Error, + "found upwards symlinks in Git tree {}", + is_hex_id ? std::string{id} : ToHexString(id)); + return std::nullopt; + } + } #ifndef NDEBUG - EnsuresAudit(ValidateEntries(entries)); + EnsuresAudit(ValidateEntries(entries)); #endif - return entries; + return entries; + } catch (std::exception const& ex) { + Logger::Log( + LogLevel::Error, "reading tree failed with:\n{}", ex.what()); + } #endif + + return std::nullopt; } auto GitRepo::CreateTree(tree_entries_t const& entries) const noexcept @@ -1254,6 +1292,7 @@ auto GitRepo::CreateTree(tree_entries_t const& entries) const noexcept auto GitRepo::ReadTreeData(std::string const& data, std::string const& id, + SymlinksCheckFunc const& check_symlinks, bool is_hex_id) noexcept -> std::optional<tree_entries_t> { #ifndef BOOTSTRAP_BUILD_TOOL @@ -1278,7 +1317,8 @@ auto GitRepo::ReadTreeData(std::string const& data, // wrap odb in "fake" repo auto repo = GitRepo(std::static_pointer_cast<GitCAS const>(cas)); - return repo.ReadTree(*raw_id, /*is_hex_id=*/false); + return repo.ReadTree( + *raw_id, check_symlinks, /*is_hex_id=*/false); } } } catch (std::exception const& ex) { diff --git a/src/buildtool/file_system/git_repo.hpp b/src/buildtool/file_system/git_repo.hpp index 43c5fa22..e96f401b 100644 --- a/src/buildtool/file_system/git_repo.hpp +++ b/src/buildtool/file_system/git_repo.hpp @@ -17,6 +17,7 @@ #include <functional> +#include "src/buildtool/common/bazel_types.hpp" #include "src/buildtool/file_system/git_cas.hpp" extern "C" { @@ -48,6 +49,11 @@ class GitRepo { using tree_entries_t = std::unordered_map<std::string, std::vector<tree_entry_t>>; + // Checks whether a list of symlinks given by their hashes are non-upwards, + // based on content read from an actual backend. + using SymlinksCheckFunc = + std::function<bool(std::vector<bazel_re::Digest> const&)>; + GitRepo() = delete; // no default ctor ~GitRepo() noexcept = default; @@ -80,9 +86,11 @@ class GitRepo { /// Reading a tree must be backed by an object database. Therefore, a real /// repository is required. /// \param id The object id. + /// \param check_symlinks Function to check non-upwardness condition. /// \param is_hex_id Specify whether `id` is hex string or raw. /// \param ignore_special If set, treat symlinks as absent. [[nodiscard]] auto ReadTree(std::string const& id, + SymlinksCheckFunc const& check_symlinks, bool is_hex_id = false, bool ignore_special = false) const noexcept -> std::optional<tree_entries_t>; @@ -100,12 +108,14 @@ class GitRepo { /// \brief Read entries from tree data (without object db). /// \param data The tree object as plain data. /// \param id The object id. + /// \param check_symlinks Function to check non-upwardness condition. /// \param is_hex_id Specify whether `id` is hex string or raw. /// \returns The tree entries. - [[nodiscard]] static auto ReadTreeData(std::string const& data, - std::string const& id, - bool is_hex_id = false) noexcept - -> std::optional<tree_entries_t>; + [[nodiscard]] static auto ReadTreeData( + std::string const& data, + std::string const& id, + SymlinksCheckFunc const& check_symlinks, + bool is_hex_id = false) noexcept -> std::optional<tree_entries_t>; /// \brief Create a flat shallow (without objects in db) tree and return it. /// Creates a tree object from the entries without access to the actual diff --git a/src/buildtool/file_system/git_tree.cpp b/src/buildtool/file_system/git_tree.cpp index d594e638..a3c441d9 100644 --- a/src/buildtool/file_system/git_tree.cpp +++ b/src/buildtool/file_system/git_tree.cpp @@ -16,8 +16,9 @@ #include <sstream> +#include "src/buildtool/common/artifact_digest.hpp" #include "src/buildtool/logging/logger.hpp" -#include "src/utils/cpp/hex_string.hpp" +#include "src/utils/cpp/path.hpp" extern "C" { #include <git2.h> @@ -68,11 +69,24 @@ auto GitTree::Read(std::filesystem::path const& repo_path, auto GitTree::Read(gsl::not_null<GitCASPtr> const& cas, std::string const& tree_id, bool ignore_special) noexcept -> std::optional<GitTree> { + // create symlinks checker + auto check_symlinks = [&cas](std::vector<bazel_re::Digest> const& ids) { + for (auto const& id : ids) { + auto content = + cas->ReadObject(ArtifactDigest(id).hash(), /*is_hex_id=*/true); + if (not content or not PathIsNonUpwards(*content)) { + return false; + } + } + return true; + }; if (auto raw_id = FromHexString(tree_id)) { auto repo = GitRepo::Open(cas); if (repo != std::nullopt) { - if (auto entries = repo->ReadTree( - *raw_id, /*is_hex_id=*/false, ignore_special)) { + if (auto entries = repo->ReadTree(*raw_id, + check_symlinks, + /*is_hex_id=*/false, + ignore_special)) { // the raw_id value is NOT recomputed when ignore_special==true, // so we set it to empty to signal that it should not be used! return GitTree::FromEntries(cas, @@ -133,8 +147,22 @@ auto GitTreeEntry::Tree(bool ignore_special) const& noexcept if (repo == std::nullopt) { return std::nullopt; } - if (auto entries = repo->ReadTree( - raw_id_, /*is_hex_id=*/false, ignore_special)) { + // create symlinks checker + auto check_symlinks = + [cas = cas_](std::vector<bazel_re::Digest> const& ids) { + for (auto const& id : ids) { + auto content = cas->ReadObject( + ArtifactDigest(id).hash(), /*is_hex_id=*/true); + if (not content or not PathIsNonUpwards(*content)) { + return false; + } + } + return true; + }; + if (auto entries = repo->ReadTree(raw_id_, + check_symlinks, + /*is_hex_id=*/false, + ignore_special)) { // the raw_id value is not used when ignore_special==true return GitTree::FromEntries( cas_, std::move(*entries), raw_id_, ignore_special); diff --git a/src/buildtool/storage/local_cas.tpp b/src/buildtool/storage/local_cas.tpp index 4b91c0e0..ba5471e7 100644 --- a/src/buildtool/storage/local_cas.tpp +++ b/src/buildtool/storage/local_cas.tpp @@ -42,9 +42,26 @@ template <class T_CAS> -> std::optional<GitRepo::tree_entries_t> { if (auto const path = cas.TreePath(digest)) { if (auto const content = FileSystemManager::ReadFile(*path)) { + auto check_symlinks = + [&cas](std::vector<bazel_re::Digest> const& ids) { + for (auto const& id : ids) { + auto link_path = + cas.BlobPath(id, /*is_executable=*/false); + if (not link_path) { + return false; + } + auto content = + FileSystemManager::ReadSymlink(*link_path); + if (not content or not PathIsNonUpwards(*content)) { + return false; + } + } + return true; + }; return GitRepo::ReadTreeData( *content, HashFunction::ComputeTreeHash(*content).Bytes(), + check_symlinks, /*is_hex_id=*/false); } } @@ -274,8 +291,24 @@ requires(kIsLocalGeneration) auto LocalCAS<kDoGlobalUplink>::LocalUplinkGitTree( // Determine tree entries. auto content = FileSystemManager::ReadFile(*tree_path); auto id = NativeSupport::Unprefix(digest.hash()); + auto check_symlinks = + [cas = &cas_file_](std::vector<bazel_re::Digest> const& ids) { + for (auto const& id : ids) { + auto link_path = cas->BlobPath(id); + if (not link_path) { + return false; + } + // in the local CAS we store as files + auto content = FileSystemManager::ReadFile(*link_path); + if (not content or not PathIsNonUpwards(*content)) { + return false; + } + } + return true; + }; auto tree_entries = GitRepo::ReadTreeData(*content, id, + check_symlinks, /*is_hex_id=*/true); if (not tree_entries) { return false; diff --git a/test/buildtool/file_system/TARGETS b/test/buildtool/file_system/TARGETS index 8d1d351d..4c8a0d76 100644 --- a/test/buildtool/file_system/TARGETS +++ b/test/buildtool/file_system/TARGETS @@ -34,6 +34,7 @@ , ["utils", "container_matchers"] , ["@", "src", "src/buildtool/file_system", "git_tree"] , ["@", "src", "src/buildtool/file_system", "file_system_manager"] + , ["@", "src", "src/buildtool/common", "common"] , ["utils", "shell_quoting"] ] , "stage": ["test", "buildtool", "file_system"] diff --git a/test/buildtool/file_system/git_tree.test.cpp b/test/buildtool/file_system/git_tree.test.cpp index 79eb11f4..ec2d5003 100644 --- a/test/buildtool/file_system/git_tree.test.cpp +++ b/test/buildtool/file_system/git_tree.test.cpp @@ -15,6 +15,7 @@ #include <thread> #include "catch2/catch_test_macros.hpp" +#include "src/buildtool/common/artifact_digest.hpp" #include "src/buildtool/file_system/file_system_manager.hpp" #include "src/buildtool/file_system/git_tree.hpp" #include "test/utils/container_matchers.hpp" @@ -193,28 +194,49 @@ TEST_CASE("Read Git Trees", "[git_cas]") { auto repo = GitRepo::Open(cas); REQUIRE(repo); + // create symlinks checker + auto check_symlinks = [&cas](std::vector<bazel_re::Digest> const& ids) { + for (auto const& id : ids) { + auto content = + cas->ReadObject(ArtifactDigest(id).hash(), /*is_hex_id=*/true); + if (not content or not PathIsNonUpwards(*content)) { + return false; + } + } + return true; + }; + SECTION("invalid trees") { - CHECK_FALSE(repo->ReadTree("", /*is_hex_id=*/true)); - CHECK_FALSE(repo->ReadTree("", /*is_hex_id=*/false)); + CHECK_FALSE(repo->ReadTree("", check_symlinks, /*is_hex_id=*/true)); + CHECK_FALSE(repo->ReadTree("", check_symlinks, /*is_hex_id=*/false)); - CHECK_FALSE(repo->ReadTree(kFailId, /*is_hex_id=*/true)); - CHECK_FALSE(repo->ReadTree(HexToRaw(kFailId), /*is_hex_id=*/false)); + CHECK_FALSE( + repo->ReadTree(kFailId, check_symlinks, /*is_hex_id=*/true)); + CHECK_FALSE(repo->ReadTree( + HexToRaw(kFailId), check_symlinks, /*is_hex_id=*/false)); - CHECK_FALSE(repo->ReadTree(RawToHex("to_short"), /*is_hex_id=*/true)); - CHECK_FALSE(repo->ReadTree("to_short", /*is_hex_id=*/false)); + CHECK_FALSE(repo->ReadTree( + RawToHex("to_short"), check_symlinks, /*is_hex_id=*/true)); + CHECK_FALSE( + repo->ReadTree("to_short", check_symlinks, /*is_hex_id=*/false)); - CHECK_FALSE(repo->ReadTree("invalid_chars", /*is_hex_id=*/true)); + CHECK_FALSE(repo->ReadTree( + "invalid_chars", check_symlinks, /*is_hex_id=*/true)); - CHECK_FALSE(repo->ReadTree(kFooId, /*is_hex_id=*/true)); - CHECK_FALSE(repo->ReadTree(HexToRaw(kFooId), /*is_hex_id=*/false)); + CHECK_FALSE(repo->ReadTree(kFooId, check_symlinks, /*is_hex_id=*/true)); + CHECK_FALSE(repo->ReadTree( + HexToRaw(kFooId), check_symlinks, /*is_hex_id=*/false)); - CHECK_FALSE(repo->ReadTree(kBarId, /*is_hex_id=*/true)); - CHECK_FALSE(repo->ReadTree(HexToRaw(kBarId), /*is_hex_id=*/false)); + CHECK_FALSE(repo->ReadTree(kBarId, check_symlinks, /*is_hex_id=*/true)); + CHECK_FALSE(repo->ReadTree( + HexToRaw(kBarId), check_symlinks, /*is_hex_id=*/false)); } SECTION("valid trees") { - auto entries0 = repo->ReadTree(kTreeId, /*is_hex_id=*/true); - auto entries1 = repo->ReadTree(HexToRaw(kTreeId), /*is_hex_id=*/false); + auto entries0 = + repo->ReadTree(kTreeId, check_symlinks, /*is_hex_id=*/true); + auto entries1 = repo->ReadTree( + HexToRaw(kTreeId), check_symlinks, /*is_hex_id=*/false); REQUIRE(entries0); REQUIRE(entries1); @@ -222,49 +244,141 @@ TEST_CASE("Read Git Trees", "[git_cas]") { } } -TEST_CASE("Read Git Trees with symlinks", "[git_cas]") { - auto repo_path = CreateTestRepoSymlinks(true); +TEST_CASE("Read Git Trees with symlinks -- ignore special", "[git_cas]") { + auto repo_path = CreateTestRepoSymlinks(false); // checkout needed REQUIRE(repo_path); auto cas = GitCAS::Open(*repo_path); REQUIRE(cas); auto repo = GitRepo::Open(cas); REQUIRE(repo); - SECTION("invalid trees") { - CHECK_FALSE( - repo->ReadTree("", /*is_hex_id=*/true, /*ignore_special=*/true)); - CHECK_FALSE( - repo->ReadTree("", /*is_hex_id=*/false, /*ignore_special=*/true)); + // create symlinks checker + auto check_symlinks = [&cas](std::vector<bazel_re::Digest> const& ids) { + for (auto const& id : ids) { + auto content = + cas->ReadObject(ArtifactDigest(id).hash(), /*is_hex_id=*/true); + if (not content or not PathIsNonUpwards(*content)) { + return false; + } + } + return true; + }; + SECTION("invalid trees") { CHECK_FALSE(repo->ReadTree( - kFailId, /*is_hex_id=*/true, /*ignore_special=*/true)); + "", check_symlinks, /*is_hex_id=*/true, /*ignore_special=*/true)); CHECK_FALSE(repo->ReadTree( - HexToRaw(kFailId), /*is_hex_id=*/false, /*ignore_special=*/true)); + "", check_symlinks, /*is_hex_id=*/false, /*ignore_special=*/true)); + + CHECK_FALSE(repo->ReadTree(kFailId, + check_symlinks, + /*is_hex_id=*/true, + /*ignore_special=*/true)); + CHECK_FALSE(repo->ReadTree(HexToRaw(kFailId), + check_symlinks, + /*is_hex_id=*/false, + /*ignore_special=*/true)); + + CHECK_FALSE(repo->ReadTree(RawToHex("to_short"), + check_symlinks, + /*is_hex_id=*/true, + /*ignore_special=*/true)); + CHECK_FALSE(repo->ReadTree("to_short", + check_symlinks, + /*is_hex_id=*/false, + /*ignore_special=*/true)); + + CHECK_FALSE(repo->ReadTree("invalid_chars", + check_symlinks, + /*is_hex_id=*/true, + /*ignore_special=*/true)); + + CHECK_FALSE(repo->ReadTree(kFooId, + check_symlinks, + /*is_hex_id=*/true, + /*ignore_special=*/true)); + CHECK_FALSE(repo->ReadTree(HexToRaw(kFooId), + check_symlinks, + /*is_hex_id=*/false, + /*ignore_special=*/true)); + + CHECK_FALSE(repo->ReadTree(kBarId, + check_symlinks, + /*is_hex_id=*/true, + /*ignore_special=*/true)); + CHECK_FALSE(repo->ReadTree(HexToRaw(kBarId), + check_symlinks, + /*is_hex_id=*/false, + /*ignore_special=*/true)); + } - CHECK_FALSE(repo->ReadTree( - RawToHex("to_short"), /*is_hex_id=*/true, /*ignore_special=*/true)); - CHECK_FALSE(repo->ReadTree( - "to_short", /*is_hex_id=*/false, /*ignore_special=*/true)); + SECTION("valid trees") { + auto entries0 = repo->ReadTree(kTreeSymId, + check_symlinks, + /*is_hex_id=*/true, + /*ignore_special=*/true); + auto entries1 = repo->ReadTree(HexToRaw(kTreeSymId), + check_symlinks, + /*is_hex_id=*/false, + /*ignore_special=*/true); + REQUIRE(entries0); + REQUIRE(entries1); + CHECK(*entries0 == *entries1); + } +} + +TEST_CASE("Read Git Trees with symlinks -- allow non-upwards", "[git_cas]") { + auto repo_path = CreateTestRepoSymlinks(false); // checkout needed + REQUIRE(repo_path); + auto cas = GitCAS::Open(*repo_path); + REQUIRE(cas); + auto repo = GitRepo::Open(cas); + REQUIRE(repo); + + // create symlinks checker + auto check_symlinks = [&cas](std::vector<bazel_re::Digest> const& ids) { + for (auto const& id : ids) { + auto content = + cas->ReadObject(ArtifactDigest(id).hash(), /*is_hex_id=*/true); + if (not content or not PathIsNonUpwards(*content)) { + return false; + } + } + return true; + }; + + SECTION("invalid trees") { + CHECK_FALSE(repo->ReadTree("", check_symlinks, /*is_hex_id=*/true)); + CHECK_FALSE(repo->ReadTree("", check_symlinks, /*is_hex_id=*/false)); + + CHECK_FALSE( + repo->ReadTree(kFailId, check_symlinks, /*is_hex_id=*/true)); CHECK_FALSE(repo->ReadTree( - "invalid_chars", /*is_hex_id=*/true, /*ignore_special=*/true)); + HexToRaw(kFailId), check_symlinks, /*is_hex_id=*/false)); CHECK_FALSE(repo->ReadTree( - kFooId, /*is_hex_id=*/true, /*ignore_special=*/true)); + RawToHex("to_short"), check_symlinks, /*is_hex_id=*/true)); + CHECK_FALSE( + repo->ReadTree("to_short", check_symlinks, /*is_hex_id=*/false)); + CHECK_FALSE(repo->ReadTree( - HexToRaw(kFooId), /*is_hex_id=*/false, /*ignore_special=*/true)); + "invalid_chars", check_symlinks, /*is_hex_id=*/true)); + CHECK_FALSE(repo->ReadTree(kFooId, check_symlinks, /*is_hex_id=*/true)); CHECK_FALSE(repo->ReadTree( - kBarId, /*is_hex_id=*/true, /*ignore_special=*/true)); + HexToRaw(kFooId), check_symlinks, /*is_hex_id=*/false)); + + CHECK_FALSE(repo->ReadTree(kBarId, check_symlinks, /*is_hex_id=*/true)); CHECK_FALSE(repo->ReadTree( - HexToRaw(kBarId), /*is_hex_id=*/false, /*ignore_special=*/true)); + HexToRaw(kBarId), check_symlinks, /*is_hex_id=*/false)); } SECTION("valid trees") { - auto entries0 = repo->ReadTree( - kTreeSymId, /*is_hex_id=*/true, /*ignore_special=*/true); + auto entries0 = + repo->ReadTree(kTreeSymId, check_symlinks, /*is_hex_id=*/true); auto entries1 = repo->ReadTree( - HexToRaw(kTreeSymId), /*is_hex_id=*/false, /*ignore_special=*/true); + HexToRaw(kTreeSymId), check_symlinks, /*is_hex_id=*/false); REQUIRE(entries0); REQUIRE(entries1); @@ -280,6 +394,18 @@ TEST_CASE("Create Git Trees", "[git_cas]") { auto repo = GitRepo::Open(cas); REQUIRE(repo); + // create symlinks checker + auto check_symlinks = [&cas](std::vector<bazel_re::Digest> const& ids) { + for (auto const& id : ids) { + auto content = + cas->ReadObject(ArtifactDigest(id).hash(), /*is_hex_id=*/true); + if (not content or not PathIsNonUpwards(*content)) { + return false; + } + } + return true; + }; + SECTION("empty tree") { auto tree_id = repo->CreateTree({}); REQUIRE(tree_id); @@ -288,7 +414,8 @@ TEST_CASE("Create Git Trees", "[git_cas]") { } SECTION("existing tree") { - auto entries = repo->ReadTree(kTreeId, /*is_hex_id=*/true); + auto entries = + repo->ReadTree(kTreeId, check_symlinks, /*is_hex_id=*/true); REQUIRE(entries); auto tree_id = repo->CreateTree(*entries); @@ -313,25 +440,53 @@ TEST_CASE("Create Git Trees", "[git_cas]") { CHECK(foo_bar_id == bar_foo_id); } +} - auto repo_path_sym = CreateTestRepoSymlinks(true); - REQUIRE(repo_path_sym); - auto cas_sym = GitCAS::Open(*repo_path_sym); - REQUIRE(cas_sym); - auto repo_sym = GitRepo::Open(cas_sym); - REQUIRE(repo_sym); +TEST_CASE("Create Git Trees with symlinks", "[git_cas]") { + auto repo_path = CreateTestRepoSymlinks(false); // checkout needed + REQUIRE(repo_path); + auto cas = GitCAS::Open(*repo_path); + REQUIRE(cas); + auto repo = GitRepo::Open(cas); + REQUIRE(repo); - SECTION("existing tree with symlinks") { - auto entries = repo_sym->ReadTree( - kTreeSymId, /*is_hex_id=*/true, /*ignore_special=*/true); + // create symlinks checker + auto check_symlinks = [&cas](std::vector<bazel_re::Digest> const& ids) { + for (auto const& id : ids) { + auto content = + cas->ReadObject(ArtifactDigest(id).hash(), /*is_hex_id=*/true); + if (not content or not PathIsNonUpwards(*content)) { + return false; + } + } + return true; + }; + + SECTION("existing tree with symlinks -- ignore special") { + auto entries = repo->ReadTree(kTreeSymId, + check_symlinks, + /*is_hex_id=*/true, + /*ignore_special=*/true); REQUIRE(entries); - auto tree_id = repo_sym->CreateTree(*entries); + auto tree_id = repo->CreateTree(*entries); REQUIRE(tree_id); // if at least one symlink exists, it gets ignored and the tree id will // not match as it is NOT recomputed! CHECK_FALSE(ToHexString(*tree_id) == kTreeSymId); } + + SECTION("existing tree with symlinks -- allow non-upwards") { + auto entries = + repo->ReadTree(kTreeSymId, check_symlinks, /*is_hex_id=*/true); + REQUIRE(entries); + + auto tree_id = repo->CreateTree(*entries); + REQUIRE(tree_id); + // all the symlinks in the test repo are non-upwards, so the tree should + // be recreated exactly and id should thus match + CHECK(ToHexString(*tree_id) == kTreeSymId); + } } TEST_CASE("Read Git Tree Data", "[git_cas]") { @@ -342,22 +497,83 @@ TEST_CASE("Read Git Tree Data", "[git_cas]") { auto repo = GitRepo::Open(cas); REQUIRE(repo); + // create symlinks checker + auto check_symlinks = [&cas](std::vector<bazel_re::Digest> const& ids) { + for (auto const& id : ids) { + auto content = + cas->ReadObject(ArtifactDigest(id).hash(), /*is_hex_id=*/true); + if (not content or not PathIsNonUpwards(*content)) { + return false; + } + } + return true; + }; + SECTION("empty tree") { - auto entries = GitRepo::ReadTreeData( - "", "4b825dc642cb6eb9a060e54bf8d69288fbee4904", /*is_hex_id=*/true); + auto entries = + GitRepo::ReadTreeData("", + "4b825dc642cb6eb9a060e54bf8d69288fbee4904", + check_symlinks, + /*is_hex_id=*/true); REQUIRE(entries); CHECK(entries->empty()); } SECTION("existing tree") { - auto entries = repo->ReadTree(kTreeId, /*is_hex_id=*/true); + auto entries = + repo->ReadTree(kTreeId, check_symlinks, /*is_hex_id=*/true); REQUIRE(entries); auto data = cas->ReadObject(kTreeId, /*is_hex_id=*/true); REQUIRE(data); - auto from_data = - GitRepo::ReadTreeData(*data, kTreeId, /*is_hex_id=*/true); + auto from_data = GitRepo::ReadTreeData( + *data, kTreeId, check_symlinks, /*is_hex_id=*/true); + REQUIRE(from_data); + CHECK(*from_data == *entries); + } +} + +TEST_CASE("Read Git Tree Data with non-upwards symlinks", "[git_cas]") { + auto repo_path = CreateTestRepoSymlinks(false); // checkout needed + REQUIRE(repo_path); + auto cas = GitCAS::Open(*repo_path); + REQUIRE(cas); + auto repo = GitRepo::Open(cas); + REQUIRE(repo); + + // create symlinks checker + auto check_symlinks = [&cas](std::vector<bazel_re::Digest> const& ids) { + for (auto const& id : ids) { + auto content = + cas->ReadObject(ArtifactDigest(id).hash(), /*is_hex_id=*/true); + if (not content or not PathIsNonUpwards(*content)) { + return false; + } + } + return true; + }; + + SECTION("empty tree") { + auto entries = + GitRepo::ReadTreeData("", + "4b825dc642cb6eb9a060e54bf8d69288fbee4904", + check_symlinks, + /*is_hex_id=*/true); + REQUIRE(entries); + CHECK(entries->empty()); + } + + SECTION("existing tree") { + auto entries = + repo->ReadTree(kTreeSymId, check_symlinks, /*is_hex_id=*/true); + REQUIRE(entries); + + auto data = cas->ReadObject(kTreeSymId, /*is_hex_id=*/true); + REQUIRE(data); + + auto from_data = GitRepo::ReadTreeData( + *data, kTreeSymId, check_symlinks, /*is_hex_id=*/true); REQUIRE(from_data); CHECK(*from_data == *entries); } @@ -371,6 +587,18 @@ TEST_CASE("Create Shallow Git Trees", "[git_cas]") { auto repo = GitRepo::Open(cas); REQUIRE(repo); + // create symlinks checker + auto check_symlinks = [&cas](std::vector<bazel_re::Digest> const& ids) { + for (auto const& id : ids) { + auto content = + cas->ReadObject(ArtifactDigest(id).hash(), /*is_hex_id=*/true); + if (not content or not PathIsNonUpwards(*content)) { + return false; + } + } + return true; + }; + SECTION("empty tree") { auto tree = GitRepo::CreateShallowTree({}); REQUIRE(tree); @@ -380,7 +608,8 @@ TEST_CASE("Create Shallow Git Trees", "[git_cas]") { } SECTION("existing tree from other CAS") { - auto entries = repo->ReadTree(kTreeId, /*is_hex_id=*/true); + auto entries = + repo->ReadTree(kTreeId, check_symlinks, /*is_hex_id=*/true); REQUIRE(entries); auto tree = GitRepo::CreateShallowTree(*entries); @@ -406,6 +635,13 @@ TEST_CASE("Read Git Tree", "[git_tree]") { } } +TEST_CASE("Read Git Tree with non-upwards symlinks", "[git_tree]") { + auto repo_path = CreateTestRepoSymlinks(false); // checkout needed + REQUIRE(repo_path); + CHECK(GitTree::Read(*repo_path, kTreeSymId)); + CHECK_FALSE(GitTree::Read(*repo_path, "wrong_tree_id")); +} + TEST_CASE("Lookup entries by name", "[git_tree]") { auto repo_path = CreateTestRepo(true); REQUIRE(repo_path); @@ -467,6 +703,69 @@ TEST_CASE("Lookup entries by name", "[git_tree]") { } } +TEST_CASE("Lookup symlinks by name", "[git_tree]") { + auto repo_path = CreateTestRepoSymlinks(true); + REQUIRE(repo_path); + auto tree_root = GitTree::Read(*repo_path, kTreeSymId); + REQUIRE(tree_root); + + auto entry_foo_l = tree_root->LookupEntryByName("foo_l"); + REQUIRE(entry_foo_l); + CHECK(entry_foo_l->IsBlob()); + CHECK(entry_foo_l->Type() == ObjectType::Symlink); + + auto blob_foo_l = entry_foo_l->Blob(); + REQUIRE(blob_foo_l); + CHECK(*blob_foo_l == "baz/foo"); + CHECK(blob_foo_l->size() == 7); + CHECK(blob_foo_l->size() == *entry_foo_l->Size()); + + auto entry_baz_l = tree_root->LookupEntryByName("baz_l"); + REQUIRE(entry_baz_l); + CHECK(entry_baz_l->IsBlob()); + CHECK(entry_baz_l->Type() == ObjectType::Symlink); + + auto blob_baz_l = entry_baz_l->Blob(); + REQUIRE(blob_baz_l); + CHECK(*blob_baz_l == "baz"); + CHECK(blob_baz_l->size() == 3); + CHECK(blob_baz_l->size() == *entry_baz_l->Size()); + + SECTION("Lookup missing entries") { + CHECK_FALSE(tree_root->LookupEntryByName("fool")); + CHECK_FALSE(tree_root->LookupEntryByName("barn")); + CHECK_FALSE(tree_root->LookupEntryByName("bazel")); + } + + SECTION("Lookup symlinks in sub-tree") { + auto entry_baz = tree_root->LookupEntryByName("baz"); + REQUIRE(entry_baz); + CHECK(entry_baz->IsTree()); + CHECK(entry_baz->Type() == ObjectType::Tree); + + auto const& tree_baz = entry_baz->Tree(); + REQUIRE(tree_baz); + + auto entry_baz_bar = tree_baz->LookupEntryByName("bar"); + REQUIRE(entry_baz_bar); + CHECK(entry_baz_bar->IsBlob()); + CHECK(entry_baz_bar->Type() == ObjectType::Executable); + + auto entry_baz_bar_l = tree_baz->LookupEntryByName("bar_l"); + REQUIRE(entry_baz_bar_l); + CHECK(entry_baz_bar_l->IsBlob()); + CHECK(entry_baz_bar_l->Type() == ObjectType::Symlink); + // the hash of the symlink content should be the same as the file + CHECK(entry_baz_bar_l->Hash() == entry_baz_bar->Hash()); + + SECTION("Lookup missing entries") { + CHECK_FALSE(tree_baz->LookupEntryByName("fool")); + CHECK_FALSE(tree_baz->LookupEntryByName("barn")); + CHECK_FALSE(tree_baz->LookupEntryByName("bazel")); + } + } +} + TEST_CASE("Lookup entries by path", "[git_tree]") { auto repo_path = CreateTestRepo(true); REQUIRE(repo_path); @@ -525,6 +824,61 @@ TEST_CASE("Lookup entries by path", "[git_tree]") { } } +TEST_CASE("Lookup symlinks by path", "[git_tree]") { + auto repo_path = CreateTestRepoSymlinks(true); + REQUIRE(repo_path); + auto tree_root = GitTree::Read(*repo_path, kTreeSymId); + REQUIRE(tree_root); + + auto entry_foo_l = tree_root->LookupEntryByPath("foo_l"); + REQUIRE(entry_foo_l); + CHECK(entry_foo_l->IsBlob()); + CHECK(entry_foo_l->Type() == ObjectType::Symlink); + + auto blob_foo_l = entry_foo_l->Blob(); + REQUIRE(blob_foo_l); + CHECK(*blob_foo_l == "baz/foo"); + CHECK(blob_foo_l->size() == 7); + CHECK(blob_foo_l->size() == *entry_foo_l->Size()); + + auto entry_baz_l = tree_root->LookupEntryByPath("baz_l"); + REQUIRE(entry_baz_l); + CHECK(entry_baz_l->IsBlob()); + CHECK(entry_baz_l->Type() == ObjectType::Symlink); + + auto blob_baz_l = entry_baz_l->Blob(); + REQUIRE(blob_baz_l); + CHECK(*blob_baz_l == "baz"); + CHECK(blob_baz_l->size() == 3); + CHECK(blob_baz_l->size() == *entry_baz_l->Size()); + + SECTION("Lookup missing entries") { + CHECK_FALSE(tree_root->LookupEntryByPath("fool")); + CHECK_FALSE(tree_root->LookupEntryByPath("barn")); + CHECK_FALSE(tree_root->LookupEntryByPath("bazel")); + } + + SECTION("Lookup symlinks in sub-tree") { + auto entry_baz_bar = tree_root->LookupEntryByPath("baz/bar"); + REQUIRE(entry_baz_bar); + CHECK(entry_baz_bar->IsBlob()); + CHECK(entry_baz_bar->Type() == ObjectType::Executable); + + auto entry_baz_bar_l = tree_root->LookupEntryByPath("baz/bar_l"); + REQUIRE(entry_baz_bar_l); + CHECK(entry_baz_bar_l->IsBlob()); + CHECK(entry_baz_bar_l->Type() == ObjectType::Symlink); + // the hash of the symlink content should be the same as the file + CHECK(entry_baz_bar_l->Hash() == entry_baz_bar->Hash()); + + SECTION("Lookup missing entries") { + CHECK_FALSE(tree_root->LookupEntryByPath("baz/fool")); + CHECK_FALSE(tree_root->LookupEntryByPath("baz/barn")); + CHECK_FALSE(tree_root->LookupEntryByPath("baz/bazel")); + } + } +} + TEST_CASE("Lookup entries by special names", "[git_tree]") { auto repo_path = CreateTestRepo(true); REQUIRE(repo_path); @@ -575,6 +929,21 @@ TEST_CASE("Iterate tree entries", "[git_tree]") { {"foo", "bar", "baz"})); } +TEST_CASE("Iterate tree entries with non-upwards symlinks", "[git_tree]") { + auto repo_path = CreateTestRepoSymlinks(true); + REQUIRE(repo_path); + auto tree_root = GitTree::Read(*repo_path, kTreeSymId); + REQUIRE(tree_root); + + std::vector<std::string> names{}; + for (auto const& [name, entry] : *tree_root) { + names.emplace_back(name); + } + CHECK_THAT(names, + HasSameUniqueElementsAs<std::vector<std::string>>( + {"foo", "bar", "baz", "foo_l", "baz_l"})); +} + TEST_CASE("Thread-safety", "[git_tree]") { constexpr auto kNumThreads = 100; @@ -582,7 +951,7 @@ TEST_CASE("Thread-safety", "[git_tree]") { std::vector<std::thread> threads{}; threads.reserve(kNumThreads); - auto repo_path = CreateTestRepo(true); + auto repo_path = CreateTestRepoSymlinks(false); // checkout needed REQUIRE(repo_path); SECTION("Opening and reading from the same CAS") { @@ -618,14 +987,27 @@ TEST_CASE("Thread-safety", "[git_tree]") { auto cas = GitCAS::Open(*repo_path); REQUIRE(cas); + // create symlinks checker + auto check_symlinks = [&cas](std::vector<bazel_re::Digest> const& ids) { + for (auto const& id : ids) { + auto content = cas->ReadObject(ArtifactDigest(id).hash(), + /*is_hex_id=*/true); + if (not content or not PathIsNonUpwards(*content)) { + return false; + } + } + return true; + }; + for (int id{}; id < kNumThreads; ++id) { - threads.emplace_back([&cas, &starting_signal]() { + threads.emplace_back([&cas, &starting_signal, check_symlinks]() { starting_signal.wait(false); auto repo = GitRepo::Open(cas); REQUIRE(repo); - auto entries = repo->ReadTree(kTreeId, true); + auto entries = repo->ReadTree( + kTreeSymId, check_symlinks, /*is_hex_id=*/true); REQUIRE(entries); }); } @@ -645,7 +1027,7 @@ TEST_CASE("Thread-safety", "[git_tree]") { [&repo_path, &starting_signal](int tid) { starting_signal.wait(false); - auto tree_root = GitTree::Read(*repo_path, kTreeId); + auto tree_root = GitTree::Read(*repo_path, kTreeSymId); REQUIRE(tree_root); auto entry_subdir = tree_root->LookupEntryByName("baz"); @@ -676,7 +1058,7 @@ TEST_CASE("Thread-safety", "[git_tree]") { } SECTION("Reading from the same tree") { - auto tree_root = GitTree::Read(*repo_path, kTreeId); + auto tree_root = GitTree::Read(*repo_path, kTreeSymId); REQUIRE(tree_root); for (int id{}; id < kNumThreads; ++id) { |