summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Cristian Sarbu <paul.cristian.sarbu@huawei.com>2023-06-13 17:35:05 +0200
committerPaul Cristian Sarbu <paul.cristian.sarbu@huawei.com>2023-06-26 17:57:29 +0200
commitc11e9142d2a1b04004dcbed282dc1e04d116e03f (patch)
treeb1c72fa8f9491f35b3c631ce98acbf00e627a59d
parentd2e3ad946b35a72a94b2d125550daf5d5e4c9904 (diff)
downloadjustbuild-c11e9142d2a1b04004dcbed282dc1e04d116e03f.tar.gz
ReadTree: Add check for non-upwards symlinks...
...as early as possible. This ensures that callers always receive only the tree entries for the supported object types. For the symlinks non-upwardness check we pass a lambda capturing the real backend of the tree entries, such that the symlinks can be read. Updates git_tree tests accordingly.
-rw-r--r--src/buildtool/execution_api/remote/bazel/bazel_network.cpp23
-rw-r--r--src/buildtool/file_system/TARGETS5
-rw-r--r--src/buildtool/file_system/file_system_manager.hpp20
-rw-r--r--src/buildtool/file_system/git_repo.cpp110
-rw-r--r--src/buildtool/file_system/git_repo.hpp18
-rw-r--r--src/buildtool/file_system/git_tree.cpp38
-rw-r--r--src/buildtool/storage/local_cas.tpp33
-rw-r--r--test/buildtool/file_system/TARGETS1
-rw-r--r--test/buildtool/file_system/git_tree.test.cpp494
9 files changed, 641 insertions, 101 deletions
diff --git a/src/buildtool/execution_api/remote/bazel/bazel_network.cpp b/src/buildtool/execution_api/remote/bazel/bazel_network.cpp
index 159a848e..60ed22e1 100644
--- a/src/buildtool/execution_api/remote/bazel/bazel_network.cpp
+++ b/src/buildtool/execution_api/remote/bazel/bazel_network.cpp
@@ -42,9 +42,32 @@ namespace {
auto blobs = network->ReadBlobs({digest}).Next();
if (blobs.size() == 1) {
auto const& content = blobs.at(0).data;
+ auto check_symlinks =
+ [&network](std::vector<bazel_re::Digest> const& ids) {
+ auto size = ids.size();
+ auto reader = network->ReadBlobs(ids);
+ auto blobs = reader.Next();
+ std::size_t count{};
+ while (not blobs.empty()) {
+ if (count + blobs.size() > size) {
+ Logger::Log(LogLevel::Debug,
+ "received more blobs than requested.");
+ return false;
+ }
+ for (auto const& blob : blobs) {
+ if (not PathIsNonUpwards(blob.data)) {
+ return false;
+ }
+ }
+ count += blobs.size();
+ blobs = reader.Next();
+ }
+ return true;
+ };
return GitRepo::ReadTreeData(
content,
HashFunction::ComputeTreeHash(content).Bytes(),
+ check_symlinks,
/*is_hex_id=*/false);
}
Logger::Log(LogLevel::Error,
diff --git a/src/buildtool/file_system/TARGETS b/src/buildtool/file_system/TARGETS
index 81455fe6..b368e5b4 100644
--- a/src/buildtool/file_system/TARGETS
+++ b/src/buildtool/file_system/TARGETS
@@ -79,6 +79,8 @@
[ ["", "libgit2"]
, "file_system_manager"
, ["src/buildtool/logging", "logging"]
+ , ["src/utils/cpp", "path"]
+ , ["src/buildtool/common", "common"]
]
}
, "git_context":
@@ -98,7 +100,7 @@
, "name": ["git_repo"]
, "hdrs": ["git_repo.hpp"]
, "srcs": ["git_repo.cpp"]
- , "deps": ["git_cas"]
+ , "deps": ["git_cas", ["src/buildtool/common", "bazel_types"]]
, "stage": ["src", "buildtool", "file_system"]
, "private-deps":
[ ["src/buildtool/logging", "logging"]
@@ -107,6 +109,7 @@
, ["src/utils/cpp", "hex_string"]
, ["src/utils/cpp", "gsl"]
, ["src/buildtool/file_system", "file_system_manager"]
+ , ["src/buildtool/common", "common"]
]
, "cflags": ["-pthread"]
}
diff --git a/src/buildtool/file_system/file_system_manager.hpp b/src/buildtool/file_system/file_system_manager.hpp
index 8555e8dc..30a469af 100644
--- a/src/buildtool/file_system/file_system_manager.hpp
+++ b/src/buildtool/file_system/file_system_manager.hpp
@@ -675,6 +675,26 @@ class FileSystemManager {
return true;
}
+ /// \brief Read the content of a symlink.
+ [[nodiscard]] static auto ReadSymlink(std::filesystem::path const& link)
+ -> std::optional<std::string> {
+ try {
+ if (std::filesystem::is_symlink(link)) {
+ return std::filesystem::read_symlink(link).string();
+ }
+ Logger::Log(LogLevel::Debug,
+ "{} can not be read because it is not a symlink.",
+ link.string());
+ } catch (std::exception const& ex) {
+ Logger::Log(LogLevel::Error,
+ "reading symlink {} failed:\n{}",
+ link.string(),
+ ex.what());
+ }
+
+ return std::nullopt;
+ }
+
/// \brief Write file
/// If argument fd_less is given, the write will be performed in a child
/// process to prevent polluting the parent with open writable file
diff --git a/src/buildtool/file_system/git_repo.cpp b/src/buildtool/file_system/git_repo.cpp
index e10da57e..25ab6c4f 100644
--- a/src/buildtool/file_system/git_repo.cpp
+++ b/src/buildtool/file_system/git_repo.cpp
@@ -17,6 +17,7 @@
#include <thread>
#include <unordered_set>
+#include "src/buildtool/common/artifact_digest.hpp"
#include "src/buildtool/file_system/file_system_manager.hpp"
#include "src/buildtool/logging/logger.hpp"
#include "src/utils/cpp/gsl.hpp"
@@ -1154,53 +1155,90 @@ auto GitRepo::IsRepoFake() const noexcept -> bool {
}
auto GitRepo::ReadTree(std::string const& id,
+ SymlinksCheckFunc const& check_symlinks,
bool is_hex_id,
bool ignore_special) const noexcept
-> std::optional<tree_entries_t> {
-#ifdef BOOTSTRAP_BUILD_TOOL
- return std::nullopt;
-#else
- // create object id
- auto oid = GitObjectID(id, is_hex_id);
- if (not oid) {
- return std::nullopt;
- }
+#ifndef BOOTSTRAP_BUILD_TOOL
+ try {
+ // create object id
+ auto oid = GitObjectID(id, is_hex_id);
+ if (not oid) {
+ return std::nullopt;
+ }
- // lookup tree
- git_tree* tree_ptr{nullptr};
- {
- // share the odb lock
- std::shared_lock lock{GetGitCAS()->mutex_};
- if (git_tree_lookup(&tree_ptr, repo_->Ptr(), &(*oid)) != 0) {
+ // lookup tree
+ git_tree* tree_ptr{nullptr};
+ {
+ // share the odb lock
+ std::shared_lock lock{GetGitCAS()->mutex_};
+ if (git_tree_lookup(&tree_ptr, repo_->Ptr(), &(*oid)) != 0) {
+ Logger::Log(LogLevel::Debug,
+ "failed to lookup Git tree {}",
+ is_hex_id ? std::string{id} : ToHexString(id));
+ return std::nullopt;
+ }
+ }
+ auto tree = std::unique_ptr<git_tree, decltype(&tree_closer)>{
+ tree_ptr, tree_closer};
+
+ // walk tree (flat) and create entries
+ tree_entries_t entries{};
+ entries.reserve(git_tree_entrycount(tree.get()));
+ if (git_tree_walk(tree.get(),
+ GIT_TREEWALK_PRE,
+ ignore_special ? flat_tree_walker_ignore_special
+ : flat_tree_walker,
+ &entries) != 0) {
Logger::Log(LogLevel::Debug,
- "failed to lookup Git tree {}",
+ "failed to walk Git tree {}",
is_hex_id ? std::string{id} : ToHexString(id));
return std::nullopt;
}
- }
- auto tree = std::unique_ptr<git_tree, decltype(&tree_closer)>{tree_ptr,
- tree_closer};
-
- // walk tree (flat) and create entries
- tree_entries_t entries{};
- entries.reserve(git_tree_entrycount(tree.get()));
- if (git_tree_walk(
- tree.get(),
- GIT_TREEWALK_PRE,
- ignore_special ? flat_tree_walker_ignore_special : flat_tree_walker,
- &entries) != 0) {
- Logger::Log(LogLevel::Debug,
- "failed to walk Git tree {}",
- is_hex_id ? std::string{id} : ToHexString(id));
- return std::nullopt;
- }
+
+ // checking non-upwardness of symlinks can not be easily or safely done
+ // during the tree walk, so it is done here. This is only needed for
+ // ignore_special==false.
+ if (not ignore_special) {
+ // we first gather all symlink candidates
+ std::vector<bazel_re::Digest> symlinks{};
+ symlinks.reserve(entries.size()); // at most one symlink per entry
+ for (auto const& entry : entries) {
+ for (auto const& item : entry.second) {
+ if (IsSymlinkObject(item.type)) {
+ symlinks.emplace_back(bazel_re::Digest(
+ ArtifactDigest(ToHexString(entry.first),
+ /*size=*/0,
+ /*is_tree=*/false)));
+ break; // no need to check other items with same hash
+ }
+ }
+ }
+ // we check symlinks in bulk, optimized for network-backed repos
+ if (not check_symlinks) {
+ Logger::Log(LogLevel::Debug, "check_symlink callable is empty");
+ return std::nullopt;
+ }
+ if (not check_symlinks(symlinks)) {
+ Logger::Log(LogLevel::Error,
+ "found upwards symlinks in Git tree {}",
+ is_hex_id ? std::string{id} : ToHexString(id));
+ return std::nullopt;
+ }
+ }
#ifndef NDEBUG
- EnsuresAudit(ValidateEntries(entries));
+ EnsuresAudit(ValidateEntries(entries));
#endif
- return entries;
+ return entries;
+ } catch (std::exception const& ex) {
+ Logger::Log(
+ LogLevel::Error, "reading tree failed with:\n{}", ex.what());
+ }
#endif
+
+ return std::nullopt;
}
auto GitRepo::CreateTree(tree_entries_t const& entries) const noexcept
@@ -1254,6 +1292,7 @@ auto GitRepo::CreateTree(tree_entries_t const& entries) const noexcept
auto GitRepo::ReadTreeData(std::string const& data,
std::string const& id,
+ SymlinksCheckFunc const& check_symlinks,
bool is_hex_id) noexcept
-> std::optional<tree_entries_t> {
#ifndef BOOTSTRAP_BUILD_TOOL
@@ -1278,7 +1317,8 @@ auto GitRepo::ReadTreeData(std::string const& data,
// wrap odb in "fake" repo
auto repo =
GitRepo(std::static_pointer_cast<GitCAS const>(cas));
- return repo.ReadTree(*raw_id, /*is_hex_id=*/false);
+ return repo.ReadTree(
+ *raw_id, check_symlinks, /*is_hex_id=*/false);
}
}
} catch (std::exception const& ex) {
diff --git a/src/buildtool/file_system/git_repo.hpp b/src/buildtool/file_system/git_repo.hpp
index 43c5fa22..e96f401b 100644
--- a/src/buildtool/file_system/git_repo.hpp
+++ b/src/buildtool/file_system/git_repo.hpp
@@ -17,6 +17,7 @@
#include <functional>
+#include "src/buildtool/common/bazel_types.hpp"
#include "src/buildtool/file_system/git_cas.hpp"
extern "C" {
@@ -48,6 +49,11 @@ class GitRepo {
using tree_entries_t =
std::unordered_map<std::string, std::vector<tree_entry_t>>;
+ // Checks whether a list of symlinks given by their hashes are non-upwards,
+ // based on content read from an actual backend.
+ using SymlinksCheckFunc =
+ std::function<bool(std::vector<bazel_re::Digest> const&)>;
+
GitRepo() = delete; // no default ctor
~GitRepo() noexcept = default;
@@ -80,9 +86,11 @@ class GitRepo {
/// Reading a tree must be backed by an object database. Therefore, a real
/// repository is required.
/// \param id The object id.
+ /// \param check_symlinks Function to check non-upwardness condition.
/// \param is_hex_id Specify whether `id` is hex string or raw.
/// \param ignore_special If set, treat symlinks as absent.
[[nodiscard]] auto ReadTree(std::string const& id,
+ SymlinksCheckFunc const& check_symlinks,
bool is_hex_id = false,
bool ignore_special = false) const noexcept
-> std::optional<tree_entries_t>;
@@ -100,12 +108,14 @@ class GitRepo {
/// \brief Read entries from tree data (without object db).
/// \param data The tree object as plain data.
/// \param id The object id.
+ /// \param check_symlinks Function to check non-upwardness condition.
/// \param is_hex_id Specify whether `id` is hex string or raw.
/// \returns The tree entries.
- [[nodiscard]] static auto ReadTreeData(std::string const& data,
- std::string const& id,
- bool is_hex_id = false) noexcept
- -> std::optional<tree_entries_t>;
+ [[nodiscard]] static auto ReadTreeData(
+ std::string const& data,
+ std::string const& id,
+ SymlinksCheckFunc const& check_symlinks,
+ bool is_hex_id = false) noexcept -> std::optional<tree_entries_t>;
/// \brief Create a flat shallow (without objects in db) tree and return it.
/// Creates a tree object from the entries without access to the actual
diff --git a/src/buildtool/file_system/git_tree.cpp b/src/buildtool/file_system/git_tree.cpp
index d594e638..a3c441d9 100644
--- a/src/buildtool/file_system/git_tree.cpp
+++ b/src/buildtool/file_system/git_tree.cpp
@@ -16,8 +16,9 @@
#include <sstream>
+#include "src/buildtool/common/artifact_digest.hpp"
#include "src/buildtool/logging/logger.hpp"
-#include "src/utils/cpp/hex_string.hpp"
+#include "src/utils/cpp/path.hpp"
extern "C" {
#include <git2.h>
@@ -68,11 +69,24 @@ auto GitTree::Read(std::filesystem::path const& repo_path,
auto GitTree::Read(gsl::not_null<GitCASPtr> const& cas,
std::string const& tree_id,
bool ignore_special) noexcept -> std::optional<GitTree> {
+ // create symlinks checker
+ auto check_symlinks = [&cas](std::vector<bazel_re::Digest> const& ids) {
+ for (auto const& id : ids) {
+ auto content =
+ cas->ReadObject(ArtifactDigest(id).hash(), /*is_hex_id=*/true);
+ if (not content or not PathIsNonUpwards(*content)) {
+ return false;
+ }
+ }
+ return true;
+ };
if (auto raw_id = FromHexString(tree_id)) {
auto repo = GitRepo::Open(cas);
if (repo != std::nullopt) {
- if (auto entries = repo->ReadTree(
- *raw_id, /*is_hex_id=*/false, ignore_special)) {
+ if (auto entries = repo->ReadTree(*raw_id,
+ check_symlinks,
+ /*is_hex_id=*/false,
+ ignore_special)) {
// the raw_id value is NOT recomputed when ignore_special==true,
// so we set it to empty to signal that it should not be used!
return GitTree::FromEntries(cas,
@@ -133,8 +147,22 @@ auto GitTreeEntry::Tree(bool ignore_special) const& noexcept
if (repo == std::nullopt) {
return std::nullopt;
}
- if (auto entries = repo->ReadTree(
- raw_id_, /*is_hex_id=*/false, ignore_special)) {
+ // create symlinks checker
+ auto check_symlinks =
+ [cas = cas_](std::vector<bazel_re::Digest> const& ids) {
+ for (auto const& id : ids) {
+ auto content = cas->ReadObject(
+ ArtifactDigest(id).hash(), /*is_hex_id=*/true);
+ if (not content or not PathIsNonUpwards(*content)) {
+ return false;
+ }
+ }
+ return true;
+ };
+ if (auto entries = repo->ReadTree(raw_id_,
+ check_symlinks,
+ /*is_hex_id=*/false,
+ ignore_special)) {
// the raw_id value is not used when ignore_special==true
return GitTree::FromEntries(
cas_, std::move(*entries), raw_id_, ignore_special);
diff --git a/src/buildtool/storage/local_cas.tpp b/src/buildtool/storage/local_cas.tpp
index 4b91c0e0..ba5471e7 100644
--- a/src/buildtool/storage/local_cas.tpp
+++ b/src/buildtool/storage/local_cas.tpp
@@ -42,9 +42,26 @@ template <class T_CAS>
-> std::optional<GitRepo::tree_entries_t> {
if (auto const path = cas.TreePath(digest)) {
if (auto const content = FileSystemManager::ReadFile(*path)) {
+ auto check_symlinks =
+ [&cas](std::vector<bazel_re::Digest> const& ids) {
+ for (auto const& id : ids) {
+ auto link_path =
+ cas.BlobPath(id, /*is_executable=*/false);
+ if (not link_path) {
+ return false;
+ }
+ auto content =
+ FileSystemManager::ReadSymlink(*link_path);
+ if (not content or not PathIsNonUpwards(*content)) {
+ return false;
+ }
+ }
+ return true;
+ };
return GitRepo::ReadTreeData(
*content,
HashFunction::ComputeTreeHash(*content).Bytes(),
+ check_symlinks,
/*is_hex_id=*/false);
}
}
@@ -274,8 +291,24 @@ requires(kIsLocalGeneration) auto LocalCAS<kDoGlobalUplink>::LocalUplinkGitTree(
// Determine tree entries.
auto content = FileSystemManager::ReadFile(*tree_path);
auto id = NativeSupport::Unprefix(digest.hash());
+ auto check_symlinks =
+ [cas = &cas_file_](std::vector<bazel_re::Digest> const& ids) {
+ for (auto const& id : ids) {
+ auto link_path = cas->BlobPath(id);
+ if (not link_path) {
+ return false;
+ }
+ // in the local CAS we store as files
+ auto content = FileSystemManager::ReadFile(*link_path);
+ if (not content or not PathIsNonUpwards(*content)) {
+ return false;
+ }
+ }
+ return true;
+ };
auto tree_entries = GitRepo::ReadTreeData(*content,
id,
+ check_symlinks,
/*is_hex_id=*/true);
if (not tree_entries) {
return false;
diff --git a/test/buildtool/file_system/TARGETS b/test/buildtool/file_system/TARGETS
index 8d1d351d..4c8a0d76 100644
--- a/test/buildtool/file_system/TARGETS
+++ b/test/buildtool/file_system/TARGETS
@@ -34,6 +34,7 @@
, ["utils", "container_matchers"]
, ["@", "src", "src/buildtool/file_system", "git_tree"]
, ["@", "src", "src/buildtool/file_system", "file_system_manager"]
+ , ["@", "src", "src/buildtool/common", "common"]
, ["utils", "shell_quoting"]
]
, "stage": ["test", "buildtool", "file_system"]
diff --git a/test/buildtool/file_system/git_tree.test.cpp b/test/buildtool/file_system/git_tree.test.cpp
index 79eb11f4..ec2d5003 100644
--- a/test/buildtool/file_system/git_tree.test.cpp
+++ b/test/buildtool/file_system/git_tree.test.cpp
@@ -15,6 +15,7 @@
#include <thread>
#include "catch2/catch_test_macros.hpp"
+#include "src/buildtool/common/artifact_digest.hpp"
#include "src/buildtool/file_system/file_system_manager.hpp"
#include "src/buildtool/file_system/git_tree.hpp"
#include "test/utils/container_matchers.hpp"
@@ -193,28 +194,49 @@ TEST_CASE("Read Git Trees", "[git_cas]") {
auto repo = GitRepo::Open(cas);
REQUIRE(repo);
+ // create symlinks checker
+ auto check_symlinks = [&cas](std::vector<bazel_re::Digest> const& ids) {
+ for (auto const& id : ids) {
+ auto content =
+ cas->ReadObject(ArtifactDigest(id).hash(), /*is_hex_id=*/true);
+ if (not content or not PathIsNonUpwards(*content)) {
+ return false;
+ }
+ }
+ return true;
+ };
+
SECTION("invalid trees") {
- CHECK_FALSE(repo->ReadTree("", /*is_hex_id=*/true));
- CHECK_FALSE(repo->ReadTree("", /*is_hex_id=*/false));
+ CHECK_FALSE(repo->ReadTree("", check_symlinks, /*is_hex_id=*/true));
+ CHECK_FALSE(repo->ReadTree("", check_symlinks, /*is_hex_id=*/false));
- CHECK_FALSE(repo->ReadTree(kFailId, /*is_hex_id=*/true));
- CHECK_FALSE(repo->ReadTree(HexToRaw(kFailId), /*is_hex_id=*/false));
+ CHECK_FALSE(
+ repo->ReadTree(kFailId, check_symlinks, /*is_hex_id=*/true));
+ CHECK_FALSE(repo->ReadTree(
+ HexToRaw(kFailId), check_symlinks, /*is_hex_id=*/false));
- CHECK_FALSE(repo->ReadTree(RawToHex("to_short"), /*is_hex_id=*/true));
- CHECK_FALSE(repo->ReadTree("to_short", /*is_hex_id=*/false));
+ CHECK_FALSE(repo->ReadTree(
+ RawToHex("to_short"), check_symlinks, /*is_hex_id=*/true));
+ CHECK_FALSE(
+ repo->ReadTree("to_short", check_symlinks, /*is_hex_id=*/false));
- CHECK_FALSE(repo->ReadTree("invalid_chars", /*is_hex_id=*/true));
+ CHECK_FALSE(repo->ReadTree(
+ "invalid_chars", check_symlinks, /*is_hex_id=*/true));
- CHECK_FALSE(repo->ReadTree(kFooId, /*is_hex_id=*/true));
- CHECK_FALSE(repo->ReadTree(HexToRaw(kFooId), /*is_hex_id=*/false));
+ CHECK_FALSE(repo->ReadTree(kFooId, check_symlinks, /*is_hex_id=*/true));
+ CHECK_FALSE(repo->ReadTree(
+ HexToRaw(kFooId), check_symlinks, /*is_hex_id=*/false));
- CHECK_FALSE(repo->ReadTree(kBarId, /*is_hex_id=*/true));
- CHECK_FALSE(repo->ReadTree(HexToRaw(kBarId), /*is_hex_id=*/false));
+ CHECK_FALSE(repo->ReadTree(kBarId, check_symlinks, /*is_hex_id=*/true));
+ CHECK_FALSE(repo->ReadTree(
+ HexToRaw(kBarId), check_symlinks, /*is_hex_id=*/false));
}
SECTION("valid trees") {
- auto entries0 = repo->ReadTree(kTreeId, /*is_hex_id=*/true);
- auto entries1 = repo->ReadTree(HexToRaw(kTreeId), /*is_hex_id=*/false);
+ auto entries0 =
+ repo->ReadTree(kTreeId, check_symlinks, /*is_hex_id=*/true);
+ auto entries1 = repo->ReadTree(
+ HexToRaw(kTreeId), check_symlinks, /*is_hex_id=*/false);
REQUIRE(entries0);
REQUIRE(entries1);
@@ -222,49 +244,141 @@ TEST_CASE("Read Git Trees", "[git_cas]") {
}
}
-TEST_CASE("Read Git Trees with symlinks", "[git_cas]") {
- auto repo_path = CreateTestRepoSymlinks(true);
+TEST_CASE("Read Git Trees with symlinks -- ignore special", "[git_cas]") {
+ auto repo_path = CreateTestRepoSymlinks(false); // checkout needed
REQUIRE(repo_path);
auto cas = GitCAS::Open(*repo_path);
REQUIRE(cas);
auto repo = GitRepo::Open(cas);
REQUIRE(repo);
- SECTION("invalid trees") {
- CHECK_FALSE(
- repo->ReadTree("", /*is_hex_id=*/true, /*ignore_special=*/true));
- CHECK_FALSE(
- repo->ReadTree("", /*is_hex_id=*/false, /*ignore_special=*/true));
+ // create symlinks checker
+ auto check_symlinks = [&cas](std::vector<bazel_re::Digest> const& ids) {
+ for (auto const& id : ids) {
+ auto content =
+ cas->ReadObject(ArtifactDigest(id).hash(), /*is_hex_id=*/true);
+ if (not content or not PathIsNonUpwards(*content)) {
+ return false;
+ }
+ }
+ return true;
+ };
+ SECTION("invalid trees") {
CHECK_FALSE(repo->ReadTree(
- kFailId, /*is_hex_id=*/true, /*ignore_special=*/true));
+ "", check_symlinks, /*is_hex_id=*/true, /*ignore_special=*/true));
CHECK_FALSE(repo->ReadTree(
- HexToRaw(kFailId), /*is_hex_id=*/false, /*ignore_special=*/true));
+ "", check_symlinks, /*is_hex_id=*/false, /*ignore_special=*/true));
+
+ CHECK_FALSE(repo->ReadTree(kFailId,
+ check_symlinks,
+ /*is_hex_id=*/true,
+ /*ignore_special=*/true));
+ CHECK_FALSE(repo->ReadTree(HexToRaw(kFailId),
+ check_symlinks,
+ /*is_hex_id=*/false,
+ /*ignore_special=*/true));
+
+ CHECK_FALSE(repo->ReadTree(RawToHex("to_short"),
+ check_symlinks,
+ /*is_hex_id=*/true,
+ /*ignore_special=*/true));
+ CHECK_FALSE(repo->ReadTree("to_short",
+ check_symlinks,
+ /*is_hex_id=*/false,
+ /*ignore_special=*/true));
+
+ CHECK_FALSE(repo->ReadTree("invalid_chars",
+ check_symlinks,
+ /*is_hex_id=*/true,
+ /*ignore_special=*/true));
+
+ CHECK_FALSE(repo->ReadTree(kFooId,
+ check_symlinks,
+ /*is_hex_id=*/true,
+ /*ignore_special=*/true));
+ CHECK_FALSE(repo->ReadTree(HexToRaw(kFooId),
+ check_symlinks,
+ /*is_hex_id=*/false,
+ /*ignore_special=*/true));
+
+ CHECK_FALSE(repo->ReadTree(kBarId,
+ check_symlinks,
+ /*is_hex_id=*/true,
+ /*ignore_special=*/true));
+ CHECK_FALSE(repo->ReadTree(HexToRaw(kBarId),
+ check_symlinks,
+ /*is_hex_id=*/false,
+ /*ignore_special=*/true));
+ }
- CHECK_FALSE(repo->ReadTree(
- RawToHex("to_short"), /*is_hex_id=*/true, /*ignore_special=*/true));
- CHECK_FALSE(repo->ReadTree(
- "to_short", /*is_hex_id=*/false, /*ignore_special=*/true));
+ SECTION("valid trees") {
+ auto entries0 = repo->ReadTree(kTreeSymId,
+ check_symlinks,
+ /*is_hex_id=*/true,
+ /*ignore_special=*/true);
+ auto entries1 = repo->ReadTree(HexToRaw(kTreeSymId),
+ check_symlinks,
+ /*is_hex_id=*/false,
+ /*ignore_special=*/true);
+ REQUIRE(entries0);
+ REQUIRE(entries1);
+ CHECK(*entries0 == *entries1);
+ }
+}
+
+TEST_CASE("Read Git Trees with symlinks -- allow non-upwards", "[git_cas]") {
+ auto repo_path = CreateTestRepoSymlinks(false); // checkout needed
+ REQUIRE(repo_path);
+ auto cas = GitCAS::Open(*repo_path);
+ REQUIRE(cas);
+ auto repo = GitRepo::Open(cas);
+ REQUIRE(repo);
+
+ // create symlinks checker
+ auto check_symlinks = [&cas](std::vector<bazel_re::Digest> const& ids) {
+ for (auto const& id : ids) {
+ auto content =
+ cas->ReadObject(ArtifactDigest(id).hash(), /*is_hex_id=*/true);
+ if (not content or not PathIsNonUpwards(*content)) {
+ return false;
+ }
+ }
+ return true;
+ };
+
+ SECTION("invalid trees") {
+ CHECK_FALSE(repo->ReadTree("", check_symlinks, /*is_hex_id=*/true));
+ CHECK_FALSE(repo->ReadTree("", check_symlinks, /*is_hex_id=*/false));
+
+ CHECK_FALSE(
+ repo->ReadTree(kFailId, check_symlinks, /*is_hex_id=*/true));
CHECK_FALSE(repo->ReadTree(
- "invalid_chars", /*is_hex_id=*/true, /*ignore_special=*/true));
+ HexToRaw(kFailId), check_symlinks, /*is_hex_id=*/false));
CHECK_FALSE(repo->ReadTree(
- kFooId, /*is_hex_id=*/true, /*ignore_special=*/true));
+ RawToHex("to_short"), check_symlinks, /*is_hex_id=*/true));
+ CHECK_FALSE(
+ repo->ReadTree("to_short", check_symlinks, /*is_hex_id=*/false));
+
CHECK_FALSE(repo->ReadTree(
- HexToRaw(kFooId), /*is_hex_id=*/false, /*ignore_special=*/true));
+ "invalid_chars", check_symlinks, /*is_hex_id=*/true));
+ CHECK_FALSE(repo->ReadTree(kFooId, check_symlinks, /*is_hex_id=*/true));
CHECK_FALSE(repo->ReadTree(
- kBarId, /*is_hex_id=*/true, /*ignore_special=*/true));
+ HexToRaw(kFooId), check_symlinks, /*is_hex_id=*/false));
+
+ CHECK_FALSE(repo->ReadTree(kBarId, check_symlinks, /*is_hex_id=*/true));
CHECK_FALSE(repo->ReadTree(
- HexToRaw(kBarId), /*is_hex_id=*/false, /*ignore_special=*/true));
+ HexToRaw(kBarId), check_symlinks, /*is_hex_id=*/false));
}
SECTION("valid trees") {
- auto entries0 = repo->ReadTree(
- kTreeSymId, /*is_hex_id=*/true, /*ignore_special=*/true);
+ auto entries0 =
+ repo->ReadTree(kTreeSymId, check_symlinks, /*is_hex_id=*/true);
auto entries1 = repo->ReadTree(
- HexToRaw(kTreeSymId), /*is_hex_id=*/false, /*ignore_special=*/true);
+ HexToRaw(kTreeSymId), check_symlinks, /*is_hex_id=*/false);
REQUIRE(entries0);
REQUIRE(entries1);
@@ -280,6 +394,18 @@ TEST_CASE("Create Git Trees", "[git_cas]") {
auto repo = GitRepo::Open(cas);
REQUIRE(repo);
+ // create symlinks checker
+ auto check_symlinks = [&cas](std::vector<bazel_re::Digest> const& ids) {
+ for (auto const& id : ids) {
+ auto content =
+ cas->ReadObject(ArtifactDigest(id).hash(), /*is_hex_id=*/true);
+ if (not content or not PathIsNonUpwards(*content)) {
+ return false;
+ }
+ }
+ return true;
+ };
+
SECTION("empty tree") {
auto tree_id = repo->CreateTree({});
REQUIRE(tree_id);
@@ -288,7 +414,8 @@ TEST_CASE("Create Git Trees", "[git_cas]") {
}
SECTION("existing tree") {
- auto entries = repo->ReadTree(kTreeId, /*is_hex_id=*/true);
+ auto entries =
+ repo->ReadTree(kTreeId, check_symlinks, /*is_hex_id=*/true);
REQUIRE(entries);
auto tree_id = repo->CreateTree(*entries);
@@ -313,25 +440,53 @@ TEST_CASE("Create Git Trees", "[git_cas]") {
CHECK(foo_bar_id == bar_foo_id);
}
+}
- auto repo_path_sym = CreateTestRepoSymlinks(true);
- REQUIRE(repo_path_sym);
- auto cas_sym = GitCAS::Open(*repo_path_sym);
- REQUIRE(cas_sym);
- auto repo_sym = GitRepo::Open(cas_sym);
- REQUIRE(repo_sym);
+TEST_CASE("Create Git Trees with symlinks", "[git_cas]") {
+ auto repo_path = CreateTestRepoSymlinks(false); // checkout needed
+ REQUIRE(repo_path);
+ auto cas = GitCAS::Open(*repo_path);
+ REQUIRE(cas);
+ auto repo = GitRepo::Open(cas);
+ REQUIRE(repo);
- SECTION("existing tree with symlinks") {
- auto entries = repo_sym->ReadTree(
- kTreeSymId, /*is_hex_id=*/true, /*ignore_special=*/true);
+ // create symlinks checker
+ auto check_symlinks = [&cas](std::vector<bazel_re::Digest> const& ids) {
+ for (auto const& id : ids) {
+ auto content =
+ cas->ReadObject(ArtifactDigest(id).hash(), /*is_hex_id=*/true);
+ if (not content or not PathIsNonUpwards(*content)) {
+ return false;
+ }
+ }
+ return true;
+ };
+
+ SECTION("existing tree with symlinks -- ignore special") {
+ auto entries = repo->ReadTree(kTreeSymId,
+ check_symlinks,
+ /*is_hex_id=*/true,
+ /*ignore_special=*/true);
REQUIRE(entries);
- auto tree_id = repo_sym->CreateTree(*entries);
+ auto tree_id = repo->CreateTree(*entries);
REQUIRE(tree_id);
// if at least one symlink exists, it gets ignored and the tree id will
// not match as it is NOT recomputed!
CHECK_FALSE(ToHexString(*tree_id) == kTreeSymId);
}
+
+ SECTION("existing tree with symlinks -- allow non-upwards") {
+ auto entries =
+ repo->ReadTree(kTreeSymId, check_symlinks, /*is_hex_id=*/true);
+ REQUIRE(entries);
+
+ auto tree_id = repo->CreateTree(*entries);
+ REQUIRE(tree_id);
+ // all the symlinks in the test repo are non-upwards, so the tree should
+ // be recreated exactly and id should thus match
+ CHECK(ToHexString(*tree_id) == kTreeSymId);
+ }
}
TEST_CASE("Read Git Tree Data", "[git_cas]") {
@@ -342,22 +497,83 @@ TEST_CASE("Read Git Tree Data", "[git_cas]") {
auto repo = GitRepo::Open(cas);
REQUIRE(repo);
+ // create symlinks checker
+ auto check_symlinks = [&cas](std::vector<bazel_re::Digest> const& ids) {
+ for (auto const& id : ids) {
+ auto content =
+ cas->ReadObject(ArtifactDigest(id).hash(), /*is_hex_id=*/true);
+ if (not content or not PathIsNonUpwards(*content)) {
+ return false;
+ }
+ }
+ return true;
+ };
+
SECTION("empty tree") {
- auto entries = GitRepo::ReadTreeData(
- "", "4b825dc642cb6eb9a060e54bf8d69288fbee4904", /*is_hex_id=*/true);
+ auto entries =
+ GitRepo::ReadTreeData("",
+ "4b825dc642cb6eb9a060e54bf8d69288fbee4904",
+ check_symlinks,
+ /*is_hex_id=*/true);
REQUIRE(entries);
CHECK(entries->empty());
}
SECTION("existing tree") {
- auto entries = repo->ReadTree(kTreeId, /*is_hex_id=*/true);
+ auto entries =
+ repo->ReadTree(kTreeId, check_symlinks, /*is_hex_id=*/true);
REQUIRE(entries);
auto data = cas->ReadObject(kTreeId, /*is_hex_id=*/true);
REQUIRE(data);
- auto from_data =
- GitRepo::ReadTreeData(*data, kTreeId, /*is_hex_id=*/true);
+ auto from_data = GitRepo::ReadTreeData(
+ *data, kTreeId, check_symlinks, /*is_hex_id=*/true);
+ REQUIRE(from_data);
+ CHECK(*from_data == *entries);
+ }
+}
+
+TEST_CASE("Read Git Tree Data with non-upwards symlinks", "[git_cas]") {
+ auto repo_path = CreateTestRepoSymlinks(false); // checkout needed
+ REQUIRE(repo_path);
+ auto cas = GitCAS::Open(*repo_path);
+ REQUIRE(cas);
+ auto repo = GitRepo::Open(cas);
+ REQUIRE(repo);
+
+ // create symlinks checker
+ auto check_symlinks = [&cas](std::vector<bazel_re::Digest> const& ids) {
+ for (auto const& id : ids) {
+ auto content =
+ cas->ReadObject(ArtifactDigest(id).hash(), /*is_hex_id=*/true);
+ if (not content or not PathIsNonUpwards(*content)) {
+ return false;
+ }
+ }
+ return true;
+ };
+
+ SECTION("empty tree") {
+ auto entries =
+ GitRepo::ReadTreeData("",
+ "4b825dc642cb6eb9a060e54bf8d69288fbee4904",
+ check_symlinks,
+ /*is_hex_id=*/true);
+ REQUIRE(entries);
+ CHECK(entries->empty());
+ }
+
+ SECTION("existing tree") {
+ auto entries =
+ repo->ReadTree(kTreeSymId, check_symlinks, /*is_hex_id=*/true);
+ REQUIRE(entries);
+
+ auto data = cas->ReadObject(kTreeSymId, /*is_hex_id=*/true);
+ REQUIRE(data);
+
+ auto from_data = GitRepo::ReadTreeData(
+ *data, kTreeSymId, check_symlinks, /*is_hex_id=*/true);
REQUIRE(from_data);
CHECK(*from_data == *entries);
}
@@ -371,6 +587,18 @@ TEST_CASE("Create Shallow Git Trees", "[git_cas]") {
auto repo = GitRepo::Open(cas);
REQUIRE(repo);
+ // create symlinks checker
+ auto check_symlinks = [&cas](std::vector<bazel_re::Digest> const& ids) {
+ for (auto const& id : ids) {
+ auto content =
+ cas->ReadObject(ArtifactDigest(id).hash(), /*is_hex_id=*/true);
+ if (not content or not PathIsNonUpwards(*content)) {
+ return false;
+ }
+ }
+ return true;
+ };
+
SECTION("empty tree") {
auto tree = GitRepo::CreateShallowTree({});
REQUIRE(tree);
@@ -380,7 +608,8 @@ TEST_CASE("Create Shallow Git Trees", "[git_cas]") {
}
SECTION("existing tree from other CAS") {
- auto entries = repo->ReadTree(kTreeId, /*is_hex_id=*/true);
+ auto entries =
+ repo->ReadTree(kTreeId, check_symlinks, /*is_hex_id=*/true);
REQUIRE(entries);
auto tree = GitRepo::CreateShallowTree(*entries);
@@ -406,6 +635,13 @@ TEST_CASE("Read Git Tree", "[git_tree]") {
}
}
+TEST_CASE("Read Git Tree with non-upwards symlinks", "[git_tree]") {
+ auto repo_path = CreateTestRepoSymlinks(false); // checkout needed
+ REQUIRE(repo_path);
+ CHECK(GitTree::Read(*repo_path, kTreeSymId));
+ CHECK_FALSE(GitTree::Read(*repo_path, "wrong_tree_id"));
+}
+
TEST_CASE("Lookup entries by name", "[git_tree]") {
auto repo_path = CreateTestRepo(true);
REQUIRE(repo_path);
@@ -467,6 +703,69 @@ TEST_CASE("Lookup entries by name", "[git_tree]") {
}
}
+TEST_CASE("Lookup symlinks by name", "[git_tree]") {
+ auto repo_path = CreateTestRepoSymlinks(true);
+ REQUIRE(repo_path);
+ auto tree_root = GitTree::Read(*repo_path, kTreeSymId);
+ REQUIRE(tree_root);
+
+ auto entry_foo_l = tree_root->LookupEntryByName("foo_l");
+ REQUIRE(entry_foo_l);
+ CHECK(entry_foo_l->IsBlob());
+ CHECK(entry_foo_l->Type() == ObjectType::Symlink);
+
+ auto blob_foo_l = entry_foo_l->Blob();
+ REQUIRE(blob_foo_l);
+ CHECK(*blob_foo_l == "baz/foo");
+ CHECK(blob_foo_l->size() == 7);
+ CHECK(blob_foo_l->size() == *entry_foo_l->Size());
+
+ auto entry_baz_l = tree_root->LookupEntryByName("baz_l");
+ REQUIRE(entry_baz_l);
+ CHECK(entry_baz_l->IsBlob());
+ CHECK(entry_baz_l->Type() == ObjectType::Symlink);
+
+ auto blob_baz_l = entry_baz_l->Blob();
+ REQUIRE(blob_baz_l);
+ CHECK(*blob_baz_l == "baz");
+ CHECK(blob_baz_l->size() == 3);
+ CHECK(blob_baz_l->size() == *entry_baz_l->Size());
+
+ SECTION("Lookup missing entries") {
+ CHECK_FALSE(tree_root->LookupEntryByName("fool"));
+ CHECK_FALSE(tree_root->LookupEntryByName("barn"));
+ CHECK_FALSE(tree_root->LookupEntryByName("bazel"));
+ }
+
+ SECTION("Lookup symlinks in sub-tree") {
+ auto entry_baz = tree_root->LookupEntryByName("baz");
+ REQUIRE(entry_baz);
+ CHECK(entry_baz->IsTree());
+ CHECK(entry_baz->Type() == ObjectType::Tree);
+
+ auto const& tree_baz = entry_baz->Tree();
+ REQUIRE(tree_baz);
+
+ auto entry_baz_bar = tree_baz->LookupEntryByName("bar");
+ REQUIRE(entry_baz_bar);
+ CHECK(entry_baz_bar->IsBlob());
+ CHECK(entry_baz_bar->Type() == ObjectType::Executable);
+
+ auto entry_baz_bar_l = tree_baz->LookupEntryByName("bar_l");
+ REQUIRE(entry_baz_bar_l);
+ CHECK(entry_baz_bar_l->IsBlob());
+ CHECK(entry_baz_bar_l->Type() == ObjectType::Symlink);
+ // the hash of the symlink content should be the same as the file
+ CHECK(entry_baz_bar_l->Hash() == entry_baz_bar->Hash());
+
+ SECTION("Lookup missing entries") {
+ CHECK_FALSE(tree_baz->LookupEntryByName("fool"));
+ CHECK_FALSE(tree_baz->LookupEntryByName("barn"));
+ CHECK_FALSE(tree_baz->LookupEntryByName("bazel"));
+ }
+ }
+}
+
TEST_CASE("Lookup entries by path", "[git_tree]") {
auto repo_path = CreateTestRepo(true);
REQUIRE(repo_path);
@@ -525,6 +824,61 @@ TEST_CASE("Lookup entries by path", "[git_tree]") {
}
}
+TEST_CASE("Lookup symlinks by path", "[git_tree]") {
+ auto repo_path = CreateTestRepoSymlinks(true);
+ REQUIRE(repo_path);
+ auto tree_root = GitTree::Read(*repo_path, kTreeSymId);
+ REQUIRE(tree_root);
+
+ auto entry_foo_l = tree_root->LookupEntryByPath("foo_l");
+ REQUIRE(entry_foo_l);
+ CHECK(entry_foo_l->IsBlob());
+ CHECK(entry_foo_l->Type() == ObjectType::Symlink);
+
+ auto blob_foo_l = entry_foo_l->Blob();
+ REQUIRE(blob_foo_l);
+ CHECK(*blob_foo_l == "baz/foo");
+ CHECK(blob_foo_l->size() == 7);
+ CHECK(blob_foo_l->size() == *entry_foo_l->Size());
+
+ auto entry_baz_l = tree_root->LookupEntryByPath("baz_l");
+ REQUIRE(entry_baz_l);
+ CHECK(entry_baz_l->IsBlob());
+ CHECK(entry_baz_l->Type() == ObjectType::Symlink);
+
+ auto blob_baz_l = entry_baz_l->Blob();
+ REQUIRE(blob_baz_l);
+ CHECK(*blob_baz_l == "baz");
+ CHECK(blob_baz_l->size() == 3);
+ CHECK(blob_baz_l->size() == *entry_baz_l->Size());
+
+ SECTION("Lookup missing entries") {
+ CHECK_FALSE(tree_root->LookupEntryByPath("fool"));
+ CHECK_FALSE(tree_root->LookupEntryByPath("barn"));
+ CHECK_FALSE(tree_root->LookupEntryByPath("bazel"));
+ }
+
+ SECTION("Lookup symlinks in sub-tree") {
+ auto entry_baz_bar = tree_root->LookupEntryByPath("baz/bar");
+ REQUIRE(entry_baz_bar);
+ CHECK(entry_baz_bar->IsBlob());
+ CHECK(entry_baz_bar->Type() == ObjectType::Executable);
+
+ auto entry_baz_bar_l = tree_root->LookupEntryByPath("baz/bar_l");
+ REQUIRE(entry_baz_bar_l);
+ CHECK(entry_baz_bar_l->IsBlob());
+ CHECK(entry_baz_bar_l->Type() == ObjectType::Symlink);
+ // the hash of the symlink content should be the same as the file
+ CHECK(entry_baz_bar_l->Hash() == entry_baz_bar->Hash());
+
+ SECTION("Lookup missing entries") {
+ CHECK_FALSE(tree_root->LookupEntryByPath("baz/fool"));
+ CHECK_FALSE(tree_root->LookupEntryByPath("baz/barn"));
+ CHECK_FALSE(tree_root->LookupEntryByPath("baz/bazel"));
+ }
+ }
+}
+
TEST_CASE("Lookup entries by special names", "[git_tree]") {
auto repo_path = CreateTestRepo(true);
REQUIRE(repo_path);
@@ -575,6 +929,21 @@ TEST_CASE("Iterate tree entries", "[git_tree]") {
{"foo", "bar", "baz"}));
}
+TEST_CASE("Iterate tree entries with non-upwards symlinks", "[git_tree]") {
+ auto repo_path = CreateTestRepoSymlinks(true);
+ REQUIRE(repo_path);
+ auto tree_root = GitTree::Read(*repo_path, kTreeSymId);
+ REQUIRE(tree_root);
+
+ std::vector<std::string> names{};
+ for (auto const& [name, entry] : *tree_root) {
+ names.emplace_back(name);
+ }
+ CHECK_THAT(names,
+ HasSameUniqueElementsAs<std::vector<std::string>>(
+ {"foo", "bar", "baz", "foo_l", "baz_l"}));
+}
+
TEST_CASE("Thread-safety", "[git_tree]") {
constexpr auto kNumThreads = 100;
@@ -582,7 +951,7 @@ TEST_CASE("Thread-safety", "[git_tree]") {
std::vector<std::thread> threads{};
threads.reserve(kNumThreads);
- auto repo_path = CreateTestRepo(true);
+ auto repo_path = CreateTestRepoSymlinks(false); // checkout needed
REQUIRE(repo_path);
SECTION("Opening and reading from the same CAS") {
@@ -618,14 +987,27 @@ TEST_CASE("Thread-safety", "[git_tree]") {
auto cas = GitCAS::Open(*repo_path);
REQUIRE(cas);
+ // create symlinks checker
+ auto check_symlinks = [&cas](std::vector<bazel_re::Digest> const& ids) {
+ for (auto const& id : ids) {
+ auto content = cas->ReadObject(ArtifactDigest(id).hash(),
+ /*is_hex_id=*/true);
+ if (not content or not PathIsNonUpwards(*content)) {
+ return false;
+ }
+ }
+ return true;
+ };
+
for (int id{}; id < kNumThreads; ++id) {
- threads.emplace_back([&cas, &starting_signal]() {
+ threads.emplace_back([&cas, &starting_signal, check_symlinks]() {
starting_signal.wait(false);
auto repo = GitRepo::Open(cas);
REQUIRE(repo);
- auto entries = repo->ReadTree(kTreeId, true);
+ auto entries = repo->ReadTree(
+ kTreeSymId, check_symlinks, /*is_hex_id=*/true);
REQUIRE(entries);
});
}
@@ -645,7 +1027,7 @@ TEST_CASE("Thread-safety", "[git_tree]") {
[&repo_path, &starting_signal](int tid) {
starting_signal.wait(false);
- auto tree_root = GitTree::Read(*repo_path, kTreeId);
+ auto tree_root = GitTree::Read(*repo_path, kTreeSymId);
REQUIRE(tree_root);
auto entry_subdir = tree_root->LookupEntryByName("baz");
@@ -676,7 +1058,7 @@ TEST_CASE("Thread-safety", "[git_tree]") {
}
SECTION("Reading from the same tree") {
- auto tree_root = GitTree::Read(*repo_path, kTreeId);
+ auto tree_root = GitTree::Read(*repo_path, kTreeSymId);
REQUIRE(tree_root);
for (int id{}; id < kNumThreads; ++id) {