summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/buildtool/execution_api/remote/bazel/bazel_network.cpp23
-rw-r--r--src/buildtool/file_system/TARGETS5
-rw-r--r--src/buildtool/file_system/file_system_manager.hpp20
-rw-r--r--src/buildtool/file_system/git_repo.cpp110
-rw-r--r--src/buildtool/file_system/git_repo.hpp18
-rw-r--r--src/buildtool/file_system/git_tree.cpp38
-rw-r--r--src/buildtool/storage/local_cas.tpp33
7 files changed, 202 insertions, 45 deletions
diff --git a/src/buildtool/execution_api/remote/bazel/bazel_network.cpp b/src/buildtool/execution_api/remote/bazel/bazel_network.cpp
index 159a848e..60ed22e1 100644
--- a/src/buildtool/execution_api/remote/bazel/bazel_network.cpp
+++ b/src/buildtool/execution_api/remote/bazel/bazel_network.cpp
@@ -42,9 +42,32 @@ namespace {
auto blobs = network->ReadBlobs({digest}).Next();
if (blobs.size() == 1) {
auto const& content = blobs.at(0).data;
+ auto check_symlinks =
+ [&network](std::vector<bazel_re::Digest> const& ids) {
+ auto size = ids.size();
+ auto reader = network->ReadBlobs(ids);
+ auto blobs = reader.Next();
+ std::size_t count{};
+ while (not blobs.empty()) {
+ if (count + blobs.size() > size) {
+ Logger::Log(LogLevel::Debug,
+ "received more blobs than requested.");
+ return false;
+ }
+ for (auto const& blob : blobs) {
+ if (not PathIsNonUpwards(blob.data)) {
+ return false;
+ }
+ }
+ count += blobs.size();
+ blobs = reader.Next();
+ }
+ return true;
+ };
return GitRepo::ReadTreeData(
content,
HashFunction::ComputeTreeHash(content).Bytes(),
+ check_symlinks,
/*is_hex_id=*/false);
}
Logger::Log(LogLevel::Error,
diff --git a/src/buildtool/file_system/TARGETS b/src/buildtool/file_system/TARGETS
index 81455fe6..b368e5b4 100644
--- a/src/buildtool/file_system/TARGETS
+++ b/src/buildtool/file_system/TARGETS
@@ -79,6 +79,8 @@
[ ["", "libgit2"]
, "file_system_manager"
, ["src/buildtool/logging", "logging"]
+ , ["src/utils/cpp", "path"]
+ , ["src/buildtool/common", "common"]
]
}
, "git_context":
@@ -98,7 +100,7 @@
, "name": ["git_repo"]
, "hdrs": ["git_repo.hpp"]
, "srcs": ["git_repo.cpp"]
- , "deps": ["git_cas"]
+ , "deps": ["git_cas", ["src/buildtool/common", "bazel_types"]]
, "stage": ["src", "buildtool", "file_system"]
, "private-deps":
[ ["src/buildtool/logging", "logging"]
@@ -107,6 +109,7 @@
, ["src/utils/cpp", "hex_string"]
, ["src/utils/cpp", "gsl"]
, ["src/buildtool/file_system", "file_system_manager"]
+ , ["src/buildtool/common", "common"]
]
, "cflags": ["-pthread"]
}
diff --git a/src/buildtool/file_system/file_system_manager.hpp b/src/buildtool/file_system/file_system_manager.hpp
index 8555e8dc..30a469af 100644
--- a/src/buildtool/file_system/file_system_manager.hpp
+++ b/src/buildtool/file_system/file_system_manager.hpp
@@ -675,6 +675,26 @@ class FileSystemManager {
return true;
}
+ /// \brief Read the content of a symlink.
+ [[nodiscard]] static auto ReadSymlink(std::filesystem::path const& link)
+ -> std::optional<std::string> {
+ try {
+ if (std::filesystem::is_symlink(link)) {
+ return std::filesystem::read_symlink(link).string();
+ }
+ Logger::Log(LogLevel::Debug,
+ "{} can not be read because it is not a symlink.",
+ link.string());
+ } catch (std::exception const& ex) {
+ Logger::Log(LogLevel::Error,
+ "reading symlink {} failed:\n{}",
+ link.string(),
+ ex.what());
+ }
+
+ return std::nullopt;
+ }
+
/// \brief Write file
/// If argument fd_less is given, the write will be performed in a child
/// process to prevent polluting the parent with open writable file
diff --git a/src/buildtool/file_system/git_repo.cpp b/src/buildtool/file_system/git_repo.cpp
index e10da57e..25ab6c4f 100644
--- a/src/buildtool/file_system/git_repo.cpp
+++ b/src/buildtool/file_system/git_repo.cpp
@@ -17,6 +17,7 @@
#include <thread>
#include <unordered_set>
+#include "src/buildtool/common/artifact_digest.hpp"
#include "src/buildtool/file_system/file_system_manager.hpp"
#include "src/buildtool/logging/logger.hpp"
#include "src/utils/cpp/gsl.hpp"
@@ -1154,53 +1155,90 @@ auto GitRepo::IsRepoFake() const noexcept -> bool {
}
auto GitRepo::ReadTree(std::string const& id,
+ SymlinksCheckFunc const& check_symlinks,
bool is_hex_id,
bool ignore_special) const noexcept
-> std::optional<tree_entries_t> {
-#ifdef BOOTSTRAP_BUILD_TOOL
- return std::nullopt;
-#else
- // create object id
- auto oid = GitObjectID(id, is_hex_id);
- if (not oid) {
- return std::nullopt;
- }
+#ifndef BOOTSTRAP_BUILD_TOOL
+ try {
+ // create object id
+ auto oid = GitObjectID(id, is_hex_id);
+ if (not oid) {
+ return std::nullopt;
+ }
- // lookup tree
- git_tree* tree_ptr{nullptr};
- {
- // share the odb lock
- std::shared_lock lock{GetGitCAS()->mutex_};
- if (git_tree_lookup(&tree_ptr, repo_->Ptr(), &(*oid)) != 0) {
+ // lookup tree
+ git_tree* tree_ptr{nullptr};
+ {
+ // share the odb lock
+ std::shared_lock lock{GetGitCAS()->mutex_};
+ if (git_tree_lookup(&tree_ptr, repo_->Ptr(), &(*oid)) != 0) {
+ Logger::Log(LogLevel::Debug,
+ "failed to lookup Git tree {}",
+ is_hex_id ? std::string{id} : ToHexString(id));
+ return std::nullopt;
+ }
+ }
+ auto tree = std::unique_ptr<git_tree, decltype(&tree_closer)>{
+ tree_ptr, tree_closer};
+
+ // walk tree (flat) and create entries
+ tree_entries_t entries{};
+ entries.reserve(git_tree_entrycount(tree.get()));
+ if (git_tree_walk(tree.get(),
+ GIT_TREEWALK_PRE,
+ ignore_special ? flat_tree_walker_ignore_special
+ : flat_tree_walker,
+ &entries) != 0) {
Logger::Log(LogLevel::Debug,
- "failed to lookup Git tree {}",
+ "failed to walk Git tree {}",
is_hex_id ? std::string{id} : ToHexString(id));
return std::nullopt;
}
- }
- auto tree = std::unique_ptr<git_tree, decltype(&tree_closer)>{tree_ptr,
- tree_closer};
-
- // walk tree (flat) and create entries
- tree_entries_t entries{};
- entries.reserve(git_tree_entrycount(tree.get()));
- if (git_tree_walk(
- tree.get(),
- GIT_TREEWALK_PRE,
- ignore_special ? flat_tree_walker_ignore_special : flat_tree_walker,
- &entries) != 0) {
- Logger::Log(LogLevel::Debug,
- "failed to walk Git tree {}",
- is_hex_id ? std::string{id} : ToHexString(id));
- return std::nullopt;
- }
+
+ // checking non-upwardness of symlinks can not be easily or safely done
+ // during the tree walk, so it is done here. This is only needed for
+ // ignore_special==false.
+ if (not ignore_special) {
+ // we first gather all symlink candidates
+ std::vector<bazel_re::Digest> symlinks{};
+ symlinks.reserve(entries.size()); // at most one symlink per entry
+ for (auto const& entry : entries) {
+ for (auto const& item : entry.second) {
+ if (IsSymlinkObject(item.type)) {
+ symlinks.emplace_back(bazel_re::Digest(
+ ArtifactDigest(ToHexString(entry.first),
+ /*size=*/0,
+ /*is_tree=*/false)));
+ break; // no need to check other items with same hash
+ }
+ }
+ }
+ // we check symlinks in bulk, optimized for network-backed repos
+ if (not check_symlinks) {
+ Logger::Log(LogLevel::Debug, "check_symlink callable is empty");
+ return std::nullopt;
+ }
+ if (not check_symlinks(symlinks)) {
+ Logger::Log(LogLevel::Error,
+ "found upwards symlinks in Git tree {}",
+ is_hex_id ? std::string{id} : ToHexString(id));
+ return std::nullopt;
+ }
+ }
#ifndef NDEBUG
- EnsuresAudit(ValidateEntries(entries));
+ EnsuresAudit(ValidateEntries(entries));
#endif
- return entries;
+ return entries;
+ } catch (std::exception const& ex) {
+ Logger::Log(
+ LogLevel::Error, "reading tree failed with:\n{}", ex.what());
+ }
#endif
+
+ return std::nullopt;
}
auto GitRepo::CreateTree(tree_entries_t const& entries) const noexcept
@@ -1254,6 +1292,7 @@ auto GitRepo::CreateTree(tree_entries_t const& entries) const noexcept
auto GitRepo::ReadTreeData(std::string const& data,
std::string const& id,
+ SymlinksCheckFunc const& check_symlinks,
bool is_hex_id) noexcept
-> std::optional<tree_entries_t> {
#ifndef BOOTSTRAP_BUILD_TOOL
@@ -1278,7 +1317,8 @@ auto GitRepo::ReadTreeData(std::string const& data,
// wrap odb in "fake" repo
auto repo =
GitRepo(std::static_pointer_cast<GitCAS const>(cas));
- return repo.ReadTree(*raw_id, /*is_hex_id=*/false);
+ return repo.ReadTree(
+ *raw_id, check_symlinks, /*is_hex_id=*/false);
}
}
} catch (std::exception const& ex) {
diff --git a/src/buildtool/file_system/git_repo.hpp b/src/buildtool/file_system/git_repo.hpp
index 43c5fa22..e96f401b 100644
--- a/src/buildtool/file_system/git_repo.hpp
+++ b/src/buildtool/file_system/git_repo.hpp
@@ -17,6 +17,7 @@
#include <functional>
+#include "src/buildtool/common/bazel_types.hpp"
#include "src/buildtool/file_system/git_cas.hpp"
extern "C" {
@@ -48,6 +49,11 @@ class GitRepo {
using tree_entries_t =
std::unordered_map<std::string, std::vector<tree_entry_t>>;
+ // Checks whether a list of symlinks given by their hashes are non-upwards,
+ // based on content read from an actual backend.
+ using SymlinksCheckFunc =
+ std::function<bool(std::vector<bazel_re::Digest> const&)>;
+
GitRepo() = delete; // no default ctor
~GitRepo() noexcept = default;
@@ -80,9 +86,11 @@ class GitRepo {
/// Reading a tree must be backed by an object database. Therefore, a real
/// repository is required.
/// \param id The object id.
+ /// \param check_symlinks Function to check non-upwardness condition.
/// \param is_hex_id Specify whether `id` is hex string or raw.
/// \param ignore_special If set, treat symlinks as absent.
[[nodiscard]] auto ReadTree(std::string const& id,
+ SymlinksCheckFunc const& check_symlinks,
bool is_hex_id = false,
bool ignore_special = false) const noexcept
-> std::optional<tree_entries_t>;
@@ -100,12 +108,14 @@ class GitRepo {
/// \brief Read entries from tree data (without object db).
/// \param data The tree object as plain data.
/// \param id The object id.
+ /// \param check_symlinks Function to check non-upwardness condition.
/// \param is_hex_id Specify whether `id` is hex string or raw.
/// \returns The tree entries.
- [[nodiscard]] static auto ReadTreeData(std::string const& data,
- std::string const& id,
- bool is_hex_id = false) noexcept
- -> std::optional<tree_entries_t>;
+ [[nodiscard]] static auto ReadTreeData(
+ std::string const& data,
+ std::string const& id,
+ SymlinksCheckFunc const& check_symlinks,
+ bool is_hex_id = false) noexcept -> std::optional<tree_entries_t>;
/// \brief Create a flat shallow (without objects in db) tree and return it.
/// Creates a tree object from the entries without access to the actual
diff --git a/src/buildtool/file_system/git_tree.cpp b/src/buildtool/file_system/git_tree.cpp
index d594e638..a3c441d9 100644
--- a/src/buildtool/file_system/git_tree.cpp
+++ b/src/buildtool/file_system/git_tree.cpp
@@ -16,8 +16,9 @@
#include <sstream>
+#include "src/buildtool/common/artifact_digest.hpp"
#include "src/buildtool/logging/logger.hpp"
-#include "src/utils/cpp/hex_string.hpp"
+#include "src/utils/cpp/path.hpp"
extern "C" {
#include <git2.h>
@@ -68,11 +69,24 @@ auto GitTree::Read(std::filesystem::path const& repo_path,
auto GitTree::Read(gsl::not_null<GitCASPtr> const& cas,
std::string const& tree_id,
bool ignore_special) noexcept -> std::optional<GitTree> {
+ // create symlinks checker
+ auto check_symlinks = [&cas](std::vector<bazel_re::Digest> const& ids) {
+ for (auto const& id : ids) {
+ auto content =
+ cas->ReadObject(ArtifactDigest(id).hash(), /*is_hex_id=*/true);
+ if (not content or not PathIsNonUpwards(*content)) {
+ return false;
+ }
+ }
+ return true;
+ };
if (auto raw_id = FromHexString(tree_id)) {
auto repo = GitRepo::Open(cas);
if (repo != std::nullopt) {
- if (auto entries = repo->ReadTree(
- *raw_id, /*is_hex_id=*/false, ignore_special)) {
+ if (auto entries = repo->ReadTree(*raw_id,
+ check_symlinks,
+ /*is_hex_id=*/false,
+ ignore_special)) {
// the raw_id value is NOT recomputed when ignore_special==true,
// so we set it to empty to signal that it should not be used!
return GitTree::FromEntries(cas,
@@ -133,8 +147,22 @@ auto GitTreeEntry::Tree(bool ignore_special) const& noexcept
if (repo == std::nullopt) {
return std::nullopt;
}
- if (auto entries = repo->ReadTree(
- raw_id_, /*is_hex_id=*/false, ignore_special)) {
+ // create symlinks checker
+ auto check_symlinks =
+ [cas = cas_](std::vector<bazel_re::Digest> const& ids) {
+ for (auto const& id : ids) {
+ auto content = cas->ReadObject(
+ ArtifactDigest(id).hash(), /*is_hex_id=*/true);
+ if (not content or not PathIsNonUpwards(*content)) {
+ return false;
+ }
+ }
+ return true;
+ };
+ if (auto entries = repo->ReadTree(raw_id_,
+ check_symlinks,
+ /*is_hex_id=*/false,
+ ignore_special)) {
// the raw_id value is not used when ignore_special==true
return GitTree::FromEntries(
cas_, std::move(*entries), raw_id_, ignore_special);
diff --git a/src/buildtool/storage/local_cas.tpp b/src/buildtool/storage/local_cas.tpp
index 4b91c0e0..ba5471e7 100644
--- a/src/buildtool/storage/local_cas.tpp
+++ b/src/buildtool/storage/local_cas.tpp
@@ -42,9 +42,26 @@ template <class T_CAS>
-> std::optional<GitRepo::tree_entries_t> {
if (auto const path = cas.TreePath(digest)) {
if (auto const content = FileSystemManager::ReadFile(*path)) {
+ auto check_symlinks =
+ [&cas](std::vector<bazel_re::Digest> const& ids) {
+ for (auto const& id : ids) {
+ auto link_path =
+ cas.BlobPath(id, /*is_executable=*/false);
+ if (not link_path) {
+ return false;
+ }
+ auto content =
+ FileSystemManager::ReadSymlink(*link_path);
+ if (not content or not PathIsNonUpwards(*content)) {
+ return false;
+ }
+ }
+ return true;
+ };
return GitRepo::ReadTreeData(
*content,
HashFunction::ComputeTreeHash(*content).Bytes(),
+ check_symlinks,
/*is_hex_id=*/false);
}
}
@@ -274,8 +291,24 @@ requires(kIsLocalGeneration) auto LocalCAS<kDoGlobalUplink>::LocalUplinkGitTree(
// Determine tree entries.
auto content = FileSystemManager::ReadFile(*tree_path);
auto id = NativeSupport::Unprefix(digest.hash());
+ auto check_symlinks =
+ [cas = &cas_file_](std::vector<bazel_re::Digest> const& ids) {
+ for (auto const& id : ids) {
+ auto link_path = cas->BlobPath(id);
+ if (not link_path) {
+ return false;
+ }
+ // in the local CAS we store as files
+ auto content = FileSystemManager::ReadFile(*link_path);
+ if (not content or not PathIsNonUpwards(*content)) {
+ return false;
+ }
+ }
+ return true;
+ };
auto tree_entries = GitRepo::ReadTreeData(*content,
id,
+ check_symlinks,
/*is_hex_id=*/true);
if (not tree_entries) {
return false;