summaryrefslogtreecommitdiff
path: root/src/buildtool
diff options
context:
space:
mode:
authorPaul Cristian Sarbu <paul.cristian.sarbu@huawei.com>2025-05-26 13:19:48 +0200
committerPaul Cristian Sarbu <paul.cristian.sarbu@huawei.com>2025-06-04 14:34:44 +0200
commitaf8f9a663d69e7bce01be200d556909b90ec1873 (patch)
tree9b51eb2a757e3c44bb3715013433df580ea2d2f5 /src/buildtool
parentd1f9bd1744b281adb353226c2f1285508e50ef53 (diff)
downloadjustbuild-af8f9a663d69e7bce01be200d556909b90ec1873.tar.gz
FileRoot: Ensure all read blobs and trees contain valid entries
In particular, ensure that Git roots check for, e.g., upwards symlinks, before returning blobs and trees. To ensure that only the bear minimum extra work is performed for this purpose, Git roots now keep also the root's GitTreeEntry as a field, allowing the validity check of root source trees to take place only once and only if required.
Diffstat (limited to 'src/buildtool')
-rw-r--r--src/buildtool/execution_engine/executor/executor.hpp10
-rw-r--r--src/buildtool/file_system/TARGETS2
-rw-r--r--src/buildtool/file_system/file_root.hpp124
3 files changed, 106 insertions, 30 deletions
diff --git a/src/buildtool/execution_engine/executor/executor.hpp b/src/buildtool/execution_engine/executor/executor.hpp
index e38a656f..018a1536 100644
--- a/src/buildtool/execution_engine/executor/executor.hpp
+++ b/src/buildtool/execution_engine/executor/executor.hpp
@@ -457,7 +457,8 @@ class ExecutorImpl {
}
else {
// if known blob is not available, read and upload it
- content = ReadGitBlob(repo, repo_config, hash);
+ content = ReadGitBlob(
+ repo, repo_config, hash, IsSymlinkObject(info.type));
}
if (not content) {
logger.Emit(LogLevel::Error, "failed to get content");
@@ -478,15 +479,16 @@ class ExecutorImpl {
[[nodiscard]] static auto ReadGitBlob(
std::string const& repo,
gsl::not_null<const RepositoryConfig*> const& repo_config,
- std::string const& hash) noexcept -> std::optional<std::string> {
+ std::string const& hash,
+ bool is_symlink) noexcept -> std::optional<std::string> {
std::optional<std::string> blob{};
if (auto const* ws_root = repo_config->WorkspaceRoot(repo)) {
// try to obtain blob from local workspace's Git CAS, if any
- blob = ws_root->ReadBlob(hash);
+ blob = ws_root->ReadBlob(hash, is_symlink);
}
if (not blob) {
// try to obtain blob from global Git CAS, if any
- blob = repo_config->ReadBlobFromGitCAS(hash, /*is_symlink=*/false);
+ blob = repo_config->ReadBlobFromGitCAS(hash, is_symlink);
}
return blob;
}
diff --git a/src/buildtool/file_system/TARGETS b/src/buildtool/file_system/TARGETS
index 2978397b..cdd5bce7 100644
--- a/src/buildtool/file_system/TARGETS
+++ b/src/buildtool/file_system/TARGETS
@@ -178,6 +178,7 @@
[ "file_system_manager"
, "git_cas"
, "git_tree"
+ , "git_tree_utils"
, "object_type"
, "precomputed_root"
, ["@", "fmt", "", "fmt"]
@@ -193,6 +194,7 @@
, ["src/buildtool/storage", "config"]
, ["src/utils/cpp", "concepts"]
, ["src/utils/cpp", "expected"]
+ , ["src/utils/cpp", "hex_string"]
, ["src/utils/cpp", "json"]
]
, "stage": ["src", "buildtool", "file_system"]
diff --git a/src/buildtool/file_system/file_root.hpp b/src/buildtool/file_system/file_root.hpp
index 708e29cb..62ce9049 100644
--- a/src/buildtool/file_system/file_root.hpp
+++ b/src/buildtool/file_system/file_root.hpp
@@ -40,6 +40,7 @@
#include "src/buildtool/file_system/file_system_manager.hpp"
#include "src/buildtool/file_system/git_cas.hpp"
#include "src/buildtool/file_system/git_tree.hpp"
+#include "src/buildtool/file_system/git_tree_utils.hpp"
#include "src/buildtool/file_system/object_type.hpp"
#include "src/buildtool/file_system/precomputed_root.hpp"
#include "src/buildtool/logging/log_level.hpp"
@@ -47,6 +48,7 @@
#include "src/buildtool/storage/config.hpp"
#include "src/utils/cpp/concepts.hpp"
#include "src/utils/cpp/expected.hpp"
+#include "src/utils/cpp/hex_string.hpp"
// Keep it to ensure fmt::format works on JSON objects
#include "src/utils/cpp/json.hpp" // IWYU pragma: keep
@@ -111,6 +113,7 @@ class FileRoot {
struct RootGit {
gsl::not_null<GitCASPtr> cas;
gsl::not_null<GitTreePtr> tree;
+ gsl::not_null<GitTreeEntryPtr> root_entry;
};
// absent roots are defined by a tree hash with no witnessing repository
@@ -338,9 +341,10 @@ class FileRoot {
: root_{fs_root_t{std::move(root)}}, ignore_special_{ignore_special} {}
FileRoot(gsl::not_null<GitCASPtr> const& cas,
gsl::not_null<GitTreePtr> const& tree,
+ gsl::not_null<GitTreeEntryPtr> const& root_entry,
gsl::not_null<StorageConfig const*> const& storage_config,
bool ignore_special = false) noexcept
- : root_{RootGit{cas, tree}},
+ : root_{RootGit{cas, tree, root_entry}},
storage_config_{storage_config},
ignore_special_{ignore_special} {}
explicit FileRoot(PrecomputedRoot precomputed)
@@ -351,19 +355,30 @@ class FileRoot {
std::filesystem::path const& repo_path,
std::string const& git_tree_id,
bool ignore_special = false) noexcept -> std::optional<FileRoot> {
+ // Check validity of hash early
+ auto raw_tree_id = FromHexString(git_tree_id);
+ if (not raw_tree_id) {
+ return std::nullopt;
+ }
auto cas = GitCAS::Open(repo_path);
if (not cas) {
return std::nullopt;
}
- auto tree = GitTree::Read(cas, git_tree_id, ignore_special);
+ // Read source tree root permissively from repository. Validity of
+ // entries will be checked only when actually needed.
+ auto tree = GitTree::Read(
+ cas, git_tree_id, ignore_special, /*skip_checks=*/true);
if (not tree) {
return std::nullopt;
}
try {
- return FileRoot{cas,
- std::make_shared<GitTree const>(std::move(*tree)),
- storage_config,
- ignore_special};
+ return FileRoot{
+ cas,
+ std::make_shared<GitTree const>(std::move(*tree)),
+ std::make_shared<GitTreeEntry const>(
+ std::move(cas), std::move(*raw_tree_id), ObjectType::Tree),
+ storage_config,
+ ignore_special};
} catch (...) {
return std::nullopt;
}
@@ -431,10 +446,15 @@ class FileRoot {
if (path == ".") {
return true;
}
- return static_cast<bool>(
- std::get<RootGit>(root_).tree->LookupEntryByPath(path));
+ if (auto entry =
+ std::get<RootGit>(root_).tree->LookupEntryByPath(path)) {
+ return IsNonSpecialObject(entry->Type()) or
+ (not ignore_special_ and
+ IsSymlinkObject(entry->Type()) and
+ entry->Blob().has_value());
+ }
}
- if (std::holds_alternative<fs_root_t>(root_)) {
+ else if (std::holds_alternative<fs_root_t>(root_)) {
auto root_path = std::get<fs_root_t>(root_) / path;
auto exists = FileSystemManager::Exists(root_path);
auto type =
@@ -463,10 +483,15 @@ class FileRoot {
[[nodiscard]] auto IsSymlink(
std::filesystem::path const& file_path) const noexcept -> bool {
+ if (ignore_special_) {
+ // skip unnecessary check
+ return false;
+ }
if (std::holds_alternative<RootGit>(root_)) {
if (auto entry = std::get<RootGit>(root_).tree->LookupEntryByPath(
file_path)) {
- return IsSymlinkObject(entry->Type());
+ return IsSymlinkObject(entry->Type()) and
+ entry->Blob().has_value();
}
}
else if (std::holds_alternative<fs_root_t>(root_)) {
@@ -506,6 +531,7 @@ class FileRoot {
if (auto entry = std::get<RootGit>(root_).tree->LookupEntryByPath(
file_path)) {
if (IsBlobObject(entry->Type())) {
+ // return blob content, if valid
return entry->Blob();
}
}
@@ -513,7 +539,8 @@ class FileRoot {
else if (std::holds_alternative<fs_root_t>(root_)) {
auto full_path = std::get<fs_root_t>(root_) / file_path;
if (auto type = FileSystemManager::Type(full_path,
- /*allow_upwards=*/true)) {
+ /*allow_upwards=*/false)) {
+ // return content of file or non-upward symlink
return IsSymlinkObject(*type)
? FileSystemManager::ReadSymlink(full_path)
: FileSystemManager::ReadFile(full_path);
@@ -526,14 +553,37 @@ class FileRoot {
const noexcept -> DirectoryEntries {
try {
if (std::holds_alternative<RootGit>(root_)) {
- auto const& tree = std::get<RootGit>(root_).tree;
+ auto const& git_root = std::get<RootGit>(root_);
+ auto const& root_tree = git_root.tree;
if (dir_path == ".") {
- return DirectoryEntries{&(*tree)};
+ if (ignore_special_ or
+ GitTreeUtils::IsGitTreeValid(*storage_config_,
+ git_root.root_entry)) {
+ // stored root tree contains only valid entries, so use
+ // it directly
+ return DirectoryEntries{&(*root_tree)};
+ }
+ Logger::Log(LogLevel::Warning,
+ "Asked to read invalid Git source tree {}",
+ root_tree->FileRootHash());
+ return DirectoryEntries{DirectoryEntries::pairs_t{}};
}
- if (auto entry = tree->LookupEntryByPath(dir_path)) {
- if (auto const& found_tree = entry->Tree(ignore_special_)) {
- return DirectoryEntries{&(*found_tree)};
+ if (auto entry = root_tree->LookupEntryByPath(dir_path)) {
+ if (ignore_special_ or
+ GitTreeUtils::IsGitTreeValid(*storage_config_, entry)) {
+ // simply get the GitTree reference; this is now valid
+ // and it is guaranteed to not reread unnecessarily
+ if (auto const& entry_tree =
+ entry->Tree(ignore_special_)) {
+ return DirectoryEntries{&(*entry_tree)};
+ }
}
+ Logger::Log(
+ LogLevel::Warning,
+ "Asked to read invalid subdir {} of Git source tree {}",
+ dir_path.string(),
+ root_tree->FileRootHash());
+ return DirectoryEntries{DirectoryEntries::pairs_t{}};
}
}
else if (std::holds_alternative<fs_root_t>(root_)) {
@@ -581,11 +631,14 @@ class FileRoot {
}
/// \brief Read a blob from the root based on its ID.
- [[nodiscard]] auto ReadBlob(std::string const& blob_id) const noexcept
+ [[nodiscard]] auto ReadBlob(std::string const& blob_id,
+ bool is_symlink = false) const noexcept
-> std::optional<std::string> {
if (std::holds_alternative<RootGit>(root_)) {
- return std::get<RootGit>(root_).cas->ReadObject(blob_id,
- /*is_hex_id=*/true);
+ return std::get<RootGit>(root_).cas->ReadObject(
+ blob_id,
+ /*is_hex_id=*/true,
+ /*as_valid_symlink=*/is_symlink);
}
return std::nullopt;
}
@@ -594,13 +647,17 @@ class FileRoot {
/// This should include all valid entry types.
[[nodiscard]] auto ReadTree(std::string const& tree_id) const noexcept
-> std::optional<GitTree> {
- if (std::holds_alternative<RootGit>(root_)) {
- try {
+ try {
+ if (std::holds_alternative<RootGit>(root_)) {
auto const& cas = std::get<RootGit>(root_).cas;
- return GitTree::Read(cas, tree_id);
- } catch (...) {
- return std::nullopt;
+ return GitTreeUtils::ReadValidGitCASTree(
+ *storage_config_, tree_id, cas);
}
+ } catch (std::exception const& ex) {
+ Logger::Log(LogLevel::Error,
+ "Unexpected failure while reading source tree {}:\n{}",
+ tree_id,
+ ex.what());
}
return std::nullopt;
}
@@ -615,11 +672,26 @@ class FileRoot {
if (std::holds_alternative<RootGit>(root_)) {
if (auto entry = std::get<RootGit>(root_).tree->LookupEntryByPath(
file_path)) {
- if (entry->IsBlob()) {
+ // if symlink, read content ahead of time to check validity
+ std::optional<std::string> blob{};
+ if (IsSymlinkObject(entry->Type())) {
+ blob = entry->Blob();
+ if (not blob) {
+ return std::nullopt;
+ }
+ }
+ if (IsBlobObject(entry->Type())) {
if (not ProtocolTraits::IsNative(hash_type)) {
+ // read blob content, if not read already
+ if (not blob) {
+ blob = entry->Blob();
+ if (not blob) {
+ return std::nullopt;
+ }
+ }
auto compatible_hash =
GitHashesConverter::Instance().RegisterGitEntry(
- entry->Hash(), *entry->Blob(), repository);
+ entry->Hash(), *std::move(blob), repository);
auto digest =
ArtifactDigestFactory::Create(hash_type,
compatible_hash,