diff options
author | Paul Cristian Sarbu <paul.cristian.sarbu@huawei.com> | 2025-05-26 10:41:15 +0200 |
---|---|---|
committer | Paul Cristian Sarbu <paul.cristian.sarbu@huawei.com> | 2025-06-04 14:34:44 +0200 |
commit | 8d4f583d63c657397583f778096b8bcb6d96c12f (patch) | |
tree | 7bb05ba0b827974aae31226987f7565cfb380e60 /src/buildtool | |
parent | 1258417cf03b3978005a637c3536873fef146c38 (diff) | |
download | justbuild-8d4f583d63c657397583f778096b8bcb6d96c12f.tar.gz |
Add utility methods for caching valid trees
...through marker files kept in storage under generation regime.
These can be used to allow valid source trees, i.e., those free of
upwards symlinks, to be cached in a persistent manner over multiple
builds.
Diffstat (limited to 'src/buildtool')
-rw-r--r-- | src/buildtool/file_system/TARGETS | 13 | ||||
-rw-r--r-- | src/buildtool/file_system/git_tree_utils.cpp | 118 | ||||
-rw-r--r-- | src/buildtool/file_system/git_tree_utils.hpp | 55 | ||||
-rw-r--r-- | src/buildtool/storage/fs_utils.cpp | 8 | ||||
-rw-r--r-- | src/buildtool/storage/fs_utils.hpp | 6 |
5 files changed, 200 insertions, 0 deletions
diff --git a/src/buildtool/file_system/TARGETS b/src/buildtool/file_system/TARGETS index d08d8451..5320482d 100644 --- a/src/buildtool/file_system/TARGETS +++ b/src/buildtool/file_system/TARGETS @@ -217,4 +217,17 @@ , "private-deps": [["@", "fmt", "", "fmt"]] , "stage": ["src", "buildtool", "file_system"] } +, "git_tree_utils": + { "type": ["@", "rules", "CC", "library"] + , "name": ["git_tree_utils"] + , "hdrs": ["git_tree_utils.hpp"] + , "srcs": ["git_tree_utils.cpp"] + , "deps": ["git_cas", "git_tree", ["src/buildtool/storage", "config"]] + , "private-deps": + [ "file_system_manager" + , "object_type" + , ["src/buildtool/storage", "fs_utils"] + ] + , "stage": ["src", "buildtool", "file_system"] + } } diff --git a/src/buildtool/file_system/git_tree_utils.cpp b/src/buildtool/file_system/git_tree_utils.cpp new file mode 100644 index 00000000..e3e290d1 --- /dev/null +++ b/src/buildtool/file_system/git_tree_utils.cpp @@ -0,0 +1,118 @@ +// Copyright 2025 Huawei Cloud Computing Technology Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "src/buildtool/file_system/git_tree_utils.hpp" + +#include <cstddef> + +#include "src/buildtool/file_system/file_system_manager.hpp" +#include "src/buildtool/file_system/object_type.hpp" +#include "src/buildtool/storage/fs_utils.hpp" + +namespace { + +/// \brief Mark a Git hash as corresponding to a valid tree by creating a +/// corresponding marker file. +/// \returns Success flag. +[[nodiscard]] auto MarkTreeValid(StorageConfig const& storage_config, + std::string const& tree_id) noexcept -> bool { + auto const marker = + StorageUtils::GetValidTreesMarkerFile(storage_config, tree_id); + return FileSystemManager::CreateDirectory(marker.parent_path()) and + FileSystemManager::CreateFile(marker); +} + +/// \brief Checks if a given Git hash is known to correspond to a validated +/// tree by checking the existence of its respective marker file. +/// \returns Existence flag signaling validation. +[[nodiscard]] auto IsTreeValid(StorageConfig const& storage_config, + std::string const& tree_hash) noexcept -> bool { + // check in all generations + for (std::size_t generation = 0; + generation < storage_config.num_generations; + ++generation) { + if (FileSystemManager::Exists(StorageUtils::GetValidTreesMarkerFile( + storage_config, tree_hash, generation))) { + // ensure it is marked in current generation + return generation == 0 ? true + : MarkTreeValid(storage_config, tree_hash); + } + } + return false; +} + +/// \brief Validate a GitTree's subtrees recursively. +/// \returns True if all the subtrees are valid. +[[nodiscard]] auto ValidateGitSubtrees(StorageConfig const& storage_config, + GitTree const& tree) noexcept -> bool { + for (auto const& [path, entry] : tree) { + if (IsTreeObject(entry->Type())) { + auto const hash = entry->Hash(); + if (not IsTreeValid(storage_config, hash)) { + // validate subtree + auto subtree = entry->Tree(); + if (not subtree or + not ValidateGitSubtrees(storage_config, *subtree) or + not MarkTreeValid(storage_config, hash)) { + return false; + } + } + } + } + return true; +} + +} // namespace + +namespace GitTreeUtils { + +auto ReadValidGitCASTree(StorageConfig const& storage_config, + std::string const& tree_id, + GitCASPtr const& git_cas) noexcept + -> std::optional<GitTree> { + if (IsTreeValid(storage_config, tree_id)) { + // read tree without extra checks + return GitTree::Read( + git_cas, tree_id, /*ignore_special=*/false, /*skip_checks=*/true); + } + // read GitTree from Git with checks and validate its subtrees recursively + if (auto tree = GitTree::Read(git_cas, tree_id)) { + if (ValidateGitSubtrees(storage_config, *tree) and + MarkTreeValid(storage_config, tree_id)) { + return tree; + } + } + return std::nullopt; +} + +auto IsGitTreeValid(StorageConfig const& storage_config, + GitTreeEntryPtr const& entry) noexcept -> bool { + if (entry == nullptr) { + return false; + } + auto tree_id = entry->Hash(); + if (IsTreeValid(storage_config, tree_id)) { + return true; + } + // read underlying GitTree and validate its subtrees recursively + if (auto const& read_tree = entry->Tree()) { + if (ValidateGitSubtrees(storage_config, *read_tree) and + MarkTreeValid(storage_config, tree_id)) { + return true; + } + } + return false; +} + +} // namespace GitTreeUtils diff --git a/src/buildtool/file_system/git_tree_utils.hpp b/src/buildtool/file_system/git_tree_utils.hpp new file mode 100644 index 00000000..a20481bb --- /dev/null +++ b/src/buildtool/file_system/git_tree_utils.hpp @@ -0,0 +1,55 @@ +// Copyright 2025 Huawei Cloud Computing Technology Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef INCLUDED_SRC_BUILDTOOL_FILE_SYSTEM_GIT_TREE_UTILS_HPP +#define INCLUDED_SRC_BUILDTOOL_FILE_SYSTEM_GIT_TREE_UTILS_HPP + +#include <optional> +#include <string> + +#include "src/buildtool/file_system/git_cas.hpp" +#include "src/buildtool/file_system/git_tree.hpp" +#include "src/buildtool/storage/config.hpp" + +/// \brief Utility methods for validating GitTree instances +namespace GitTreeUtils { + +/// \brief Read a GitTree from a Git repository and ensure (recursively) that it +/// is free of upwards symlinks. Performs storage-based caching of all found +/// valid tree hashes. +/// \param storage_config Storage instance for caching valid tree hashes. +/// \param tree_id Git identifier of the tree to read and validate. +/// \param git_cas Git repository providing the tree. +/// \returns GitTree instance free of upwards symlinks, recursively, on success +/// or nullopt on failure. +[[nodiscard]] auto ReadValidGitCASTree(StorageConfig const& storage_config, + std::string const& tree_id, + GitCASPtr const& git_cas) noexcept + -> std::optional<GitTree>; + +/// \brief Validate a known GitTreeEntry pointing to a Git tree, by checking +/// recursively that it is free of upwards symlinks. Performs storage-based +/// caching of all found valid tree hashes. +/// \param storage_config Storage instance for caching valid tree hashes. +/// \param GitTreeEntryPtr Pointer to an existing GitTreeEntry. +/// \returns Flag stating if tree is (recursively) free of upwards symlinks. +/// \note This method is useful when one has fast (and preferably cached) access +/// to a GitTree instance and direct reading from a repository is not desired. +[[nodiscard]] auto IsGitTreeValid(StorageConfig const& storage_config, + GitTreeEntryPtr const& entry) noexcept + -> bool; + +} // namespace GitTreeUtils + +#endif // INCLUDED_SRC_BUILDTOOL_FILE_SYSTEM_GIT_TREE_UTILS_HPP diff --git a/src/buildtool/storage/fs_utils.cpp b/src/buildtool/storage/fs_utils.cpp index 528dd8dd..861c8728 100644 --- a/src/buildtool/storage/fs_utils.cpp +++ b/src/buildtool/storage/fs_utils.cpp @@ -119,6 +119,14 @@ auto GetRehashIDFile(StorageConfig const& storage_config, (from_git ? "from-git" : "from-cas") / hash; } +auto GetValidTreesMarkerFile(StorageConfig const& storage_config, + std::string const& tree_hash, + std::size_t generation) noexcept + -> std::filesystem::path { + return storage_config.GenerationCacheRoot(generation) / + "validated-git-trees" / tree_hash; +} + auto WriteTreeIDFile(std::filesystem::path const& tree_id_file, std::string const& tree_id) noexcept -> bool { // needs to be done safely, so use the rename trick diff --git a/src/buildtool/storage/fs_utils.hpp b/src/buildtool/storage/fs_utils.hpp index 8cc23b69..a03cc4dd 100644 --- a/src/buildtool/storage/fs_utils.hpp +++ b/src/buildtool/storage/fs_utils.hpp @@ -89,6 +89,12 @@ namespace StorageUtils { std::size_t generation = 0) noexcept -> std::filesystem::path; +/// \brief Get the path to the file marking a known valid Git tree. +[[nodiscard]] auto GetValidTreesMarkerFile(StorageConfig const& storage_config, + std::string const& tree_hash, + std::size_t generation = 0) noexcept + -> std::filesystem::path; + /// \brief Write a tree id to file. The parent folder of the file must exist! [[nodiscard]] auto WriteTreeIDFile(std::filesystem::path const& tree_id_file, std::string const& tree_id) noexcept -> bool; |