diff options
Diffstat (limited to 'src/buildtool')
-rw-r--r-- | src/buildtool/file_system/TARGETS | 13 | ||||
-rw-r--r-- | src/buildtool/file_system/git_tree_utils.cpp | 118 | ||||
-rw-r--r-- | src/buildtool/file_system/git_tree_utils.hpp | 55 | ||||
-rw-r--r-- | src/buildtool/storage/fs_utils.cpp | 8 | ||||
-rw-r--r-- | src/buildtool/storage/fs_utils.hpp | 6 |
5 files changed, 200 insertions, 0 deletions
diff --git a/src/buildtool/file_system/TARGETS b/src/buildtool/file_system/TARGETS index d08d8451..5320482d 100644 --- a/src/buildtool/file_system/TARGETS +++ b/src/buildtool/file_system/TARGETS @@ -217,4 +217,17 @@ , "private-deps": [["@", "fmt", "", "fmt"]] , "stage": ["src", "buildtool", "file_system"] } +, "git_tree_utils": + { "type": ["@", "rules", "CC", "library"] + , "name": ["git_tree_utils"] + , "hdrs": ["git_tree_utils.hpp"] + , "srcs": ["git_tree_utils.cpp"] + , "deps": ["git_cas", "git_tree", ["src/buildtool/storage", "config"]] + , "private-deps": + [ "file_system_manager" + , "object_type" + , ["src/buildtool/storage", "fs_utils"] + ] + , "stage": ["src", "buildtool", "file_system"] + } } diff --git a/src/buildtool/file_system/git_tree_utils.cpp b/src/buildtool/file_system/git_tree_utils.cpp new file mode 100644 index 00000000..e3e290d1 --- /dev/null +++ b/src/buildtool/file_system/git_tree_utils.cpp @@ -0,0 +1,118 @@ +// Copyright 2025 Huawei Cloud Computing Technology Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "src/buildtool/file_system/git_tree_utils.hpp" + +#include <cstddef> + +#include "src/buildtool/file_system/file_system_manager.hpp" +#include "src/buildtool/file_system/object_type.hpp" +#include "src/buildtool/storage/fs_utils.hpp" + +namespace { + +/// \brief Mark a Git hash as corresponding to a valid tree by creating a +/// corresponding marker file. +/// \returns Success flag. +[[nodiscard]] auto MarkTreeValid(StorageConfig const& storage_config, + std::string const& tree_id) noexcept -> bool { + auto const marker = + StorageUtils::GetValidTreesMarkerFile(storage_config, tree_id); + return FileSystemManager::CreateDirectory(marker.parent_path()) and + FileSystemManager::CreateFile(marker); +} + +/// \brief Checks if a given Git hash is known to correspond to a validated +/// tree by checking the existence of its respective marker file. +/// \returns Existence flag signaling validation. +[[nodiscard]] auto IsTreeValid(StorageConfig const& storage_config, + std::string const& tree_hash) noexcept -> bool { + // check in all generations + for (std::size_t generation = 0; + generation < storage_config.num_generations; + ++generation) { + if (FileSystemManager::Exists(StorageUtils::GetValidTreesMarkerFile( + storage_config, tree_hash, generation))) { + // ensure it is marked in current generation + return generation == 0 ? true + : MarkTreeValid(storage_config, tree_hash); + } + } + return false; +} + +/// \brief Validate a GitTree's subtrees recursively. +/// \returns True if all the subtrees are valid. +[[nodiscard]] auto ValidateGitSubtrees(StorageConfig const& storage_config, + GitTree const& tree) noexcept -> bool { + for (auto const& [path, entry] : tree) { + if (IsTreeObject(entry->Type())) { + auto const hash = entry->Hash(); + if (not IsTreeValid(storage_config, hash)) { + // validate subtree + auto subtree = entry->Tree(); + if (not subtree or + not ValidateGitSubtrees(storage_config, *subtree) or + not MarkTreeValid(storage_config, hash)) { + return false; + } + } + } + } + return true; +} + +} // namespace + +namespace GitTreeUtils { + +auto ReadValidGitCASTree(StorageConfig const& storage_config, + std::string const& tree_id, + GitCASPtr const& git_cas) noexcept + -> std::optional<GitTree> { + if (IsTreeValid(storage_config, tree_id)) { + // read tree without extra checks + return GitTree::Read( + git_cas, tree_id, /*ignore_special=*/false, /*skip_checks=*/true); + } + // read GitTree from Git with checks and validate its subtrees recursively + if (auto tree = GitTree::Read(git_cas, tree_id)) { + if (ValidateGitSubtrees(storage_config, *tree) and + MarkTreeValid(storage_config, tree_id)) { + return tree; + } + } + return std::nullopt; +} + +auto IsGitTreeValid(StorageConfig const& storage_config, + GitTreeEntryPtr const& entry) noexcept -> bool { + if (entry == nullptr) { + return false; + } + auto tree_id = entry->Hash(); + if (IsTreeValid(storage_config, tree_id)) { + return true; + } + // read underlying GitTree and validate its subtrees recursively + if (auto const& read_tree = entry->Tree()) { + if (ValidateGitSubtrees(storage_config, *read_tree) and + MarkTreeValid(storage_config, tree_id)) { + return true; + } + } + return false; +} + +} // namespace GitTreeUtils diff --git a/src/buildtool/file_system/git_tree_utils.hpp b/src/buildtool/file_system/git_tree_utils.hpp new file mode 100644 index 00000000..a20481bb --- /dev/null +++ b/src/buildtool/file_system/git_tree_utils.hpp @@ -0,0 +1,55 @@ +// Copyright 2025 Huawei Cloud Computing Technology Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef INCLUDED_SRC_BUILDTOOL_FILE_SYSTEM_GIT_TREE_UTILS_HPP +#define INCLUDED_SRC_BUILDTOOL_FILE_SYSTEM_GIT_TREE_UTILS_HPP + +#include <optional> +#include <string> + +#include "src/buildtool/file_system/git_cas.hpp" +#include "src/buildtool/file_system/git_tree.hpp" +#include "src/buildtool/storage/config.hpp" + +/// \brief Utility methods for validating GitTree instances +namespace GitTreeUtils { + +/// \brief Read a GitTree from a Git repository and ensure (recursively) that it +/// is free of upwards symlinks. Performs storage-based caching of all found +/// valid tree hashes. +/// \param storage_config Storage instance for caching valid tree hashes. +/// \param tree_id Git identifier of the tree to read and validate. +/// \param git_cas Git repository providing the tree. +/// \returns GitTree instance free of upwards symlinks, recursively, on success +/// or nullopt on failure. +[[nodiscard]] auto ReadValidGitCASTree(StorageConfig const& storage_config, + std::string const& tree_id, + GitCASPtr const& git_cas) noexcept + -> std::optional<GitTree>; + +/// \brief Validate a known GitTreeEntry pointing to a Git tree, by checking +/// recursively that it is free of upwards symlinks. Performs storage-based +/// caching of all found valid tree hashes. +/// \param storage_config Storage instance for caching valid tree hashes. +/// \param GitTreeEntryPtr Pointer to an existing GitTreeEntry. +/// \returns Flag stating if tree is (recursively) free of upwards symlinks. +/// \note This method is useful when one has fast (and preferably cached) access +/// to a GitTree instance and direct reading from a repository is not desired. +[[nodiscard]] auto IsGitTreeValid(StorageConfig const& storage_config, + GitTreeEntryPtr const& entry) noexcept + -> bool; + +} // namespace GitTreeUtils + +#endif // INCLUDED_SRC_BUILDTOOL_FILE_SYSTEM_GIT_TREE_UTILS_HPP diff --git a/src/buildtool/storage/fs_utils.cpp b/src/buildtool/storage/fs_utils.cpp index 528dd8dd..861c8728 100644 --- a/src/buildtool/storage/fs_utils.cpp +++ b/src/buildtool/storage/fs_utils.cpp @@ -119,6 +119,14 @@ auto GetRehashIDFile(StorageConfig const& storage_config, (from_git ? "from-git" : "from-cas") / hash; } +auto GetValidTreesMarkerFile(StorageConfig const& storage_config, + std::string const& tree_hash, + std::size_t generation) noexcept + -> std::filesystem::path { + return storage_config.GenerationCacheRoot(generation) / + "validated-git-trees" / tree_hash; +} + auto WriteTreeIDFile(std::filesystem::path const& tree_id_file, std::string const& tree_id) noexcept -> bool { // needs to be done safely, so use the rename trick diff --git a/src/buildtool/storage/fs_utils.hpp b/src/buildtool/storage/fs_utils.hpp index 8cc23b69..a03cc4dd 100644 --- a/src/buildtool/storage/fs_utils.hpp +++ b/src/buildtool/storage/fs_utils.hpp @@ -89,6 +89,12 @@ namespace StorageUtils { std::size_t generation = 0) noexcept -> std::filesystem::path; +/// \brief Get the path to the file marking a known valid Git tree. +[[nodiscard]] auto GetValidTreesMarkerFile(StorageConfig const& storage_config, + std::string const& tree_hash, + std::size_t generation = 0) noexcept + -> std::filesystem::path; + /// \brief Write a tree id to file. The parent folder of the file must exist! [[nodiscard]] auto WriteTreeIDFile(std::filesystem::path const& tree_id_file, std::string const& tree_id) noexcept -> bool; |