summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/other_tools/symlinks_map/TARGETS18
-rw-r--r--src/other_tools/symlinks_map/resolve_symlinks_map.cpp335
-rw-r--r--src/other_tools/symlinks_map/resolve_symlinks_map.hpp97
3 files changed, 450 insertions, 0 deletions
diff --git a/src/other_tools/symlinks_map/TARGETS b/src/other_tools/symlinks_map/TARGETS
new file mode 100644
index 00000000..225000f5
--- /dev/null
+++ b/src/other_tools/symlinks_map/TARGETS
@@ -0,0 +1,18 @@
+{ "resolve_symlinks_map":
+ { "type": ["@", "rules", "CC", "library"]
+ , "name": ["resolve_symlinks_map"]
+ , "hdrs": ["resolve_symlinks_map.hpp"]
+ , "srcs": ["resolve_symlinks_map.cpp"]
+ , "deps":
+ [ ["src/buildtool/file_system", "git_repo"]
+ , ["src/buildtool/file_system", "object_type"]
+ , ["src/buildtool/multithreading", "async_map_consumer"]
+ , ["src/other_tools/just_mr", "utils"]
+ , ["src/utils/cpp", "path"]
+ , ["src/utils/cpp", "path_hash"]
+ ]
+ , "stage": ["src", "other_tools", "symlinks_map"]
+ , "private-deps":
+ [["@", "fmt", "", "fmt"], ["src/buildtool/storage", "config"]]
+ }
+}
diff --git a/src/other_tools/symlinks_map/resolve_symlinks_map.cpp b/src/other_tools/symlinks_map/resolve_symlinks_map.cpp
new file mode 100644
index 00000000..88d09aec
--- /dev/null
+++ b/src/other_tools/symlinks_map/resolve_symlinks_map.cpp
@@ -0,0 +1,335 @@
+// Copyright 2023 Huawei Cloud Computing Technology Co., Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/other_tools/symlinks_map/resolve_symlinks_map.hpp"
+
+#include "fmt/core.h"
+#include "src/buildtool/file_system/git_repo.hpp"
+#include "src/buildtool/storage/config.hpp"
+
+namespace {
+
+void ResolveKnownEntry(GitObjectToResolve const& obj,
+ GitRepo::TreeEntryInfo const& entry_info,
+ GitCASPtr const& just_git_cas,
+ ResolveSymlinksMap::SetterPtr const& setter,
+ ResolveSymlinksMap::LoggerPtr const& logger,
+ ResolveSymlinksMap::SubCallerPtr const& subcaller) {
+ // differentiated treatment based on object type
+ if (IsFileObject(entry_info.type)) {
+ // files are already resolved, so return the hash directly
+ (*setter)(ResolvedGitObject{.id = entry_info.id,
+ .type = entry_info.type,
+ .path = obj.rel_path});
+ }
+ else if (IsTreeObject(entry_info.type)) {
+ // for tree types we resolve by rebuilding the tree from the
+ // resolved children
+ auto just_git_repo = GitRepo::Open(just_git_cas);
+ if (not just_git_repo) {
+ (*logger)("ResolveSymlinks: could not open Git cache repository!",
+ /*fatal=*/true);
+ return;
+ }
+ auto children = just_git_repo->ReadTree(
+ entry_info.id,
+ [](std::vector<bazel_re::Digest> const& /*unused*/) {
+ return true;
+ },
+ /*is_hex_id=*/true);
+ if (not children) {
+ (*logger)(fmt::format("ResolveSymlinks: failed to read entries of "
+ "subtree {} in root tree {}",
+ entry_info.id,
+ obj.root_tree_id),
+ /*fatal=*/true);
+ return;
+ }
+ // resolve children
+ std::vector<GitObjectToResolve> children_info{};
+ children_info.reserve(children->size());
+ for (auto const& [raw_id, ev] : *children) {
+ for (auto const& e : ev) {
+ // must enforce ignore special at the tree level!
+ if (IsNonSpecialObject(e.type) or
+ obj.pragma_special != PragmaSpecial::Ignore) {
+ // children info is known, so pass this forward
+ if (IsSymlinkObject(e.type)) {
+ if (auto target = just_git_cas->ReadObject(raw_id)) {
+ children_info.emplace_back(
+ obj.root_tree_id,
+ obj.rel_path / e.name,
+ obj.pragma_special,
+ std::make_optional(GitRepo::TreeEntryInfo{
+ .id = ToHexString(raw_id),
+ .type = e.type,
+ .symlink_content = *target}));
+ }
+ else {
+ (*logger)(
+ fmt::format("ResolveSymlinks: could not read "
+ "symlink {} in root tree {}",
+ (obj.rel_path / e.name).string(),
+ obj.root_tree_id),
+ /*fatal=*/true);
+ return;
+ }
+ }
+ else {
+ children_info.emplace_back(
+ obj.root_tree_id,
+ obj.rel_path / e.name,
+ obj.pragma_special,
+ GitRepo::TreeEntryInfo{
+ .id = ToHexString(raw_id),
+ .type = e.type,
+ .symlink_content = std::nullopt});
+ }
+ }
+ }
+ }
+ (*subcaller)(
+ children_info,
+ [children_info, parent = obj, just_git_cas, setter, logger](
+ auto const& resolved_entries) {
+ // create the entries map of the children
+ GitRepo::tree_entries_t entries{};
+ auto num = resolved_entries.size();
+ entries.reserve(num);
+ for (auto i = 0; i < num; ++i) {
+ auto const& p = children_info[i].rel_path;
+ entries[*FromHexString(resolved_entries[i]->id)]
+ .emplace_back(
+ p.filename().string(), // we only need the name
+ resolved_entries[i]->type);
+ }
+ // create the tree inside our Git CAS, which is already
+ // existing by this point. Also, this operation is
+ // guarded internally, so no need for the
+ // critical_git_op map
+ auto just_git_repo = GitRepo::Open(just_git_cas);
+ if (not just_git_repo) {
+ (*logger)(
+ "ResolveSymlinks: could not open Git cache repository!",
+ /*fatal=*/true);
+ return;
+ }
+ auto tree_raw_id = just_git_repo->CreateTree(entries);
+ if (not tree_raw_id) {
+ (*logger)(fmt::format("ResolveSymlinks: failed to create "
+ "resolved tree {} in root tree {}",
+ parent.rel_path.string(),
+ parent.root_tree_id),
+ /*fatal=*/true);
+ return;
+ }
+ // set the resolved tree hash
+ (*setter)(ResolvedGitObject{.id = ToHexString(*tree_raw_id),
+ .type = ObjectType::Tree,
+ .path = parent.rel_path});
+ },
+ logger);
+ }
+ else {
+ // sanity check: cannot resolve a symlink called with ignore
+ // special, as that can only be handled by the parent tree
+ if (obj.pragma_special == PragmaSpecial::Ignore) {
+ (*logger)(fmt::format("ResolveSymlinks: asked to ignore symlink {} "
+ "in root tree {}",
+ obj.rel_path.string(),
+ obj.root_tree_id),
+ /*fatal=*/true);
+ return;
+ }
+ // target should have already been read
+ if (not entry_info.symlink_content) {
+ (*logger)(fmt::format("ResolveSymlinks: missing target of symlink "
+ "{} in root tree {}",
+ obj.rel_path.string(),
+ obj.root_tree_id),
+ /*fatal=*/true);
+ return;
+ }
+ // check if link target (unresolved) is confined to the tree
+ if (not PathIsConfined(*entry_info.symlink_content, obj.rel_path)) {
+ (*logger)(fmt::format("ResolveSymlinks: symlink {} is not confined "
+ "to tree {}",
+ obj.rel_path.string(),
+ obj.root_tree_id),
+ /*fatal=*/true);
+ return;
+ }
+ // if partially resolved, return non-upwards symlinks as-is
+ if (obj.pragma_special == PragmaSpecial::ResolvePartially and
+ PathIsNonUpwards(*entry_info.symlink_content)) {
+ // return as symlink object
+ (*setter)(ResolvedGitObject{.id = entry_info.id,
+ .type = ObjectType::Symlink,
+ .path = obj.rel_path});
+ return;
+ }
+ // resolve the target
+ auto n_target = ToNormalPath(obj.rel_path.parent_path() /
+ *entry_info.symlink_content);
+ (*subcaller)(
+ {GitObjectToResolve(obj.root_tree_id,
+ n_target,
+ obj.pragma_special,
+ /*known_info=*/std::nullopt)},
+ [setter](auto const& values) {
+ (*setter)(ResolvedGitObject{*values[0]});
+ },
+ logger);
+ }
+}
+
+} // namespace
+
+auto CreateResolveSymlinksMap() -> ResolveSymlinksMap {
+ auto resolve_symlinks = [](auto /*unused*/,
+ auto setter,
+ auto logger,
+ auto subcaller,
+ auto const& key) {
+ // look up entry by its relative path
+ auto just_git_cas = GitCAS::Open(StorageConfig::GitRoot());
+ if (not just_git_cas) {
+ (*logger)("ResolveSymlinks: could not open Git cache database!",
+ /*fatal=*/true);
+ return;
+ }
+ auto just_git_repo = GitRepo::Open(just_git_cas);
+ if (not just_git_repo) {
+ (*logger)("ResolveSymlinks: could not open Git cache repository!",
+ /*fatal=*/true);
+ return;
+ }
+ auto entry_info = key.known_info
+ ? key.known_info
+ : just_git_repo->GetObjectByPathFromTree(
+ key.root_tree_id, key.rel_path);
+
+ // differentiate between existing path and non-existing
+ if (entry_info) {
+ ResolveKnownEntry(
+ key, *entry_info, just_git_cas, setter, logger, subcaller);
+ }
+ else {
+ // non-existing paths come from symlinks, so treat accordingly
+ // sanity check: pragma ignore special should not be set if here
+ if (key.pragma_special == PragmaSpecial::Ignore) {
+ (*logger)(
+ fmt::format("ResolveSymlinks: asked to ignore indirect "
+ "symlink path {} in root tree {}",
+ key.rel_path.string(),
+ key.root_tree_id),
+ /*fatal=*/true);
+ return;
+ }
+ auto parent_path = key.rel_path.parent_path();
+ if (parent_path == key.rel_path) {
+ (*logger)(fmt::format("ResolveSymlinks: found unresolved path "
+ "{} in root tree {}",
+ key.rel_path.string(),
+ key.root_tree_id),
+ /*fatal=*/true);
+ return;
+ }
+ // resolve parent
+ (*subcaller)(
+ {GitObjectToResolve(key.root_tree_id,
+ parent_path,
+ key.pragma_special,
+ /*known_info=*/std::nullopt)},
+ [key,
+ parent_path,
+ filename = key.rel_path.filename(),
+ just_git_cas,
+ setter,
+ logger,
+ subcaller](auto const& values) {
+ auto resolved_parent = *values[0];
+ // parent must be a tree
+ if (not IsTreeObject(resolved_parent.type)) {
+ (*logger)(
+ fmt::format("ResolveSymlinks: path {} in root tree "
+ "{} failed to resolve to a tree",
+ parent_path.string(),
+ key.root_tree_id),
+ /*fatal=*/true);
+ return;
+ }
+ // check if filename exists in resolved parent tree
+ auto just_git_repo = GitRepo::Open(just_git_cas);
+ if (not just_git_repo) {
+ (*logger)(
+ "ResolveSymlinks: could not open Git cache "
+ "repository!",
+ /*fatal=*/true);
+ return;
+ }
+ auto entry_info = just_git_repo->GetObjectByPathFromTree(
+ resolved_parent.id, filename);
+ if (entry_info) {
+ ResolveKnownEntry(
+ GitObjectToResolve(key.root_tree_id,
+ resolved_parent.path / filename,
+ key.pragma_special,
+ /*known_info=*/std::nullopt),
+ std::move(*entry_info),
+ just_git_cas,
+ setter,
+ logger,
+ subcaller);
+ }
+ else {
+ // report unresolvable
+ (*logger)(
+ fmt::format(
+ "ResolveSymlinks: reached unresolvable "
+ "path {} in root tree {}",
+ (resolved_parent.path / filename).string(),
+ key.root_tree_id),
+ /*fatal=*/true);
+ }
+ },
+ logger);
+ }
+ };
+ return AsyncMapConsumer<GitObjectToResolve, ResolvedGitObject>(
+ resolve_symlinks);
+}
+
+auto DetectAndReportCycle(ResolveSymlinksMap const& map,
+ std::string const& root_tree_id)
+ -> std::optional<std::string> {
+ using namespace std::string_literals;
+ auto cycle = map.DetectCycle();
+ if (cycle) {
+ bool found{false};
+ std::ostringstream oss{};
+ oss << fmt::format("Cycle detected for Git tree {}:", root_tree_id)
+ << std::endl;
+ for (auto const& k : *cycle) {
+ auto match = (k == cycle->back());
+ auto prefix{match ? found ? "`-- "s : ".-> "s
+ : found ? "| "s
+ : " "s};
+ oss << prefix << k.rel_path << std::endl;
+ found = found or match;
+ }
+ return oss.str();
+ }
+ return std::nullopt;
+}
diff --git a/src/other_tools/symlinks_map/resolve_symlinks_map.hpp b/src/other_tools/symlinks_map/resolve_symlinks_map.hpp
new file mode 100644
index 00000000..8bae7583
--- /dev/null
+++ b/src/other_tools/symlinks_map/resolve_symlinks_map.hpp
@@ -0,0 +1,97 @@
+// Copyright 2023 Huawei Cloud Computing Technology Co., Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef INCLUDED_SRC_OTHER_TOOLS_SYMLINKS_MAP_RESOLVE_SYMLINKS_MAP_HPP
+#define INCLUDED_SRC_OTHER_TOOLS_SYMLINKS_MAP_RESOLVE_SYMLINKS_MAP_HPP
+
+#include <filesystem>
+#include <optional>
+#include <string>
+
+#include "src/buildtool/file_system/git_repo.hpp"
+#include "src/buildtool/file_system/object_type.hpp"
+#include "src/buildtool/multithreading/async_map_consumer.hpp"
+#include "src/other_tools/just_mr/utils.hpp"
+#include "src/utils/cpp/path.hpp"
+#include "src/utils/cpp/path_hash.hpp"
+
+/// \brief Information needed to resolve an object (blob or tree) given its
+/// path relative to the path of a root tree in a given CAS.
+struct GitObjectToResolve {
+ // hash of the root tree
+ std::string root_tree_id{}; /* key */
+ // path of this object relative to root tree, in normal form
+ std::filesystem::path rel_path{"."}; /* key */
+ // how the tree should be resolved
+ PragmaSpecial pragma_special{}; /* key */
+ // sometimes the info of the object at the required path is already known,
+ // so leverage this to avoid extra work
+ std::optional<GitRepo::TreeEntryInfo> known_info{std::nullopt};
+
+ GitObjectToResolve() = default; // needed for cycle detection only!
+
+ GitObjectToResolve(std::string root_tree_id_,
+ std::filesystem::path const& rel_path_,
+ PragmaSpecial const& pragma_special_,
+ std::optional<GitRepo::TreeEntryInfo> known_info_)
+ : root_tree_id{std::move(root_tree_id_)},
+ rel_path{ToNormalPath(rel_path_)},
+ pragma_special{pragma_special_},
+ known_info{std::move(known_info_)} {};
+
+ [[nodiscard]] auto operator==(
+ GitObjectToResolve const& other) const noexcept -> bool {
+ return root_tree_id == other.root_tree_id and
+ rel_path == other.rel_path and
+ pragma_special == other.pragma_special;
+ }
+};
+
+/// \brief For a possibly initially unresolved path by the end we should be able
+/// to know its hash, its type, and its now resolved path.
+struct ResolvedGitObject {
+ std::string id;
+ ObjectType type;
+ std::filesystem::path path;
+};
+
+/// \brief Maps information about a Git object to its Git ID, type, and path as
+/// part of a Git tree where symlinks have been resolved according to the given
+/// pragma value.
+/// Returns a nullopt only if called on a symlink with pragma ignore special.
+/// \note Call the map with type Tree and path "." to resolve a Git tree.
+using ResolveSymlinksMap =
+ AsyncMapConsumer<GitObjectToResolve, ResolvedGitObject>;
+
+[[nodiscard]] auto CreateResolveSymlinksMap() -> ResolveSymlinksMap;
+
+[[nodiscard]] auto DetectAndReportCycle(ResolveSymlinksMap const& map,
+ std::string const& root_tree_id)
+ -> std::optional<std::string>;
+
+namespace std {
+template <>
+struct hash<GitObjectToResolve> {
+ [[nodiscard]] auto operator()(const GitObjectToResolve& ct) const noexcept
+ -> std::size_t {
+ size_t seed{};
+ hash_combine<std::string>(&seed, ct.root_tree_id);
+ hash_combine<std::filesystem::path>(&seed, ct.rel_path);
+ hash_combine<PragmaSpecial>(&seed, ct.pragma_special);
+ return seed;
+ }
+};
+} // namespace std
+
+#endif // INCLUDED_SRC_OTHER_TOOLS_SYMLINKS_MAP_RESOLVE_SYMLINKS_MAP_HPP