diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/buildtool/common/TARGETS | 7 | ||||
-rw-r--r-- | src/buildtool/common/repository_config.cpp | 133 | ||||
-rw-r--r-- | src/buildtool/common/repository_config.hpp | 64 |
3 files changed, 196 insertions, 8 deletions
diff --git a/src/buildtool/common/TARGETS b/src/buildtool/common/TARGETS index 3efd06a8..3967e54b 100644 --- a/src/buildtool/common/TARGETS +++ b/src/buildtool/common/TARGETS @@ -93,9 +93,14 @@ { "type": ["@", "rules", "CC", "library"] , "name": ["config"] , "hdrs": ["repository_config.hpp"] + , "srcs": ["repository_config.cpp"] , "deps": - [ ["src/buildtool/file_system", "file_root"] + [ ["src/buildtool/execution_api/local", "local"] + , ["src/buildtool/file_system", "file_root"] , ["src/buildtool/file_system", "git_cas"] + , ["src/buildtool/multithreading", "atomic_value"] + , ["src/utils/automata", "dfa_minimizer"] + , ["src/utils/cpp", "hash_combine"] ] , "stage": ["src", "buildtool", "common"] } diff --git a/src/buildtool/common/repository_config.cpp b/src/buildtool/common/repository_config.cpp new file mode 100644 index 00000000..1366d8f9 --- /dev/null +++ b/src/buildtool/common/repository_config.cpp @@ -0,0 +1,133 @@ +#include "src/buildtool/common/repository_config.hpp" + +#include "src/utils/automata/dfa_minimizer.hpp" + +auto RepositoryConfig::RepositoryInfo::BaseContentDescription() const + -> std::optional<nlohmann::json> { + auto wroot = workspace_root.ContentDescription(); + auto troot = target_root.ContentDescription(); + auto rroot = rule_root.ContentDescription(); + auto eroot = expression_root.ContentDescription(); + if (wroot and troot and rroot and eroot) { + return nlohmann::json{{"workspace_root", *wroot}, + {"target_root", *troot}, + {"rule_root", *rroot}, + {"expression_root", *eroot}, + {"target_file_name", target_file_name}, + {"rule_file_name", rule_file_name}, + {"expression_file_name", expression_file_name}}; + } + return std::nullopt; +} + +auto RepositoryConfig::RepositoryKey(std::string const& repo) const noexcept + -> std::optional<std::string> { + auto const& unique = DeduplicateRepo(repo); + if (auto const* data = Data(unique)) { + // compute key only once (thread-safe) + return data->key.SetOnceAndGet( + [this, &unique]() -> std::optional<std::string> { + if (auto graph = BuildGraphForRepository(unique)) { + auto& cas = LocalCAS<ObjectType::File>::Instance(); + if (auto digest = cas.StoreBlobFromBytes(graph->dump(2))) { + return digest->hash(); + } + } + return std::nullopt; + }); + } + return std::nullopt; +} + +// Obtain canonical name (according to bisimulation) for the given repository. +auto RepositoryConfig::DeduplicateRepo(std::string const& repo) const + -> std::string const& { + // Compute duplicates map only once (thread-safe) + auto const& duplicates = duplicates_.SetOnceAndGet([this] { + // To detect duplicate repository descriptions, we represent each + // repository as a DFA state with repo name as state name, repo + // bindings as state transitions, and repo base description as state + // content id. Then we use a DFA minimizer to find the bisimulation + // for each state. + auto minimizer = DFAMinimizer{}; + for (auto const& [repo, data] : repos_) { + // Only add content-fixed repositories. This is sufficient, as for + // incomplete graphs our minimizer implementation identifies states + // with transitions to differently-named missing states as + // distinguishable. Even if those were considered indistinguishable, + // repository key computation would still work correctly, as this + // computation is only performed if all transitive dependencies are + // content-fixed. + if (data.base_desc) { + // Use hash of content-fixed base description as content id + auto hash = ComputeHashDigest(data.base_desc->dump()).Bytes(); + // Add state with name, transitions, and content id + minimizer.AddState(repo, data.info.name_mapping, hash); + } + } + return minimizer.ComputeBisimulation(); + }); + + // Lookup canonical name for given repository in duplicates map + auto it = duplicates.find(repo); + if (it != duplicates.end()) { + return it->second; + } + return repo; +} + +// Returns the repository-local representation of its dependency graph if all +// contained repositories are content fixed or return std::nullopt otherwise. +auto RepositoryConfig::BuildGraphForRepository(std::string const& repo) const + -> std::optional<nlohmann::json> { + auto graph = nlohmann::json::object(); + int id_counter{}; + std::unordered_map<std::string, int> repo_ids{}; + if (AddToGraphAndGetId(&graph, &id_counter, &repo_ids, repo)) { + return graph; + } + return std::nullopt; +} + +// Add given repository to the given graph and return its traversal specific +// unique id if it and all its dependencies are content-fixed or return +// std::nullopt otherwise. Recursion immediately aborts on traversing the first +// non-content-fixed repository. +// NOLINTNEXTLINE(misc-no-recursion) +auto RepositoryConfig::AddToGraphAndGetId( + gsl::not_null<nlohmann::json*> const& graph, + gsl::not_null<int*> const& id_counter, + gsl::not_null<std::unordered_map<std::string, int>*> const& repo_ids, + std::string const& repo) const -> std::optional<int> { + auto const& unique_repo = DeduplicateRepo(repo); + auto repo_it = repo_ids->find(unique_repo); + if (repo_it != repo_ids->end()) { + // The same or equivalent repository was already requested before + return repo_it->second; + } + + auto const* data = Data(unique_repo); + if (data != nullptr and data->base_desc) { + // Compute the unique id (traversal order) and store it + auto repo_id = (*id_counter)++; + repo_ids->emplace(unique_repo, repo_id); + + // Compute repository description from content-fixed base + // description and bindings to unique ids of depending repositories + auto repo_desc = *data->base_desc; + repo_desc["bindings"] = nlohmann::json::object(); + for (auto const& [local_name, global_name] : data->info.name_mapping) { + auto global_id = + AddToGraphAndGetId(graph, id_counter, repo_ids, global_name); + if (not global_id) { + return std::nullopt; + } + repo_desc["bindings"][local_name] = *global_id; + } + + // Add repository description to graph with unique id as key + (*graph)[std::to_string(repo_id)] = std::move(repo_desc); + return repo_id; + } + return std::nullopt; +} diff --git a/src/buildtool/common/repository_config.hpp b/src/buildtool/common/repository_config.hpp index 62c2c4ff..85a388fc 100644 --- a/src/buildtool/common/repository_config.hpp +++ b/src/buildtool/common/repository_config.hpp @@ -2,23 +2,32 @@ #define INCLUDED_SRC_BUILDTOOL_COMMON_REPOSITORY_CONFIG_HPP #include <filesystem> +#include <map> #include <string> #include <unordered_map> +#include "src/buildtool/execution_api/local/local_cas.hpp" #include "src/buildtool/file_system/file_root.hpp" #include "src/buildtool/file_system/git_cas.hpp" +#include "src/buildtool/multithreading/atomic_value.hpp" class RepositoryConfig { + public: struct RepositoryInfo { FileRoot workspace_root; FileRoot target_root{workspace_root}; FileRoot rule_root{target_root}; FileRoot expression_root{rule_root}; - std::unordered_map<std::string, std::string> name_mapping{}; + std::map<std::string, std::string> name_mapping{}; std::string target_file_name{"TARGETS"}; std::string rule_file_name{"RULES"}; std::string expression_file_name{"EXPRESSIONS"}; + + // Return base content description without bindings if all roots are + // content fixed or return std::nullopt otherwise. + [[nodiscard]] auto BaseContentDescription() const + -> std::optional<nlohmann::json>; }; [[nodiscard]] static auto Instance() noexcept -> RepositoryConfig& { @@ -27,7 +36,9 @@ class RepositoryConfig { } void SetInfo(std::string const& repo, RepositoryInfo&& info) { - infos_.emplace(repo, std::move(info)); + repos_[repo].base_desc = info.BaseContentDescription(); + repos_[repo].info = std::move(info); + repos_[repo].key.Reset(); } [[nodiscard]] auto SetGitCAS( @@ -38,9 +49,8 @@ class RepositoryConfig { [[nodiscard]] auto Info(std::string const& repo) const noexcept -> RepositoryInfo const* { - auto it = infos_.find(repo); - if (it != infos_.end()) { - return &it->second; + if (auto const* data = Data(repo)) { + return &data->info; } return nullptr; } @@ -106,15 +116,34 @@ class RepositoryConfig { repo, [](auto const& info) { return &info.expression_file_name; }); } + // Obtain repository's cache key if the repository is content fixed or + // std::nullopt otherwise. + [[nodiscard]] auto RepositoryKey(std::string const& repo) const noexcept + -> std::optional<std::string>; + // used for testing void Reset() { - infos_.clear(); + repos_.clear(); git_cas_.reset(); + duplicates_.Reset(); } private: - std::unordered_map<std::string, RepositoryInfo> infos_; + using duplicates_t = std::unordered_map<std::string, std::string>; + + // All data we store per repository. + struct RepositoryData { + // Info structure (roots, file names, bindings) + RepositoryInfo info{}; + // Base description if content-fixed + std::optional<nlohmann::json> base_desc{}; + // Cache key if content-fixed + AtomicValue<std::optional<std::string>> key{}; + }; + + std::unordered_map<std::string, RepositoryData> repos_; GitCASPtr git_cas_; + AtomicValue<duplicates_t> duplicates_{}; template <class T> [[nodiscard]] auto Get(std::string const& repo, @@ -128,6 +157,27 @@ class RepositoryConfig { } return nullptr; } + + [[nodiscard]] auto Data(std::string const& repo) const noexcept + -> RepositoryData const* { + auto it = repos_.find(repo); + if (it != repos_.end()) { + return &it->second; + } + return nullptr; + } + + [[nodiscard]] auto DeduplicateRepo(std::string const& repo) const + -> std::string const&; + + [[nodiscard]] auto BuildGraphForRepository(std::string const& repo) const + -> std::optional<nlohmann::json>; + + [[nodiscard]] auto AddToGraphAndGetId( + gsl::not_null<nlohmann::json*> const& graph, + gsl::not_null<int*> const& id_counter, + gsl::not_null<std::unordered_map<std::string, int>*> const& repo_ids, + std::string const& repo) const -> std::optional<int>; }; #endif // INCLUDED_SRC_BUILDTOOL_COMMON_REPOSITORY_CONFIG_HPP |