summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorOliver Reiche <oliver.reiche@huawei.com>2022-06-13 13:30:13 +0200
committerOliver Reiche <oliver.reiche@huawei.com>2022-06-13 15:55:41 +0200
commitbe6b7b24e74e605533c0a1c4d3f32cd56f662ca7 (patch)
treea7a606adcee0bfa23916c02deaede053b9772544 /src
parent3c64357cec1ff484e4b410eff47c8b7006e55c66 (diff)
downloadjustbuild-be6b7b24e74e605533c0a1c4d3f32cd56f662ca7.tar.gz
RepoConfig: Compute repository key
Diffstat (limited to 'src')
-rw-r--r--src/buildtool/common/TARGETS7
-rw-r--r--src/buildtool/common/repository_config.cpp133
-rw-r--r--src/buildtool/common/repository_config.hpp64
3 files changed, 196 insertions, 8 deletions
diff --git a/src/buildtool/common/TARGETS b/src/buildtool/common/TARGETS
index 3efd06a8..3967e54b 100644
--- a/src/buildtool/common/TARGETS
+++ b/src/buildtool/common/TARGETS
@@ -93,9 +93,14 @@
{ "type": ["@", "rules", "CC", "library"]
, "name": ["config"]
, "hdrs": ["repository_config.hpp"]
+ , "srcs": ["repository_config.cpp"]
, "deps":
- [ ["src/buildtool/file_system", "file_root"]
+ [ ["src/buildtool/execution_api/local", "local"]
+ , ["src/buildtool/file_system", "file_root"]
, ["src/buildtool/file_system", "git_cas"]
+ , ["src/buildtool/multithreading", "atomic_value"]
+ , ["src/utils/automata", "dfa_minimizer"]
+ , ["src/utils/cpp", "hash_combine"]
]
, "stage": ["src", "buildtool", "common"]
}
diff --git a/src/buildtool/common/repository_config.cpp b/src/buildtool/common/repository_config.cpp
new file mode 100644
index 00000000..1366d8f9
--- /dev/null
+++ b/src/buildtool/common/repository_config.cpp
@@ -0,0 +1,133 @@
+#include "src/buildtool/common/repository_config.hpp"
+
+#include "src/utils/automata/dfa_minimizer.hpp"
+
+auto RepositoryConfig::RepositoryInfo::BaseContentDescription() const
+ -> std::optional<nlohmann::json> {
+ auto wroot = workspace_root.ContentDescription();
+ auto troot = target_root.ContentDescription();
+ auto rroot = rule_root.ContentDescription();
+ auto eroot = expression_root.ContentDescription();
+ if (wroot and troot and rroot and eroot) {
+ return nlohmann::json{{"workspace_root", *wroot},
+ {"target_root", *troot},
+ {"rule_root", *rroot},
+ {"expression_root", *eroot},
+ {"target_file_name", target_file_name},
+ {"rule_file_name", rule_file_name},
+ {"expression_file_name", expression_file_name}};
+ }
+ return std::nullopt;
+}
+
+auto RepositoryConfig::RepositoryKey(std::string const& repo) const noexcept
+ -> std::optional<std::string> {
+ auto const& unique = DeduplicateRepo(repo);
+ if (auto const* data = Data(unique)) {
+ // compute key only once (thread-safe)
+ return data->key.SetOnceAndGet(
+ [this, &unique]() -> std::optional<std::string> {
+ if (auto graph = BuildGraphForRepository(unique)) {
+ auto& cas = LocalCAS<ObjectType::File>::Instance();
+ if (auto digest = cas.StoreBlobFromBytes(graph->dump(2))) {
+ return digest->hash();
+ }
+ }
+ return std::nullopt;
+ });
+ }
+ return std::nullopt;
+}
+
+// Obtain canonical name (according to bisimulation) for the given repository.
+auto RepositoryConfig::DeduplicateRepo(std::string const& repo) const
+ -> std::string const& {
+ // Compute duplicates map only once (thread-safe)
+ auto const& duplicates = duplicates_.SetOnceAndGet([this] {
+ // To detect duplicate repository descriptions, we represent each
+ // repository as a DFA state with repo name as state name, repo
+ // bindings as state transitions, and repo base description as state
+ // content id. Then we use a DFA minimizer to find the bisimulation
+ // for each state.
+ auto minimizer = DFAMinimizer{};
+ for (auto const& [repo, data] : repos_) {
+ // Only add content-fixed repositories. This is sufficient, as for
+ // incomplete graphs our minimizer implementation identifies states
+ // with transitions to differently-named missing states as
+ // distinguishable. Even if those were considered indistinguishable,
+ // repository key computation would still work correctly, as this
+ // computation is only performed if all transitive dependencies are
+ // content-fixed.
+ if (data.base_desc) {
+ // Use hash of content-fixed base description as content id
+ auto hash = ComputeHashDigest(data.base_desc->dump()).Bytes();
+ // Add state with name, transitions, and content id
+ minimizer.AddState(repo, data.info.name_mapping, hash);
+ }
+ }
+ return minimizer.ComputeBisimulation();
+ });
+
+ // Lookup canonical name for given repository in duplicates map
+ auto it = duplicates.find(repo);
+ if (it != duplicates.end()) {
+ return it->second;
+ }
+ return repo;
+}
+
+// Returns the repository-local representation of its dependency graph if all
+// contained repositories are content fixed or return std::nullopt otherwise.
+auto RepositoryConfig::BuildGraphForRepository(std::string const& repo) const
+ -> std::optional<nlohmann::json> {
+ auto graph = nlohmann::json::object();
+ int id_counter{};
+ std::unordered_map<std::string, int> repo_ids{};
+ if (AddToGraphAndGetId(&graph, &id_counter, &repo_ids, repo)) {
+ return graph;
+ }
+ return std::nullopt;
+}
+
+// Add given repository to the given graph and return its traversal specific
+// unique id if it and all its dependencies are content-fixed or return
+// std::nullopt otherwise. Recursion immediately aborts on traversing the first
+// non-content-fixed repository.
+// NOLINTNEXTLINE(misc-no-recursion)
+auto RepositoryConfig::AddToGraphAndGetId(
+ gsl::not_null<nlohmann::json*> const& graph,
+ gsl::not_null<int*> const& id_counter,
+ gsl::not_null<std::unordered_map<std::string, int>*> const& repo_ids,
+ std::string const& repo) const -> std::optional<int> {
+ auto const& unique_repo = DeduplicateRepo(repo);
+ auto repo_it = repo_ids->find(unique_repo);
+ if (repo_it != repo_ids->end()) {
+ // The same or equivalent repository was already requested before
+ return repo_it->second;
+ }
+
+ auto const* data = Data(unique_repo);
+ if (data != nullptr and data->base_desc) {
+ // Compute the unique id (traversal order) and store it
+ auto repo_id = (*id_counter)++;
+ repo_ids->emplace(unique_repo, repo_id);
+
+ // Compute repository description from content-fixed base
+ // description and bindings to unique ids of depending repositories
+ auto repo_desc = *data->base_desc;
+ repo_desc["bindings"] = nlohmann::json::object();
+ for (auto const& [local_name, global_name] : data->info.name_mapping) {
+ auto global_id =
+ AddToGraphAndGetId(graph, id_counter, repo_ids, global_name);
+ if (not global_id) {
+ return std::nullopt;
+ }
+ repo_desc["bindings"][local_name] = *global_id;
+ }
+
+ // Add repository description to graph with unique id as key
+ (*graph)[std::to_string(repo_id)] = std::move(repo_desc);
+ return repo_id;
+ }
+ return std::nullopt;
+}
diff --git a/src/buildtool/common/repository_config.hpp b/src/buildtool/common/repository_config.hpp
index 62c2c4ff..85a388fc 100644
--- a/src/buildtool/common/repository_config.hpp
+++ b/src/buildtool/common/repository_config.hpp
@@ -2,23 +2,32 @@
#define INCLUDED_SRC_BUILDTOOL_COMMON_REPOSITORY_CONFIG_HPP
#include <filesystem>
+#include <map>
#include <string>
#include <unordered_map>
+#include "src/buildtool/execution_api/local/local_cas.hpp"
#include "src/buildtool/file_system/file_root.hpp"
#include "src/buildtool/file_system/git_cas.hpp"
+#include "src/buildtool/multithreading/atomic_value.hpp"
class RepositoryConfig {
+
public:
struct RepositoryInfo {
FileRoot workspace_root;
FileRoot target_root{workspace_root};
FileRoot rule_root{target_root};
FileRoot expression_root{rule_root};
- std::unordered_map<std::string, std::string> name_mapping{};
+ std::map<std::string, std::string> name_mapping{};
std::string target_file_name{"TARGETS"};
std::string rule_file_name{"RULES"};
std::string expression_file_name{"EXPRESSIONS"};
+
+ // Return base content description without bindings if all roots are
+ // content fixed or return std::nullopt otherwise.
+ [[nodiscard]] auto BaseContentDescription() const
+ -> std::optional<nlohmann::json>;
};
[[nodiscard]] static auto Instance() noexcept -> RepositoryConfig& {
@@ -27,7 +36,9 @@ class RepositoryConfig {
}
void SetInfo(std::string const& repo, RepositoryInfo&& info) {
- infos_.emplace(repo, std::move(info));
+ repos_[repo].base_desc = info.BaseContentDescription();
+ repos_[repo].info = std::move(info);
+ repos_[repo].key.Reset();
}
[[nodiscard]] auto SetGitCAS(
@@ -38,9 +49,8 @@ class RepositoryConfig {
[[nodiscard]] auto Info(std::string const& repo) const noexcept
-> RepositoryInfo const* {
- auto it = infos_.find(repo);
- if (it != infos_.end()) {
- return &it->second;
+ if (auto const* data = Data(repo)) {
+ return &data->info;
}
return nullptr;
}
@@ -106,15 +116,34 @@ class RepositoryConfig {
repo, [](auto const& info) { return &info.expression_file_name; });
}
+ // Obtain repository's cache key if the repository is content fixed or
+ // std::nullopt otherwise.
+ [[nodiscard]] auto RepositoryKey(std::string const& repo) const noexcept
+ -> std::optional<std::string>;
+
// used for testing
void Reset() {
- infos_.clear();
+ repos_.clear();
git_cas_.reset();
+ duplicates_.Reset();
}
private:
- std::unordered_map<std::string, RepositoryInfo> infos_;
+ using duplicates_t = std::unordered_map<std::string, std::string>;
+
+ // All data we store per repository.
+ struct RepositoryData {
+ // Info structure (roots, file names, bindings)
+ RepositoryInfo info{};
+ // Base description if content-fixed
+ std::optional<nlohmann::json> base_desc{};
+ // Cache key if content-fixed
+ AtomicValue<std::optional<std::string>> key{};
+ };
+
+ std::unordered_map<std::string, RepositoryData> repos_;
GitCASPtr git_cas_;
+ AtomicValue<duplicates_t> duplicates_{};
template <class T>
[[nodiscard]] auto Get(std::string const& repo,
@@ -128,6 +157,27 @@ class RepositoryConfig {
}
return nullptr;
}
+
+ [[nodiscard]] auto Data(std::string const& repo) const noexcept
+ -> RepositoryData const* {
+ auto it = repos_.find(repo);
+ if (it != repos_.end()) {
+ return &it->second;
+ }
+ return nullptr;
+ }
+
+ [[nodiscard]] auto DeduplicateRepo(std::string const& repo) const
+ -> std::string const&;
+
+ [[nodiscard]] auto BuildGraphForRepository(std::string const& repo) const
+ -> std::optional<nlohmann::json>;
+
+ [[nodiscard]] auto AddToGraphAndGetId(
+ gsl::not_null<nlohmann::json*> const& graph,
+ gsl::not_null<int*> const& id_counter,
+ gsl::not_null<std::unordered_map<std::string, int>*> const& repo_ids,
+ std::string const& repo) const -> std::optional<int>;
};
#endif // INCLUDED_SRC_BUILDTOOL_COMMON_REPOSITORY_CONFIG_HPP