diff options
Diffstat (limited to 'src/utils')
-rw-r--r-- | src/utils/automata/TARGETS | 12 | ||||
-rw-r--r-- | src/utils/automata/dfa_minimizer.hpp | 213 |
2 files changed, 225 insertions, 0 deletions
diff --git a/src/utils/automata/TARGETS b/src/utils/automata/TARGETS new file mode 100644 index 00000000..39df4e98 --- /dev/null +++ b/src/utils/automata/TARGETS @@ -0,0 +1,12 @@ +{ "dfa_minimizer": + { "type": ["@", "rules", "CC", "library"] + , "name": ["dfa_minimizer"] + , "hdrs": ["dfa_minimizer.hpp"] + , "stage": ["src", "utils", "automata"] + , "deps": + [ ["@", "json", "", "json"] + , ["src/utils/cpp", "hex_string"] + , ["src/utils/cpp", "hash_combine"] + ] + } +} diff --git a/src/utils/automata/dfa_minimizer.hpp b/src/utils/automata/dfa_minimizer.hpp new file mode 100644 index 00000000..56a078c4 --- /dev/null +++ b/src/utils/automata/dfa_minimizer.hpp @@ -0,0 +1,213 @@ +#ifndef INCLUDED_SRC_UTILS_AUTOMATA_DFA_MINIMIZER_HPP +#define INCLUDED_SRC_UTILS_AUTOMATA_DFA_MINIMIZER_HPP + +#include <map> +#include <regex> +#include <string> +#include <unordered_map> +#include <utility> +#include <vector> + +#include "nlohmann/json.hpp" +#include "src/utils/cpp/hash_combine.hpp" +#include "src/utils/cpp/hex_string.hpp" + +// Minimizes a DFA by separating distinguishable states. States added with same +// content id are considered initially indistinguishable. The algorithm has +// complexity O(n^2) among each set of initially indistinguishable states. +// Note that for incomplete graphs, two states are considered distinguishable if +// they transition for the same symbol to two differently named non-existing +// states. This is done for efficiency reasons, as we then can avoid creating an +// additional bucket for non-existing states. This is sufficient for our use +// case, as we are only interested in the bisimiulation of states in complete +// graphs. +class DFAMinimizer { + // Maps symbols to states + using transitions_t = std::map<std::string, std::string>; + using states_t = std::unordered_map<std::string, transitions_t>; + + // Bucket of states with equal local properties (content and acceptance) + struct Bucket { + std::vector<std::string> symbols{}; + states_t states{}; + }; + + // Key used for state pairs. Reordering names will result in the same key. + class StatePairKey { + public: + struct hash_t { + [[nodiscard]] auto operator()(StatePairKey const& p) const + -> std::size_t { + std::size_t hash{}; + hash_combine(&hash, p.Names().first); + hash_combine(&hash, p.Names().second); + return hash; + } + }; + + StatePairKey(std::string first, std::string second) { + if (first < second) { + data_ = std::make_pair(std::move(first), std::move(second)); + } + else { + data_ = std::make_pair(std::move(second), std::move(first)); + } + } + [[nodiscard]] auto Names() + const& -> std::pair<std::string, std::string> const& { + return data_; + } + [[nodiscard]] auto operator==(StatePairKey const& other) const + -> bool = default; + [[nodiscard]] auto operator!=(StatePairKey const& other) const + -> bool = default; + + private: + std::pair<std::string, std::string> data_; + }; + + // Value of state pairs. + struct StatePairValue { + // Parent pairs depending on this pair's distinguishability + std::vector<StatePairValue*> parents{}; + // Distinguishability flag (true means distinguishable) + bool marked{}; + }; + + using state_pairs_t = + std::unordered_map<StatePairKey, StatePairValue, StatePairKey::hash_t>; + + public: + using bisimulation_t = std::unordered_map<std::string, std::string>; + + // Add state with name, transitions, and content id. States with the same + // content id are initially considered indistinguishable. + void AddState(std::string const& name, + transitions_t const& transitions, + std::string const& content_id = {}) { + auto symbols = GetKeys(transitions); + + // Compute bucket id from content id and transition symbols + auto bucket_id = + nlohmann::json(std::pair{ToHexString(content_id), symbols}).dump(); + + // Store indistinguishable states in same bucket + auto it = buckets_.find(bucket_id); + if (it == buckets_.end()) { + it = buckets_.emplace(bucket_id, Bucket{std::move(symbols)}).first; + } + it->second.states.emplace(name, transitions); + gsl_Ensures(buckets_by_state_.emplace(name, bucket_id).second); + } + + // Compute bisimulation for each state and return a map, which maps a + // state name to its bisimulation if one was found. + [[nodiscard]] auto ComputeBisimulation() const -> bisimulation_t { + auto pairs = CreatePairs(); + + // Mark pairs of distinguishable states + for (auto& [key, ab_val] : pairs) { + auto const& [a, b] = key.Names(); + auto const& bucket_id = buckets_by_state_.at(a); + auto const& bucket = buckets_.at(bucket_id); + for (auto const& sym : bucket.symbols) { + auto const& r = bucket.states.at(a).at(sym); + auto const& s = bucket.states.at(b).at(sym); + if (r != s) { + auto* rs_val = LookupPairValue(&pairs, {r, s}); + if (rs_val == nullptr or rs_val->marked) { + // if rs_val == nullptr, the pair is missing, due to: + // - both do not exist (and are named differently) + // - either r or s does not exist + // - r and s do exist but are not in the same bucket + MarkPairValue(&ab_val); + } + else if (rs_val != nullptr) { + // Remember ab_val if rs_val ever gets marked + rs_val->parents.emplace_back(&ab_val); + } + } + } + } + + // Compute the bisimulation for each state + bisimulation_t bisimulation{}; + for (auto const& [_, bucket] : buckets_) { + // Consider states in unmarked pairs to be equivalent + auto all_states = GetKeys(bucket.states); + while (not all_states.empty()) { + auto last = std::move(all_states.back()); + all_states.pop_back(); + auto unequal_states = all_states; + for (auto const& state : all_states) { + // Lookup is safe, both must be in the same bucket and + // therefore the pair is are guaranteed to exist + if (not LookupPairValue(&pairs, {last, state})->marked) { + bisimulation.emplace(state, last); + std::erase(unequal_states, state); + } + } + all_states = unequal_states; + } + } + return bisimulation; + } + + private: + std::unordered_map<std::string, Bucket> buckets_{}; + std::unordered_map<std::string, std::string> buckets_by_state_{}; + + template <class M, class K = typename M::key_type> + [[nodiscard]] static auto GetKeys(M const& map) -> std::vector<K> { + auto keys = std::vector<K>{}; + keys.reserve(map.size()); + std::transform(map.begin(), + map.end(), + std::back_inserter(keys), + [](auto const& kv) { return kv.first; }); + return keys; + } + + // Returns nullptr if no pair with given key was found. + [[nodiscard]] static auto LookupPairValue( + gsl::not_null<state_pairs_t*> const& pairs, + StatePairKey const& key) -> StatePairValue* { + auto it = pairs->find(key); + if (it != pairs->end()) { + return &it->second; + } + return nullptr; + } + + // Mark pair as distinguishable and recursively mark all parents. + // NOLINTNEXTLINE(misc-no-recursion) + static void MarkPairValue(gsl::not_null<StatePairValue*> const& data) { + data->marked = true; + for (auto* parent : data->parents) { + if (not parent->marked) { + MarkPairValue(parent); + } + } + } + + // Create n to n pairs for all states in the same bucket. + [[nodiscard]] auto CreatePairs() const -> state_pairs_t { + state_pairs_t pairs{}; + for (auto const& [_, bucket] : buckets_) { + auto const& states = bucket.states; + auto const n = states.size(); + pairs.reserve(pairs.size() + ((n * (n - 1)) / 2)); + + auto const end = states.end(); + for (auto it = ++states.begin(); it != end; ++it) { + for (auto it2 = states.begin(); it2 != it; ++it2) { + pairs.emplace(StatePairKey{it->first, it2->first}, + StatePairValue{}); + } + } + } + return pairs; + } +}; + +#endif // INCLUDED_SRC_UTILS_AUTOMATA_DFA_MINIMIZER_HPP |