From da8730d0eeb73a22911a91ba3779c44a8ecbc725 Mon Sep 17 00:00:00 2001 From: TonitaN Date: Sun, 9 Jun 2024 12:30:18 +0300 Subject: [PATCH] (#350) SemDet fixing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ещё есть повторные перепроверки из-за использования векторов вместо множеств; не подогнано под линтер; не нужно брать языки по всем вообще префиксам, достаточно по неоднозначным (Эдгар, на заметку олимпиадникам); Есть подозрение, что на преобразованиях после Annote (типа Reverse) иногда будут сравниваться таки неразмеченные языки, но у меня пока сравниваются размеченные, так что тест найти не удалось. --- .../Objects/include/Objects/FiniteAutomaton.h | 5 +- libs/Objects/include/Objects/Regex.h | 6 +- libs/Objects/src/FiniteAutomaton.cpp | 171 +++++++++++------- libs/Objects/src/Regex.cpp | 90 +++++---- 4 files changed, 163 insertions(+), 109 deletions(-) diff --git a/libs/Objects/include/Objects/FiniteAutomaton.h b/libs/Objects/include/Objects/FiniteAutomaton.h index 90604863..8e668c79 100644 --- a/libs/Objects/include/Objects/FiniteAutomaton.h +++ b/libs/Objects/include/Objects/FiniteAutomaton.h @@ -91,12 +91,9 @@ class FiniteAutomaton : public AbstractMachine { std::optional get_nfa_minimality_value() const; // поиск префикса из состояния state_beg в состояние state_end - std::optional get_prefix( + std::optional> get_prefix( int state_beg, int state_end, std::map& was) const; // NOLINT(runtime/references) - // функция проверки на семантическую детерминированность - bool semdet_entry(bool annoted = false, iLogTemplate* log = nullptr) const; - // меняет местами состояние под индексом 0 с начальным // используется в томпсоне void set_initial_state_to_zero(); diff --git a/libs/Objects/include/Objects/Regex.h b/libs/Objects/include/Objects/Regex.h index 2adc2a15..4dbe6442 100644 --- a/libs/Objects/include/Objects/Regex.h +++ b/libs/Objects/include/Objects/Regex.h @@ -25,7 +25,7 @@ class Regex : public AlgExpression { bool equals(const AlgExpression* other) const override; // Множество префиксов длины len - void get_prefix(int len, std::set& prefs) const; // NOLINT(runtime/references) + void get_prefix(int len, std::vector>& prefs) const; // NOLINT(runtime/references) // Производная по символу bool derivative_with_respect_to_sym(Regex* respected_sym, const Regex* reg_e, Regex& result) const; // NOLINT(runtime/references) @@ -33,7 +33,7 @@ class Regex : public AlgExpression { Regex* respected_sym, const Regex* reg_e, std::vector& result) const; // NOLINT(runtime/references) // Производная по префиксу - bool derivative_with_respect_to_str(std::string str, const Regex* reg_e, + bool derivative_with_respect_to_str(const std::vector& str, const Regex* reg_e, Regex& result) const; // NOLINT(runtime/references) // возвращает вектор состояний нового автомата, построенного из регулярного выражения @@ -83,7 +83,7 @@ class Regex : public AlgExpression { void partial_symbol_derivative(const Regex& respected_sym, std::vector& result) const; // NOLINT(runtime/references) // производная по префиксу - std::optional prefix_derivative(std::string respected_str) const; + std::optional prefix_derivative(const std::vector& respected_str) const; // поиск длины накачки int pump_length(iLogTemplate* log = nullptr) const; Regex linearize(iLogTemplate* log = nullptr) const; diff --git a/libs/Objects/src/FiniteAutomaton.cpp b/libs/Objects/src/FiniteAutomaton.cpp index 9f6b3911..99786753 100644 --- a/libs/Objects/src/FiniteAutomaton.cpp +++ b/libs/Objects/src/FiniteAutomaton.cpp @@ -2185,105 +2185,152 @@ bool FiniteAutomaton::is_dfa_minimal(iLogTemplate* log) const { return result; } -std::optional FiniteAutomaton::get_prefix(int state_beg, int state_end, - map& was) const { - std::optional ans = std::nullopt; +std::optional> FiniteAutomaton::get_prefix(int state_beg, int state_end, + map& was) const { + std::optional> ans = std::nullopt; if (state_beg == state_end) { - ans = ""; + ans = {Regex(Symbol::Epsilon)}; return ans; } - auto trans = &states[state_beg].transitions; - for (auto it = trans->begin(); it != trans->end(); it++) { - for (auto it2 = it->second.begin(); it2 != it->second.end(); it2++) { - if (!was[*it2]) { - was[*it2] = true; - auto res = get_prefix(*it2, state_end, was); - if (res) { - ans = (string)it->first + (string)*res; + auto trans = states[state_beg].transitions; + for (auto it : trans) { + for (auto it2 : it.second) { + if (!was[it2]) { + was[it2] = true; + auto res = get_prefix(it2, state_end, was); + if (res.has_value()) { + ans = {Regex(it.first)}; + cout << Regex(it.first).to_txt() << "\n"; + ans.value().insert(ans.value().end(), res.value().begin(), res.value().end()); + return ans; } - return ans; } } } return ans; } -bool FiniteAutomaton::semdet_entry(bool annoted, iLogTemplate* log) const { - if (!annoted) { - return annote().semdet_entry(true); - } +bool FiniteAutomaton::semdet(iLogTemplate* log) const { + /* if (!annoted) { + return annote().semdet_entry(true, log); + }*/ map was; - vector final_states; - for (int i = 0; i < states.size(); i++) { - if (states[i].is_terminal) - final_states.push_back(i); - } + int trans_id = 1; + bool reliability; + + auto make_string_transition = [=](string from, Symbol through, string to) { + string arrow = ">->>[[" + string(through) + "]]"; + return from + arrow + to; + }; + vector state_languages; + FiniteAutomaton dfaa = annote(); + Regex reg = dfaa.to_regex(); + MetaInfo meta; + iLogTemplate::Table t; + string local_ambig = ""; + if (log) { + t.columns.push_back("Неоднозначные переходы"); + t.columns.push_back("Безопасные переходы"); + } state_languages.resize(states.size()); for (int i = 0; i < states.size(); i++) { - auto prefix = get_prefix(initial_state, i, was); - was.clear(); - // cout << "Try " << i << "\n"; + auto prefix = dfaa.get_prefix(initial_state, i, was); if (!prefix.has_value()) continue; - Regex reg; - // Получение языка из производной регулярки автомата по префиксу: - // this -> reg (arden?) - reg = to_regex(); - // cout << "State: " << i << "\n"; - // cout << "Prefix: " << prefix.value() << "\n"; - // cout << "Regex: " << reg.to_txt() << "\n"; + was.clear(); + cout << "Try " << states[i].identifier << "" << "\n"; + for (int i = 0; i < prefix.value().size(); i++) + cout << prefix.value()[i].to_txt() << " "; + cout << "\n"; auto derivative = reg.prefix_derivative(prefix.value()); if (!derivative.has_value()) continue; state_languages[i] = derivative.value(); - // cout << "Derevative: " << state_languages[i].to_txt() << "\n"; + //if (annoted) + state_languages[i] = state_languages[i].deannote(); + cout << "Derevative: " << state_languages[i].to_txt() << "\n"; // TODO: logs - if (log) { +/* if (log) { log->set_parameter("state", i); log->set_parameter("prefix", prefix.value()); log->set_parameter("regex", reg); log->set_parameter("derivative", state_languages[i]); - } + }*/ state_languages[i].make_language(); } for (int i = 0; i < states.size(); i++) { - for (const auto& state : states) { - for (auto transition = state.transitions.begin(); transition != state.transitions.end(); - transition++) { + for (auto transition : states[i].transitions) { bool verified_ambiguity = false; - for (auto it = transition->second.begin(); it != transition->second.end(); it++) { - bool reliability = true; - for (auto it2 = transition->second.begin(); it2 != transition->second.end(); - it2++) { - if (!state_languages[*it].subset(state_languages[*it2])) { - reliability = false; - break; + int target; + for (auto it : transition.second) { + reliability = true; + target = it; + set checked; + checked.insert(target); + for (auto it2 : transition.second) { + if (it2 <= it) continue; + cout << "Checking lang subset: " << states[target].identifier << " " + << states[it2].identifier << "\n"; + cout << state_languages[target].to_txt() << " " << state_languages[it2].to_txt() + << "\n"; + if (!state_languages[target].subset(state_languages[it2])) { + if (!state_languages[it2].subset(state_languages[target])) { + reliability = false; + break; + } else { + target = it2; + checked.insert(target); + } } } verified_ambiguity |= reliability; + if (transition.second.size() > 1) { + local_ambig=""; + t.rows.push_back("<"+ states[i].identifier+","+ string(transition.first)+ ">"); + for (auto v : transition.second) + {if ((!reliability)||(v >= it)) + meta.upd(EdgeMeta{i, v, transition.first, trans_id}); + local_ambig +=", " + states[v].identifier; + } + local_ambig = local_ambig.substr(1); + trans_id++; + t.data.push_back(local_ambig); + if (reliability) {meta.upd(EdgeMeta{i,target,transition.first,0}); + t.data.push_back(make_string_transition(states[i].identifier, transition.first, states[target].identifier)); + cout << "Meta UPD" + << "\n"; + } else + t.data.push_back("Отсутствуют"); + } + if (!verified_ambiguity) { + // Logger::log("Результат SemDet", "false"); + // Logger::finish_step(); + cout << "Break false" + << "\n"; + if (log) { + log->set_parameter("oldautomaton", *this, meta); + log->set_parameter("result", "false\\\\"); + if (trans_id > 1) { + log->set_parameter("trans_table", t); + } + } + return false; + } + if (target == it) + break; } - if (!verified_ambiguity) { - return false; - } - } } - } - return true; -} - -bool FiniteAutomaton::semdet(iLogTemplate* log) const { + } if (log) { - log->set_parameter("oldautomaton", *this); - } - bool result = semdet_entry(log); - if (log) { - log->set_parameter("result", result); - } - return result; + log->set_parameter("oldautomaton", *this, meta); + if (trans_id>1) + {log->set_parameter("trans_table", t); } + log->set_parameter("result", "true\\\\"); + } + return true; } - // bool FiniteAutomaton::parsing_nfa(const string& s, int index_state) const { // FAState state = states[index_state]; // diff --git a/libs/Objects/src/Regex.cpp b/libs/Objects/src/Regex.cpp index 076efcdd..ed13f6cf 100644 --- a/libs/Objects/src/Regex.cpp +++ b/libs/Objects/src/Regex.cpp @@ -603,39 +603,39 @@ FiniteAutomaton Regex::to_ilieyu(iLogTemplate* log) const { return fa; } -void Regex::get_prefix(int len, set& prefs) const { - set prefs1, prefs2; +void Regex::get_prefix(int len, vector>& prefs) const { + vector> prefs1, prefs2; if (len == 0) { - prefs.insert(""); + prefs.push_back({Regex(Symbol::Epsilon)}); return; } switch (type) { case Type::eps: if (len == 0) - prefs.insert(""); + prefs.push_back({Regex(Symbol::Epsilon)}); return; case Type::symb: - if (len == 1) { - prefs.insert(string(symbol)); - } + prefs.push_back({*this}); return; case Type::alt: Regex::cast(term_l)->get_prefix(len, prefs1); Regex::cast(term_r)->get_prefix(len, prefs2); - for (auto i = prefs1.begin(); i != prefs1.end(); i++) { - prefs.insert(*i); + for (int i = 0; i < prefs1.size(); i++) { + prefs.push_back(prefs1[i]); } - for (auto i = prefs2.begin(); i != prefs2.end(); i++) { - prefs.insert(*i); + for (int i = 0; i < prefs2.size(); i++) { + prefs.push_back(prefs2[i]); } return; case Type::conc: for (int k = 0; k <= len; k++) { Regex::cast(term_l)->get_prefix(k, prefs1); Regex::cast(term_r)->get_prefix(len - k, prefs2); - for (auto i = prefs1.begin(); i != prefs1.end(); i++) { - for (auto j = prefs2.begin(); j != prefs2.end(); j++) { - prefs.insert(*i + *j); + for (int i = 0; i < prefs1.size(); i++) { + for (int j = 0; j < prefs2.size(); j++) { + vector auxpref = prefs1[i]; + auxpref.insert(auxpref.end(), prefs2[j].begin(), prefs2[j].end()); + prefs.push_back(auxpref); } } prefs1.clear(); @@ -644,15 +644,17 @@ void Regex::get_prefix(int len, set& prefs) const { return; case Type::star: if (len == 0) { - prefs.insert(""); + prefs.push_back({Regex(Symbol::Epsilon)}); return; } for (int k = 1; k <= len; k++) { Regex::cast(term_l)->get_prefix(k, prefs1); get_prefix(len - k, prefs2); - for (auto i = prefs1.begin(); i != prefs1.end(); i++) { - for (auto j = prefs2.begin(); j != prefs2.end(); j++) { - prefs.insert(*i + *j); + for (int i = 0; i < prefs1.size(); i++) { + for (int j = 0; j < prefs2.size(); j++) { + vector auxpref = prefs1[i]; + auxpref.insert(auxpref.end(), prefs2[j].begin(), prefs2[j].end()); + prefs.push_back(auxpref); } } prefs1.clear(); @@ -872,15 +874,13 @@ bool Regex::partial_derivative_with_respect_to_sym(Regex* respected_sym, const R } } -bool Regex::derivative_with_respect_to_str(string str, const Regex* reg_e, Regex& result) const { +bool Regex::derivative_with_respect_to_str(const vector& str, const Regex* reg_e, Regex& result) const { bool success = true; Regex cur = *reg_e; Regex next = *reg_e; - for (char i : str) { - Regex sym; - sym.type = Type::symb; - sym.symbol = i; + for (auto sym : str) { next.clear(); + cout << "Take derivative: " << sym.to_txt() << "\n"; success &= derivative_with_respect_to_sym(&sym, &cur, next); if (!success) { return false; @@ -910,7 +910,7 @@ void Regex::partial_symbol_derivative(const Regex& respected_sym, vector& delete rs; } -std::optional Regex::prefix_derivative(string respected_str) const { +std::optional Regex::prefix_derivative(const vector& respected_str) const { Regex result; std::optional ans; if (derivative_with_respect_to_str(respected_str, this, result)) @@ -921,49 +921,59 @@ std::optional Regex::prefix_derivative(string respected_str) const { } int Regex::pump_length(iLogTemplate* log) const { + if (log) + log->set_parameter("oldregex", *this); if (language->is_pump_length_cached()) { if (log) { - log->set_parameter("pumplength", language->get_pump_length()); + log->set_parameter("pumplength1", language->get_pump_length()); log->set_parameter("cach", "(!) результат получен из кэша"); } return language->get_pump_length(); } - map checked_prefixes; + std::unordered_map checked_prefixes; + auto word_to_str = [=](std::vector& from) { + std::string s = ""; + for (auto f : from) + s = s + f.to_txt(); + return s; + }; for (int i = 1;; i++) { - set prefs; + std::vector> prefs; get_prefix(i, prefs); if (prefs.empty()) { language->set_pump_length(i); if (log) { - log->set_parameter("pumplength", i); + log->set_parameter("pumplength1", i); } return i; } - for (auto it = prefs.begin(); it != prefs.end(); it++) { + for (auto it : prefs) { bool was = false; - for (int j = 0; j < it->size(); j++) { - if (checked_prefixes[it->substr(0, j)]) { + for (int j = 0; j < it.size(); j++) { + if (checked_prefixes[word_to_str(vector(it.begin(), it.begin() + j))]) { was = true; break; } } if (was) continue; - for (int j = 0; j < it->size(); j++) { - for (int k = j + 1; k <= it->size(); k++) { - string pumped_prefix; - pumped_prefix += it->substr(0, j); - pumped_prefix += "(" + it->substr(j, k - j) + ")*"; - pumped_prefix += it->substr(k, it->size() - k + j); - Regex a(pumped_prefix); + for (int j = 0; j < it.size(); j++) { + Regex pumped_prefix = it[0]; + Regex suff = Regex(Symbol::Epsilon); + for (int k = 0; k < j; k++) + pumped_prefix = Regex(Type::conc, &pumped_prefix, &it[k]); + for (int k = j + 1; k < it.size(); k++) + suff = Regex(Type::conc, &suff, &it[k]); + for (int k = j + 1; k <= it.size(); k++) { + Regex a(Type::conc, &Regex(Type::star, &pumped_prefix), &suff); Regex b; Regex pumping(Type::conc, &a, &b); - if (!derivative_with_respect_to_str(*it, this, *Regex::cast(pumping.term_r))) + if (!derivative_with_respect_to_str(it, this, *Regex::cast(pumping.term_r))) continue; pumping.make_language(); // cout << pumped_prefix << " " << pumping.term_r->to_txt(); if (subset(pumping)) { - checked_prefixes[*it] = true; + checked_prefixes[word_to_str(it)] = true; language->set_pump_length(i); /*cout << *it << "\n"; cout << pumping.to_txt() << "\n";