Skip to content

Commit

Permalink
Merge pull request #335 from OnionGrief/bisim
Browse files Browse the repository at this point in the history
to_mfa_additional & bisimilar fix
  • Loading branch information
xendalm authored May 7, 2024
2 parents d07cf3e + afb06e9 commit bea7880
Show file tree
Hide file tree
Showing 10 changed files with 148 additions and 130 deletions.
26 changes: 13 additions & 13 deletions apps/MetamorphicTestsApp/src/MetamorphicTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,21 +129,21 @@ void MetamorphicTests::cmp_automatons(const MemoryFiniteAutomaton& mfa1,
// std::cout << " " << a << " " << b << " " << double(b) / (a + b) << "\n";
}

// TEST(TestMFA, Fuzzing) {
// RegexGenerator rg(5, 3, 3, 2);
// for (int i = 0; i < RegexNumberX10; i++) {
// string rgx_str = MetamorphicTests::generate_bregex(rg, 2);
// // std::cout << i << " " << rgx_str << "\n";
// SCOPED_TRACE("Regex: " + rgx_str);
// MemoryFiniteAutomaton mfa1 = BackRefRegex(rgx_str).to_mfa();
// MemoryFiniteAutomaton mfa2 = BackRefRegex(rgx_str).to_mfa_additional();
//
// MetamorphicTests::cmp_automatons(mfa1, mfa2);
// }
// }
TEST(TestMFA, Fuzzing) {
RegexGenerator rg(5, 3, 3, 2);
for (int i = 0; i < RegexNumberX10; i++) {
string rgx_str = MetamorphicTests::generate_bregex(rg, 2);
// std::cout << i << " " << rgx_str << "\n";
SCOPED_TRACE("Regex: " + rgx_str);
MemoryFiniteAutomaton mfa1 = BackRefRegex(rgx_str).to_mfa();
MemoryFiniteAutomaton mfa2 = BackRefRegex(rgx_str).to_mfa_additional();

MetamorphicTests::cmp_automatons(mfa1, mfa2);
}
}

// TEST(TestMFA, Fuzz) {
// string rgx_str = "(([[b*]:1|]:2|)&1&1&2)*";
// string rgx_str = "(a[[b|]:1|]:2*[[c|]:1|]:2*&1&2)*";
// MemoryFiniteAutomaton mfa1 = BackRefRegex(rgx_str).to_mfa();
// MemoryFiniteAutomaton mfa2 = BackRefRegex(rgx_str).to_mfa_additional();
//
Expand Down
41 changes: 25 additions & 16 deletions apps/UnitTestsApp/src/UnitTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -740,11 +740,12 @@ TEST(TestParsing, MFA_equivalence) {
{false, "(&1[b]:1[a*]:1)*"},
{true, "[a*]:1&1[b|c]:2*&2"},
{true, "[a*]:1&1[b|c]:1*&1"},
{false, "[[|b]:2*]:1*a&1&2"},
{false, "(([[b*]:1|]:2|)&1&1&2)*"},
{false, "(a[[b|]:1|]:2&1&1)*"},
};

int MAX_LEN = 9;

for_each(tests.begin(), tests.end(), [&MAX_LEN](const Test& test) {
for_each(tests.begin(), tests.end(), [](const Test& test) {
auto [test_rem_eps, rgx_str] = test;
std::cout << rgx_str << std::endl;
SCOPED_TRACE("Regex: " + rgx_str);
Expand All @@ -755,6 +756,7 @@ TEST(TestParsing, MFA_equivalence) {
MFAs.emplace_back(mfa.remove_eps());
}

int MAX_LEN = mfa.size();
auto base_test_set = mfa.generate_test_set(MAX_LEN);
unordered_map<string, int> base_parsing_res;
for (const auto& mutated_word : base_test_set.second) {
Expand Down Expand Up @@ -783,19 +785,26 @@ TEST(TestReverse, BRegex_Reverse) {
}

TEST(TestBisimilar, MFA_Bisimilar) {
ASSERT_TRUE(MemoryFiniteAutomaton::bisimilar(BackRefRegex("[aa*]:1a&1").to_mfa_additional(),
BackRefRegex("a[a*a]:1&1").to_mfa_additional())
.value());
ASSERT_FALSE(MemoryFiniteAutomaton::bisimilar(BackRefRegex("[a*]:1a*&1").to_mfa_additional(),
BackRefRegex("a*[a*]:1&1").to_mfa_additional())
.value());
ASSERT_TRUE(MemoryFiniteAutomaton::bisimilar(BackRefRegex("[ab]:2cab&2").to_mfa_additional(),
BackRefRegex("abc[ab]:2&2").to_mfa_additional())
.value());
ASSERT_FALSE(
MemoryFiniteAutomaton::bisimilar(BackRefRegex("[a|b]:1c(a|b)&1").to_mfa_additional(),
BackRefRegex("(a|b)c[a|b]:1&1").to_mfa_additional())
.value());
using Test = std::tuple<string, string, bool>;
vector<Test> tests = {
{"[aa*]:1a&1", "a[a*a]:1&1", true},
{"[a*]:1a*&1", "a*[a*]:1&1", false},
{"[ab]:2cab&2", "abc[ab]:2&2", true},
{"[a|b]:1c(a|b)&1", "(a|b)c[a|b]:1&1", false},
{"[a]:1*&1", "[a*]:1*&1", false},
{"[a*]:1&1", "[a*]:1a*&1", false},
{"[a*a*|]:1&1", "[a*]:1&1", true},
{"[a|a]:1*&1", "[a]:1*[a]:1*&1", true},
};

for_each(tests.begin(), tests.end(), [](const Test& test) {
auto [rgx1, rgx2, expected_res] = test;
SCOPED_TRACE(rgx1 + " " + rgx2);
ASSERT_EQ(MemoryFiniteAutomaton::bisimilar(BackRefRegex(rgx1).to_mfa_additional(),
BackRefRegex(rgx2).to_mfa_additional()),
expected_res);
});

ASSERT_FALSE(
MemoryFiniteAutomaton::bisimilar(BackRefRegex("[[a*]:1]:2a*&1").to_mfa_additional(),
BackRefRegex("a*[a*]:1&1").to_mfa_additional())
Expand Down
4 changes: 3 additions & 1 deletion libs/Objects/include/Objects/BackRefRegex.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,12 @@ class BackRefRegex : public AlgExpression {
// 0-e состояние начальное
std::vector<MFAState> _to_mfa() const;

Cell get_cell() const;

// возвращает вектор листьев дерева
// устанавливает для них in_lin_cells, first_in_cells и last_in_cells
void preorder_traversal(
std::vector<BackRefRegex*>& terms, // NOLINT(runtime/references)
std::vector<BackRefRegex*>& terms, // NOLINT(runtime/references)
int& lin_counter, // NOLINT(runtime/references)
std::vector<std::unordered_set<int>>& in_lin_cells, // NOLINT(runtime/references)
std::vector<CellSet>& first_in_cells, // NOLINT(runtime/references)
Expand Down
5 changes: 3 additions & 2 deletions libs/Objects/include/Objects/MemoryCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,13 @@ struct CaptureGroup {
};
};
int cell;
std::unordered_set<std::vector<int>, VectorHasher<int>> traces;
std::unordered_set<std::vector<int>, VectorHasher<int>> paths;
std::unordered_set<State, State::Hasher> states;
std::unordered_set<int> state_classes;

CaptureGroup() = default;
CaptureGroup(int, const std::vector<std::vector<int>>&, const std::vector<int>&);
CaptureGroup(int, const std::vector<std::vector<int>>&, const std::vector<int>&,
bool reset = false);
bool operator==(const CaptureGroup& other) const;

std::unordered_set<int> get_states_diff(
Expand Down
12 changes: 6 additions & 6 deletions libs/Objects/include/Objects/MemoryFiniteAutomaton.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,15 +203,15 @@ class MemoryFiniteAutomaton : public AbstractMachine {

std::vector<MFAState::Transitions> get_reversed_transitions() const;

std::vector<std::vector<int>> find_cg_traces(int state_index, std::unordered_set<int> visited,
int cell, int opening_state) const;
std::pair<std::vector<std::vector<int>>, std::vector<std::vector<int>>> find_cg_paths(
int state_index, std::unordered_set<int> visited, int cell, int opening_state) const;
std::vector<CaptureGroup> find_capture_groups_backward(
int ref_incoming_state, int cell, const std::vector<int>& fa_classes) const;

bool find_path_decisions(int state_index,
std::vector<int>& visited, // NOLINT(runtime/references)
const std::unordered_set<int>& path_states) const;
bool path_contains_decisions(const std::unordered_set<int>& path_states) const;
bool find_decisions(int state_index,
std::vector<int>& visited, // NOLINT(runtime/references)
const std::unordered_set<int>& states_to_check) const;
bool states_have_decisions(const std::unordered_set<int>& states_to_check) const;

static std::optional<bool> bisimilarity_checker(const MemoryFiniteAutomaton&,
const MemoryFiniteAutomaton&);
Expand Down
87 changes: 49 additions & 38 deletions libs/Objects/src/BackRefRegex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -435,8 +435,11 @@ MemoryFiniteAutomaton BackRefRegex::to_mfa(iLogTemplate* log) const {
return mfa;
}

void BackRefRegex::preorder_traversal(
vector<BackRefRegex*>& terms, int& lin_counter,
Cell BackRefRegex::get_cell() const {
return {cell_number, lin_number};
}

void BackRefRegex::preorder_traversal(vector<BackRefRegex*>& terms, int& lin_counter,
vector<unordered_set<int>>& in_lin_cells,
vector<CellSet>& first_in_cells,
vector<CellSet>& last_in_cells,
Expand Down Expand Up @@ -466,22 +469,20 @@ void BackRefRegex::preorder_traversal(
case conc:
l_contains_eps = cast(term_l)->contains_eps();
r_contains_eps = cast(term_r)->contains_eps();
cast(term_l)->preorder_traversal(
terms,
lin_counter,
in_lin_cells,
first_in_cells,
last_in_cells,
cur_in_lin_cells,
cur_first_in_cells,
cast(term_l)->preorder_traversal(terms,
lin_counter,
in_lin_cells,
first_in_cells,
last_in_cells,
cur_in_lin_cells,
cur_first_in_cells,
r_contains_eps ? cur_last_in_cells : CellSet());
cast(term_r)->preorder_traversal(
terms,
lin_counter,
in_lin_cells,
first_in_cells,
last_in_cells,
cur_in_lin_cells,
cast(term_r)->preorder_traversal(terms,
lin_counter,
in_lin_cells,
first_in_cells,
last_in_cells,
cur_in_lin_cells,
l_contains_eps ? cur_first_in_cells : CellSet(),
cur_last_in_cells);
return;
Expand All @@ -498,8 +499,8 @@ void BackRefRegex::preorder_traversal(
case memoryWriter:
lin_number = lin_counter++;
cur_in_lin_cells.insert(lin_number);
cur_first_in_cells.insert({cell_number, lin_number});
cur_last_in_cells.insert({cell_number, lin_number});
cur_first_in_cells.insert(get_cell());
cur_last_in_cells.insert(get_cell());
cast(term_l)->preorder_traversal(terms,
lin_counter,
in_lin_cells,
Expand Down Expand Up @@ -614,10 +615,15 @@ pair<bool, ToResetMap> BackRefRegex::contains_eps_tracking_resets() const {
l = cast(term_l)->contains_eps_tracking_resets();
if (l.first) {
CellSet depends_on;
for (auto& [cell, info] : l.second)
if (info.first)
for (auto& [cell, emptiness_info] : l.second) {
// если ячейка не может быть пропущена, добавляем в список тех,
// от пустоты которых зависит пустота текущей
if (emptiness_info.first)
depends_on.insert(cell);
l.second.insert({{cell_number, lin_number}, {true, depends_on}});
// вложенные ячейки не могут быть сброшены, если не сброшена внешняя
emptiness_info.second.insert(get_cell());
}
l.second.insert({get_cell(), {true, depends_on}});
return l;
} else {
return {false, {}};
Expand Down Expand Up @@ -730,38 +736,42 @@ vector<CellSet> merge_to_reset_maps(const vector<ToResetMap>& maps) {
ToResetMap merged;
CellSet to_reset, maybe_to_reset;
for (const auto& map : maps) {
for (const auto& [cell, info] : map) {
if (info.first)
for (const auto& [cell, emptiness_info] : map) {
if (emptiness_info.first)
to_reset.insert(cell);
else
maybe_to_reset.insert(cell);

auto it = merged.find(cell);
if (it != merged.end()) {
it->second.first &= info.first;
it->second.second = get_intersection(it->second.second, info.second);
it->second.first &= emptiness_info.first;
it->second.second = get_intersection(it->second.second, emptiness_info.second);
} else {
merged[cell] = info;
merged[cell] = emptiness_info;
}
}
}

unordered_map<Cell, CellSet, Cell::Hasher> must_be_equal_to;
for (const auto& [cell, info] : merged)
for (const auto& depends_on_cell : info.second) {
must_be_equal_to[cell].insert(depends_on_cell);
must_be_equal_to[depends_on_cell].insert(cell);
unordered_map<Cell, CellSet, Cell::Hasher> must_have_same_actions;
for (const auto& [cell, emptiness_info] : merged)
for (const auto& depends_on_cell : emptiness_info.second) {
must_have_same_actions[cell].insert(depends_on_cell);
}

vector<CellSet> res({to_reset});
auto t = get_all_combinations(maybe_to_reset);
for (const auto& i : get_all_combinations(maybe_to_reset)) {
// пропускаем комбинации, которые не содержат всех зависимых друг от друга ячеек
bool skip = std::any_of(i.begin(), i.end(), [&](const auto& cell) {
return must_be_equal_to.count(cell) &&
std::any_of(
must_be_equal_to.at(cell).begin(),
must_be_equal_to.at(cell).end(),
[&](const auto& must_be_equal_to) { return !i.count(must_be_equal_to); });
return must_have_same_actions.count(cell) &&
std::any_of(must_have_same_actions.at(cell).begin(),
must_have_same_actions.at(cell).end(),
[&](const auto& must_have_same_actions_with) {
// если в комбинации или в to_reset нет ячейки,
// от которой зависит cell
return !i.count(must_have_same_actions_with) &&
!to_reset.count(must_have_same_actions_with);
});
});
if (skip)
continue;
Expand Down Expand Up @@ -931,7 +941,8 @@ MemoryFiniteAutomaton BackRefRegex::to_mfa_additional(iLogTemplate* log) const {

for (const auto& [to, iteration_over_cells, to_reset] :
following_states[symb.last_linearization_number()]) {
transitions[delinearized_symbols[to]].insert(MFATransition(to + 1,
transitions[delinearized_symbols[to]].insert(
MFATransition(to + 1,
MFATransition::TransitionConfig{&first_in_cells[to],
&in_lin_cells[i],
&iteration_over_cells,
Expand Down
6 changes: 3 additions & 3 deletions libs/Objects/src/FiniteAutomaton.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2713,8 +2713,8 @@ FiniteAutomaton FiniteAutomaton::get_subautomaton(const CaptureGroup& cg) {
Alphabet alphabet;

unordered_set<int> terminal_states;
for (const auto& trace : cg.traces)
terminal_states.insert(trace[trace.size() - 1]);
for (const auto& path : cg.paths)
terminal_states.insert(path[path.size() - 1]);

unordered_map<int, int> indexes;
int idx = 0;
Expand All @@ -2732,5 +2732,5 @@ FiniteAutomaton FiniteAutomaton::get_subautomaton(const CaptureGroup& cg) {
sub_states[indexes.at(st.index)].add_transition(indexes.at(to), symbol);
}

return {indexes.at((*cg.traces.begin())[0]), sub_states, alphabet};
return {indexes.at((*cg.paths.begin())[0]), sub_states, alphabet};
}
22 changes: 11 additions & 11 deletions libs/Objects/src/MemoryCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,13 @@ bool CaptureGroup::State::operator==(const State& other) const {
return index == other.index && class_num == other.class_num;
}

CaptureGroup::CaptureGroup(int cell, const std::vector<std::vector<int>>& _traces,
const std::vector<int>& _state_classes)
CaptureGroup::CaptureGroup(int cell, const std::vector<std::vector<int>>& _paths,
const std::vector<int>& _state_classes, bool reset)
: cell(cell) {
for (const auto& trace : _traces) {
traces.insert(trace);
for (auto st : trace) {
int class_num = (trace.size() > 1) ? _state_classes[st] : State::reset_class;
for (const auto& path : _paths) {
paths.insert(path);
for (auto st : path) {
int class_num = (reset) ? State::reset_class : _state_classes[st];
states.insert({st, class_num});
state_classes.insert(class_num);
}
Expand All @@ -60,16 +60,16 @@ std::unordered_set<int> CaptureGroup::get_states_diff(
if (st.class_num != State::reset_class && !other_state_classes.count(st.class_num))
res.insert(st.index);

for (const auto& trace : traces)
for (int i = trace.size() - 1; i > 0; i--)
if (res.count(trace[i - 1]))
res.insert(trace[i]);
for (const auto& path : paths)
for (int i = path.size() - 1; i > 0; i--)
if (res.count(path[i - 1]))
res.insert(path[i]);
return res;
}

std::ostream& operator<<(std::ostream& os, const CaptureGroup& cg) {
os << "{\n";
for (const auto& i : cg.traces)
for (const auto& i : cg.paths)
os << i;
os << "}\n";
for (const auto& i : cg.states)
Expand Down
Loading

0 comments on commit bea7880

Please sign in to comment.