From eca9a78041566314d2c589fe70f84f3b03e7b9fc Mon Sep 17 00:00:00 2001 From: Julien Romero Date: Mon, 18 Mar 2024 16:40:54 +0100 Subject: [PATCH] :bug: Closing brackets and escaping problem --- pyformlang/regular_expression/python_regex.py | 11 +++-------- .../regular_expression/tests/test_python_regex.py | 4 +++- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/pyformlang/regular_expression/python_regex.py b/pyformlang/regular_expression/python_regex.py index 8277535..b9d7bf6 100644 --- a/pyformlang/regular_expression/python_regex.py +++ b/pyformlang/regular_expression/python_regex.py @@ -132,14 +132,11 @@ def _preprocess_brackets(self): in_brackets = 0 in_brackets_temp = [] for symbol in self._python_regex: - if symbol == "[" and (not regex_temp or regex_temp[-1] != "\\") and \ - (in_brackets == 0 or not in_brackets_temp[-1] or in_brackets_temp[-1][-1] != "\\"): + if symbol == "[" and not self._should_escape_next_symbol(regex_temp) and \ + (in_brackets == 0 or not self._should_escape_next_symbol(in_brackets_temp[-1])): in_brackets += 1 in_brackets_temp.append([]) - elif symbol == "]" and (not regex_temp or regex_temp[-1] != "\\") and \ - (in_brackets == 0 or not in_brackets_temp[-1] or - (in_brackets_temp[-1][-1] != "\\" or - (len(in_brackets_temp[-1]) > 1 and in_brackets_temp[-1][-2] == "\\"))): + elif symbol == "]" and in_brackets >= 1 and not self._should_escape_next_symbol(in_brackets_temp[-1]): if len(in_brackets_temp) == 1: regex_temp.append("(") regex_temp += self._preprocess_brackets_content( @@ -358,7 +355,6 @@ def _add_repetition(self, regex_list): def _preprocess_optional(self): regex_temp = [] - print(self._python_regex) for symbol in self._python_regex: if symbol == "?": if regex_temp[-1] == ")": @@ -372,7 +368,6 @@ def _preprocess_optional(self): regex_temp[-1] += symbol else: regex_temp.append(symbol) - print(regex_temp) self._python_regex = "".join(regex_temp) @staticmethod diff --git a/pyformlang/regular_expression/tests/test_python_regex.py b/pyformlang/regular_expression/tests/test_python_regex.py index 8d0a379..948273e 100644 --- a/pyformlang/regular_expression/tests/test_python_regex.py +++ b/pyformlang/regular_expression/tests/test_python_regex.py @@ -310,4 +310,6 @@ def test_range_repetition(self): self._test_compare(r"[a-z]{1,3}", "dpoz") def test_error_backslash(self): - self._test_compare(r"\"([^\"\\\\]|\\\\.)*\"", '"ddd"') + self._test_compare(r"[a\\\\\\]]", "\\]") + self._test_compare(r"\"([d\"\\\\]|\\\\.)*\"", '"d\\"') + self._test_compare(r"[a\\\\]", "a")