From 51d61efe11f37b3dfac5661880da94f6d323c6d8 Mon Sep 17 00:00:00 2001 From: Julien Romero Date: Mon, 18 Mar 2024 19:10:17 +0100 Subject: [PATCH] :sparkles: Add the negation [^...] in PythonRegex --- pyformlang/regular_expression/python_regex.py | 20 ++++++++++++++----- .../tests/test_python_regex.py | 15 ++++++++++++++ setup.py | 2 +- 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/pyformlang/regular_expression/python_regex.py b/pyformlang/regular_expression/python_regex.py index b9d7bf6..00728e8 100644 --- a/pyformlang/regular_expression/python_regex.py +++ b/pyformlang/regular_expression/python_regex.py @@ -23,7 +23,8 @@ "$": "\\$", "\n": "", " ": "\\ ", - '\\': '\\\\' + '\\': '\\\\', + "?": "\\?" } RECOMBINE = { @@ -218,13 +219,14 @@ def _preprocess_brackets_content(self, bracket_content): bracket_content_temp = [] previous_is_valid_for_range = False for i, symbol in enumerate(bracket_content): - if (symbol == "-" and not self._should_escape_next_symbol( - bracket_content_temp)): - if (not previous_is_valid_for_range - or i == len(bracket_content) - 1): + # We have a range + if symbol == "-" and not self._should_escape_next_symbol(bracket_content_temp): + if not previous_is_valid_for_range or i == len(bracket_content) - 1: + # False alarm, no range bracket_content_temp.append("-") previous_is_valid_for_range = True else: + # We insert all the characters in the range bracket_content[i - 1] = self._recombine(bracket_content[i - 1]) for j in range(ord(bracket_content[i - 1][-1]) + 1, ord(bracket_content[i + 1][-1])): @@ -244,10 +246,18 @@ def _preprocess_brackets_content(self, bracket_content): previous_is_valid_for_range = False else: previous_is_valid_for_range = True + bracket_content_temp = self._preprocess_negation(bracket_content_temp) bracket_content_temp = self._insert_or(bracket_content_temp) bracket_content_temp = self._recombine(bracket_content_temp) return bracket_content_temp + @staticmethod + def _preprocess_negation(bracket_content): + if not bracket_content or bracket_content[0] != "^": + return bracket_content + # We inverse everything + return [x for x in ESCAPED_PRINTABLES if x not in bracket_content] + @staticmethod def _insert_or(l_to_modify): res = [] diff --git a/pyformlang/regular_expression/tests/test_python_regex.py b/pyformlang/regular_expression/tests/test_python_regex.py index 948273e..358d09d 100644 --- a/pyformlang/regular_expression/tests/test_python_regex.py +++ b/pyformlang/regular_expression/tests/test_python_regex.py @@ -313,3 +313,18 @@ def test_error_backslash(self): self._test_compare(r"[a\\\\\\]]", "\\]") self._test_compare(r"\"([d\"\\\\]|\\\\.)*\"", '"d\\"') self._test_compare(r"[a\\\\]", "a") + + def test_negation_brackets(self): + self._test_compare(r"[^abc]*", "") + self._test_compare(r"[^abc]*", "a") + self._test_compare(r"[^abc]*", "b") + self._test_compare(r"[^abc]*", "c") + self._test_compare(r"[^abc]*", "d") + self._test_compare(r"[^abc]*", "dga") + self._test_compare(r"[^abc]*", "dgh") + self._test_compare(r"[^?]*", "dgh") + + def test_question_mark(self): + self._test_compare(r".", "?") + self._test_compare(r"a(a|b)?", "a") + self._test_compare(r"a(a|b)\?", "ab?") diff --git a/setup.py b/setup.py index ada1743..06f5af1 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setuptools.setup( name='pyformlang', - version='1.0.8', + version='1.0.9', #scripts=['pyformlang'] , author="Julien Romero", author_email="romerojulien34@gmail.com",