From 831e05f52eb2450017d12dab66fb4f2336ed0a0e Mon Sep 17 00:00:00 2001 From: Kaushlendra Pratap Date: Thu, 23 Jan 2025 12:08:36 +0530 Subject: [PATCH] feat(workflows): Add Required Workflows for Build, Code Quality and Compatibility Signed-off-by: Kaushlendra Pratap --- .github/workflows/build-test.yml | 39 +++++++++++++++++++ .github/workflows/code-quality.yml | 36 +++++++++++++++++ .github/workflows/reuse-lint.yml | 4 +- .github/workflows/version-compatibility.yml | 39 +++++++++++++++++++ Safaa/src/safaa/Safaa.py | 39 ++++++++++++++----- ...e_positive_detection_model_sgd.pkl.license | 4 ++ 6 files changed, 150 insertions(+), 11 deletions(-) create mode 100644 .github/workflows/build-test.yml create mode 100644 .github/workflows/code-quality.yml create mode 100644 .github/workflows/version-compatibility.yml create mode 100644 Safaa/src/safaa/models/false_positive_detection_model_sgd.pkl.license diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml new file mode 100644 index 0000000..2eca230 --- /dev/null +++ b/.github/workflows/build-test.yml @@ -0,0 +1,39 @@ +# SPDX-FileCopyrightText: © 2025 Siemens AG +# SPDX-FileCopyrightText: © Kaushlendra Pratap Singh +# +# SPDX-License-Identifier: LGPL-2.1-only + +name: Build Tests + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + build: + name: Build Test + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Install build tools + run: | + python3 -m pip install --upgrade pip + python3 -m pip install setuptools wheel + + - name: Build package + run: | + python3 setup.py sdist bdist_wheel + working-directory: ./Safaa + diff --git a/.github/workflows/code-quality.yml b/.github/workflows/code-quality.yml new file mode 100644 index 0000000..74c5608 --- /dev/null +++ b/.github/workflows/code-quality.yml @@ -0,0 +1,36 @@ +# SPDX-FileCopyrightText: © 2021 Siemens AG +# SPDX-FileCopyrightText: © Kaushlendra Pratap Singh +# +# SPDX-License-Identifier: LGPL-2.1-only +name: Code Quality + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + lint: + name: Run flake8 + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.8' + architecture: 'x64' + + - name: Install flake8 + run: | + python3 -m pip install --upgrade pip + python3 -m pip install flake8 + + - name: Run flake8 + run: | + flake8 --max-line-length=120 . + working-directory: ./Safaa \ No newline at end of file diff --git a/.github/workflows/reuse-lint.yml b/.github/workflows/reuse-lint.yml index db0ad2d..3d29cd5 100644 --- a/.github/workflows/reuse-lint.yml +++ b/.github/workflows/reuse-lint.yml @@ -11,9 +11,9 @@ concurrency: on: push: - branches: [master] + branches: [main] pull_request: - branches: [master] + branches: [main] workflow_dispatch: jobs: diff --git a/.github/workflows/version-compatibility.yml b/.github/workflows/version-compatibility.yml new file mode 100644 index 0000000..f8d79e2 --- /dev/null +++ b/.github/workflows/version-compatibility.yml @@ -0,0 +1,39 @@ +# SPDX-FileCopyrightText: © 2025 Siemens AG +# SPDX-FileCopyrightText: © Kaushlendra Pratap Singh +# +# SPDX-License-Identifier: LGPL-2.1-only + +name: Compatibility Tests + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + test: + name: Test on Python ${{ matrix.python-version }} + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + ## Allowed Python versions https://github.com/actions/runner-images/blob/main/images/ubuntu/Ubuntu2404-Readme.md#python + python-version: ["3.10", "3.11", "3.12"] + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python3 -m ensurepip --upgrade + python3 -m pip install --upgrade pip setuptools wheel + python3 setup.py install + working-directory: ./Safaa diff --git a/Safaa/src/safaa/Safaa.py b/Safaa/src/safaa/Safaa.py index 98f9e29..777ba50 100644 --- a/Safaa/src/safaa/Safaa.py +++ b/Safaa/src/safaa/Safaa.py @@ -47,7 +47,9 @@ def __init__(self, use_local_model=True, model_dir=None): self.vectorizer_path = os.path.join( model_dir, "false_positive_detection_vectorizer.pkl" ) - self.entity_recognizer_path = os.path.join(model_dir, "entity_recognizer") + self.entity_recognizer_path = os.path.join( + model_dir, "entity_recognizer" + ) self.declutter_model_path = os.path.join(model_dir, "declutter_model") # Load the models from the constructed file paths @@ -172,14 +174,24 @@ def _perform_text_substitutions(self, data): (r"\(c\)", " COPYRIGHTSYMBOL "), (r"\(C\)", " COPYRIGHTSYMBOL "), ( - r"""(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])""", + r"""(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|" + (?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]| + \\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@ + (?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9] + (?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:(2(5[0-5]|[0-4][0-9])| + 1[0-9][0-9]|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]|[0-4][0-9])| + 1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:( + [\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]| + \\[\x01-\x09\x0b\x0c\x0e-\x7f])+)])""", " EMAIL ", ), (r"[^a-zA-Z0-9]", " "), ] # Perform the substitutions for each pattern in the list for pattern, replacement in subs: - data = [re.sub(pattern, replacement, sentence) for sentence in data] + data = [ + re.sub(pattern, replacement, sentence) for sentence in data + ] # Convert text to lowercase and strip extra whitespace return [sentence.lower().strip() for sentence in data] @@ -209,7 +221,8 @@ def predict(self, data, threshold=0.5): # Classify based on the given threshold. If the threshhold is not # met, automatically sets the prediction to true return [ - "f" if prediction[1] >= threshold else "t" for prediction in predictions + "f" if prediction[1] >= threshold else "t" + for prediction in predictions ] # Get binary predictions from the model if probability prediction is not @@ -239,7 +252,9 @@ def declutter(self, data, predictions): ( "" if prediction == "f" - else " ".join([ent.text for ent in self.declutter_model(sentence).ents]) + else " ".join( + [ent.text for ent in self.declutter_model(sentence).ents] + ) ) for sentence, prediction in zip(data, predictions) ] @@ -297,7 +312,9 @@ def train_ner_model( # Determine the model directory paths tmp_model_path = os.path.join(LOCAL_MODEL_DIR, "tmp") - new_model_dir = "declutter_model" if declutter_model else "entity_recognizer" + new_model_dir = ( + "declutter_model" if declutter_model else "entity_recognizer" + ) new_model_path = os.path.join(LOCAL_MODEL_DIR, new_model_dir) # Create the new model directory if it doesn't exist @@ -305,12 +322,15 @@ def train_ner_model( # Construct the training command and execute it train_command = ( - f"python -m spacy train '{tmp_cfg_path}' " f"--output '{tmp_model_path}'" + f"python -m spacy train '{tmp_cfg_path}' " + f"--output '{tmp_model_path}'" ) os.system(train_command) # Move the trained model files to the new model directory - self._move_files(os.path.join(tmp_model_path, "model-best"), new_model_path) + self._move_files( + os.path.join(tmp_model_path, "model-best"), new_model_path + ) # Clean up the temporary files and directories os.remove(tmp_cfg_path) @@ -355,7 +375,8 @@ def save(self, path=None): # Check directory permissions if not os.access(path, os.W_OK): print( - "Write permissions are not granted for the directory: " f"{save_path}" + "Write permissions are not granted for the directory: " + f"{save_path}" ) return diff --git a/Safaa/src/safaa/models/false_positive_detection_model_sgd.pkl.license b/Safaa/src/safaa/models/false_positive_detection_model_sgd.pkl.license new file mode 100644 index 0000000..a121cae --- /dev/null +++ b/Safaa/src/safaa/models/false_positive_detection_model_sgd.pkl.license @@ -0,0 +1,4 @@ +# SPDX-FileCopyrightText: © 2025 Siemens AG +# SPDX-FileCopyrightText: © Kaushlendra Pratap Singh +# +# SPDX-License-Identifier: LGPL-2.1-only \ No newline at end of file