From f747f5e4a5a9e1fd65f00cee13083a2516c18375 Mon Sep 17 00:00:00 2001 From: Joe Flack Date: Wed, 4 Dec 2024 00:04:54 -0500 Subject: [PATCH] Vocab refresh - Now errors when completes successfully so that it will notify that a restart of the server is necessary to refresh networkx graph. Backend tests action - Set cronjob to 1x/day, as this will test database integrity, including for vocab refreshes, which is helpful since due to the way I'm having it error and notify, it isn't running the test. --- .github/workflows/refresh_voc.yml | 5 ++++- .../workflows/test_backend_e2e_and_unit_and_qc.yml | 2 ++ backend/db/refresh_dataset_group_tables.py | 13 +++++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/.github/workflows/refresh_voc.yml b/.github/workflows/refresh_voc.yml index da1b96c22..2d16c80ef 100644 --- a/.github/workflows/refresh_voc.yml +++ b/.github/workflows/refresh_voc.yml @@ -3,7 +3,7 @@ name: Refresh vocabulary tables on: schedule: ## - cron: '0 0 * * 0' # weekly, sunday 12am - - cron: '0 8 * * *' # every day 6am GMT (3/4am EST/EDT) + - cron: '0 8 * * *' # every day 8am GMT (3/4am EST/EDT) workflow_dispatch: jobs: refresh-vocab-tables: @@ -50,5 +50,8 @@ jobs: run: make refresh-vocab # Test + # todo: This won't actually run, because the Python script will throw an error at the end (only if successful + # refresh happened, which will notify that it completed successfully. So the GitHub action will only get this far + # if there was never any data. However, this test does run daily in a separate action. - name: Test run: python -m unittest test.test_backend.db.test_refresh_dataset_group_tables.TestCurrentDatasetGroupSetup.test_current_vocab diff --git a/.github/workflows/test_backend_e2e_and_unit_and_qc.yml b/.github/workflows/test_backend_e2e_and_unit_and_qc.yml index b3fa58d21..bb9ec9d8d 100644 --- a/.github/workflows/test_backend_e2e_and_unit_and_qc.yml +++ b/.github/workflows/test_backend_e2e_and_unit_and_qc.yml @@ -10,6 +10,8 @@ on: branches: [ develop, main ] pull_request: branches: [ develop, main ] + schedule: + - cron: '0 14 * * *' # every day 2pm GMT (9/10am EST/EDT) jobs: test: diff --git a/backend/db/refresh_dataset_group_tables.py b/backend/db/refresh_dataset_group_tables.py index 6da95bc28..b9e525505 100644 --- a/backend/db/refresh_dataset_group_tables.py +++ b/backend/db/refresh_dataset_group_tables.py @@ -31,6 +31,11 @@ from enclave_wrangler.datasets import download_datasets, get_datetime_dataset_last_updated +class NotAnError(Exception): + """Used to throw an error that causes a GitHub action notification.""" + pass + + def load_dataset_group(dataset_group_name: str, schema: str = SCHEMA, alternate_dataset_dir: Union[Path, str] = None): """Load data @@ -109,6 +114,14 @@ def refresh_dataset_group_tables( # DB Counts print('Updating database counts. This could take a while...') counts_update(f'DB refresh: {",".join(dataset_group)}', schema) + + # Vocab refresh only + # todo: ideally this notification would happen differently. More comments near bottom of refresh_voc.yml + if group_name == 'vocab' and len(dataset_group) == 1: # Will be the case for GH actions + raise NotAnError('Notification: Restart deployment backend.\nThis is not an error. It is only being raised ' + 'as an easy way to trigger a GitHub action notification. Vocabulary refresh has completed ' + 'successfully. Please restart backend to refresh networkx graph.') + print('Done')