From d9149ab9d4ecfc9d96a0f6198c83f3dd574140eb Mon Sep 17 00:00:00 2001 From: "K. Allagbe" Date: Tue, 26 Mar 2024 12:06:26 -0400 Subject: [PATCH] Issue #4: temporary lint and test workflow --- .env.template | 6 +++ .github/workflows/github-workflows.yaml | 52 +++++++++++++++++++- ailab-llama-search/requirements.txt | 1 + ailab-llama-search/tests/test_integration.py | 15 +++--- 4 files changed, 65 insertions(+), 9 deletions(-) diff --git a/.env.template b/.env.template index a69d12b..cb5eb01 100644 --- a/.env.template +++ b/.env.template @@ -42,3 +42,9 @@ DB_NAME= # wish to query. This will depend on your database schema # TABLE_NAME= # COLUMN= + +# ailab-llama-search parameters. Adapt to your needs. +# Ask your cloud admin for the secrets. +LLAMAINDEX_DB_EMBED_MODEL_PARAMS='{"model": "text-embedding-ada-002", "deployment_name": "ada", "api_key": "", "api_version": "2023-07-01-preview", "azure_endpoint": ""}' +LLAMAINDEX_DB_VECTOR_STORE_PARAMS='{"database": "llamaindex_db_legacy", "host": "", "password": "", "port": "5432", "user": "", "embed_dim": 1536}' +LLAMAINDEX_DB_TRANS_PATHS={"id": "node/metadata/id", "chunk_id": "node/metadata/chunk_id", "url": "node/metadata/url", "title": "node/metadata/title", "subtitle": "node/metadata/subtitle", "tokens_count": "node/metadata/tokens_count", "last_updated": "node/metadata/last_updated", "score": "node/metadata/score", "llama_id": "node/id_", "llama_score": "score", "content": "node/text"} diff --git a/.github/workflows/github-workflows.yaml b/.github/workflows/github-workflows.yaml index 4cd829d..f2142b5 100644 --- a/.github/workflows/github-workflows.yaml +++ b/.github/workflows/github-workflows.yaml @@ -8,7 +8,57 @@ on: - synchronize jobs: - # lint-test: to be determined + # temporary + list-packages: + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - uses: actions/checkout@v3 + - id: set-matrix + run: | + echo "Finding package directories..." + PACKAGES=$(find . -maxdepth 1 -type d -exec test -e '{}/requirements.txt' \; -print | sed 's|^\./||' | jq -R . | jq -cs .) + PACKAGES=$(find . -maxdepth 1 -type d ! -path . -exec test -e '{}/requirements.txt' \; -print | sed 's|^\./||' | jq -R . | jq -cs .) + echo "matrix=${PACKAGES}" >> $GITHUB_OUTPUT + # temporary + lint-test: + needs: list-packages + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + package: ${{ fromJson(needs.list-packages.outputs.matrix) }} + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.11.x" + cache: "pip" + + - name: Install dependencies for ${{ matrix.package }} + run: | + if [ -f ${{ matrix.package }}/requirements.txt ]; then + python -m pip install --upgrade pip + pip install -r ${{ matrix.package }}/requirements.txt + fi + pip install ruff + + - name: Lint with ruff (selected rules) in ${{ matrix.package }} + run: | + ruff check --output-format=github --select=E9,F63,F7,F82 --target-version=py311 ${{ matrix.package }} + ruff check --output-format=github --target-version=py311 ${{ matrix.package }} + + - name: Test with unittest in ${{ matrix.package }} + run: | + cd ${{ matrix.package }} + python -m unittest discover -s tests + env: + LLAMAINDEX_DB_EMBED_MODEL_PARAMS: ${{ secrets.LLAMAINDEX_DB_EMBED_MODEL_PARAMS }} + LLAMAINDEX_DB_VECTOR_STORE_PARAMS: ${{ secrets.LLAMAINDEX_DB_VECTOR_STORE_PARAMS }} + LLAMAINDEX_DB_TRANS_PATHS: ${{ secrets.LLAMAINDEX_DB_TRANS_PATHS }} markdown-check: uses: ai-cfia/github-workflows/.github/workflows/workflow-markdown-check.yml@main diff --git a/ailab-llama-search/requirements.txt b/ailab-llama-search/requirements.txt index e9a14d6..9059a02 100644 --- a/ailab-llama-search/requirements.txt +++ b/ailab-llama-search/requirements.txt @@ -3,3 +3,4 @@ llama-index-vector-stores-postgres llama-index-embeddings-azure-openai llama-index-storage-kvstore-postgres python-dotenv +dpath diff --git a/ailab-llama-search/tests/test_integration.py b/ailab-llama-search/tests/test_integration.py index 95808aa..e04e683 100644 --- a/ailab-llama-search/tests/test_integration.py +++ b/ailab-llama-search/tests/test_integration.py @@ -1,6 +1,5 @@ import json import os -import time import unittest from ailab_llama_search import create_index_object, search @@ -11,9 +10,13 @@ class AilabLlamaSearchIntegrationTests(unittest.TestCase): def setUp(self): load_dotenv() - self.embed_model_params = json.loads(os.getenv("EMBED_MODEL_PARAMS")) - self.vector_store_params = json.loads(os.getenv("VECTOR_STORE_PARAMS")) - self.trans_paths = json.loads(os.getenv("TRANS_PATHS")) + self.embed_model_params = json.loads( + os.getenv("LLAMAINDEX_DB_EMBED_MODEL_PARAMS") + ) + self.vector_store_params = json.loads( + os.getenv("LLAMAINDEX_DB_VECTOR_STORE_PARAMS") + ) + self.trans_paths = json.loads(os.getenv("LLAMAINDEX_DB_TRANS_PATHS")) self.search_params = {"similarity_top_k": 5} self.index = create_index_object( self.embed_model_params, self.vector_store_params @@ -21,12 +24,8 @@ def setUp(self): def test_search(self): query = "steps and considerations of the sampling procedures for food safety" - start_time = time.time() results = search(query, self.index, self.search_params, self.trans_paths) - end_time = time.time() - duration = (end_time - start_time) * 1000 n = self.search_params["similarity_top_k"] - self.assertLess(duration, 2000) self.assertEqual(len(results), n) for result in results: for key in self.trans_paths.keys():