diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 50f9dfa..17eacdf 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,7 +1,7 @@ { "name": "Python 3 with Jupyter", "image": "mcr.microsoft.com/devcontainers/python:3.11", - "postCreateCommand": "pip3 install --user -r requirements.txt && pip3 install --user -r document-search/requirements.txt", + "postCreateCommand": "pip3 install --user -r requirements.txt", "customizations": { "vscode": { "extensions": [ diff --git a/.env.template b/.env.template index 6f302bc..a69d12b 100644 --- a/.env.template +++ b/.env.template @@ -1,25 +1,36 @@ -# API Key for Azure OpenAI -API_KEY= +# Storage Account Connection String. Obtain this from your Azure Storage account +# in the Azure Portal under "Access keys". +STORAGE_ACCOUNT_CONN_STRING= -# Endpoint URL for Azure OpenAI +# API Key for Azure OpenAI. Same value for both. Get this from the Azure portal +# under your Azure OpenAI Service resource. +OPENAI_API_KEY= +AZURE_OPENAI_API_KEY= + +# Endpoint URL for Azure OpenAI. Find this in the Overview section of your Azure +# OpenAI Service resource in the Azure portal. AZURE_ENDPOINT= -# API Version for Azure OpenAI -API_VERSION= +# API Version for Azure OpenAI Check the Azure OpenAI documentation for the +# current API version +# OPENAI_API_VERSION=2023-07-01-preview -# Language Model for Azure OpenAI. -# LLM_MODEL= +# Language Model for Azure OpenAI. Specify the model you wish to use. +# LLM_MODEL=gpt-4 -# Language Model Deployment Name for Azure OpenAI -# LLM_DEPLOYMENT_NAME= +# Language Model Deployment Name for Azure OpenAI If you have a custom +# deployment, specify its name here. Otherwise, leave commented +# LLM_DEPLOYMENT_NAME=ailab-llm -# Embedding Model for Azure OpenAI -# EMBED_MODEL= +# Embedding Model for Azure OpenAI Specify the embedding model you wish to use. +# Check Azure OpenAI documentation for available models +# EMBED_MODEL=ada -# Embedding Model Deployment Name for Azure OpenAI -# EMBED_DEPLOYMENT_NAME= +# Embedding Model Deployment Name for Azure OpenAI. +# EMBED_DEPLOYMENT_NAME=text-embedding-ada-002 -# Database Connection Configuration +# Database Connection Configuration These details will be specific to your +# database. Obtain them from your database administrator or setup DB_SCHEME= DB_HOST= DB_PORT= @@ -27,6 +38,7 @@ DB_USER= DB_PASSWORD= DB_NAME= -# Table and Column for Querying the Database +# Table and Column for Querying the Database Specify the table and column you +# wish to query. This will depend on your database schema # TABLE_NAME= # COLUMN= diff --git a/.gitignore b/.gitignore index ab33422..73e0586 100644 --- a/.gitignore +++ b/.gitignore @@ -56,3 +56,12 @@ venv/ ENV/ env.bak/ venv.bak/ + +# Index files +index/ + +# Pickle files +*.pkl + +# OS specific +.DS_Store diff --git a/docs/deployment_design/components.plantuml b/docs/deployment_design/components.plantuml new file mode 100644 index 0000000..35aaf51 --- /dev/null +++ b/docs/deployment_design/components.plantuml @@ -0,0 +1,24 @@ +@startuml components +left to right direction +interface REST + +component "LlamaIndex API" as api { + package app as "flask app" + package LlamaIndex as "llamaIndex_db" { + package llama_index + } +} +app --> LlamaIndex: uses +REST -- api + +folder Container as "Index folder" { + file doc as "default__vector_store.json" + file index as "docstore.json" + file vector as "graph_store.json" + file graph as "image__vector_store.json" + file image as "index_store.json" +} + +llama_index --> Container : reads + +@enduml diff --git a/docs/deployment_design/components.png b/docs/deployment_design/components.png new file mode 100644 index 0000000..0939b02 Binary files /dev/null and b/docs/deployment_design/components.png differ diff --git a/docs/deployment_design/deployment.plantuml b/docs/deployment_design/deployment.plantuml new file mode 100644 index 0000000..59527e4 --- /dev/null +++ b/docs/deployment_design/deployment.plantuml @@ -0,0 +1,16 @@ +@startuml deployment +left to right direction +interface REST + +cloud Azure { + node docker as "Docker container" { + component "LlamaIndex API" as api + } + database volume as "Docker Volume" { + folder Container as "Index folder" + } +} + +REST -- api +api --> Container : reads +@enduml diff --git a/docs/deployment_design/deployment.png b/docs/deployment_design/deployment.png new file mode 100644 index 0000000..e62dd3f Binary files /dev/null and b/docs/deployment_design/deployment.png differ diff --git a/docs/deployment_design/design.ipynb b/docs/deployment_design/design.ipynb new file mode 100644 index 0000000..51a80e1 --- /dev/null +++ b/docs/deployment_design/design.ipynb @@ -0,0 +1,334 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -r ../../requirements.txt" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import sys\n", + "from dotenv import load_dotenv\n", + "from llama_index.core import StorageContext, get_response_synthesizer, load_index_from_storage\n", + "from llama_index.core.postprocessor import SimilarityPostprocessor\n", + "from llama_index.core.query_engine import RetrieverQueryEngine\n", + "from llama_index.core.retrievers import VectorIndexRetriever\n", + "from llama_index.embeddings.azure_openai import AzureOpenAI\n", + "from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding\n", + "from llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n", + "from llama_index.core import Settings\n", + "import pickle\n", + "import os\n", + "from pprint import pprint\n", + "import time\n", + "\n", + "load_dotenv()\n", + "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n", + "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Llama index deployment design" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Component diagram\n", + "\n", + "![components](components.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Deployment diagram\n", + "\n", + "![deployment](deployment.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Downloading the `index` folder files (3GB+)\n", + "\n", + "Execute the cell below to download the index files to be able to run the test codes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Download query engine from Azure Blob Storage Container\n", + "from azure.storage.blob import BlobServiceClient\n", + "\n", + "# Your storage account connection string\n", + "connection_string = os.getenv(\"STORAGE_ACCOUNT_CONN_STRING\")\n", + "\n", + "# The name of your container\n", + "container_name = \"llamaindex-v1\"\n", + "\n", + "# Initialize the BlobServiceClient\n", + "blob_service_client = BlobServiceClient.from_connection_string(connection_string)\n", + "\n", + "# Get the container client\n", + "container_client = blob_service_client.get_container_client(container_name)\n", + "\n", + "# Folder name in the blob container and local directory name\n", + "index_folder = \"index\"\n", + "remote_folder = f\"{index_folder}/\"\n", + "\n", + "# Create local directory if it does not exist\n", + "os.makedirs(index_folder, exist_ok=True)\n", + "\n", + "# List and download files in the \"index\" folder\n", + "for blob in container_client.list_blobs(name_starts_with=remote_folder):\n", + " file_name = blob.name.split('/')[-1] # Get the file name without folder path\n", + " if file_name:\n", + " local_file_path = os.path.join(index_folder, file_name)\n", + " blob_client = container_client.get_blob_client(blob)\n", + " with open(local_file_path, \"wb\") as file:\n", + " file.write(blob_client.download_blob().readall())\n", + "\n", + "print(\"Download completed.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## LlamaIndex API startup sequence diagram\n", + "\n", + "Our LlamaIndex API's startup will take longer than a regular Flask app startup\n", + "due to the initialization of index objects (`StorageContext` mainly). Here's a\n", + "sequence diagram showing the process.\n", + "\n", + "![startup sequence](startup_sequence.png)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example of creating the `VectorIndexRetriever` " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Configure LlamaIndex to use our embed model\n", + "embed_model = AzureOpenAIEmbedding(\n", + " model=\"text-embedding-ada-002\",\n", + " deployment_name=\"ada\",\n", + " api_key=os.getenv(\"AZURE_OPENAI_API_KEY\"),\n", + " azure_endpoint=os.getenv(\"AZURE_ENDPOINT\"),\n", + " api_version=os.getenv(\"API_VERSION\"),\n", + ")\n", + "Settings.embed_model = embed_model\n", + "\n", + "# Create the StorageContext (long running task, expect 5min+)\n", + "index_folder = \"index\"\n", + "storage_context = StorageContext.from_defaults(persist_dir=index_folder)\n", + "\n", + "# Optional. Save the StorageContext using pickle. Generates a 1.5GB+ file.\n", + "# Loading a pickle object is faster than recreating the StorageContext.\n", + "with open('storage_context.pkl', 'wb') as file:\n", + " pickle.dump(storage_context, file)\n", + "\n", + "# Optional. Loading the StorageContext from pickle.\n", + "# with open('storage_context.pkl', 'rb') as file:\n", + "# storage_context = pickle.load(file)\n", + "\n", + "# Create the BaseIndex:\n", + "index = load_index_from_storage(storage_context)\n", + "\n", + "# Create the VectorIndexRetriever\n", + "similarity_top_k = 15 # arbitrarily\n", + "retriever = VectorIndexRetriever(\n", + " index=index,\n", + " similarity_top_k=similarity_top_k,\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## LLamaIndex API search sequence diagram\n", + "\n", + "![search sequence](search_sequence.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example of querying the `VectorIndexRetriever`" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: POST https://azure-open-ai-common-infra.openai.azure.com//openai/deployments/ada/embeddings?api-version=2023-07-01-preview \"HTTP/1.1 200 OK\"\n", + "HTTP Request: POST https://azure-open-ai-common-infra.openai.azure.com//openai/deployments/ada/embeddings?api-version=2023-07-01-preview \"HTTP/1.1 200 OK\"\n", + "HTTP Request: POST https://azure-open-ai-common-infra.openai.azure.com//openai/deployments/ada/embeddings?api-version=2023-07-01-preview \"HTTP/1.1 200 OK\"\n", + "HTTP Request: POST https://azure-open-ai-common-infra.openai.azure.com//openai/deployments/ada/embeddings?api-version=2023-07-01-preview \"HTTP/1.1 200 OK\"\n", + "TYPE(SELF._VECTOR_STORE) \n", + "Elapsed time: 7.649816036224365 seconds\n", + "\n", + "Elapsed time: 7.867833852767944 seconds\n", + "{'class_name': 'NodeWithScore',\n", + " 'node': {'class_name': 'TextNode',\n", + " 'embedding': None,\n", + " 'end_char_idx': 337,\n", + " 'excluded_embed_metadata_keys': [],\n", + " 'excluded_llm_metadata_keys': [],\n", + " 'id_': 'bb398ec8-a128-460e-b821-515798985b87',\n", + " 'metadata': {'id_': '3980fd2c-1ba7-4128-a913-a6f238131347'},\n", + " 'metadata_seperator': '\\n',\n", + " 'metadata_template': '{key}: {value}',\n", + " 'relationships': {: {'class_name': 'RelatedNodeInfo',\n", + " 'hash': 'b69c1c52e150bdb73c709b7c3ef03025ce76e1848c0d55d23fef5205197da5f2',\n", + " 'metadata': {'id_': '3980fd2c-1ba7-4128-a913-a6f238131347'},\n", + " 'node_id': '06cffdb0-c000-4202-9a91-814d6d2cbc0d',\n", + " 'node_type': },\n", + " : {'class_name': 'RelatedNodeInfo',\n", + " 'hash': '5fc99a35b36960e6d0db3aec4b51f3c73365acfbaae5592a6f6865140225aa5d',\n", + " 'metadata': {'id_': '9d41840d-6621-4670-a388-496ed95275f4'},\n", + " 'node_id': 'c24289d0-ce12-4083-b2e3-3f72d48b37fd',\n", + " 'node_type': },\n", + " : {'class_name': 'RelatedNodeInfo',\n", + " 'hash': 'ebbe3fc592eb375b2108f615ec2889fcf8151be96f8228eb8df9c9cac0ac7f6a',\n", + " 'metadata': {},\n", + " 'node_id': '0d91095a-f917-4ce4-a653-61cb1e9b5659',\n", + " 'node_type': }},\n", + " 'start_char_idx': 0,\n", + " 'text': 'Demandes de renseignements du public et des médias Demandes '\n", + " 'de renseignements du publicSans frais : 1-800-442-2342 '\n", + " '(Canada et États-Unis) Téléphone : 1-613-773-2342 (local ou '\n", + " 'international) Courriel : information@inspection.gc.ca '\n", + " 'Renseignements généraux pour les médiasTéléphone : '\n", + " '613-773-6600 Courriel : cfia.media.acia@inspection.gc.ca',\n", + " 'text_template': '{metadata_str}\\n\\n{content}'},\n", + " 'score': 0.8702674996969137}\n" + ] + } + ], + "source": [ + "# Query the VectorIndexRetriever\n", + "query_string = \"Quels sont les numéros de téléphone pour les demandes de renseignements du public?\"\n", + "start_time = time.time()\n", + "nodes = retriever.retrieve(query_string)\n", + "end_time = time.time()\n", + "elapsed_time = end_time - start_time\n", + "print(\"\\nElapsed time:\", elapsed_time, \"seconds\")\n", + "\n", + "# Print the top most result\n", + "pprint(nodes[0].dict())\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As can be seen from the execution of the cell above, the `retrieve` query\n", + "potentially takes some time (more than 5 seconds in some instances). This could\n", + "be problematic as the user expects near instant results. We might have to\n", + "optimize this process.\n", + "\n", + "By default, LlamaIdex uses a `SimpleVectorIndex` as a vector store which is [\"a\n", + "simple in-memory vector store that’s great for quick\n", + "experimentation\"](https://docs.llamaindex.ai/en/stable/module_guides/storing/vector_stores.html#simple-vector-store).\n", + "This explains the slow responses. There are no sources that indicate that\n", + "`SimpleVectorIndex` queries are GPU accelerated. `SimpleVectorIndex` doesn't\n", + "seem to be suitable for production. Alternatives we should consider are vector\n", + "databases. Here are few open-source candidates supported by LlamaIndex:\n", + "\n", + "- [Milvus](https://github.com/milvus-io/milvus)\n", + "- [Qdrant](https://github.com/qdrant/qdrant)\n", + "- [Chroma](https://github.com/chroma-core/chroma)\n", + "- [Weaviate](https://github.com/weaviate/weaviate)\n", + "- [Pgvector](https://github.com/pgvector/pgvector)\n", + "\n", + "Of those, `Milvus` seems to have [higher\n", + "performance](https://benchmark.vectorview.ai/vectordbs.html) and is the most\n", + "mature (first release: Oct 21, 2019). `Pgvector` on the other hand is built on\n", + "`Postgres` which is a more familiar tool. Both should be tried in solving our\n", + "latency problem.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Resources\n", + "\n", + "- [Loading data in LlamaIndex from\n", + " filesystem](https://docs.llamaindex.ai/en/stable/module_guides/storing/save_load.html#loading-data)\n", + "- [Creating a flask API with\n", + " LlamaIndex](https://docs.llamaindex.ai/en/stable/understanding/putting_it_all_together/apps/fullstack_app_guide.html#flask-backend)\n", + "- [A comparison of vector\n", + " databases](https://benchmark.vectorview.ai/vectordbs.html)\n", + "- [Vector databases supported by\n", + " LlammaIndex](https://docs.llamaindex.ai/en/stable/module_guides/storing/vector_stores.html#vector-store-options-feature-support)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Notes\n", + "\n", + "- A semantic reranker might need to be added to the pipeline once we solve the\n", + " latency problem." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "llamaindex-db", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/deployment_design/search_sequence.plantuml b/docs/deployment_design/search_sequence.plantuml new file mode 100644 index 0000000..53bdd2e --- /dev/null +++ b/docs/deployment_design/search_sequence.plantuml @@ -0,0 +1,19 @@ +@startuml search sequence + +actor user +participant "app:Flask" as app +participant "config:Config" as config +participant ":VectorIndexRetriever" as retriever + +user -> app: POST /search query_string +activate app +app -> config: get retriever +app -> retriever: retrieve with query_string +activate retriever +retriever --> app: nodes +deactivate retriever +app -> app: generate results from nodes +app --> user: results +deactivate app + +@enduml diff --git a/docs/deployment_design/search_sequence.png b/docs/deployment_design/search_sequence.png new file mode 100644 index 0000000..576a81a Binary files /dev/null and b/docs/deployment_design/search_sequence.png differ diff --git a/docs/deployment_design/startup_sequence.plantuml b/docs/deployment_design/startup_sequence.plantuml new file mode 100644 index 0000000..9784cd9 --- /dev/null +++ b/docs/deployment_design/startup_sequence.plantuml @@ -0,0 +1,20 @@ +@startuml startup sequence + +participant "App Creator" as creator + +create ":StorageContext" as sc +creator -> sc: create with index_folder + +create ":BaseIndex" as index +creator -> index: create with storage_context + +create ":VectorIndexRetriever" as retriever +creator -> retriever: create with index + +create ":Config" as conf +creator -> conf: create with retriever + +create ":Flask" as app +creator -> app: create with config + +@enduml diff --git a/docs/deployment_design/startup_sequence.png b/docs/deployment_design/startup_sequence.png new file mode 100644 index 0000000..07931de Binary files /dev/null and b/docs/deployment_design/startup_sequence.png differ diff --git a/requirements.txt b/requirements.txt index 45e89c1..4e0e298 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,11 @@ -jupyterlab -notebook -asyncpg -llama-index -psycopg3 -SQLAlchemy -python-dotenv +jupyterlab==4.1.3 +notebook==7.1.1 +asyncpg==0.29.0 +llama-index==0.10.17 +psycopg==3.1.18 +SQLAlchemy==2.0.28 +python-dotenv==1.0.1 +openai==1.13.3 +azure-storage-blob==12.19.1 +llama-index-vector-stores-postgres==0.1.2 +llama-index-embeddings-azure-openai==0.1.6