From 406ae6a940939de4b311d09b1abe8270512f55b2 Mon Sep 17 00:00:00 2001 From: Chaoyu Date: Thu, 10 Aug 2023 02:49:46 -0700 Subject: [PATCH 1/7] feat: support self-hosted embedding service via BentoML --- .env.example | 8 ++++ README.md | 22 +++++++++- cli.py | 7 ++++ realtime_ai_character/database/chroma.py | 21 ++++++++-- requirements.txt | 1 + sample_cloud_deployment/deployment.yaml | 2 + .../embedding_service.yaml | 41 +++++++++++++++++++ 7 files changed, 97 insertions(+), 5 deletions(-) create mode 100644 sample_cloud_deployment/embedding_service.yaml diff --git a/.env.example b/.env.example index 5b35b4e04..be0cf89b1 100644 --- a/.env.example +++ b/.env.example @@ -69,3 +69,11 @@ SERPAPI_API_KEY= # instructions: https://stackoverflow.com/a/62037708 GOOGLE_API_KEY= GOOGLE_CSE_ID= + +# Enable SentenceEmbedding model served via BentoML +# For local embedding service, use: +# docker run --rm -p 3000:3000 ghcr.io/bentoml/sentence-embedding-bento:0.1.0 +# Then set the following env var: +# BENTOML_EMBEDDING_ENDPOINT=http://localhost:3000 +# Instructions for customizing your embedding model server: https://github.com/bentoml/sentence-embedding-bento +BENTOML_EMBEDDING_ENDPOINT= diff --git a/README.md b/README.md index 67b1b1328..a973f4712 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ __Demo settings: Web, GPT4, ElevenLabs with voice clone, Chroma, Google Speech t - ✅**Web**: [React JS](https://react.dev/), [Vanilla JS](http://vanilla-js.com/), [WebSockets](https://developer.mozilla.org/en-US/docs/Web/API/WebSockets_API) - ✅**Mobile**: [Swift](https://developer.apple.com/swift/), [WebSockets](https://developer.mozilla.org/en-US/docs/Web/API/WebSockets_API) -- ✅**Backend**: [FastAPI](https://fastapi.tiangolo.com/), [SQLite](https://www.sqlite.org/index.html), [Docker](https://www.docker.com/) +- ✅**Backend**: [FastAPI](https://fastapi.tiangolo.com/), [SQLite](https://www.sqlite.org/index.html), [Docker](https://www.docker.com/), [BentoML](https://bentoml.com/) - ✅**Data Ingestion**: [LlamaIndex](https://www.llamaindex.ai/), [Chroma](https://www.trychroma.com/) - ✅**LLM Orchestration**: [LangChain](https://langchain.com/), [Chroma](https://www.trychroma.com/) - ✅**LLM**: [OpenAI GPT3.5/4](https://platform.openai.com/docs/api-reference/chat), [Anthropic Claude 2](https://docs.anthropic.com/claude/docs/getting-started-with-claude) @@ -157,6 +157,26 @@ ELEVEN_LABS_API_KEY= ``` +### 4. (Optional) Prepare self-hosted embedding service - BentoML Deployment Endpoint +
👇click me + +1. Install [Docker](https://docs.docker.com/engine/install/) + +2. Run the text embedding service docker image generated with BentoML: + + ```bash + docker run --rm -p 3000:3000 ghcr.io/bentoml/sentence-embedding-bento:0.1.0 + ``` + +3. Set the Text Embedding Endpoint in your .env file: + + ``` + BENTOML_EMBEDDING_ENDPOINT=http://localhost:3000 + ``` + +For cloud deployment options and customizing your own embeddding model, check out the source repo [here](https://github.com/bentoml/sentence-embedding-bento) +
+ ## 💿 Installation via Python - **Step 1**. Clone the repo ```sh diff --git a/cli.py b/cli.py index aaba40bd7..cff896a34 100755 --- a/cli.py +++ b/cli.py @@ -69,10 +69,17 @@ def image_exists(name): return result.returncode == 0 +@click.command(help="Run BentoML text embedding service locally via Docker at localhost:3000") +def run_embedding_service(): + click.secho("Launching BentoML SentenceEmbedding Service...", fg='green') + subprocess.run(["docker", "run", "--rm", "-p", "3000:3000", "ghcr.io/bentoml/sentence-embedding-bento:0.1.0"]) + + cli.add_command(docker_build) cli.add_command(docker_run) cli.add_command(docker_delete) cli.add_command(run_uvicorn) +cli.add_command(run_embedding_service) if __name__ == '__main__': diff --git a/realtime_ai_character/database/chroma.py b/realtime_ai_character/database/chroma.py index e8365a2dd..54b841ec3 100644 --- a/realtime_ai_character/database/chroma.py +++ b/realtime_ai_character/database/chroma.py @@ -7,10 +7,23 @@ load_dotenv() logger = get_logger(__name__) -embedding = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY")) -if os.getenv('OPENAI_API_TYPE') == 'azure': - embedding = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"), deployment=os.getenv( - "OPENAI_API_EMBEDDING_DEPLOYMENT_NAME", "text-embedding-ada-002"), chunk_size=1) + +embedding_endpoint = os.getenv("BENTOML_EMBEDDING_ENDPOINT") + +if embedding_endpoint: + # Use self-hosted embedding model via BentoML API endpoint + from bentoml.client import Client + client = Client.from_url(embedding_endpoint) + embedding = client.encode +else: + embedding = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY")) + if os.getenv('OPENAI_API_TYPE') == 'azure': + embedding = OpenAIEmbeddings( + openai_api_key=os.getenv("OPENAI_API_KEY"), + deployment=os.getenv( + "OPENAI_API_EMBEDDING_DEPLOYMENT_NAME", "text-embedding-ada-002" + ), + chunk_size=1) def get_chroma(): diff --git a/requirements.txt b/requirements.txt index 2802037d9..1e3adbf67 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ aioconsole aiofiles alembic anthropic +bentoml>=1.1 chromadb>=0.4.2 click EbookLib diff --git a/sample_cloud_deployment/deployment.yaml b/sample_cloud_deployment/deployment.yaml index 9975021a2..514f00e20 100644 --- a/sample_cloud_deployment/deployment.yaml +++ b/sample_cloud_deployment/deployment.yaml @@ -50,6 +50,8 @@ spec: value: - name: BRUCE_VOICE value: + - name: BENTOML_EMBEDDING_ENDPOINT + value: bentoml-embedding-service..svc.cluster.local --- apiVersion: v1 kind: Service diff --git a/sample_cloud_deployment/embedding_service.yaml b/sample_cloud_deployment/embedding_service.yaml new file mode 100644 index 000000000..47a9cda5f --- /dev/null +++ b/sample_cloud_deployment/embedding_service.yaml @@ -0,0 +1,41 @@ +# For advanced BentoML deployment on kubernetes, see: +# https://www.kubeflow.org/docs/external-add-ons/serving/bentoml/ +# https://github.com/bentoml/yatai +apiVersion: apps/v1 +kind: Deployment +metadata: + name: bentoml-embedding-deployment + labels: + app: bentoml-text-embedding +spec: + replicas: 1 + selector: + matchLabels: + app: bentoml-text-embedding + template: + metadata: + labels: + app: bentoml-text-embedding + spec: + containers: + - name: bentoml-text-embedding + image: ghcr.io/bentoml/sentence-embedding-bento:0.1.0 + ports: + - containerPort: 3000 + env: + - name: BENTOML_CONFIG_OPTIONS + value: "api_server.metrics.namespace=realchar,api_server.traffic.timeout=10" +--- +apiVersion: v1 +kind: Service +metadata: + name: bentoml-embedding-service +spec: + type: ClusterIP + selector: + app: bentoml-text-embedding + ports: + - protocol: TCP + port: 80 + targetPort: 3000 + From 4ca94cf4ea1862b13f8caa15a8a504d1ac408f94 Mon Sep 17 00:00:00 2001 From: Chaoyu Date: Thu, 10 Aug 2023 15:18:17 -0700 Subject: [PATCH 2/7] fix: chroma embedding func expects list[float] --- realtime_ai_character/database/chroma.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/realtime_ai_character/database/chroma.py b/realtime_ai_character/database/chroma.py index 54b841ec3..90d8fe0e1 100644 --- a/realtime_ai_character/database/chroma.py +++ b/realtime_ai_character/database/chroma.py @@ -1,3 +1,5 @@ +from typing import Callable, List + import os from dotenv import load_dotenv from langchain.vectorstores import Chroma @@ -14,11 +16,11 @@ # Use self-hosted embedding model via BentoML API endpoint from bentoml.client import Client client = Client.from_url(embedding_endpoint) - embedding = client.encode + embedding_func: Callable[[List[str]], List[float]] = lambda docs: client.encode(docs).tolist() else: - embedding = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY")) + embedding_func = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY")) if os.getenv('OPENAI_API_TYPE') == 'azure': - embedding = OpenAIEmbeddings( + embedding_func = OpenAIEmbeddings( openai_api_key=os.getenv("OPENAI_API_KEY"), deployment=os.getenv( "OPENAI_API_EMBEDDING_DEPLOYMENT_NAME", "text-embedding-ada-002" @@ -29,7 +31,7 @@ def get_chroma(): chroma = Chroma( collection_name='llm', - embedding_function=embedding, + embedding_function=embedding_func, persist_directory='./chroma.db' ) return chroma From cb637d42f40357d0425cf2d0935ae6b9ba973508 Mon Sep 17 00:00:00 2001 From: Chaoyu Date: Thu, 10 Aug 2023 15:19:24 -0700 Subject: [PATCH 3/7] chore: avoid using 3000 port --- .env.example | 7 ++++--- README.md | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.env.example b/.env.example index e65468be9..546ddae6d 100644 --- a/.env.example +++ b/.env.example @@ -75,8 +75,9 @@ OVERWRITE_CHROMA=true # Enable SentenceEmbedding model served via BentoML # For local embedding service, use: -# docker run --rm -p 3000:3000 ghcr.io/bentoml/sentence-embedding-bento:0.1.0 +# docker run --rm -p 3001:3001 ghcr.io/bentoml/sentence-embedding-bento:latest --port 3001 # Then set the following env var: -# BENTOML_EMBEDDING_ENDPOINT=http://localhost:3000 +# BENTOML_EMBEDDING_ENDPOINT=http://localhost:3001 # Instructions for customizing your embedding model server: https://github.com/bentoml/sentence-embedding-bento -BENTOML_EMBEDDING_ENDPOINT= \ No newline at end of file +BENTOML_EMBEDDING_ENDPOINT= + diff --git a/README.md b/README.md index a973f4712..832103b4e 100644 --- a/README.md +++ b/README.md @@ -165,13 +165,13 @@ ELEVEN_LABS_API_KEY= 2. Run the text embedding service docker image generated with BentoML: ```bash - docker run --rm -p 3000:3000 ghcr.io/bentoml/sentence-embedding-bento:0.1.0 + docker run --rm -p 3001:3001 ghcr.io/bentoml/sentence-embedding-bento:latest --port 3001 ``` 3. Set the Text Embedding Endpoint in your .env file: ``` - BENTOML_EMBEDDING_ENDPOINT=http://localhost:3000 + BENTOML_EMBEDDING_ENDPOINT=http://localhost:3001 ``` For cloud deployment options and customizing your own embeddding model, check out the source repo [here](https://github.com/bentoml/sentence-embedding-bento) From 23fcb357dd4f2423245005ebf8f7ed8939a457e8 Mon Sep 17 00:00:00 2001 From: Chaoyu Date: Thu, 10 Aug 2023 15:23:17 -0700 Subject: [PATCH 4/7] chore: fix ruff warning --- realtime_ai_character/database/chroma.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/realtime_ai_character/database/chroma.py b/realtime_ai_character/database/chroma.py index 90d8fe0e1..675e736ba 100644 --- a/realtime_ai_character/database/chroma.py +++ b/realtime_ai_character/database/chroma.py @@ -1,5 +1,3 @@ -from typing import Callable, List - import os from dotenv import load_dotenv from langchain.vectorstores import Chroma @@ -16,7 +14,8 @@ # Use self-hosted embedding model via BentoML API endpoint from bentoml.client import Client client = Client.from_url(embedding_endpoint) - embedding_func: Callable[[List[str]], List[float]] = lambda docs: client.encode(docs).tolist() + def embedding_func(docs: list[str]) -> list[float]: + return client.encode(docs).tolist() else: embedding_func = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY")) if os.getenv('OPENAI_API_TYPE') == 'azure': From af9b5560d0958df923c5d1f4ac3a3bd7ff44145d Mon Sep 17 00:00:00 2001 From: Chaoyu Date: Fri, 11 Aug 2023 14:47:11 -0700 Subject: [PATCH 5/7] Update embedding service port in CLI Co-authored-by: shaun --- cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli.py b/cli.py index cff896a34..547ced651 100755 --- a/cli.py +++ b/cli.py @@ -72,7 +72,7 @@ def image_exists(name): @click.command(help="Run BentoML text embedding service locally via Docker at localhost:3000") def run_embedding_service(): click.secho("Launching BentoML SentenceEmbedding Service...", fg='green') - subprocess.run(["docker", "run", "--rm", "-p", "3000:3000", "ghcr.io/bentoml/sentence-embedding-bento:0.1.0"]) + subprocess.run(["docker", "run", "--rm", "-p", "3001:3001", "ghcr.io/bentoml/sentence-embedding-bento:latest"]) cli.add_command(docker_build) From e164bc2e439e4d04609f31a623bdd129df82139d Mon Sep 17 00:00:00 2001 From: Chaoyu Date: Fri, 11 Aug 2023 14:47:29 -0700 Subject: [PATCH 6/7] Update embed_func protocol to work with Chroma interface Co-authored-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- realtime_ai_character/database/chroma.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/realtime_ai_character/database/chroma.py b/realtime_ai_character/database/chroma.py index 675e736ba..cd92f82ee 100644 --- a/realtime_ai_character/database/chroma.py +++ b/realtime_ai_character/database/chroma.py @@ -14,8 +14,10 @@ # Use self-hosted embedding model via BentoML API endpoint from bentoml.client import Client client = Client.from_url(embedding_endpoint) - def embedding_func(docs: list[str]) -> list[float]: - return client.encode(docs).tolist() + class BentoEmbeddings: + def embed_documents(self, texts: list[str], chunk_size: int | None = 0) -> list[list[float]]: + return client.encode(docs).tolist() + embedding_func = BentoEmbeddings() else: embedding_func = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY")) if os.getenv('OPENAI_API_TYPE') == 'azure': From 20285e73cdf98e565fd2263079f993c7c822fa00 Mon Sep 17 00:00:00 2001 From: Chaoyu Date: Fri, 11 Aug 2023 14:58:39 -0700 Subject: [PATCH 7/7] Fix embeddings protocol --- realtime_ai_character/database/chroma.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/realtime_ai_character/database/chroma.py b/realtime_ai_character/database/chroma.py index cd92f82ee..b705c7ae7 100644 --- a/realtime_ai_character/database/chroma.py +++ b/realtime_ai_character/database/chroma.py @@ -1,23 +1,32 @@ import os from dotenv import load_dotenv +from bentoml.client import Client from langchain.vectorstores import Chroma from langchain.embeddings import OpenAIEmbeddings +from langchain.embeddings.base import Embeddings from realtime_ai_character.logger import get_logger load_dotenv() logger = get_logger(__name__) +class BentoEmbeddings(Embeddings): + def __init__(self, embedding_svc_client: Client): + self.client = embedding_svc_client + + def embed_documents(self, texts: list[str]) -> list[list[float]]: + return self.client.encode(texts).tolist() + + def embed_query(self, text: str) -> list[float]: + return self.client.encode([text]).tolist()[0] + + embedding_endpoint = os.getenv("BENTOML_EMBEDDING_ENDPOINT") if embedding_endpoint: # Use self-hosted embedding model via BentoML API endpoint - from bentoml.client import Client client = Client.from_url(embedding_endpoint) - class BentoEmbeddings: - def embed_documents(self, texts: list[str], chunk_size: int | None = 0) -> list[list[float]]: - return client.encode(docs).tolist() - embedding_func = BentoEmbeddings() + embedding_func = BentoEmbeddings(client) else: embedding_func = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY")) if os.getenv('OPENAI_API_TYPE') == 'azure':