Skip to content

Commit

Permalink
chore: move vector stores to their own packages (#1605)
Browse files Browse the repository at this point in the history
  • Loading branch information
thucpn authored Jan 24, 2025
1 parent b24ffc6 commit 34faf48
Show file tree
Hide file tree
Showing 49 changed files with 3,214 additions and 2,510 deletions.
15 changes: 15 additions & 0 deletions .changeset/long-insects-collect.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
---
"@llamaindex/astra": patch
"@llamaindex/chroma": patch
"@llamaindex/milvus": patch
"@llamaindex/mongodb": patch
"@llamaindex/pinecone": patch
"@llamaindex/qdrant": patch
"@llamaindex/upstash": patch
"@llamaindex/weaviate": patch
"@llamaindex/e2e": patch
"@llamaindex/llamaindex-test": patch
"llamaindex": patch
---

chore: move vector stores to their own packages
2 changes: 1 addition & 1 deletion examples/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"dotenv": "^16.4.5",
"js-tiktoken": "^1.0.14",
"llamaindex": "^0.8.31",
"mongodb": "^6.7.0",
"mongodb": "6.7.0",
"pathe": "^1.1.2",
"postgres": "^3.4.4"
},
Expand Down
18 changes: 8 additions & 10 deletions packages/llamaindex/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,9 @@
"@anthropic-ai/sdk": "0.32.1",
"@aws-crypto/sha256-js": "^5.2.0",
"@aws-sdk/client-sso-oidc": "^3.693.0",
"@datastax/astra-db-ts": "^1.4.1",
"@discoveryjs/json-ext": "^0.6.1",
"@google-cloud/vertexai": "1.9.0",
"@google/generative-ai": "0.21.0",
"@grpc/grpc-js": "^1.12.2",
"@llamaindex/anthropic": "workspace:*",
"@llamaindex/clip": "workspace:*",
"@llamaindex/cloud": "workspace:*",
Expand All @@ -45,29 +43,29 @@
"@llamaindex/vllm": "workspace:*",
"@llamaindex/postgres": "workspace:*",
"@llamaindex/azure": "workspace:*",
"@llamaindex/astra": "workspace:*",
"@llamaindex/milvus": "workspace:*",
"@llamaindex/chroma": "workspace:*",
"@llamaindex/mongodb": "workspace:*",
"@llamaindex/pinecone": "workspace:*",
"@llamaindex/qdrant": "workspace:*",
"@llamaindex/upstash": "workspace:*",
"@llamaindex/weaviate": "workspace:*",
"@mistralai/mistralai": "^1.3.4",
"@mixedbread-ai/sdk": "^2.2.11",
"@pinecone-database/pinecone": "^4.0.0",
"@qdrant/js-client-rest": "^1.11.0",
"@types/lodash": "^4.17.7",
"@types/node": "^22.9.0",
"@upstash/vector": "^1.1.5",
"@zilliz/milvus2-sdk-node": "^2.4.6",
"ajv": "^8.17.1",
"assemblyai": "^4.8.0",
"chromadb": "1.9.2",
"chromadb-default-embed": "^2.13.2",
"cohere-ai": "7.14.0",
"gpt-tokenizer": "^2.6.2",
"groq-sdk": "^0.8.0",
"js-tiktoken": "^1.0.14",
"lodash": "^4.17.21",
"magic-bytes.js": "^1.10.0",
"mongodb": "^6.7.0",
"openai": "^4.73.1",
"pathe": "^1.1.2",
"rake-modified": "^1.0.8",
"weaviate-client": "^3.2.3",
"wikipedia": "^2.1.2",
"wink-nlp": "^2.3.0",
"zod": "^3.23.8"
Expand Down
269 changes: 1 addition & 268 deletions packages/llamaindex/src/vector-store/AstraDBVectorStore.ts
Original file line number Diff line number Diff line change
@@ -1,268 +1 @@
import {
Collection,
DataAPIClient,
Db,
type Filter,
type FindOptions,
type SomeDoc,
} from "@datastax/astra-db-ts";
import type { BaseNode } from "@llamaindex/core/schema";
import { MetadataMode } from "@llamaindex/core/schema";
import {
BaseVectorStore,
FilterCondition,
FilterOperator,
metadataDictToNode,
nodeToMetadata,
parseArrayValue,
type MetadataFilter,
type MetadataFilters,
type VectorStoreBaseParams,
type VectorStoreQuery,
type VectorStoreQueryResult,
} from "@llamaindex/core/vector-store";
import { getEnv } from "@llamaindex/env";

export class AstraDBVectorStore extends BaseVectorStore {
storesText: boolean = true;
flatMetadata: boolean = true;

idKey: string;
contentKey: string;

private astraClient: DataAPIClient;
private astraDB: Db;
private collection: Collection | undefined;

constructor(
init?: Partial<AstraDBVectorStore> & {
params?: {
token: string;
endpoint: string;
namespace?: string;
};
} & VectorStoreBaseParams,
) {
super(init);
const token = init?.params?.token ?? getEnv("ASTRA_DB_APPLICATION_TOKEN");
const endpoint = init?.params?.endpoint ?? getEnv("ASTRA_DB_API_ENDPOINT");

if (!token) {
throw new Error(
"Must specify ASTRA_DB_APPLICATION_TOKEN via env variable.",
);
}
if (!endpoint) {
throw new Error("Must specify ASTRA_DB_API_ENDPOINT via env variable.");
}
const namespace =
init?.params?.namespace ??
getEnv("ASTRA_DB_NAMESPACE") ??
"default_keyspace";
this.astraClient = new DataAPIClient(token, {
caller: ["LlamaIndexTS"],
});
this.astraDB = this.astraClient.db(endpoint, { namespace });

this.idKey = init?.idKey ?? "_id";
this.contentKey = init?.contentKey ?? "content";
}

/**
* Create a new collection in your Astra DB vector database and connects to it.
* You must call this method or `connect` before adding, deleting, or querying.
*
* @param collection - Your new collection's name
* @param options - CreateCollectionOptions used to set the number of vector dimensions and similarity metric
* @returns Promise that resolves if the creation did not throw an error.
*/
async createAndConnect(
collection: string,
options?: Parameters<Db["createCollection"]>[1],
): Promise<void> {
this.collection = await this.astraDB.createCollection(collection, options);
console.debug("Created Astra DB collection");

return;
}

/**
* Connect to an existing collection in your Astra DB vector database.
* You must call this method or `createAndConnect` before adding, deleting, or querying.
*
* @param collection - Your existing collection's name
* @returns Promise that resolves if the connection did not throw an error.
*/
async connect(collection: string): Promise<void> {
this.collection = await this.astraDB.collection(collection);
console.debug("Connected to Astra DB collection");

return;
}

/**
* Get an instance of your Astra DB client.
* @returns the AstraDB client
*/
client(): DataAPIClient {
return this.astraClient;
}

/**
* Add your document(s) to your Astra DB collection.
*
* @returns an array of node ids which were added
*/
async add(nodes: BaseNode[]): Promise<string[]> {
if (!this.collection) {
throw new Error("Must connect to collection before adding.");
}
const collection = this.collection;

if (!nodes || nodes.length === 0) {
return [];
}

const dataToInsert = nodes.map((node) => {
const metadata = nodeToMetadata(
node,
true,
this.contentKey,
this.flatMetadata,
);

return {
$vector: node.getEmbedding(),
[this.idKey]: node.id_,
[this.contentKey]: node.getContent(MetadataMode.NONE),
...metadata,
};
});

console.debug(`Adding ${dataToInsert.length} rows to table`);

const insertResult = await collection.insertMany(dataToInsert);

return insertResult.insertedIds as string[];
}

/**
* Delete a document from your Astra DB collection.
*
* @param refDocId - The id of the document to delete
* @param deleteOptions - DeleteOneOptions to pass to the delete query
* @returns Promise that resolves if the delete query did not throw an error.
*/
async delete(
refDocId: string,
deleteOptions?: Parameters<Collection["deleteOne"]>[1],
): Promise<void> {
if (!this.collection) {
throw new Error("Must connect to collection before deleting.");
}
const collection = this.collection;

console.debug(`Deleting row with id ${refDocId}`);

await collection.deleteOne(
{
_id: refDocId,
},
deleteOptions,
);
}

/**
* Query documents from your Astra DB collection to get the closest match to your embedding.
*
* @param query - VectorStoreQuery
* @param options - FindOptions
*/
async query(
query: VectorStoreQuery,
options?: Parameters<Collection["find"]>[1],
): Promise<VectorStoreQueryResult> {
if (!this.collection) {
throw new Error("Must connect to collection before querying.");
}
const collection = this.collection;

const astraFilter = this.toAstraFilter(query.filters);
const cursor = await collection.find(astraFilter, <FindOptions>{
...options,
sort: query.queryEmbedding
? { $vector: query.queryEmbedding }
: options?.sort,
limit: query.similarityTopK,
includeSimilarity: true,
});

const nodes: BaseNode[] = [];
const ids: string[] = [];
const similarities: number[] = [];

for await (const row of cursor) {
const {
$vector: embedding,
$similarity: similarity,
[this.idKey]: id,
[this.contentKey]: content,
...metadata
} = row;

const node = metadataDictToNode(metadata, {
fallback: {
id,
text: content,
...metadata,
},
});
node.setContent(content);

ids.push(id);
similarities.push(similarity);
nodes.push(node);
}

return {
similarities,
ids,
nodes,
};
}

private toAstraFilter(filters?: MetadataFilters): Filter<SomeDoc> {
if (!filters || filters.filters?.length === 0) return {};
const condition = filters.condition ?? FilterCondition.AND;
const listFilter = filters.filters.map((f) => this.buildFilterItem(f));
if (condition === FilterCondition.OR) return { $or: listFilter };
if (condition === FilterCondition.AND) return { $and: listFilter };
throw new Error(`Not supported filter condition: ${condition}`);
}

private buildFilterItem(filter: MetadataFilter): Filter<SomeDoc> {
const { key, operator, value } = filter;
switch (operator) {
case FilterOperator.EQ:
return { [key]: value };
case FilterOperator.NE:
return { [key]: { $ne: value } };
case FilterOperator.GT:
return { [key]: { $gt: value } };
case FilterOperator.LT:
return { [key]: { $lt: value } };
case FilterOperator.GTE:
return { [key]: { $gte: value } };
case FilterOperator.LTE:
return { [key]: { $lte: value } };
case FilterOperator.IN:
return { [key]: { $in: parseArrayValue(value) } };
case FilterOperator.NIN:
return { [key]: { $nin: parseArrayValue(value) } };
case FilterOperator.IS_EMPTY:
return { [key]: { $size: 0 } };
default:
throw new Error(`Not supported filter operator: ${operator}`);
}
}
}
export * from "@llamaindex/astra";
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export * from "@llamaindex/azure";
Loading

0 comments on commit 34faf48

Please sign in to comment.