From da638df86f34e617b7f4e101f3567f0e17ce5d79 Mon Sep 17 00:00:00 2001 From: Daniel Bos Date: Thu, 18 Jan 2024 17:26:49 +0800 Subject: [PATCH 1/2] embeddings: add adapter for EmbedderClient interface An adapter for the EmbedderClient interface has been added. This allows the use of ordinary functions as Embedder Clients. This change provides flexibility in implementing the EmbedderClient interface, as it allows any function with the appropriate signature to be used as an EmbedderClient. --- embeddings/embedding.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/embeddings/embedding.go b/embeddings/embedding.go index 96f8c171f..c3d7198d7 100644 --- a/embeddings/embedding.go +++ b/embeddings/embedding.go @@ -35,6 +35,15 @@ type EmbedderClient interface { CreateEmbedding(ctx context.Context, texts []string) ([][]float32, error) } +// EmbedderClientFunc is an adapter to allow the use of ordinary functions as Embedder Clients. If +// `f` is a function with the appropriate signature, `EmbedderClientFunc(f)` is an `EmbedderClient` +// that calls `f`. +type EmbedderClientFunc func(ctx context.Context, texts []string) ([][]float32, error) + +func (e EmbedderClientFunc) CreateEmbedding(ctx context.Context, texts []string) ([][]float32, error) { + return e(ctx, texts) +} + type EmbedderImpl struct { client EmbedderClient From a7cd70c11d0c1cc0c5990cff690f660eec363e87 Mon Sep 17 00:00:00 2001 From: Daniel Bos Date: Thu, 18 Jan 2024 17:29:01 +0800 Subject: [PATCH 2/2] weaviate: allow embedder override in vectorstores options The changes allow the embedder to be overridden in the vectorstores options. This is done by changing the embedder calls in `AddDocuments` and `SimilaritySearch` functions to use the embedder from the options instead of the store. The default embedder is still the one from the store, but it can be overwritten by passing a `vectorstores.WithEmbedder` option. A new test case 'TestWeaviateWithOptionEmbedder' is added to ensure that the embedder provided as an option to either `AddDocuments` or `SimilaritySearch` takes precedence over the one provided when creating the Store. This change provides more flexibility in choosing the embedder at runtime, which can be useful in scenarios where the same store is used with different embedders. --- vectorstores/weaviate/weaviate.go | 10 +++-- vectorstores/weaviate/weaviate_test.go | 55 ++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/vectorstores/weaviate/weaviate.go b/vectorstores/weaviate/weaviate.go index 5a22beea9..70122ee50 100644 --- a/vectorstores/weaviate/weaviate.go +++ b/vectorstores/weaviate/weaviate.go @@ -101,7 +101,7 @@ func (s Store) AddDocuments(ctx context.Context, texts = append(texts, doc.PageContent) } - vectors, err := s.embedder.EmbedDocuments(ctx, texts) + vectors, err := opts.Embedder.EmbedDocuments(ctx, texts) if err != nil { return nil, err } @@ -158,7 +158,7 @@ func (s Store) SimilaritySearch( return nil, err } - vector, err := s.embedder.EmbedQuery(ctx, query) + vector, err := opts.Embedder.EmbedQuery(ctx, query) if err != nil { return nil, err } @@ -245,7 +245,11 @@ func (s Store) getFilters(opts vectorstores.Options) any { } func (s Store) getOptions(options ...vectorstores.Option) vectorstores.Options { - opts := vectorstores.Options{} + // use the embedder from the store by default, this can be overwritten by passing + // an `vectorstores.WithEmbedder` option. + opts := vectorstores.Options{ + Embedder: s.embedder, + } for _, opt := range options { opt(&opts) } diff --git a/vectorstores/weaviate/weaviate_test.go b/vectorstores/weaviate/weaviate_test.go index 174c92357..e3aa5050b 100644 --- a/vectorstores/weaviate/weaviate_test.go +++ b/vectorstores/weaviate/weaviate_test.go @@ -630,3 +630,58 @@ func TestWeaviateStoreAdditionalFieldsAdded(t *testing.T) { require.NotEmpty(t, additional["certainty"], "expected the certainty to be present") require.NotEmpty(t, additional["distance"], "expected the distance to be present") } + +// TestWeaviateWithOptionEmbedder ensures that the embedder provided as an option to either +// `AddDocuments` or `SimilaritySearch` takes precedence over the one provided when creating +// the `Store`. +func TestWeaviateWithOptionEmbedder(t *testing.T) { + t.Parallel() + + scheme, host := getValues(t) + + llm, err := openai.New() + require.NoError(t, err) + + notme, err := embeddings.NewEmbedder( + embeddings.EmbedderClientFunc(func(context.Context, []string) ([][]float32, error) { + require.FailNow(t, "wrong embedder was called") + return nil, nil + }), + ) + require.NoError(t, err) + + butme, err := embeddings.NewEmbedder( + embeddings.EmbedderClientFunc(func(ctx context.Context, texts []string) ([][]float32, error) { + return llm.CreateEmbedding(ctx, texts) + }), + ) + require.NoError(t, err) + + store, err := New( + WithScheme(scheme), + WithHost(host), + WithEmbedder(notme), + WithNameSpace(uuid.New().String()), + WithIndexName(randomizedCamelCaseClass()), + WithQueryAttrs([]string{"location"}), + ) + require.NoError(t, err) + + err = createTestClass(context.Background(), store) + require.NoError(t, err) + + _, err = store.AddDocuments(context.Background(), []schema.Document{ + {PageContent: "tokyo", Metadata: map[string]any{ + "country": "japan", + }}, + {PageContent: "potato"}, + }, vectorstores.WithEmbedder(butme)) + require.NoError(t, err) + + docs, err := store.SimilaritySearch(context.Background(), "japan", 1, + vectorstores.WithEmbedder(butme)) + require.NoError(t, err) + require.Len(t, docs, 1) + require.Equal(t, "tokyo", docs[0].PageContent) + require.Equal(t, "japan", docs[0].Metadata["country"]) +}