From 4331ce1aa83791b1d182e2d0e2f7fc83ba187594 Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Wed, 15 Jan 2025 13:32:55 -0500 Subject: [PATCH 01/32] Add vector database section Signed-off-by: Fanit Kolchina --- _config.yml | 6 + _search-plugins/knn/index.md | 44 --- _search-plugins/knn/jni-libraries.md | 22 -- _search-plugins/vector-search.md | 283 --------------- .../knn => _vector-database}/api.md | 9 +- .../creating-vector-index/index.md | 10 + .../creating-vector-index}/knn-vector.md | 10 +- .../creating-vector-index/vector-index.md | 12 +- .../auto-generated-embeddings.md | 330 ++++++++++++++++++ _vector-database/getting-started/index.md | 114 ++++++ .../neural-search-tutorial.md | 28 +- .../pre-generated-embeddings.md | 155 ++++++++ _vector-database/index.md | 24 ++ .../conversational-search.md | 2 + .../ml-powered-search}/hybrid-search.md | 1 + .../ml-powered-search/index.md | 10 +- .../ml-powered-search}/multimodal-search.md | 2 + .../neural-sparse-search.md | 2 + .../neural-sparse-with-pipelines.md | 3 + .../neural-sparse-with-raw-vectors.md | 3 + .../ml-powered-search}/semantic-search.md | 2 + .../ml-powered-search}/text-chunking.md | 3 + .../disk-based-vector-search.md | 6 +- _vector-database/optimizing-storage/index.md | 11 + .../knn-vector-quantization.md | 10 +- .../performance-tuning.md | 5 +- .../knn => _vector-database}/settings.md | 5 +- .../vector-search}/approximate-knn.md | 8 +- .../vector-search}/filter-search-knn.md | 8 +- _vector-database/vector-search/index.md | 40 +++ .../vector-search}/knn-score-script.md | 12 +- .../vector-search}/nested-search-knn.md | 10 +- .../vector-search}/painless-functions.md | 9 +- .../vector-search}/radial-search-knn.md | 6 +- 34 files changed, 788 insertions(+), 417 deletions(-) delete mode 100644 _search-plugins/knn/index.md delete mode 100644 _search-plugins/knn/jni-libraries.md delete mode 100644 _search-plugins/vector-search.md rename {_search-plugins/knn => _vector-database}/api.md (99%) create mode 100644 _vector-database/creating-vector-index/index.md rename {_field-types/supported-field-types => _vector-database/creating-vector-index}/knn-vector.md (99%) rename _search-plugins/knn/knn-index.md => _vector-database/creating-vector-index/vector-index.md (99%) create mode 100644 _vector-database/getting-started/auto-generated-embeddings.md create mode 100644 _vector-database/getting-started/index.md rename {_search-plugins => _vector-database/getting-started}/neural-search-tutorial.md (95%) create mode 100644 _vector-database/getting-started/pre-generated-embeddings.md create mode 100644 _vector-database/index.md rename {_search-plugins => _vector-database/ml-powered-search}/conversational-search.md (99%) rename {_search-plugins => _vector-database/ml-powered-search}/hybrid-search.md (99%) rename _search-plugins/neural-search.md => _vector-database/ml-powered-search/index.md (95%) rename {_search-plugins => _vector-database/ml-powered-search}/multimodal-search.md (98%) rename {_search-plugins => _vector-database/ml-powered-search}/neural-sparse-search.md (98%) rename {_search-plugins => _vector-database/ml-powered-search}/neural-sparse-with-pipelines.md (99%) rename {_search-plugins => _vector-database/ml-powered-search}/neural-sparse-with-raw-vectors.md (96%) rename {_search-plugins => _vector-database/ml-powered-search}/semantic-search.md (99%) rename {_search-plugins => _vector-database/ml-powered-search}/text-chunking.md (97%) rename {_search-plugins/knn => _vector-database/optimizing-storage}/disk-based-vector-search.md (98%) create mode 100644 _vector-database/optimizing-storage/index.md rename {_search-plugins/knn => _vector-database/optimizing-storage}/knn-vector-quantization.md (99%) rename {_search-plugins/knn => _vector-database}/performance-tuning.md (99%) rename {_search-plugins/knn => _vector-database}/settings.md (98%) rename {_search-plugins/knn => _vector-database/vector-search}/approximate-knn.md (99%) rename {_search-plugins/knn => _vector-database/vector-search}/filter-search-knn.md (99%) create mode 100644 _vector-database/vector-search/index.md rename {_search-plugins/knn => _vector-database/vector-search}/knn-score-script.md (98%) rename {_search-plugins/knn => _vector-database/vector-search}/nested-search-knn.md (98%) rename {_search-plugins/knn => _vector-database/vector-search}/painless-functions.md (96%) rename {_search-plugins/knn => _vector-database/vector-search}/radial-search-knn.md (99%) diff --git a/_config.yml b/_config.yml index 3c6f737cc8..eb875c1b60 100644 --- a/_config.yml +++ b/_config.yml @@ -124,6 +124,9 @@ collections: workspace: permalink: /:collection/:path/ output: true + vector-database: + permalink: /:collection/:path/ + output: true opensearch_collection: # Define the collections used in the theme @@ -173,6 +176,9 @@ opensearch_collection: search-plugins: name: Search nav_fold: true + vector-database: + name: Vector database + nav_fold: true ml-commons-plugin: name: Machine learning nav_fold: true diff --git a/_search-plugins/knn/index.md b/_search-plugins/knn/index.md deleted file mode 100644 index f8c28bcc4e..0000000000 --- a/_search-plugins/knn/index.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -layout: default -title: k-NN search -nav_order: 20 -has_children: true -has_toc: false -redirect_from: - - /search-plugins/knn/ ---- - -# k-NN search - -Short for *k-nearest neighbors*, the k-NN plugin enables users to search for the k-nearest neighbors to a query point across an index of vectors. To determine the neighbors, you can specify the space (the distance function) you want to use to measure the distance between points. - -Use cases include recommendations (for example, an "other songs you might like" feature in a music application), image recognition, and fraud detection. For more background information about k-NN search, see [Wikipedia](https://en.wikipedia.org/wiki/Nearest_neighbor_search). - -This plugin supports three different methods for obtaining the k-nearest neighbors from an index of vectors: - -1. **Approximate k-NN** - - The first method takes an approximate nearest neighbor approach---it uses one of several algorithms to return the approximate k-nearest neighbors to a query vector. Usually, these algorithms sacrifice indexing speed and search accuracy in return for performance benefits such as lower latency, smaller memory footprints and more scalable search. To learn more about the algorithms, refer to [*nmslib*](https://github.com/nmslib/nmslib/blob/master/manual/README.md)'s and [*faiss*](https://github.com/facebookresearch/faiss/wiki)'s documentation. - - Approximate k-NN is the best choice for searches over large indexes (that is, hundreds of thousands of vectors or more) that require low latency. You should not use approximate k-NN if you want to apply a filter on the index before the k-NN search, which greatly reduces the number of vectors to be searched. In this case, you should use either the script scoring method or Painless extensions. - - For more details about this method, including recommendations for which engine to use, see [Approximate k-NN search]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/). - -2. **Script Score k-NN** - - The second method extends OpenSearch's script scoring functionality to execute a brute force, exact k-NN search over "knn_vector" fields or fields that can represent binary objects. With this approach, you can run k-NN search on a subset of vectors in your index (sometimes referred to as a pre-filter search). - - Use this approach for searches over smaller bodies of documents or when a pre-filter is needed. Using this approach on large indexes may lead to high latencies. - - For more details about this method, see [Exact k-NN with scoring script]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-score-script/). - -3. **Painless extensions** - - The third method adds the distance functions as painless extensions that you can use in more complex combinations. Similar to the k-NN Script Score, you can use this method to perform a brute force, exact k-NN search across an index, which also supports pre-filtering. - - This approach has slightly slower query performance compared to the k-NN Script Score. If your use case requires more customization over the final score, you should use this approach over Script Score k-NN. - - For more details about this method, see [Painless scripting functions]({{site.url}}{{site.baseurl}}/search-plugins/knn/painless-functions/). - - -Overall, for larger data sets, you should generally choose the approximate nearest neighbor method because it scales significantly better. For smaller data sets, where you may want to apply a filter, you should choose the custom scoring approach. If you have a more complex use case where you need to use a distance function as part of their scoring method, you should use the painless scripting approach. diff --git a/_search-plugins/knn/jni-libraries.md b/_search-plugins/knn/jni-libraries.md deleted file mode 100644 index 4dbdb2da56..0000000000 --- a/_search-plugins/knn/jni-libraries.md +++ /dev/null @@ -1,22 +0,0 @@ ---- -layout: default -title: JNI libraries -nav_order: 35 -parent: k-NN search -has_children: false -redirect_from: - - /search-plugins/knn/jni-library/ ---- - -# JNI libraries - -To integrate [nmslib](https://github.com/nmslib/nmslib/) and [faiss](https://github.com/facebookresearch/faiss/) approximate k-NN functionality (implemented in C++) into the k-NN plugin (implemented in Java), we created a Java Native Interface, which lets the k-NN plugin make calls to the native libraries. The interface includes three libraries: `libopensearchknn_nmslib`, the JNI library that interfaces with nmslib, `libopensearchknn_faiss`, the JNI library that interfaces with faiss, and `libopensearchknn_common`, a library containing common shared functionality between native libraries. - -The Lucene library is not implemented using a native library. -{: .note} - -The libraries `libopensearchknn_faiss` and `libopensearchknn_nmslib` are lazily loaded when they are first called in the plugin. This means that if you are only planning on using one of the libraries, the plugin never loads the other library. - -To build the libraries from source, refer to the [DEVELOPER_GUIDE](https://github.com/opensearch-project/k-NN/blob/main/DEVELOPER_GUIDE.md). - -For more information about JNI, see [Java Native Interface](https://en.wikipedia.org/wiki/Java_Native_Interface) on Wikipedia. diff --git a/_search-plugins/vector-search.md b/_search-plugins/vector-search.md deleted file mode 100644 index f19030bf90..0000000000 --- a/_search-plugins/vector-search.md +++ /dev/null @@ -1,283 +0,0 @@ ---- -layout: default -title: Vector search -nav_order: 22 -has_children: false -has_toc: false ---- - -# Vector search - -OpenSearch is a comprehensive search platform that supports a variety of data types, including vectors. OpenSearch vector database functionality is seamlessly integrated with its generic database function. - -In OpenSearch, you can generate vector embeddings, store those embeddings in an index, and use them for vector search. Choose one of the following options: - -- Generate embeddings using a library of your choice before ingesting them into OpenSearch. Once you ingest vectors into an index, you can perform a vector similarity search on the vector space. For more information, see [Working with embeddings generated outside of OpenSearch](#working-with-embeddings-generated-outside-of-opensearch). -- Automatically generate embeddings within OpenSearch. To use embeddings for semantic search, the ingested text (the corpus) and the query need to be embedded using the same model. [Neural search]({{site.url}}{{site.baseurl}}/search-plugins/neural-search/) packages this functionality, eliminating the need to manage the internal details. For more information, see [Generating vector embeddings within OpenSearch](#generating-vector-embeddings-in-opensearch). - -## Working with embeddings generated outside of OpenSearch - -After you generate vector embeddings, upload them to an OpenSearch index and search the index using vector search. For a complete example, see [Example](#example). - -### k-NN index - -To build a vector database and use vector search, you must specify your index as a [k-NN index]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/) when creating it by setting `index.knn` to `true`: - -```json -PUT test-index -{ - "settings": { - "index": { - "knn": true, - "knn.algo_param.ef_search": 100 - } - }, - "mappings": { - "properties": { - "my_vector1": { - "type": "knn_vector", - "dimension": 1024, - "space_type": "l2", - "method": { - "name": "hnsw", - "engine": "nmslib", - "parameters": { - "ef_construction": 128, - "m": 24 - } - } - } - } - } -} -``` -{% include copy-curl.html %} - -### k-NN vector - -You must designate the field that will store vectors as a [`knn_vector`]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) field type. OpenSearch supports vectors of up to 16,000 dimensions, each of which is represented as a 32-bit or 16-bit float. - -To save storage space, you can use `byte` or `binary` vectors. For more information, see [Byte vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#byte-vectors) and [Binary vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#binary-vectors). - -### k-NN vector search - -Vector search finds the vectors in your database that are most similar to the query vector. OpenSearch supports the following search methods: - -- [Approximate search](#approximate-search) (approximate k-NN, or ANN): Returns approximate nearest neighbors to the query vector. Usually, approximate search algorithms sacrifice indexing speed and search accuracy in exchange for performance benefits such as lower latency, smaller memory footprints, and more scalable search. For most use cases, approximate search is the best option. - -- Exact search (exact k-NN): A brute-force, exact k-NN search of vector fields. OpenSearch supports the following types of exact search: - - [Exact k-NN with scoring script]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-score-script/): Using the k-NN scoring script, you can apply a filter to an index before executing the nearest neighbor search. - - [Painless extensions]({{site.url}}{{site.baseurl}}/search-plugins/knn/painless-functions/): Adds the distance functions as Painless extensions that you can use in more complex combinations. You can use this method to perform a brute-force, exact k-NN search of an index, which also supports pre-filtering. - -### Approximate search - -OpenSearch supports several algorithms for approximate vector search, each with its own advantages. For complete documentation, see [Approximate search]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/). For more information about the search methods and engines, see [Method definitions]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/#method-definitions). For method recommendations, see [Choosing the right method]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/#choosing-the-right-method). - -To use approximate vector search, specify one of the following search methods (algorithms) in the `method` parameter: - -- Hierarchical Navigable Small World (HNSW) -- Inverted File System (IVF) - -Additionally, specify the engine (library) that implements this method in the `engine` parameter: - -- [Non-Metric Space Library (NMSLIB)](https://github.com/nmslib/nmslib) -- [Facebook AI Similarity Search (Faiss)](https://github.com/facebookresearch/faiss) -- Lucene - -The following table lists the combinations of search methods and libraries supported by the k-NN engine for approximate vector search. - -Method | Engine -:--- | :--- -HNSW | NMSLIB, Faiss, Lucene -IVF | Faiss - -### Engine recommendations - -In general, select NMSLIB or Faiss for large-scale use cases. Lucene is a good option for smaller deployments and offers benefits like smart filtering, where the optimal filtering strategy—pre-filtering, post-filtering, or exact k-NN—is automatically applied depending on the situation. The following table summarizes the differences between each option. - -| | NMSLIB/HNSW | Faiss/HNSW | Faiss/IVF | Lucene/HNSW | -|:---|:---|:---|:---|:---| -| Max dimensions | 16,000 | 16,000 | 16,000 | 16,000 | -| Filter | Post-filter | Post-filter | Post-filter | Filter during search | -| Training required | No | No | Yes | No | -| Similarity metrics | `l2`, `innerproduct`, `cosinesimil`, `l1`, `linf` | `l2`, `innerproduct` | `l2`, `innerproduct` | `l2`, `cosinesimil` | -| Number of vectors | Tens of billions | Tens of billions | Tens of billions | Less than 10 million | -| Indexing latency | Low | Low | Lowest | Low | -| Query latency and quality | Low latency and high quality | Low latency and high quality | Low latency and low quality | High latency and high quality | -| Vector compression | Flat | Flat
Product quantization | Flat
Product quantization | Flat | -| Memory consumption | High | High
Low with PQ | Medium
Low with PQ | High | - -### Example - -In this example, you'll create a k-NN index, add data to the index, and search the data. - -#### Step 1: Create a k-NN index - -First, create an index that will store sample hotel data. Set `index.knn` to `true` and specify the `location` field as a `knn_vector`: - -```json -PUT /hotels-index -{ - "settings": { - "index": { - "knn": true, - "knn.algo_param.ef_search": 100, - "number_of_shards": 1, - "number_of_replicas": 0 - } - }, - "mappings": { - "properties": { - "location": { - "type": "knn_vector", - "dimension": 2, - "space_type": "l2", - "method": { - "name": "hnsw", - "engine": "lucene", - "parameters": { - "ef_construction": 100, - "m": 16 - } - } - } - } - } -} -``` -{% include copy-curl.html %} - -#### Step 2: Add data to your index - -Next, add data to your index. Each document represents a hotel. The `location` field in each document contains a vector specifying the hotel's location: - -```json -POST /_bulk -{ "index": { "_index": "hotels-index", "_id": "1" } } -{ "location": [5.2, 4.4] } -{ "index": { "_index": "hotels-index", "_id": "2" } } -{ "location": [5.2, 3.9] } -{ "index": { "_index": "hotels-index", "_id": "3" } } -{ "location": [4.9, 3.4] } -{ "index": { "_index": "hotels-index", "_id": "4" } } -{ "location": [4.2, 4.6] } -{ "index": { "_index": "hotels-index", "_id": "5" } } -{ "location": [3.3, 4.5] } -``` -{% include copy-curl.html %} - -#### Step 3: Search your data - -Now search for hotels closest to the pin location `[5, 4]`. This location is labeled `Pin` in the following image. Each hotel is labeled with its document number. - -![Hotels on a coordinate plane]({{site.url}}{{site.baseurl}}/images/k-nn-search-hotels.png/) - -To search for the top three closest hotels, set `k` to `3`: - -```json -POST /hotels-index/_search -{ - "size": 3, - "query": { - "knn": { - "location": { - "vector": [ - 5, - 4 - ], - "k": 3 - } - } - } -} -``` -{% include copy-curl.html %} - -The response contains the hotels closest to the specified pin location: - -```json -{ - "took": 1093, - "timed_out": false, - "_shards": { - "total": 1, - "successful": 1, - "skipped": 0, - "failed": 0 - }, - "hits": { - "total": { - "value": 3, - "relation": "eq" - }, - "max_score": 0.952381, - "hits": [ - { - "_index": "hotels-index", - "_id": "2", - "_score": 0.952381, - "_source": { - "location": [ - 5.2, - 3.9 - ] - } - }, - { - "_index": "hotels-index", - "_id": "1", - "_score": 0.8333333, - "_source": { - "location": [ - 5.2, - 4.4 - ] - } - }, - { - "_index": "hotels-index", - "_id": "3", - "_score": 0.72992706, - "_source": { - "location": [ - 4.9, - 3.4 - ] - } - } - ] - } -} -``` - -### Vector search with filtering - -For information about vector search with filtering, see [k-NN search with filters]({{site.url}}{{site.baseurl}}/search-plugins/knn/filter-search-knn/). - -## Generating vector embeddings in OpenSearch - -[Neural search]({{site.url}}{{site.baseurl}}/search-plugins/neural-search/) encapsulates the infrastructure needed to perform semantic vector searches. After you integrate an inference (embedding) service, neural search functions like lexical search, accepting a textual query and returning relevant documents. - -When you index your data, neural search transforms text into vector embeddings and indexes both the text and its vector embeddings in a vector index. When you use a neural query during search, neural search converts the query text into vector embeddings and uses vector search to return the results. - -### Choosing a model - -The first step in setting up neural search is choosing a model. You can upload a model to your OpenSearch cluster, use one of the pretrained models provided by OpenSearch, or connect to an externally hosted model. For more information, see [Integrating ML models]({{site.url}}{{site.baseurl}}/ml-commons-plugin/integrating-ml-models/). - -### Neural search tutorial - -For a step-by-step tutorial, see [Neural search tutorial]({{site.url}}{{site.baseurl}}/search-plugins/neural-search-tutorial/). - -### Search methods - -Choose one of the following search methods to use your model for neural search: - -- [Semantic search]({{site.url}}{{site.baseurl}}/search-plugins/semantic-search/): Uses dense retrieval based on text embedding models to search text data. - -- [Hybrid search]({{site.url}}{{site.baseurl}}/search-plugins/hybrid-search/): Combines lexical and neural search to improve search relevance. - -- [Multimodal search]({{site.url}}{{site.baseurl}}/search-plugins/multimodal-search/): Uses neural search with multimodal embedding models to search text and image data. - -- [Neural sparse search]({{site.url}}{{site.baseurl}}/search-plugins/neural-sparse-search/): Uses neural search with sparse retrieval based on sparse embedding models to search text data. - -- [Conversational search]({{site.url}}{{site.baseurl}}/search-plugins/conversational-search/): With conversational search, you can ask questions in natural language, receive a text response, and ask additional clarifying questions. diff --git a/_search-plugins/knn/api.md b/_vector-database/api.md similarity index 99% rename from _search-plugins/knn/api.md rename to _vector-database/api.md index d927bf1c35..3151b70e00 100644 --- a/_search-plugins/knn/api.md +++ b/_vector-database/api.md @@ -1,12 +1,13 @@ --- layout: default -title: k-NN plugin API -nav_order: 30 -parent: k-NN search +title: Vector API +nav_order: 90 has_children: false +redirect_from: + - /search-plugins/knn/api/ --- -# k-NN plugin API +# Vector API The k-NN plugin adds several APIs for managing, monitoring, and optimizing your k-NN workload. diff --git a/_vector-database/creating-vector-index/index.md b/_vector-database/creating-vector-index/index.md new file mode 100644 index 0000000000..532ba2bcd4 --- /dev/null +++ b/_vector-database/creating-vector-index/index.md @@ -0,0 +1,10 @@ +--- +layout: default +title: Creating a vector index +nav_order: 20 +has_children: true +redirect_from: + - /vector-database/creating-a-vector-db/ +--- + +# Creating a vector index \ No newline at end of file diff --git a/_field-types/supported-field-types/knn-vector.md b/_vector-database/creating-vector-index/knn-vector.md similarity index 99% rename from _field-types/supported-field-types/knn-vector.md rename to _vector-database/creating-vector-index/knn-vector.md index da784aeefe..1c8abb4617 100644 --- a/_field-types/supported-field-types/knn-vector.md +++ b/_vector-database/creating-vector-index/knn-vector.md @@ -1,13 +1,15 @@ --- layout: default -title: k-NN vector -nav_order: 58 +title: Vector field type +nav_order: 20 has_children: false -parent: Supported field types +parent: Creating a vector index has_math: true +redirect_from: + - /field-types/supported-field-types/knn-vector/ --- -# k-NN vector field type +# Vector field type **Introduced 1.0** {: .label .label-purple } diff --git a/_search-plugins/knn/knn-index.md b/_vector-database/creating-vector-index/vector-index.md similarity index 99% rename from _search-plugins/knn/knn-index.md rename to _vector-database/creating-vector-index/vector-index.md index b53fa997d8..7241a28871 100644 --- a/_search-plugins/knn/knn-index.md +++ b/_vector-database/creating-vector-index/vector-index.md @@ -1,12 +1,14 @@ --- layout: default -title: k-NN index -nav_order: 5 -parent: k-NN search -has_children: false +title: Vector index +parent: Creating a vector index +nav_order: 10 +has_children: true +redirect_from: + - /search-plugins/knn/knn-index/ --- -# k-NN index +# Vector index The k-NN plugin introduces a custom data type, the `knn_vector`, that allows users to ingest their k-NN vectors into an OpenSearch index and perform different kinds of k-NN search. The `knn_vector` field is highly configurable and can serve many different k-NN workloads. For more information, see [k-NN vector]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/). diff --git a/_vector-database/getting-started/auto-generated-embeddings.md b/_vector-database/getting-started/auto-generated-embeddings.md new file mode 100644 index 0000000000..db0871726f --- /dev/null +++ b/_vector-database/getting-started/auto-generated-embeddings.md @@ -0,0 +1,330 @@ +--- +layout: default +title: Auto-generated embeddings +parent: Getting started +nav_order: 20 +--- + +# Getting started with auto-generated embeddings + +With this approach, embeddings are generated dynamically within OpenSearch. This method provides a simplified workflow by offering automatic text-to-vector conversion. + +## Prerequisites + +For this simple setup, you'll use an OpenSearch-provided machine learning (ML) model and a cluster with no dedicated ML nodes. To ensure that this basic local setup works, send the following request to update ML-related cluster settings: + +```json +PUT _cluster/settings +{ + "persistent": { + "plugins.ml_commons.only_run_on_ml_node": "false", + "plugins.ml_commons.model_access_control_enabled": "true", + "plugins.ml_commons.native_memory_threshold": "99" + } +} +``` +{% include copy-curl.html %} + +## Step 1: Choose a language model for embedding generation + +Auto-generating embeddings requires configuring a language model that will convert text to embeddings both at ingestion time and query time. + +When selecting a model, you have the following options: + +- Use a pretrained model provided by OpenSearch. For more information, see [OpenSearch-provided pretrained models]({{site.url}}{{site.baseurl}}/ml-commons-plugin/pretrained-models/). + +- Upload your own model to OpenSearch. For more information, see [Custom local models]({{site.url}}{{site.baseurl}}/ml-commons-plugin/custom-local-models/). + +- Connect to a foundation model hosted on an external platform. For more information, see [Connecting to remote models]({{site.url}}{{site.baseurl}}/ml-commons-plugin/remote-models/index/). + +In this example, you'll use the [DistilBERT](https://huggingface.co/docs/transformers/model_doc/distilbert) model from Hugging Face, which is one of the [pretrained models]({{site.url}}{{site.baseurl}}/ml-commons-plugin/pretrained-models/#sentence-transformers) available in OpenSearch. For more information, see [Integrating ML models]({{site.url}}{{site.baseurl}}/ml-commons-plugin/integrating-ml-models/). + +Take note of the dimensionality of the model because you'll need it when you set up a vector index. +{: .important} + +## Step 2: Register and deploy the model + +To register and deploy the model, send the following request: + +```json +POST /_plugins/_ml/models/_register?deploy=true +{ + "name": "huggingface/sentence-transformers/msmarco-distilbert-base-tas-b", + "version": "1.0.1", + "model_format": "TORCH_SCRIPT" +} +``` +{% include copy-curl.html %} + +Registering a model is an asynchronous task. OpenSearch sends back a task ID for this task: + +```json +{ + "task_id": "aFeif4oB5Vm0Tdw8yoN7", + "status": "CREATED" +} +``` + +You can check the status of the task by using the Tasks API: + +```json +GET /_plugins/_ml/tasks/aFeif4oB5Vm0Tdw8yoN7 +``` +{% include copy-curl.html %} + +Once the task is complete, the task state will change to `COMPLETED` and the Tasks API response will contain a model ID for the registered model: + +```json +{ + "model_id": "aVeif4oB5Vm0Tdw8zYO2", + "task_type": "REGISTER_MODEL", + "function_name": "TEXT_EMBEDDING", + "state": "COMPLETED", + "worker_node": [ + "4p6FVOmJRtu3wehDD74hzQ" + ], + "create_time": 1694358489722, + "last_update_time": 1694358499139, + "is_async": true +} +``` + +You'll need the model ID in order to use this model for several of the following steps. + +## Step 3: Ingest text data + +Use the following steps to ingest text data into OpenSearch and automatically generate vector embeddings from text. + +### Step 3(a): Create an ingest pipeline for neural search + +First, you need to create an [ingest pipeline]({{site.url}}{{site.baseurl}}/api-reference/ingest-apis/index/) that contains one processor: a task that transforms document fields before documents are ingested into an index. You'll set up a `text_embedding` processor that creates vector embeddings from text. You'll need the `model_id` of the model you set up in the previous section and a `field_map`, which specifies the name of the field from which to take the text (`text`) and the name of the field in which to record embeddings (`passage_embedding`): + +```json +PUT /_ingest/pipeline/nlp-ingest-pipeline +{ + "description": "An NLP ingest pipeline", + "processors": [ + { + "text_embedding": { + "model_id": "aVeif4oB5Vm0Tdw8zYO2", + "field_map": { + "text": "passage_embedding" + } + } + } + ] +} +``` +{% include copy-curl.html %} + +### Step 3(b): Create a vector index + +Now you'll create a vector by setting `index.knn` to `true`. In the index, the field named `text` will contains an image description, and a [`knn_vector`]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) field named `passage_embedding` will contains the vector embedding of the text. Additionally, set the default ingest pipeline to the `nlp-ingest-pipeline` you created in the previous step: + + +```json +PUT /my-nlp-index +{ + "settings": { + "index.knn": true, + "default_pipeline": "nlp-ingest-pipeline" + }, + "mappings": { + "properties": { + "id": { + "type": "text" + }, + "passage_embedding": { + "type": "knn_vector", + "dimension": 768, + "method": { + "engine": "lucene", + "space_type": "l2", + "name": "hnsw", + "parameters": {} + } + }, + "text": { + "type": "text" + } + } + } +} +``` +{% include copy-curl.html %} + +Setting up a k-NN index allows you to later perform a vector search on the `passage_embedding` field. + +### Step 3(c): Ingest documents into the index + +In this step, you'll ingest several sample documents into the index. The sample data is taken from the [Flickr image dataset](https://www.kaggle.com/datasets/hsankesara/flickr-image-dataset). Each document contains a `text` field corresponding to the image description and an `id` field corresponding to the image ID: + +```json +PUT /my-nlp-index/_doc/1 +{ + "text": "A West Virginia university women 's basketball team , officials , and a small gathering of fans are in a West Virginia arena .", + "id": "4319130149.jpg" +} +``` +{% include copy-curl.html %} + +```json +PUT /my-nlp-index/_doc/2 +{ + "text": "A wild animal races across an uncut field with a minimal amount of trees .", + "id": "1775029934.jpg" +} +``` +{% include copy-curl.html %} + +```json +PUT /my-nlp-index/_doc/3 +{ + "text": "People line the stands which advertise Freemont 's orthopedics , a cowboy rides a light brown bucking bronco .", + "id": "2664027527.jpg" +} +``` +{% include copy-curl.html %} + +```json +PUT /my-nlp-index/_doc/4 +{ + "text": "A man who is riding a wild horse in the rodeo is very near to falling off .", + "id": "4427058951.jpg" +} +``` +{% include copy-curl.html %} + +```json +PUT /my-nlp-index/_doc/5 +{ + "text": "A rodeo cowboy , wearing a cowboy hat , is being thrown off of a wild white horse .", + "id": "2691147709.jpg" +} +``` +{% include copy-curl.html %} + +When the documents are ingested into the index, the `text_embedding` processor creates an additional field that contains vector embeddings and adds that field to the document. To see an example document that is indexed, search for document 1: + +```json +GET /my-nlp-index/_doc/1 +``` +{% include copy-curl.html %} + +The response includes the document `_source` containing the original `text` and `id` fields and the added `passage_embedding` field: + +```json +{ + "_index": "my-nlp-index", + "_id": "1", + "_version": 1, + "_seq_no": 0, + "_primary_term": 1, + "found": true, + "_source": { + "passage_embedding": [ + 0.04491629, + -0.34105563, + 0.036822468, + -0.14139028, + ... + ], + "text": "A West Virginia university women 's basketball team , officials , and a small gathering of fans are in a West Virginia arena .", + "id": "4319130149.jpg" + } +} +``` + +## Step 4: Search the data + +Now you'll search the index using semantic search. To automatically generate vector embeddings from query text, use a `neural` query and provide the model ID of the model you set up earlier so that vector embeddings for the query text are generated with the model used at ingestion time: + +```json +GET /my-nlp-index/_search +{ + "_source": { + "excludes": [ + "passage_embedding" + ] + }, + "query": { + "neural": { + "passage_embedding": { + "query_text": "wild west", + "model_id": "aVeif4oB5Vm0Tdw8zYO2", + "k": 5 + } + } + } +} +``` +{% include copy-curl.html %} + +The response all five documents, and the document order reflects semantic meaning: + +```json +{ + "took": 25, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 5, + "relation": "eq" + }, + "max_score": 0.01585195, + "hits": [ + { + "_index": "my-nlp-index", + "_id": "4", + "_score": 0.01585195, + "_source": { + "text": "A man who is riding a wild horse in the rodeo is very near to falling off .", + "id": "4427058951.jpg" + } + }, + { + "_index": "my-nlp-index", + "_id": "2", + "_score": 0.015748845, + "_source": { + "text": "A wild animal races across an uncut field with a minimal amount of trees.", + "id": "1775029934.jpg" + } + }, + { + "_index": "my-nlp-index", + "_id": "5", + "_score": 0.015177963, + "_source": { + "text": "A rodeo cowboy , wearing a cowboy hat , is being thrown off of a wild white horse .", + "id": "2691147709.jpg" + } + }, + { + "_index": "my-nlp-index", + "_id": "1", + "_score": 0.013272902, + "_source": { + "text": "A West Virginia university women 's basketball team , officials , and a small gathering of fans are in a West Virginia arena .", + "id": "4319130149.jpg" + } + }, + { + "_index": "my-nlp-index", + "_id": "3", + "_score": 0.011347735, + "_source": { + "text": "People line the stands which advertise Freemont 's orthopedics , a cowboy rides a light brown bucking bronco .", + "id": "2664027527.jpg" + } + } + ] + } +} +``` diff --git a/_vector-database/getting-started/index.md b/_vector-database/getting-started/index.md new file mode 100644 index 0000000000..9a6a0e2038 --- /dev/null +++ b/_vector-database/getting-started/index.md @@ -0,0 +1,114 @@ +--- +layout: default +title: Getting started +nav_order: 10 +has_children: true +redirect_from: + - /vector-database/getting-started/ +--- + +# Getting started with OpenSearch as a vector database + +To get started using OpenSearch as a vector database, choose one of the following approaches. + +## Pre-generated embeddings + +With this approach, you generate embeddings externally and then index them into OpenSearch. This method offers greater flexibility in how embeddings are created. The workflow for this approach is as follows: + +1. Generate embeddings using external tools + - Custom machine learning models + - Embedding services (OpenAI, Cohere) + - Domain-specific embedding techniques +2. Ingest pre-computed vector embeddings into OpenSearch +3. Perform vector similarity search + +For a complete example, see [Getting started with pre-generated embeddings]({{site.url}}{{site.baseurl}}/vector-database/getting-started/pre-generated-embeddings/). + +This approach is suitable for the following use cases: + - Scientific research + - Domain-specific applications + - Custom embedding requirements + +## Auto-generated embeddings + +With this approach, embeddings are generated dynamically within OpenSearch. This method provides a simplified workflow by offering automatic text-to-vector conversion. The workflow for this approach is as follows: + +1. Choose an embedding model: + - Pretrained models + - Custom uploaded models + - Externally hosted model connections +2. Index text data +3. OpenSearch automatically generates embeddings +4. Perform semantic search + +For a complete example, see [Getting started with auto-generated embeddings]({{site.url}}{{site.baseurl}}/vector-database/getting-started/auto-generated-embeddings/). + +For a comprehensive tutorial, see [Neural search tutorial]({{site.url}}{{site.baseurl}}/search-plugins/neural-search-tutorial/). + +This approach is suitable for the following use cases: + - General-purpose search + - Rapid prototyping + - Standard text corpus + - Quick implementation + + +### k-NN vector search + +Vector search finds the vectors in your database that are most similar to the query vector. OpenSearch supports the following search methods: + + +### Approximate search + +OpenSearch supports several algorithms for approximate vector search, each with its own advantages. For complete documentation, see [Approximate search]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/). For more information about the search methods and engines, see [Method definitions]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/#method-definitions). For method recommendations, see [Choosing the right method]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/#choosing-the-right-method). + +To use approximate vector search, specify one of the following search methods (algorithms) in the `method` parameter: + +- Hierarchical Navigable Small World (HNSW) +- Inverted File System (IVF) + +Additionally, specify the engine (library) that implements this method in the `engine` parameter: + +- [Non-Metric Space Library (NMSLIB)](https://github.com/nmslib/nmslib) +- [Facebook AI Similarity Search (Faiss)](https://github.com/facebookresearch/faiss) +- Lucene + +The following table lists the combinations of search methods and libraries supported by the k-NN engine for approximate vector search. + +Method | Engine +:--- | :--- +HNSW | NMSLIB, Faiss, Lucene +IVF | Faiss + +### Engine recommendations + +In general, select NMSLIB or Faiss for large-scale use cases. Lucene is a good option for smaller deployments and offers benefits like smart filtering, where the optimal filtering strategy—pre-filtering, post-filtering, or exact k-NN—is automatically applied depending on the situation. The following table summarizes the differences between each option. + +| | NMSLIB/HNSW | Faiss/HNSW | Faiss/IVF | Lucene/HNSW | +|:---|:---|:---|:---|:---| +| Max dimensions | 16,000 | 16,000 | 16,000 | 16,000 | +| Filter | Post-filter | Post-filter | Post-filter | Filter during search | +| Training required | No | No | Yes | No | +| Similarity metrics | `l2`, `innerproduct`, `cosinesimil`, `l1`, `linf` | `l2`, `innerproduct` | `l2`, `innerproduct` | `l2`, `cosinesimil` | +| Number of vectors | Tens of billions | Tens of billions | Tens of billions | Less than 10 million | +| Indexing latency | Low | Low | Lowest | Low | +| Query latency and quality | Low latency and high quality | Low latency and high quality | Low latency and low quality | High latency and high quality | +| Vector compression | Flat | Flat
Product quantization | Flat
Product quantization | Flat | +| Memory consumption | High | High
Low with PQ | Medium
Low with PQ | High | + + + + + +### Search methods + +Choose one of the following search methods to use your model for neural search: + +- [Semantic search]({{site.url}}{{site.baseurl}}/search-plugins/semantic-search/): Uses dense retrieval based on text embedding models to search text data. + +- [Hybrid search]({{site.url}}{{site.baseurl}}/search-plugins/hybrid-search/): Combines lexical and neural search to improve search relevance. + +- [Multimodal search]({{site.url}}{{site.baseurl}}/search-plugins/multimodal-search/): Uses neural search with multimodal embedding models to search text and image data. + +- [Neural sparse search]({{site.url}}{{site.baseurl}}/search-plugins/neural-sparse-search/): Uses neural search with sparse retrieval based on sparse embedding models to search text data. + +- [Conversational search]({{site.url}}{{site.baseurl}}/search-plugins/conversational-search/): With conversational search, you can ask questions in natural language, receive a text response, and ask additional clarifying questions. diff --git a/_search-plugins/neural-search-tutorial.md b/_vector-database/getting-started/neural-search-tutorial.md similarity index 95% rename from _search-plugins/neural-search-tutorial.md rename to _vector-database/getting-started/neural-search-tutorial.md index 9c1b224cb8..a0d93c24c6 100644 --- a/_search-plugins/neural-search-tutorial.md +++ b/_vector-database/getting-started/neural-search-tutorial.md @@ -1,36 +1,22 @@ --- layout: default -title: Neural search tutorial +title: Semantic search tutorial has_children: false +parent: Getting started nav_order: 30 redirect_from: - /ml-commons-plugin/semantic-search/ + - /search-plugins/neural-search-tutorial/ --- -# Neural search tutorial +# Semantic search tutorial By default, OpenSearch calculates document scores using the [Okapi BM25](https://en.wikipedia.org/wiki/Okapi_BM25) algorithm. BM25 is a keyword-based algorithm that performs well on queries containing keywords but fails to capture the semantic meaning of the query terms. Semantic search, unlike keyword-based search, takes into account the meaning of the query in the search context. Thus, semantic search performs well when a query requires natural language understanding. -In this tutorial, you'll learn how to use neural search to: +In this tutorial, you'll learn how to implement the following types of search: -- Implement semantic search in OpenSearch. -- Implement hybrid search by combining semantic and keyword search to improve search relevance. - -## Terminology - -It's helpful to understand the following terms before starting this tutorial: - -- _Neural search_: Facilitates vector search at ingestion time and at search time: - - At ingestion time, neural search uses language models to generate vector embeddings from the text fields in the document. The documents containing both the original text field and the vector embedding of the field are then indexed in a k-NN index, as shown in the following diagram. - - ![Neural search at ingestion time diagram]({{site.url}}{{site.baseurl}}/images/neural-search-ingestion.png) - - At search time, when you then use a _neural query_, the query text is passed through a language model, and the resulting vector embeddings are compared with the document text vector embeddings to find the most relevant results, as shown in the following diagram. - - ![Neural search at search time diagram]({{site.url}}{{site.baseurl}}/images/neural-search-query.png) - -- _Semantic search_: Employs neural search in order to determine the intention of the user's query in the search context, thereby improving search relevance. - -- _Hybrid search_: Combines semantic and keyword search to improve search relevance. +- **Semantic search**: Considers semantic meaning in order to determine the intention of the user's query in the search context, thereby improving search relevance. +- **Hybrid search**: Combines semantic and keyword search to improve search relevance. ## OpenSearch components for semantic search diff --git a/_vector-database/getting-started/pre-generated-embeddings.md b/_vector-database/getting-started/pre-generated-embeddings.md new file mode 100644 index 0000000000..de483c549b --- /dev/null +++ b/_vector-database/getting-started/pre-generated-embeddings.md @@ -0,0 +1,155 @@ +--- +layout: default +title: Pre-generated embeddings +parent: Getting started +nav_order: 10 +--- + +# Getting started with pre-generated embeddings + +With this approach, you generate embeddings externally and then index them into OpenSearch. This method offers greater flexibility in how embeddings are created. + +In this example, you'll create a k-NN index, ingest vector embedding data into the index, and search the data. + +## Prerequisite + +Before you start, you must generate embeddings using a library of your choice. + +## Step 1: Create a k-NN index + +First, create an index that will store sample hotel data. To use vector search, set `index.knn` to `true` and specify the `location` field as a `knn_vector`: + +```json +PUT /hotels-index +{ + "settings": { + "index": { + "knn": true, + "knn.algo_param.ef_search": 100, + "number_of_shards": 1, + "number_of_replicas": 0 + } + }, + "mappings": { + "properties": { + "location": { + "type": "knn_vector", + "dimension": 2, + "space_type": "l2", + "method": { + "name": "hnsw", + "engine": "lucene", + "parameters": { + "ef_construction": 100, + "m": 16 + } + } + } + } + } +} +``` +{% include copy-curl.html %} + +## Step 2: Add data to your index + +Next, add data to your index. Each document represents a hotel. The `location` field in each document contains a vector specifying the hotel's location: + +```json +POST /_bulk +{ "index": { "_index": "hotels-index", "_id": "1" } } +{ "location": [5.2, 4.4] } +{ "index": { "_index": "hotels-index", "_id": "2" } } +{ "location": [5.2, 3.9] } +{ "index": { "_index": "hotels-index", "_id": "3" } } +{ "location": [4.9, 3.4] } +{ "index": { "_index": "hotels-index", "_id": "4" } } +{ "location": [4.2, 4.6] } +{ "index": { "_index": "hotels-index", "_id": "5" } } +{ "location": [3.3, 4.5] } +``` +{% include copy-curl.html %} + +## Step 3: Search your data + +Now search for hotels closest to the pin location `[5, 4]`. This location is labeled `Pin` in the following image. Each hotel is labeled with its document number. + +![Hotels on a coordinate plane]({{site.url}}{{site.baseurl}}/images/k-nn-search-hotels.png/) + +To search for the top three closest hotels, set `k` to `3`: + +```json +POST /hotels-index/_search +{ + "size": 3, + "query": { + "knn": { + "location": { + "vector": [ + 5, + 4 + ], + "k": 3 + } + } + } +} +``` +{% include copy-curl.html %} + +The response contains the hotels closest to the specified pin location: + +```json +{ + "took": 1093, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": 0.952381, + "hits": [ + { + "_index": "hotels-index", + "_id": "2", + "_score": 0.952381, + "_source": { + "location": [ + 5.2, + 3.9 + ] + } + }, + { + "_index": "hotels-index", + "_id": "1", + "_score": 0.8333333, + "_source": { + "location": [ + 5.2, + 4.4 + ] + } + }, + { + "_index": "hotels-index", + "_id": "3", + "_score": 0.72992706, + "_source": { + "location": [ + 4.9, + 3.4 + ] + } + } + ] + } +} +``` \ No newline at end of file diff --git a/_vector-database/index.md b/_vector-database/index.md new file mode 100644 index 0000000000..3f3aa609b6 --- /dev/null +++ b/_vector-database/index.md @@ -0,0 +1,24 @@ +--- +layout: default +title: Vector database +nav_order: 1 +has_children: false +has_toc: false +nav_exclude: true +permalink: /vector-database/ +redirect_from: + - /vector-database/index/ + - /search-plugins/vector-search/ +--- + +# Vector database + +OpenSearch is a comprehensive search platform that supports a variety of data types, including vectors. OpenSearch vector database functionality is seamlessly integrated with its generic database function. + +In OpenSearch, you can generate vector embeddings, store those embeddings in an index, and use them for vector search in the following ways: + +- **Pre-generated embeddings**: Generate embeddings using a library of your choice before ingesting them into OpenSearch. Once you ingest vectors into an index, you can perform a vector similarity search on the vector space. For more information, see [Working with embeddings generated outside of OpenSearch](#working-with-embeddings-generated-outside-of-opensearch). +- **Auto-generated embeddings**: Automatically generate embeddings within OpenSearch. To use embeddings for semantic search, the ingested text (the corpus) and the query need to be embedded using the same model. [Neural search]({{site.url}}{{site.baseurl}}/search-plugins/neural-search/) packages this functionality, eliminating the need to manage the internal details. For more information, see [Generating vector embeddings within OpenSearch](#generating-vector-embeddings-in-opensearch). + + +To get started, see [Getting started]({{site.url}}{{site.baseurl}}/vector-database/getting-started/). diff --git a/_search-plugins/conversational-search.md b/_vector-database/ml-powered-search/conversational-search.md similarity index 99% rename from _search-plugins/conversational-search.md rename to _vector-database/ml-powered-search/conversational-search.md index be4c97b425..ddf0e921ad 100644 --- a/_search-plugins/conversational-search.md +++ b/_vector-database/ml-powered-search/conversational-search.md @@ -1,10 +1,12 @@ --- layout: default title: Conversational search +parent: ML-powered search has_children: false nav_order: 70 redirect_from: - /ml-commons-plugin/conversational-search/ + - /search-plugins/conversational-search/ --- # Conversational search diff --git a/_search-plugins/hybrid-search.md b/_vector-database/ml-powered-search/hybrid-search.md similarity index 99% rename from _search-plugins/hybrid-search.md rename to _vector-database/ml-powered-search/hybrid-search.md index 6d68645421..e2eedd8038 100644 --- a/_search-plugins/hybrid-search.md +++ b/_vector-database/ml-powered-search/hybrid-search.md @@ -1,6 +1,7 @@ --- layout: default title: Hybrid search +parent: ML-powered search has_children: false nav_order: 60 --- diff --git a/_search-plugins/neural-search.md b/_vector-database/ml-powered-search/index.md similarity index 95% rename from _search-plugins/neural-search.md rename to _vector-database/ml-powered-search/index.md index 931c9ce593..9768f3257b 100644 --- a/_search-plugins/neural-search.md +++ b/_vector-database/ml-powered-search/index.md @@ -1,14 +1,16 @@ --- layout: default -title: Neural search -nav_order: 25 -has_children: false +title: ML-powered search +nav_order: 50 +has_children: true has_toc: false redirect_from: - /neural-search-plugin/index/ + - /search-plugins/neural-search/ + - /vector-database/ml-powered-search/ --- -# Neural search +# ML-powered search Neural search transforms text into vectors and facilitates vector search both at ingestion time and at search time. During ingestion, neural search transforms document text into vector embeddings and indexes both the text and its vector embeddings in a vector index. When you use a neural query during search, neural search converts the query text into vector embeddings, uses vector search to compare the query and document embeddings, and returns the closest results. diff --git a/_search-plugins/multimodal-search.md b/_vector-database/ml-powered-search/multimodal-search.md similarity index 98% rename from _search-plugins/multimodal-search.md rename to _vector-database/ml-powered-search/multimodal-search.md index 6c7ddeed5b..dc49ba40c3 100644 --- a/_search-plugins/multimodal-search.md +++ b/_vector-database/ml-powered-search/multimodal-search.md @@ -1,10 +1,12 @@ --- layout: default title: Multimodal search +parent: ML-powered search nav_order: 40 has_children: false redirect_from: - /search-plugins/neural-multimodal-search/ + - /search-plugins/multimodal-search/ --- # Multimodal search diff --git a/_search-plugins/neural-sparse-search.md b/_vector-database/ml-powered-search/neural-sparse-search.md similarity index 98% rename from _search-plugins/neural-sparse-search.md rename to _vector-database/ml-powered-search/neural-sparse-search.md index 0beee26ef0..c7109a9443 100644 --- a/_search-plugins/neural-sparse-search.md +++ b/_vector-database/ml-powered-search/neural-sparse-search.md @@ -1,11 +1,13 @@ --- layout: default title: Neural sparse search +parent: ML-powered search nav_order: 50 has_children: true redirect_from: - /search-plugins/neural-sparse-search/ - /search-plugins/sparse-search/ + - /search-plugins/neural-sparse-search/ --- # Neural sparse search diff --git a/_search-plugins/neural-sparse-with-pipelines.md b/_vector-database/ml-powered-search/neural-sparse-with-pipelines.md similarity index 99% rename from _search-plugins/neural-sparse-with-pipelines.md rename to _vector-database/ml-powered-search/neural-sparse-with-pipelines.md index 2e8f01a446..e1047015ba 100644 --- a/_search-plugins/neural-sparse-with-pipelines.md +++ b/_vector-database/ml-powered-search/neural-sparse-with-pipelines.md @@ -2,8 +2,11 @@ layout: default title: Configuring ingest pipelines parent: Neural sparse search +grand_parent: ML-powered search nav_order: 10 has_children: false +redirect_from: + - /search-plugins/neural-sparse-with-pipelines/ --- # Configuring ingest pipelines for neural sparse search diff --git a/_search-plugins/neural-sparse-with-raw-vectors.md b/_vector-database/ml-powered-search/neural-sparse-with-raw-vectors.md similarity index 96% rename from _search-plugins/neural-sparse-with-raw-vectors.md rename to _vector-database/ml-powered-search/neural-sparse-with-raw-vectors.md index d69a789a1d..4c4d604464 100644 --- a/_search-plugins/neural-sparse-with-raw-vectors.md +++ b/_vector-database/ml-powered-search/neural-sparse-with-raw-vectors.md @@ -2,8 +2,11 @@ layout: default title: Using raw vectors parent: Neural sparse search +grand_parent: ML-powered search nav_order: 20 has_children: false +redirect_from: + - /search-plugins/neural-sparse-with-raw-vectors/ --- # Using raw vectors for neural sparse search diff --git a/_search-plugins/semantic-search.md b/_vector-database/ml-powered-search/semantic-search.md similarity index 99% rename from _search-plugins/semantic-search.md rename to _vector-database/ml-powered-search/semantic-search.md index 259685fe3d..fae615cc3c 100644 --- a/_search-plugins/semantic-search.md +++ b/_vector-database/ml-powered-search/semantic-search.md @@ -1,10 +1,12 @@ --- layout: default title: Semantic search +parent: ML-powered search nav_order: 35 has_children: false redirect_from: - /search-plugins/neural-text-search/ + - /search-plugins/semantic-search/ --- # Semantic search diff --git a/_search-plugins/text-chunking.md b/_vector-database/ml-powered-search/text-chunking.md similarity index 97% rename from _search-plugins/text-chunking.md rename to _vector-database/ml-powered-search/text-chunking.md index b66cfeda61..77781efa7e 100644 --- a/_search-plugins/text-chunking.md +++ b/_vector-database/ml-powered-search/text-chunking.md @@ -1,7 +1,10 @@ --- layout: default title: Text chunking +parent: ML-powered search nav_order: 65 +redirect_from: + - /search-plugins/text-chunking/ --- # Text chunking diff --git a/_search-plugins/knn/disk-based-vector-search.md b/_vector-database/optimizing-storage/disk-based-vector-search.md similarity index 98% rename from _search-plugins/knn/disk-based-vector-search.md rename to _vector-database/optimizing-storage/disk-based-vector-search.md index 8fe794f44c..3e6a6b2308 100644 --- a/_search-plugins/knn/disk-based-vector-search.md +++ b/_vector-database/optimizing-storage/disk-based-vector-search.md @@ -1,9 +1,11 @@ --- layout: default title: Disk-based vector search -nav_order: 16 -parent: k-NN search +nav_order: 30 +parent: Optimizing vector storage has_children: false +redirect_from: + - /search-plugins/knn/disk-based-vector-search/ --- # Disk-based vector search diff --git a/_vector-database/optimizing-storage/index.md b/_vector-database/optimizing-storage/index.md new file mode 100644 index 0000000000..00d927ad37 --- /dev/null +++ b/_vector-database/optimizing-storage/index.md @@ -0,0 +1,11 @@ +--- +layout: default +title: Optimizing vector storage +nav_order: 60 +has_children: true +redirect_from: + - /vector-database/optimizing-storage/ +--- + +# Optimizing vector storage + diff --git a/_search-plugins/knn/knn-vector-quantization.md b/_vector-database/optimizing-storage/knn-vector-quantization.md similarity index 99% rename from _search-plugins/knn/knn-vector-quantization.md rename to _vector-database/optimizing-storage/knn-vector-quantization.md index 2e516b0b8d..4ed2ad8e22 100644 --- a/_search-plugins/knn/knn-vector-quantization.md +++ b/_vector-database/optimizing-storage/knn-vector-quantization.md @@ -1,13 +1,15 @@ --- layout: default -title: k-NN vector quantization -nav_order: 27 -parent: k-NN search +title: Vector quantization +parent: Optimizing vector storage +nav_order: 50 has_children: false has_math: true +redirect_from: + - /search-plugins/knn/knn-vector-quantization/ --- -# k-NN vector quantization +# Vector quantization By default, the k-NN plugin supports the indexing and querying of vectors of type `float`, where each dimension of the vector occupies 4 bytes of memory. For use cases that require ingestion on a large scale, keeping `float` vectors can be expensive because OpenSearch needs to construct, load, save, and search graphs (for native `nmslib` and `faiss` engines). To reduce the memory footprint, you can use vector quantization. diff --git a/_search-plugins/knn/performance-tuning.md b/_vector-database/performance-tuning.md similarity index 99% rename from _search-plugins/knn/performance-tuning.md rename to _vector-database/performance-tuning.md index ae2368b597..6a602ef262 100644 --- a/_search-plugins/knn/performance-tuning.md +++ b/_vector-database/performance-tuning.md @@ -1,8 +1,9 @@ --- layout: default title: Performance tuning -parent: k-NN search -nav_order: 45 +nav_order: 70 +redirect_from: + - /search-plugins/knn/performance-tuning/ --- # Performance tuning diff --git a/_search-plugins/knn/settings.md b/_vector-database/settings.md similarity index 98% rename from _search-plugins/knn/settings.md rename to _vector-database/settings.md index e4731ec94c..f2fa4788c4 100644 --- a/_search-plugins/knn/settings.md +++ b/_vector-database/settings.md @@ -1,8 +1,9 @@ --- layout: default title: Settings -parent: k-NN search -nav_order: 40 +nav_order: 80 +redirect_from: + - /search-plugins/knn/settings/ --- # k-NN settings diff --git a/_search-plugins/knn/approximate-knn.md b/_vector-database/vector-search/approximate-knn.md similarity index 99% rename from _search-plugins/knn/approximate-knn.md rename to _vector-database/vector-search/approximate-knn.md index f8921033e0..4363757b0a 100644 --- a/_search-plugins/knn/approximate-knn.md +++ b/_vector-database/vector-search/approximate-knn.md @@ -1,13 +1,15 @@ --- layout: default -title: Approximate k-NN search +title: Approximate vector search nav_order: 15 -parent: k-NN search +parent: Vector search has_children: false has_math: true +redirect_from: + - /search-plugins/knn/approximate-knn/ --- -# Approximate k-NN search +# Approximate vector search Standard k-NN search methods compute similarity using a brute-force approach that measures the nearest distance between a query and a number of points, which produces exact results. This works well in many applications. However, in the case of extremely large datasets with high dimensionality, this creates a scaling problem that reduces the efficiency of the search. Approximate k-NN search methods can overcome this by employing tools that restructure indexes more efficiently and reduce the dimensionality of searchable vectors. Using this approach requires a sacrifice in accuracy but increases search processing speeds appreciably. diff --git a/_search-plugins/knn/filter-search-knn.md b/_vector-database/vector-search/filter-search-knn.md similarity index 99% rename from _search-plugins/knn/filter-search-knn.md rename to _vector-database/vector-search/filter-search-knn.md index 2f0c4aa072..909bfef415 100644 --- a/_search-plugins/knn/filter-search-knn.md +++ b/_vector-database/vector-search/filter-search-knn.md @@ -1,13 +1,15 @@ --- layout: default -title: k-NN search with filters +title: Vector search with filters nav_order: 20 -parent: k-NN search +parent: Vector search has_children: false has_math: true +redirect_from: + - /search-plugins/knn/filter-search-knn/ --- -# k-NN search with filters +# Vector search with filters To refine k-NN results, you can filter a k-NN search using one of the following methods: diff --git a/_vector-database/vector-search/index.md b/_vector-database/vector-search/index.md new file mode 100644 index 0000000000..17d0d98265 --- /dev/null +++ b/_vector-database/vector-search/index.md @@ -0,0 +1,40 @@ +--- +layout: default +title: Vector search +nav_order: 40 +has_children: true +has_toc: false +redirect_from: + - /search-plugins/knn/ + - /search-plugins/knn/index/ + - /vector-database/vector-search/ +--- + +# Vector search + +Short for *k-nearest neighbors*, the k-NN plugin enables users to search for the k-nearest neighbors to a query point across an index of vectors. To determine the neighbors, you can specify the space (the distance function) you want to use to measure the distance between points. + +Use cases include recommendations (for example, an "other songs you might like" feature in a music application), image recognition, and fraud detection. For more background information about vector search, see [Wikipedia](https://en.wikipedia.org/wiki/Nearest_neighbor_search). + +This plugin supports three different methods for obtaining the k-nearest neighbors from an index of vectors: + +- [Approximate search](#approximate-search) (approximate k-NN, or ANN): Returns approximate nearest neighbors to the query vector. Usually, approximate search algorithms sacrifice indexing speed and search accuracy in exchange for performance benefits such as lower latency, smaller memory footprints, and more scalable search. For most use cases, approximate search is the best option. + +- Exact search: A brute-force, exact k-NN search of vector fields. OpenSearch supports the following types of exact search: + - [Exact search with scoring script]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-score-script/): Using a scoring script, you can apply a filter to an index before executing the nearest neighbor search. + - [Painless extensions]({{site.url}}{{site.baseurl}}/search-plugins/knn/painless-functions/): Adds the distance functions as Painless extensions that you can use in more complex combinations. You can use this method to perform a brute-force, exact vector search of an index, which also supports pre-filtering. + + +Overall, for larger data sets, you should generally choose the approximate nearest neighbor method because it scales significantly better. For smaller data sets, where you may want to apply a filter, you should choose the custom scoring approach. If you have a more complex use case where you need to use a distance function as part of their scoring method, you should use the Painless scripting approach. + +### Vector search with filtering + +For information about vector search with filtering, see [Vector search with filters]({{site.url}}{{site.baseurl}}/search-plugins/knn/filter-search-knn/). + +## Nested field vector search + +For information about vector search with nested fields, see [Vector search with filters]({{site.url}}{{site.baseurl}}/search-plugins/knn/nested-search-knn/). + +## Radial search + +With radial search, you can search all points within a vector space that reside within a specified maximum distance or minimum score threshold from a query point. For information about vector search with nested fields, see [Radial search]({{site.url}}{{site.baseurl}}/search-plugins/knn/radial-search-knn/). \ No newline at end of file diff --git a/_search-plugins/knn/knn-score-script.md b/_vector-database/vector-search/knn-score-script.md similarity index 98% rename from _search-plugins/knn/knn-score-script.md rename to _vector-database/vector-search/knn-score-script.md index a184de2d3d..6fe9b93723 100644 --- a/_search-plugins/knn/knn-score-script.md +++ b/_vector-database/vector-search/knn-score-script.md @@ -1,13 +1,15 @@ --- layout: default -title: Exact k-NN with scoring script -nav_order: 10 -parent: k-NN search -has_children: false +title: Exact vector search with scoring script +nav_order: 20 +parent: Vector search +has_children: true has_math: true +redirect_from: + - /search-plugins/knn/knn-score-script/ --- -# Exact k-NN with scoring script +# Exact vector search with scoring script The k-NN plugin implements the OpenSearch score script plugin that you can use to find the exact k-nearest neighbors to a given query point. Using the k-NN score script, you can apply a filter on an index before executing the nearest neighbor search. This is useful for dynamic search cases where the index body may vary based on other conditions. diff --git a/_search-plugins/knn/nested-search-knn.md b/_vector-database/vector-search/nested-search-knn.md similarity index 98% rename from _search-plugins/knn/nested-search-knn.md rename to _vector-database/vector-search/nested-search-knn.md index ba3df48bdf..3132099260 100644 --- a/_search-plugins/knn/nested-search-knn.md +++ b/_vector-database/vector-search/nested-search-knn.md @@ -1,13 +1,15 @@ --- layout: default -title: k-NN search with nested fields -nav_order: 21 -parent: k-NN search +title: Vector search with nested fields +nav_order: 40 +parent: Vector search has_children: false has_math: true +redirect_from: + - /search-plugins/knn/nested-search-knn/ --- -# k-NN search with nested fields +# Vector search with nested fields Using [nested fields]({{site.url}}{{site.baseurl}}/field-types/nested/) in a k-nearest neighbors (k-NN) index, you can store multiple vectors in a single document. For example, if your document consists of various components, you can generate a vector value for each component and store each vector in a nested field. diff --git a/_search-plugins/knn/painless-functions.md b/_vector-database/vector-search/painless-functions.md similarity index 96% rename from _search-plugins/knn/painless-functions.md rename to _vector-database/vector-search/painless-functions.md index 4b2311ad65..4cacc0ba06 100644 --- a/_search-plugins/knn/painless-functions.md +++ b/_vector-database/vector-search/painless-functions.md @@ -1,13 +1,16 @@ --- layout: default -title: k-NN Painless extensions +title: Painless extensions nav_order: 25 -parent: k-NN search +parent: Exact vector search with scoring script +grand_parent: Vector search has_children: false has_math: true +redirect_from: + - /search-plugins/knn/painless-functions/ --- -# k-NN Painless Scripting extensions +# Painless scripting extensions With the k-NN plugin's Painless Scripting extensions, you can use k-NN distance functions directly in your Painless scripts to perform operations on `knn_vector` fields. Painless has a strict list of allowed functions and classes per context to ensure its scripts are secure. The k-NN plugin adds Painless Scripting extensions to a few of the distance functions used in [k-NN score script]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-score-script), so you can use them to customize your k-NN workload. diff --git a/_search-plugins/knn/radial-search-knn.md b/_vector-database/vector-search/radial-search-knn.md similarity index 99% rename from _search-plugins/knn/radial-search-knn.md rename to _vector-database/vector-search/radial-search-knn.md index e5449a0993..432ae3e3a1 100644 --- a/_search-plugins/knn/radial-search-knn.md +++ b/_vector-database/vector-search/radial-search-knn.md @@ -1,10 +1,12 @@ --- layout: default title: Radial search -nav_order: 28 -parent: k-NN search +nav_order: 50 +parent: Vector search has_children: false has_math: true +redirect_from: + - /search-plugins/radial-search-knn/ --- # Radial search From db9e95e79f399532ce911d67d881959a055ca2dd Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Thu, 16 Jan 2025 10:07:27 -0500 Subject: [PATCH 02/32] More restructuring Signed-off-by: Fanit Kolchina --- _config.yml | 8 +- .../supported-field-types}/knn-vector.md | 8 +- .../creating-vector-index/index.md | 10 -- _vector-database/getting-started/index.md | 114 -------------- _vector-database/optimizing-storage/index.md | 11 -- _vector-database/vector-search/index.md | 40 ----- {_vector-database => _vector-search}/api.md | 0 .../creating-vector-index/index-settings.md | 24 +++ _vector-search/creating-vector-index/index.md | 146 ++++++++++++++++++ .../creating-vector-index/method.md | 104 +------------ .../creating-vector-index/vector-field.md | 81 ++++++++++ .../auto-generated-embeddings.md | 0 _vector-search/getting-started/index.md | 53 +++++++ .../getting-started/neural-search-tutorial.md | 0 .../pre-generated-embeddings.md | 0 {_vector-database => _vector-search}/index.md | 8 +- _vector-search/ingesting-data.md | 108 +++++++++++++ .../conversational-search.md | 0 .../ml-powered-search/hybrid-search.md | 0 .../ml-powered-search/index.md | 0 .../ml-powered-search/multimodal-search.md | 0 .../ml-powered-search/neural-sparse-search.md | 0 .../neural-sparse-with-pipelines.md | 0 .../neural-sparse-with-raw-vectors.md | 0 .../ml-powered-search/semantic-search.md | 0 .../ml-powered-search/text-chunking.md | 0 .../disk-based-vector-search.md | 0 _vector-search/optimizing-storage/index.md | 53 +++++++ .../knn-vector-quantization.md | 0 .../performance-tuning.md | 0 _vector-search/querying-data.md | 72 +++++++++ .../settings.md | 0 .../filter-search-knn.md | 2 +- .../specialized-operations/index.md | 23 +++ .../nested-search-knn.md | 2 +- .../radial-search-knn.md | 2 +- .../approximate-knn.md | 2 +- .../vector-search-techniques/index.md | 70 +++++++++ .../knn-score-script.md | 2 +- .../painless-functions.md | 2 +- 40 files changed, 651 insertions(+), 294 deletions(-) rename {_vector-database/creating-vector-index => _field-types/supported-field-types}/knn-vector.md (99%) delete mode 100644 _vector-database/creating-vector-index/index.md delete mode 100644 _vector-database/getting-started/index.md delete mode 100644 _vector-database/optimizing-storage/index.md delete mode 100644 _vector-database/vector-search/index.md rename {_vector-database => _vector-search}/api.md (100%) create mode 100644 _vector-search/creating-vector-index/index-settings.md create mode 100644 _vector-search/creating-vector-index/index.md rename _vector-database/creating-vector-index/vector-index.md => _vector-search/creating-vector-index/method.md (69%) create mode 100644 _vector-search/creating-vector-index/vector-field.md rename {_vector-database => _vector-search}/getting-started/auto-generated-embeddings.md (100%) create mode 100644 _vector-search/getting-started/index.md rename {_vector-database => _vector-search}/getting-started/neural-search-tutorial.md (100%) rename {_vector-database => _vector-search}/getting-started/pre-generated-embeddings.md (100%) rename {_vector-database => _vector-search}/index.md (93%) create mode 100644 _vector-search/ingesting-data.md rename {_vector-database => _vector-search}/ml-powered-search/conversational-search.md (100%) rename {_vector-database => _vector-search}/ml-powered-search/hybrid-search.md (100%) rename {_vector-database => _vector-search}/ml-powered-search/index.md (100%) rename {_vector-database => _vector-search}/ml-powered-search/multimodal-search.md (100%) rename {_vector-database => _vector-search}/ml-powered-search/neural-sparse-search.md (100%) rename {_vector-database => _vector-search}/ml-powered-search/neural-sparse-with-pipelines.md (100%) rename {_vector-database => _vector-search}/ml-powered-search/neural-sparse-with-raw-vectors.md (100%) rename {_vector-database => _vector-search}/ml-powered-search/semantic-search.md (100%) rename {_vector-database => _vector-search}/ml-powered-search/text-chunking.md (100%) rename {_vector-database => _vector-search}/optimizing-storage/disk-based-vector-search.md (100%) create mode 100644 _vector-search/optimizing-storage/index.md rename {_vector-database => _vector-search}/optimizing-storage/knn-vector-quantization.md (100%) rename {_vector-database => _vector-search}/performance-tuning.md (100%) create mode 100644 _vector-search/querying-data.md rename {_vector-database => _vector-search}/settings.md (100%) rename {_vector-database/vector-search => _vector-search/specialized-operations}/filter-search-knn.md (99%) create mode 100644 _vector-search/specialized-operations/index.md rename {_vector-database/vector-search => _vector-search/specialized-operations}/nested-search-knn.md (99%) rename {_vector-database/vector-search => _vector-search/specialized-operations}/radial-search-knn.md (99%) rename {_vector-database/vector-search => _vector-search/vector-search-techniques}/approximate-knn.md (99%) create mode 100644 _vector-search/vector-search-techniques/index.md rename {_vector-database/vector-search => _vector-search/vector-search-techniques}/knn-score-script.md (99%) rename {_vector-database/vector-search => _vector-search/vector-search-techniques}/painless-functions.md (99%) diff --git a/_config.yml b/_config.yml index eb875c1b60..8ad9eb88cf 100644 --- a/_config.yml +++ b/_config.yml @@ -124,7 +124,7 @@ collections: workspace: permalink: /:collection/:path/ output: true - vector-database: + vector-search: permalink: /:collection/:path/ output: true @@ -174,10 +174,10 @@ opensearch_collection: name: Aggregations nav_fold: true search-plugins: - name: Search + name: Search features nav_fold: true - vector-database: - name: Vector database + vector-search: + name: Vector search nav_fold: true ml-commons-plugin: name: Machine learning diff --git a/_vector-database/creating-vector-index/knn-vector.md b/_field-types/supported-field-types/knn-vector.md similarity index 99% rename from _vector-database/creating-vector-index/knn-vector.md rename to _field-types/supported-field-types/knn-vector.md index 1c8abb4617..91b4457bf8 100644 --- a/_vector-database/creating-vector-index/knn-vector.md +++ b/_field-types/supported-field-types/knn-vector.md @@ -1,15 +1,13 @@ --- layout: default -title: Vector field type +title: k-NN vector nav_order: 20 has_children: false -parent: Creating a vector index +parent: Supported field types has_math: true -redirect_from: - - /field-types/supported-field-types/knn-vector/ --- -# Vector field type +# k-NN vector **Introduced 1.0** {: .label .label-purple } diff --git a/_vector-database/creating-vector-index/index.md b/_vector-database/creating-vector-index/index.md deleted file mode 100644 index 532ba2bcd4..0000000000 --- a/_vector-database/creating-vector-index/index.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -layout: default -title: Creating a vector index -nav_order: 20 -has_children: true -redirect_from: - - /vector-database/creating-a-vector-db/ ---- - -# Creating a vector index \ No newline at end of file diff --git a/_vector-database/getting-started/index.md b/_vector-database/getting-started/index.md deleted file mode 100644 index 9a6a0e2038..0000000000 --- a/_vector-database/getting-started/index.md +++ /dev/null @@ -1,114 +0,0 @@ ---- -layout: default -title: Getting started -nav_order: 10 -has_children: true -redirect_from: - - /vector-database/getting-started/ ---- - -# Getting started with OpenSearch as a vector database - -To get started using OpenSearch as a vector database, choose one of the following approaches. - -## Pre-generated embeddings - -With this approach, you generate embeddings externally and then index them into OpenSearch. This method offers greater flexibility in how embeddings are created. The workflow for this approach is as follows: - -1. Generate embeddings using external tools - - Custom machine learning models - - Embedding services (OpenAI, Cohere) - - Domain-specific embedding techniques -2. Ingest pre-computed vector embeddings into OpenSearch -3. Perform vector similarity search - -For a complete example, see [Getting started with pre-generated embeddings]({{site.url}}{{site.baseurl}}/vector-database/getting-started/pre-generated-embeddings/). - -This approach is suitable for the following use cases: - - Scientific research - - Domain-specific applications - - Custom embedding requirements - -## Auto-generated embeddings - -With this approach, embeddings are generated dynamically within OpenSearch. This method provides a simplified workflow by offering automatic text-to-vector conversion. The workflow for this approach is as follows: - -1. Choose an embedding model: - - Pretrained models - - Custom uploaded models - - Externally hosted model connections -2. Index text data -3. OpenSearch automatically generates embeddings -4. Perform semantic search - -For a complete example, see [Getting started with auto-generated embeddings]({{site.url}}{{site.baseurl}}/vector-database/getting-started/auto-generated-embeddings/). - -For a comprehensive tutorial, see [Neural search tutorial]({{site.url}}{{site.baseurl}}/search-plugins/neural-search-tutorial/). - -This approach is suitable for the following use cases: - - General-purpose search - - Rapid prototyping - - Standard text corpus - - Quick implementation - - -### k-NN vector search - -Vector search finds the vectors in your database that are most similar to the query vector. OpenSearch supports the following search methods: - - -### Approximate search - -OpenSearch supports several algorithms for approximate vector search, each with its own advantages. For complete documentation, see [Approximate search]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/). For more information about the search methods and engines, see [Method definitions]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/#method-definitions). For method recommendations, see [Choosing the right method]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/#choosing-the-right-method). - -To use approximate vector search, specify one of the following search methods (algorithms) in the `method` parameter: - -- Hierarchical Navigable Small World (HNSW) -- Inverted File System (IVF) - -Additionally, specify the engine (library) that implements this method in the `engine` parameter: - -- [Non-Metric Space Library (NMSLIB)](https://github.com/nmslib/nmslib) -- [Facebook AI Similarity Search (Faiss)](https://github.com/facebookresearch/faiss) -- Lucene - -The following table lists the combinations of search methods and libraries supported by the k-NN engine for approximate vector search. - -Method | Engine -:--- | :--- -HNSW | NMSLIB, Faiss, Lucene -IVF | Faiss - -### Engine recommendations - -In general, select NMSLIB or Faiss for large-scale use cases. Lucene is a good option for smaller deployments and offers benefits like smart filtering, where the optimal filtering strategy—pre-filtering, post-filtering, or exact k-NN—is automatically applied depending on the situation. The following table summarizes the differences between each option. - -| | NMSLIB/HNSW | Faiss/HNSW | Faiss/IVF | Lucene/HNSW | -|:---|:---|:---|:---|:---| -| Max dimensions | 16,000 | 16,000 | 16,000 | 16,000 | -| Filter | Post-filter | Post-filter | Post-filter | Filter during search | -| Training required | No | No | Yes | No | -| Similarity metrics | `l2`, `innerproduct`, `cosinesimil`, `l1`, `linf` | `l2`, `innerproduct` | `l2`, `innerproduct` | `l2`, `cosinesimil` | -| Number of vectors | Tens of billions | Tens of billions | Tens of billions | Less than 10 million | -| Indexing latency | Low | Low | Lowest | Low | -| Query latency and quality | Low latency and high quality | Low latency and high quality | Low latency and low quality | High latency and high quality | -| Vector compression | Flat | Flat
Product quantization | Flat
Product quantization | Flat | -| Memory consumption | High | High
Low with PQ | Medium
Low with PQ | High | - - - - - -### Search methods - -Choose one of the following search methods to use your model for neural search: - -- [Semantic search]({{site.url}}{{site.baseurl}}/search-plugins/semantic-search/): Uses dense retrieval based on text embedding models to search text data. - -- [Hybrid search]({{site.url}}{{site.baseurl}}/search-plugins/hybrid-search/): Combines lexical and neural search to improve search relevance. - -- [Multimodal search]({{site.url}}{{site.baseurl}}/search-plugins/multimodal-search/): Uses neural search with multimodal embedding models to search text and image data. - -- [Neural sparse search]({{site.url}}{{site.baseurl}}/search-plugins/neural-sparse-search/): Uses neural search with sparse retrieval based on sparse embedding models to search text data. - -- [Conversational search]({{site.url}}{{site.baseurl}}/search-plugins/conversational-search/): With conversational search, you can ask questions in natural language, receive a text response, and ask additional clarifying questions. diff --git a/_vector-database/optimizing-storage/index.md b/_vector-database/optimizing-storage/index.md deleted file mode 100644 index 00d927ad37..0000000000 --- a/_vector-database/optimizing-storage/index.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -layout: default -title: Optimizing vector storage -nav_order: 60 -has_children: true -redirect_from: - - /vector-database/optimizing-storage/ ---- - -# Optimizing vector storage - diff --git a/_vector-database/vector-search/index.md b/_vector-database/vector-search/index.md deleted file mode 100644 index 17d0d98265..0000000000 --- a/_vector-database/vector-search/index.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -layout: default -title: Vector search -nav_order: 40 -has_children: true -has_toc: false -redirect_from: - - /search-plugins/knn/ - - /search-plugins/knn/index/ - - /vector-database/vector-search/ ---- - -# Vector search - -Short for *k-nearest neighbors*, the k-NN plugin enables users to search for the k-nearest neighbors to a query point across an index of vectors. To determine the neighbors, you can specify the space (the distance function) you want to use to measure the distance between points. - -Use cases include recommendations (for example, an "other songs you might like" feature in a music application), image recognition, and fraud detection. For more background information about vector search, see [Wikipedia](https://en.wikipedia.org/wiki/Nearest_neighbor_search). - -This plugin supports three different methods for obtaining the k-nearest neighbors from an index of vectors: - -- [Approximate search](#approximate-search) (approximate k-NN, or ANN): Returns approximate nearest neighbors to the query vector. Usually, approximate search algorithms sacrifice indexing speed and search accuracy in exchange for performance benefits such as lower latency, smaller memory footprints, and more scalable search. For most use cases, approximate search is the best option. - -- Exact search: A brute-force, exact k-NN search of vector fields. OpenSearch supports the following types of exact search: - - [Exact search with scoring script]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-score-script/): Using a scoring script, you can apply a filter to an index before executing the nearest neighbor search. - - [Painless extensions]({{site.url}}{{site.baseurl}}/search-plugins/knn/painless-functions/): Adds the distance functions as Painless extensions that you can use in more complex combinations. You can use this method to perform a brute-force, exact vector search of an index, which also supports pre-filtering. - - -Overall, for larger data sets, you should generally choose the approximate nearest neighbor method because it scales significantly better. For smaller data sets, where you may want to apply a filter, you should choose the custom scoring approach. If you have a more complex use case where you need to use a distance function as part of their scoring method, you should use the Painless scripting approach. - -### Vector search with filtering - -For information about vector search with filtering, see [Vector search with filters]({{site.url}}{{site.baseurl}}/search-plugins/knn/filter-search-knn/). - -## Nested field vector search - -For information about vector search with nested fields, see [Vector search with filters]({{site.url}}{{site.baseurl}}/search-plugins/knn/nested-search-knn/). - -## Radial search - -With radial search, you can search all points within a vector space that reside within a specified maximum distance or minimum score threshold from a query point. For information about vector search with nested fields, see [Radial search]({{site.url}}{{site.baseurl}}/search-plugins/knn/radial-search-knn/). \ No newline at end of file diff --git a/_vector-database/api.md b/_vector-search/api.md similarity index 100% rename from _vector-database/api.md rename to _vector-search/api.md diff --git a/_vector-search/creating-vector-index/index-settings.md b/_vector-search/creating-vector-index/index-settings.md new file mode 100644 index 0000000000..1f1c09e75e --- /dev/null +++ b/_vector-search/creating-vector-index/index-settings.md @@ -0,0 +1,24 @@ +--- +layout: default +title: Vector index settings +parent: Creating a vector index +nav_order: 30 +--- + +# Vector index settings + +The k-NN plugin introduces several index settings that can be used to configure the k-NN structure as well. + +At the moment, several parameters defined in the settings are in the deprecation process. Those parameters should be set in the mapping instead of the index settings. Parameters set in the mapping will override the parameters set in the index settings. Setting the parameters in the mapping allows an index to have multiple `knn_vector` fields with different parameters. + +Setting | Default | Updatable | Description +:--- | :--- | :--- | :--- +`index.knn` | false | false | Whether the index should build native library indexes for the `knn_vector` fields. If set to false, the `knn_vector` fields will be stored in doc values, but approximate k-NN search functionality will be disabled. +`index.knn.algo_param.ef_search` | 100 | true | The size of the dynamic list used during k-NN searches. Higher values result in more accurate but slower searches. Only available for NMSLIB. +`index.knn.advanced.approximate_threshold` | 15,000 | true | The number of vectors a segment must have before creating specialized data structures for approximate search. Set to `-1` to disable building vector data structures and `0` to always build them. +`index.knn.algo_param.ef_construction` | 100 | false | Deprecated in 1.0.0. Instead, use the [mapping parameters](https://opensearch.org/docs/latest/search-plugins/knn/knn-index/#method-definitions) to set this value. +`index.knn.algo_param.m` | 16 | false | Deprecated in 1.0.0. Use the [mapping parameters](https://opensearch.org/docs/latest/search-plugins/knn/knn-index/#method-definitions) to set this value instead. +`index.knn.space_type` | l2 | false | Deprecated in 1.0.0. Use the [mapping parameters](https://opensearch.org/docs/latest/search-plugins/knn/knn-index/#method-definitions) to set this value instead. + +An index created in OpenSearch version 2.11 or earlier will still use the old `ef_construction` and `ef_search` values (`512`). +{: .note} diff --git a/_vector-search/creating-vector-index/index.md b/_vector-search/creating-vector-index/index.md new file mode 100644 index 0000000000..3d3d25d20c --- /dev/null +++ b/_vector-search/creating-vector-index/index.md @@ -0,0 +1,146 @@ +--- +layout: default +title: Creating a vector index +nav_order: 20 +has_children: true +redirect_from: + - /vector-database/creating-a-vector-db/ +--- + +# Creating a vector index + +Creating a vector index in OpenSearch involves a common core process with some variations depending on the type of vector search. This guide outlines the key elements shared across all vector indexes and the differences specific to supported use cases. + +To create a k-NN index, set the `settings.index.knn` parameter to `true`: + +```json +PUT /test-index +{ + "settings": { + "index": { + "knn": true + } + }, + "mappings": { + "properties": { + "my_vector1": { + "type": "knn_vector", + "dimension": 3, + "space_type": "l2", + "method": { + "name": "hnsw", + "engine": "lucene", + "parameters": { + "ef_construction": 128, + "m": 24 + } + } + } + } + } +} +``` +{% include copy-curl.html %} + + +Regardless of the type of vector search, the following elements are part of creating a vector index: + +1. **Enable k-NN search**: + Set `index.knn` to `true` in the index settings to enable k-NN search functionality. + +2. **Define a vector field**: + Specify the field that will store the vector data. + +3. **Specify dimension**: + Set the `dimension` property to match the size of the vectors used. + +4. **Choose a space type**: + Select a distance metric for similarity comparisons, such as `l2` (Euclidean distance) or `cosine`. + +5. **Select a method**: + Configure the indexing method, such as HNSW or IVF, to optimize vector search performance. + +To create a vector index, choose one of the following options: + +- [Pre-generated embeddings or raw vectors](#pre-generated-embeddings-or-raw-vectors): Ingest pre-generated embeddings or raw vectors into your index to perform raw vector search. +- [Auto-generated embeddings](#auto-generated-embeddings): Ingest text that will be converted into vector embeddings within OpenSearch in order to perform semantic search using ML models. + + +The following table summarizes key index configuration differences for the supported use cases. + + +| Feature | Vector field type | Ingest pipeline | Transformation | Use case | +|--------------------------|-----------------------|---------------------|-------------------------|-------------------------| +| **Pre-generated embeddings or raw vectors** | `knn_vector` | Not required | Direct ingestion | Raw vector search | +| **Auto-generated embeddings** | `knn_vector` | Required | Auto-generated vectors | Semantic search | + +## Pre-generated embeddings or raw vectors + +To ingest raw vectors into an index, configure a vector field (in this request, `my_vector`) and specify its `dimension`: + +```json +PUT /my-raw-vector-index +{ + "settings": { + "index.knn": true + }, + "mappings": { + "properties": { + "my_vector": { + "type": "knn_vector", + "dimension": 128, + "method": { + "name": "hnsw", + "engine": "faiss", + "space_type": "l2" + } + } + } + } +} +``` + +**Key Characteristics:** +- Uses the `knn_vector` type. +- Directly ingests vector data. +- No additional transformations are required. +- Supports custom configurations for indexing methods (e.g., FAISS). + + + +## Auto-generated embeddings + +Auto-generating embeddings require configuring an ingest pipeline. When creating embeddings, specify the pipeline at index creation time: + +```json +PUT /my-semantic-search-index +{ + "settings": { + "index.knn": true, + "default_pipeline": "nlp-ingest-pipeline" + }, + "mappings": { + "properties": { + "passage_text": { + "type": "text" + }, + "passage_embedding": { + "type": "knn_vector", + "dimension": 768, + "method": { + "name": "hnsw", + "engine": "lucene", + "space_type": "l2" + } + } + } + } +} +``` + +**Key Characteristics:** +- Uses the `knn_vector` type. +- Includes an ingest pipeline for automatic embedding generation. +- Dimension matches the embedding model output. +- Includes a `text` field for the original content. + diff --git a/_vector-database/creating-vector-index/vector-index.md b/_vector-search/creating-vector-index/method.md similarity index 69% rename from _vector-database/creating-vector-index/vector-index.md rename to _vector-search/creating-vector-index/method.md index 7241a28871..d31e2f0d40 100644 --- a/_vector-database/creating-vector-index/vector-index.md +++ b/_vector-search/creating-vector-index/method.md @@ -1,91 +1,13 @@ --- layout: default -title: Vector index +title: Supported methods parent: Creating a vector index -nav_order: 10 -has_children: true -redirect_from: - - /search-plugins/knn/knn-index/ +nav_order: 20 --- -# Vector index +# Supported methods -The k-NN plugin introduces a custom data type, the `knn_vector`, that allows users to ingest their k-NN vectors into an OpenSearch index and perform different kinds of k-NN search. The `knn_vector` field is highly configurable and can serve many different k-NN workloads. For more information, see [k-NN vector]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/). - -To create a k-NN index, set the `settings.index.knn` parameter to `true`: - -```json -PUT /test-index -{ - "settings": { - "index": { - "knn": true - } - }, - "mappings": { - "properties": { - "my_vector1": { - "type": "knn_vector", - "dimension": 3, - "space_type": "l2", - "method": { - "name": "hnsw", - "engine": "lucene", - "parameters": { - "ef_construction": 128, - "m": 24 - } - } - } - } - } -} -``` -{% include copy-curl.html %} - -## Byte vectors - -Starting with k-NN plugin version 2.17, you can use `byte` vectors with the `faiss` and `lucene` engines to reduce the amount of required memory and storage space. For more information, see [Byte vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#byte-vectors). - -## Binary vectors - -Starting with k-NN plugin version 2.16, you can use `binary` vectors with the `faiss` engine to reduce the amount of required storage space. For more information, see [Binary vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#binary-vectors). - -## SIMD optimization for the Faiss engine - -Starting with version 2.13, the k-NN plugin supports [Single Instruction Multiple Data (SIMD)](https://en.wikipedia.org/wiki/Single_instruction,_multiple_data) processing if the underlying hardware supports SIMD instructions (AVX2 on x64 architecture and Neon on ARM64 architecture). SIMD is supported by default on Linux machines only for the Faiss engine. SIMD architecture helps boost overall performance by improving indexing throughput and reducing search latency. Starting with version 2.18, the k-NN plugin supports AVX512 SIMD instructions on x64 architecture. - -SIMD optimization is applicable only if the vector dimension is a multiple of 8. -{: .note} - - -### x64 architecture - - -For x64 architecture, the following versions of the Faiss library are built and shipped with the artifact: - -- `libopensearchknn_faiss.so`: The non-optimized Faiss library without SIMD instructions. -- `libopensearchknn_faiss_avx512.so`: The Faiss library containing AVX512 SIMD instructions. -- `libopensearchknn_faiss_avx2.so`: The Faiss library containing AVX2 SIMD instructions. - -When using the Faiss library, the performance ranking is as follows: AVX512 > AVX2 > no optimization. -{: .note } - -If your hardware supports AVX512, the k-NN plugin loads the `libopensearchknn_faiss_avx512.so` library at runtime. - -If your hardware supports AVX2 but doesn't support AVX512, the k-NN plugin loads the `libopensearchknn_faiss_avx2.so` library at runtime. - -To disable the AVX512 and AVX2 SIMD instructions and load the non-optimized Faiss library (`libopensearchknn_faiss.so`), specify the `knn.faiss.avx512.disabled` and `knn.faiss.avx2.disabled` static settings as `true` in `opensearch.yml` (by default, both of these are `false`). - -Note that to update a static setting, you must stop the cluster, change the setting, and restart the cluster. For more information, see [Static settings]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/index/#static-settings). - -### ARM64 architecture - -For the ARM64 architecture, only one performance-boosting Faiss library (`libopensearchknn_faiss.so`) is built and shipped. The library contains Neon SIMD instructions and cannot be disabled. - -## Method definitions - -A method definition refers to the underlying configuration of the approximate k-NN algorithm you want to use. Method definitions are used to either create a `knn_vector` field (when the method does not require training) or [create a model during training]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#train-a-model) that can then be used to [create a `knn_vector` field]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-k-nn-index-from-a-model). +A _method_ definition refers to the underlying configuration of the approximate k-NN algorithm you want to use. Method definitions are used to either create a `knn_vector` field (when the method does not require training) or [create a model during training]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#train-a-model) that can then be used to [create a `knn_vector` field]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-k-nn-index-from-a-model). A method definition will always contain the name of the method, the space_type the method is built for, the engine (the library) to use, and a map of parameters. @@ -359,21 +281,3 @@ As an example, assume you have a million vectors with a dimension of 256 and nli 1.1 * (((4 * 256) * 1,000,000) + (4 * 128 * 256)) ~= 1.126 GB ``` - -## Index settings - -Additionally, the k-NN plugin introduces several index settings that can be used to configure the k-NN structure as well. - -At the moment, several parameters defined in the settings are in the deprecation process. Those parameters should be set in the mapping instead of the index settings. Parameters set in the mapping will override the parameters set in the index settings. Setting the parameters in the mapping allows an index to have multiple `knn_vector` fields with different parameters. - -Setting | Default | Updatable | Description -:--- | :--- | :--- | :--- -`index.knn` | false | false | Whether the index should build native library indexes for the `knn_vector` fields. If set to false, the `knn_vector` fields will be stored in doc values, but approximate k-NN search functionality will be disabled. -`index.knn.algo_param.ef_search` | 100 | true | The size of the dynamic list used during k-NN searches. Higher values result in more accurate but slower searches. Only available for NMSLIB. -`index.knn.advanced.approximate_threshold` | 15,000 | true | The number of vectors a segment must have before creating specialized data structures for approximate search. Set to `-1` to disable building vector data structures and `0` to always build them. -`index.knn.algo_param.ef_construction` | 100 | false | Deprecated in 1.0.0. Instead, use the [mapping parameters](https://opensearch.org/docs/latest/search-plugins/knn/knn-index/#method-definitions) to set this value. -`index.knn.algo_param.m` | 16 | false | Deprecated in 1.0.0. Use the [mapping parameters](https://opensearch.org/docs/latest/search-plugins/knn/knn-index/#method-definitions) to set this value instead. -`index.knn.space_type` | l2 | false | Deprecated in 1.0.0. Use the [mapping parameters](https://opensearch.org/docs/latest/search-plugins/knn/knn-index/#method-definitions) to set this value instead. - -An index created in OpenSearch version 2.11 or earlier will still use the old `ef_construction` and `ef_search` values (`512`). -{: .note} diff --git a/_vector-search/creating-vector-index/vector-field.md b/_vector-search/creating-vector-index/vector-field.md new file mode 100644 index 0000000000..2164def374 --- /dev/null +++ b/_vector-search/creating-vector-index/vector-field.md @@ -0,0 +1,81 @@ +--- +layout: default +title: Vector data types +parent: Creating a vector index +nav_order: 10 +--- + +# Vector data types + +The k-NN plugin introduces a custom data type, the `knn_vector`, that allows users to ingest their k-NN vectors into an OpenSearch index and perform different kinds of k-NN search. The `knn_vector` field is highly configurable and can serve many different k-NN workloads. For more information, see [k-NN vector]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/). + + +When defining a `knn_vector` field in OpenSearch, you can select from different data types to balance storage requirements and performance. By default, k-NN vectors are float vectors, but you can also opt for byte or binary vectors for more efficient storage. + +## Float Vectors + +Float is the default type for `knn_vector` fields. + +- **Default type**: Each dimension is stored as a 4-byte floating-point number. +- **Precision**: High, suitable for applications requiring maximum accuracy. +- **Use case**: Best for scenarios where storage cost is not a primary concern and precision is critical. + +## Byte Vectors + +Starting with k-NN plugin version 2.17, you can use `byte` vectors with the `faiss` and `lucene` engines to reduce the amount of required memory and storage space. For more information, see [Byte vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#byte-vectors). + +- **Storage efficiency**: Each dimension is stored as a signed 8-bit integer, reducing storage space significantly. + - Value range: [-128, 127]. +- **Engines supported**: Available when using the `faiss` or `lucene` engine. +- **Use case**: Ideal for applications that prioritize storage efficiency and can tolerate reduced precision. + +## Binary Vectors + +Starting with k-NN plugin version 2.16, you can use `binary` vectors with the `faiss` engine to reduce the amount of required storage space. For more information, see [Binary vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#binary-vectors). + +- **Storage efficiency**: Memory costs are reduced by a factor of 32 compared to float vectors. +- **Performance**: Provides high recall performance while significantly lowering operational costs. +- **Use case**: Suitable for large-scale deployments where cost-efficiency is crucial without sacrificing search performance. + +### Choosing the Right Data Type + +The choice of data type for your `knn_vector` field depends on your specific use case: + +- **Float vectors**: Use when high precision is essential, and storage space is not a limiting factor. +- **Byte vectors**: Use to save storage space while maintaining acceptable precision levels, especially for large datasets. +- **Binary vectors**: Use to achieve cost efficiency and scalability with acceptable trade-offs in precision. + +By selecting the appropriate data type, you can optimize storage, performance, and cost-effectiveness for your OpenSearch deployment. + + +## SIMD optimization for the Faiss engine + +Starting with version 2.13, the k-NN plugin supports [Single Instruction Multiple Data (SIMD)](https://en.wikipedia.org/wiki/Single_instruction,_multiple_data) processing if the underlying hardware supports SIMD instructions (AVX2 on x64 architecture and Neon on ARM64 architecture). SIMD is supported by default on Linux machines only for the Faiss engine. SIMD architecture helps boost overall performance by improving indexing throughput and reducing search latency. Starting with version 2.18, the k-NN plugin supports AVX512 SIMD instructions on x64 architecture. + +SIMD optimization is applicable only if the vector dimension is a multiple of 8. +{: .note} + + +### x64 architecture + + +For x64 architecture, the following versions of the Faiss library are built and shipped with the artifact: + +- `libopensearchknn_faiss.so`: The non-optimized Faiss library without SIMD instructions. +- `libopensearchknn_faiss_avx512.so`: The Faiss library containing AVX512 SIMD instructions. +- `libopensearchknn_faiss_avx2.so`: The Faiss library containing AVX2 SIMD instructions. + +When using the Faiss library, the performance ranking is as follows: AVX512 > AVX2 > no optimization. +{: .note } + +If your hardware supports AVX512, the k-NN plugin loads the `libopensearchknn_faiss_avx512.so` library at runtime. + +If your hardware supports AVX2 but doesn't support AVX512, the k-NN plugin loads the `libopensearchknn_faiss_avx2.so` library at runtime. + +To disable the AVX512 and AVX2 SIMD instructions and load the non-optimized Faiss library (`libopensearchknn_faiss.so`), specify the `knn.faiss.avx512.disabled` and `knn.faiss.avx2.disabled` static settings as `true` in `opensearch.yml` (by default, both of these are `false`). + +Note that to update a static setting, you must stop the cluster, change the setting, and restart the cluster. For more information, see [Static settings]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/index/#static-settings). + +### ARM64 architecture + +For the ARM64 architecture, only one performance-boosting Faiss library (`libopensearchknn_faiss.so`) is built and shipped. The library contains Neon SIMD instructions and cannot be disabled. \ No newline at end of file diff --git a/_vector-database/getting-started/auto-generated-embeddings.md b/_vector-search/getting-started/auto-generated-embeddings.md similarity index 100% rename from _vector-database/getting-started/auto-generated-embeddings.md rename to _vector-search/getting-started/auto-generated-embeddings.md diff --git a/_vector-search/getting-started/index.md b/_vector-search/getting-started/index.md new file mode 100644 index 0000000000..2fbfd8dca7 --- /dev/null +++ b/_vector-search/getting-started/index.md @@ -0,0 +1,53 @@ +--- +layout: default +title: Getting started +nav_order: 10 +has_children: true +redirect_from: + - /vector-database/getting-started/ +--- + +# Getting started with vector search + +You can either upload pre-generated embeddings to OpenSearch or have OpenSearch automatically generate embeddings from your text. + +## Pre-generated embeddings + +With this approach, you generate embeddings externally and then index them into OpenSearch. This method offers greater flexibility in how embeddings are created. The workflow for this approach is as follows: + +1. Generate embeddings using external tools + - Custom machine learning models + - Embedding services (OpenAI, Cohere) + - Domain-specific embedding techniques +2. Ingest pre-computed vector embeddings into OpenSearch +3. Perform vector similarity search + +For a complete example, see [Getting started with pre-generated embeddings]({{site.url}}{{site.baseurl}}/vector-database/getting-started/pre-generated-embeddings/). + +This approach is suitable for the following use cases: + - Scientific research + - Domain-specific applications + - Custom embedding requirements + +## Auto-generated embeddings + +With this approach, embeddings are generated dynamically within OpenSearch. This method provides a simplified workflow by offering automatic text-to-vector conversion. The workflow for this approach is as follows: + +1. Choose an embedding model: + - Pretrained models + - Custom uploaded models + - Externally hosted model connections +2. Index text data +3. OpenSearch automatically generates embeddings +4. Perform semantic search + +For a complete example, see [Getting started with auto-generated embeddings]({{site.url}}{{site.baseurl}}/vector-database/getting-started/auto-generated-embeddings/). + +For a comprehensive tutorial, see [Neural search tutorial]({{site.url}}{{site.baseurl}}/search-plugins/neural-search-tutorial/). + +This approach is suitable for the following use cases: + - General-purpose search + - Rapid prototyping + - Standard text corpus + - Quick implementation + diff --git a/_vector-database/getting-started/neural-search-tutorial.md b/_vector-search/getting-started/neural-search-tutorial.md similarity index 100% rename from _vector-database/getting-started/neural-search-tutorial.md rename to _vector-search/getting-started/neural-search-tutorial.md diff --git a/_vector-database/getting-started/pre-generated-embeddings.md b/_vector-search/getting-started/pre-generated-embeddings.md similarity index 100% rename from _vector-database/getting-started/pre-generated-embeddings.md rename to _vector-search/getting-started/pre-generated-embeddings.md diff --git a/_vector-database/index.md b/_vector-search/index.md similarity index 93% rename from _vector-database/index.md rename to _vector-search/index.md index 3f3aa609b6..69ba3c789b 100644 --- a/_vector-database/index.md +++ b/_vector-search/index.md @@ -1,17 +1,17 @@ --- layout: default -title: Vector database +title: Vector search nav_order: 1 has_children: false has_toc: false nav_exclude: true -permalink: /vector-database/ +permalink: /vector-search/ redirect_from: - - /vector-database/index/ + - /vector-search/index/ - /search-plugins/vector-search/ --- -# Vector database +# Vector search OpenSearch is a comprehensive search platform that supports a variety of data types, including vectors. OpenSearch vector database functionality is seamlessly integrated with its generic database function. diff --git a/_vector-search/ingesting-data.md b/_vector-search/ingesting-data.md new file mode 100644 index 0000000000..f1a9a8b9c3 --- /dev/null +++ b/_vector-search/ingesting-data.md @@ -0,0 +1,108 @@ +--- +layout: default +title: Ingesting data +nav_order: 30 +--- + +# Ingesting data into a vector index + +After creating a vector index, you need to ingest data according to your chosen vector search approach. This guide outlines the ingestion process for each type of vector search: raw vector ingestion and auto-generated embeddings. + +## Comparison of ingestion methods + +The following table compares ingestion for each vector search method. + +| Feature | Data format | Ingest pipeline | Vector generation | Additional fields | +|-------------------------------|----------------------------|---------------------|---------------------------------|-----------------------------------| +| **Raw vector ingestion** | Pre-generated vectors | Not required | External | Optional metadata | +| **Auto-generated embeddings** | Text | Required | Internal (during ingestion) | Original text + embeddings | + +## Raw vector ingestion + +Raw vector ingestion does not require an ingest pipeline because vectors are pre-generated outside of OpenSearch. + +When working with pre-generated embeddings, you directly ingest vector data into the `knn_vector` field. No pipeline is required because the vectors are already generated: + +```json +POST /my-raw-vector-index/_doc +{ + "my_vector": [0.1, 0.2, 0.3, ..., 0.128], + "metadata": "Optional additional information" +} +``` +{% include copy-curl.html %} + +You can also use the Bulk API to ingest multiple vectors efficiently: + +```json +POST /_bulk +{"index": {"_index": "my-raw-vector-index"}} +{"my_vector": [0.1, 0.2, 0.3, ..., 0.128], "metadata": "First item"} +{"index": {"_index": "my-raw-vector-index"}} +{"my_vector": [0.2, 0.3, 0.4, ..., 0.129], "metadata": "Second item"} +``` +{% include copy-curl.html %} + +**Key Characteristics:** +- Direct ingestion of vector arrays +- No transformation during ingestion +- Optional metadata fields +- Supports single document or bulk ingestion + + +## Auto-generated embeddings + +For auto-generated embeddings, you first need to set up an ingest pipeline that will convert text to vectors. Then, you ingest text data, and the pipeline automatically generates the embeddings: + +```json +PUT /_ingest/pipeline/nlp-ingest-pipeline +{ + "description": "Text to dense vector pipeline", + "processors": [ + { + "text_embedding": { + "model_id": "your-model-id", + "field_map": { + "passage_text": "passage_embedding" + } + } + } + ] +} +``` +{% include copy-curl.html %} + +After setting up the pipeline, ingest text data: + +```json +POST /my-semantic-search-index/_doc +{ + "passage_text": "Your text content here", + "metadata": "Optional additional information" +} +``` +{% include copy-curl.html %} + +The pipeline automatically generates and stores the embeddings in the `passage_embedding` field. + +**Key Characteristics:** +- Ingest plain text +- Automatic vector generation during ingestion +- Original text preserved +- Pipeline handles transformation + +## Best practices + +When ingesting data into vector indexes, consider the following best practices: + +1. **Batch processing**: Use the Bulk API for better performance when ingesting multiple documents. +2. **Pipeline monitoring**: Monitor pipeline performance and errors, especially for auto-generated embeddings. +3. **Data validation**: Ensure vector dimensions match the index configuration. +4. **Error handling**: Implement proper error handling for failed ingestion attempts. +5. **Resource management**: Monitor system resources during large-scale ingestion, especially with auto-generated embeddings. + +## Further reading + +- [Bulk API Documentation]({{site.url}}{{site.baseurl}}/api-reference/document-apis/bulk/) +- [Ingest Pipeline Documentation]({{site.url}}{{site.baseurl}}/api-reference/ingest-apis/index/) +- [Text Embedding Processor Documentation]({{site.url}}{{site.baseurl}}/api-reference/ingest-apis/processors/text-embedding/) diff --git a/_vector-database/ml-powered-search/conversational-search.md b/_vector-search/ml-powered-search/conversational-search.md similarity index 100% rename from _vector-database/ml-powered-search/conversational-search.md rename to _vector-search/ml-powered-search/conversational-search.md diff --git a/_vector-database/ml-powered-search/hybrid-search.md b/_vector-search/ml-powered-search/hybrid-search.md similarity index 100% rename from _vector-database/ml-powered-search/hybrid-search.md rename to _vector-search/ml-powered-search/hybrid-search.md diff --git a/_vector-database/ml-powered-search/index.md b/_vector-search/ml-powered-search/index.md similarity index 100% rename from _vector-database/ml-powered-search/index.md rename to _vector-search/ml-powered-search/index.md diff --git a/_vector-database/ml-powered-search/multimodal-search.md b/_vector-search/ml-powered-search/multimodal-search.md similarity index 100% rename from _vector-database/ml-powered-search/multimodal-search.md rename to _vector-search/ml-powered-search/multimodal-search.md diff --git a/_vector-database/ml-powered-search/neural-sparse-search.md b/_vector-search/ml-powered-search/neural-sparse-search.md similarity index 100% rename from _vector-database/ml-powered-search/neural-sparse-search.md rename to _vector-search/ml-powered-search/neural-sparse-search.md diff --git a/_vector-database/ml-powered-search/neural-sparse-with-pipelines.md b/_vector-search/ml-powered-search/neural-sparse-with-pipelines.md similarity index 100% rename from _vector-database/ml-powered-search/neural-sparse-with-pipelines.md rename to _vector-search/ml-powered-search/neural-sparse-with-pipelines.md diff --git a/_vector-database/ml-powered-search/neural-sparse-with-raw-vectors.md b/_vector-search/ml-powered-search/neural-sparse-with-raw-vectors.md similarity index 100% rename from _vector-database/ml-powered-search/neural-sparse-with-raw-vectors.md rename to _vector-search/ml-powered-search/neural-sparse-with-raw-vectors.md diff --git a/_vector-database/ml-powered-search/semantic-search.md b/_vector-search/ml-powered-search/semantic-search.md similarity index 100% rename from _vector-database/ml-powered-search/semantic-search.md rename to _vector-search/ml-powered-search/semantic-search.md diff --git a/_vector-database/ml-powered-search/text-chunking.md b/_vector-search/ml-powered-search/text-chunking.md similarity index 100% rename from _vector-database/ml-powered-search/text-chunking.md rename to _vector-search/ml-powered-search/text-chunking.md diff --git a/_vector-database/optimizing-storage/disk-based-vector-search.md b/_vector-search/optimizing-storage/disk-based-vector-search.md similarity index 100% rename from _vector-database/optimizing-storage/disk-based-vector-search.md rename to _vector-search/optimizing-storage/disk-based-vector-search.md diff --git a/_vector-search/optimizing-storage/index.md b/_vector-search/optimizing-storage/index.md new file mode 100644 index 0000000000..75263c6506 --- /dev/null +++ b/_vector-search/optimizing-storage/index.md @@ -0,0 +1,53 @@ +--- +layout: default +title: Optimizing vector storage +nav_order: 60 +has_children: true +has_toc: false +redirect_from: + - /vector-database/optimizing-storage/ +--- + +# Optimizing vector storage + +Vector search operations can be memory-intensive, especially when dealing with large-scale vector datasets. OpenSearch provides several optimization techniques to reduce memory usage while maintaining search performance. This section covers different approaches to optimize vector storage and search operations. + +## Available optimization techniques + +OpenSearch supports the following vector storage optimization methods: + +1. **Vector quantization techniques** + - Byte vectors + - Lucene scalar quantization + - Faiss 16-bit scalar quantization + - Product quantization (PQ) + - Binary quantization (BQ) + +2. **Disk-based vector search** + - Reduces operational costs for vector workloads + - Uses binary quantization for compression + - Provides significant memory savings with minimal impact on search quality + +## Choosing an optimization method + +The choice of optimization method depends on your specific requirements: + +| Method | Best for | Memory Savings | Impact on Search Quality | +|--------|----------|----------------|-------------------------| +| Disk-based search | Low-memory environments | Highest (32x reduction) | Minimal impact with rescoring | +| Vector quantization | Balanced approach | Varies (2x-32x reduction) | Varies by technique | + +### When to use disk-based search +- Limited memory environments +- Large-scale vector operations +- When willing to accept slightly increased search latency + +### When to use vector quantization +- Need fine-grained control over compression +- Specific accuracy requirements +- Different memory-performance trade-off needs + +## Next steps + +- Learn about [Vector quantization]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-vector-quantization/) techniques +- Explore [Disk-based vector search]({{site.url}}{{site.baseurl}}/search-plugins/knn/disk-based-vector-search/) diff --git a/_vector-database/optimizing-storage/knn-vector-quantization.md b/_vector-search/optimizing-storage/knn-vector-quantization.md similarity index 100% rename from _vector-database/optimizing-storage/knn-vector-quantization.md rename to _vector-search/optimizing-storage/knn-vector-quantization.md diff --git a/_vector-database/performance-tuning.md b/_vector-search/performance-tuning.md similarity index 100% rename from _vector-database/performance-tuning.md rename to _vector-search/performance-tuning.md diff --git a/_vector-search/querying-data.md b/_vector-search/querying-data.md new file mode 100644 index 0000000000..b55cdbcd53 --- /dev/null +++ b/_vector-search/querying-data.md @@ -0,0 +1,72 @@ +--- +layout: default +title: Searching data +nav_order: 35 +--- + +# Searching vector data + +OpenSearch supports various methods for searching vector data, tailored to how the vectors were created and indexed. This guide explains the query syntax and options for raw vector ingestion and auto-generated embeddings. + +## Search types comparison + +The following table compares the search syntax and typical use cases for each vector search method. + +| Feature | Query type | Input format | Model required | Use case | +|----------------------------------|------------------|------------------|---------------------|----------------------------| +| **Pre-generated embeddings** | `knn` | Vector array | No | Raw vector search | +| **Auto-generated embeddings** | `neural` | Text | Yes | Semantic search | + +## Searching pre-generated embeddings or raw vectors + +For raw vector searches, use the `knn` query type and provide the vector array as input: + +```json +GET /my-raw-vector-index/_search +{ + "query": { + "knn": { + "my_vector": { + "vector": [0.1, 0.2, 0.3], + "k": 10 + } + } + } +} +``` +{% include copy-curl.html %} + +**Key characteristics**: + +- Utilizes the `knn` query type. +- Requires a vector array input. +- Specify `k` to return the top-k nearest neighbors. +- Does not require a model for query transformation. + +## Searching auto-generated embeddings + +For semantic searches using embeddings, use the `neural` query type and provide text input: + +```json +GET /my-semantic-search-index/_search +{ + "query": { + "neural": { + "passage_embedding": { + "query_text": "What is machine learning?", + "model_id": "your-model-id", + "k": 10 + } + } + } +} +``` +{% include copy-curl.html %} + +**Key characteristics**: + +- Employs the `neural` query type. +- Accepts plain text as input. +- Requires the same `model_id` used during indexing. +- Converts query text into dense vector embeddings automatically. +- Specify `k` to retrieve the top-k matches. diff --git a/_vector-database/settings.md b/_vector-search/settings.md similarity index 100% rename from _vector-database/settings.md rename to _vector-search/settings.md diff --git a/_vector-database/vector-search/filter-search-knn.md b/_vector-search/specialized-operations/filter-search-knn.md similarity index 99% rename from _vector-database/vector-search/filter-search-knn.md rename to _vector-search/specialized-operations/filter-search-knn.md index 909bfef415..dcd98fbacd 100644 --- a/_vector-database/vector-search/filter-search-knn.md +++ b/_vector-search/specialized-operations/filter-search-knn.md @@ -2,7 +2,7 @@ layout: default title: Vector search with filters nav_order: 20 -parent: Vector search +parent: Specialized vector search has_children: false has_math: true redirect_from: diff --git a/_vector-search/specialized-operations/index.md b/_vector-search/specialized-operations/index.md new file mode 100644 index 0000000000..85ba4abba5 --- /dev/null +++ b/_vector-search/specialized-operations/index.md @@ -0,0 +1,23 @@ +--- +layout: default +title: Specialized vector search +nav_order: 45 +has_children: true +has_toc: false +--- + +# Specialized vector search + +OpenSearch supports the following specialized vector search applications. + +### Vector search with filtering + +For information about vector search with filtering, see [Vector search with filters]({{site.url}}{{site.baseurl}}/search-plugins/knn/filter-search-knn/). + +## Nested field vector search + +For information about vector search with nested fields, see [Vector search with filters]({{site.url}}{{site.baseurl}}/search-plugins/knn/nested-search-knn/). + +## Radial search + +With radial search, you can search all points within a vector space that reside within a specified maximum distance or minimum score threshold from a query point. For information about vector search with nested fields, see [Radial search]({{site.url}}{{site.baseurl}}/search-plugins/knn/radial-search-knn/). diff --git a/_vector-database/vector-search/nested-search-knn.md b/_vector-search/specialized-operations/nested-search-knn.md similarity index 99% rename from _vector-database/vector-search/nested-search-knn.md rename to _vector-search/specialized-operations/nested-search-knn.md index 3132099260..e30a0181c4 100644 --- a/_vector-database/vector-search/nested-search-knn.md +++ b/_vector-search/specialized-operations/nested-search-knn.md @@ -2,7 +2,7 @@ layout: default title: Vector search with nested fields nav_order: 40 -parent: Vector search +parent: Specialized vector search has_children: false has_math: true redirect_from: diff --git a/_vector-database/vector-search/radial-search-knn.md b/_vector-search/specialized-operations/radial-search-knn.md similarity index 99% rename from _vector-database/vector-search/radial-search-knn.md rename to _vector-search/specialized-operations/radial-search-knn.md index 432ae3e3a1..27d7cb2aa2 100644 --- a/_vector-database/vector-search/radial-search-knn.md +++ b/_vector-search/specialized-operations/radial-search-knn.md @@ -2,7 +2,7 @@ layout: default title: Radial search nav_order: 50 -parent: Vector search +parent: Specialized vector search has_children: false has_math: true redirect_from: diff --git a/_vector-database/vector-search/approximate-knn.md b/_vector-search/vector-search-techniques/approximate-knn.md similarity index 99% rename from _vector-database/vector-search/approximate-knn.md rename to _vector-search/vector-search-techniques/approximate-knn.md index 4363757b0a..66e6e52de8 100644 --- a/_vector-database/vector-search/approximate-knn.md +++ b/_vector-search/vector-search-techniques/approximate-knn.md @@ -2,7 +2,7 @@ layout: default title: Approximate vector search nav_order: 15 -parent: Vector search +parent: Vector search techniques has_children: false has_math: true redirect_from: diff --git a/_vector-search/vector-search-techniques/index.md b/_vector-search/vector-search-techniques/index.md new file mode 100644 index 0000000000..f017151bda --- /dev/null +++ b/_vector-search/vector-search-techniques/index.md @@ -0,0 +1,70 @@ +--- +layout: default +title: Vector search techniques +nav_order: 40 +has_children: true +has_toc: false +redirect_from: + - /search-plugins/knn/ + - /search-plugins/knn/index/ + - /vector-database/vector-search/ +--- + +# Vector search techniques + +Short for *k-nearest neighbors*, the k-NN plugin enables users to search for the k-nearest neighbors to a query point across an index of vectors. To determine the neighbors, you can specify the space (the distance function) you want to use to measure the distance between points. + +Use cases include recommendations (for example, an "other songs you might like" feature in a music application), image recognition, and fraud detection. For more background information about vector search, see [Wikipedia](https://en.wikipedia.org/wiki/Nearest_neighbor_search). + +This plugin supports three different methods for obtaining the k-nearest neighbors from an index of vectors: + +- [Approximate search](#approximate-search) (approximate k-NN, or ANN): Returns approximate nearest neighbors to the query vector. Usually, approximate search algorithms sacrifice indexing speed and search accuracy in exchange for performance benefits such as lower latency, smaller memory footprints, and more scalable search. For most use cases, approximate search is the best option. + +- Exact search: A brute-force, exact k-NN search of vector fields. OpenSearch supports the following types of exact search: + - [Exact search with scoring script]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-score-script/): Using a scoring script, you can apply a filter to an index before executing the nearest neighbor search. + - [Painless extensions]({{site.url}}{{site.baseurl}}/search-plugins/knn/painless-functions/): Adds the distance functions as Painless extensions that you can use in more complex combinations. You can use this method to perform a brute-force, exact vector search of an index, which also supports pre-filtering. + + +Overall, for larger data sets, you should generally choose the approximate nearest neighbor method because it scales significantly better. For smaller data sets, where you may want to apply a filter, you should choose the custom scoring approach. If you have a more complex use case where you need to use a distance function as part of their scoring method, you should use the Painless scripting approach. + +### Approximate search + +OpenSearch supports several algorithms for approximate vector search, each with its own advantages. For complete documentation, see [Approximate search]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/). For more information about the search methods and engines, see [Method definitions]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/#method-definitions). For method recommendations, see [Choosing the right method]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/#choosing-the-right-method). + +To use approximate vector search, specify one of the following search methods (algorithms) in the `method` parameter: + +- Hierarchical Navigable Small World (HNSW) +- Inverted File System (IVF) + +Additionally, specify the engine (library) that implements this method in the `engine` parameter: + +- [Non-Metric Space Library (NMSLIB)](https://github.com/nmslib/nmslib) +- [Facebook AI Similarity Search (Faiss)](https://github.com/facebookresearch/faiss) +- Lucene + +The following table lists the combinations of search methods and libraries supported by the k-NN engine for approximate vector search. + +Method | Engine +:--- | :--- +HNSW | NMSLIB, Faiss, Lucene +IVF | Faiss + +### Engine recommendations + +In general, select NMSLIB or Faiss for large-scale use cases. Lucene is a good option for smaller deployments and offers benefits like smart filtering, where the optimal filtering strategy—pre-filtering, post-filtering, or exact k-NN—is automatically applied depending on the situation. The following table summarizes the differences between each option. + +| | NMSLIB/HNSW | Faiss/HNSW | Faiss/IVF | Lucene/HNSW | +|:---|:---|:---|:---|:---| +| Max dimensions | 16,000 | 16,000 | 16,000 | 16,000 | +| Filter | Post-filter | Post-filter | Post-filter | Filter during search | +| Training required | No | No | Yes | No | +| Similarity metrics | `l2`, `innerproduct`, `cosinesimil`, `l1`, `linf` | `l2`, `innerproduct` | `l2`, `innerproduct` | `l2`, `cosinesimil` | +| Number of vectors | Tens of billions | Tens of billions | Tens of billions | Less than 10 million | +| Indexing latency | Low | Low | Lowest | Low | +| Query latency and quality | Low latency and high quality | Low latency and high quality | Low latency and low quality | High latency and high quality | +| Vector compression | Flat | Flat
Product quantization | Flat
Product quantization | Flat | +| Memory consumption | High | High
Low with PQ | Medium
Low with PQ | High | + + + + diff --git a/_vector-database/vector-search/knn-score-script.md b/_vector-search/vector-search-techniques/knn-score-script.md similarity index 99% rename from _vector-database/vector-search/knn-score-script.md rename to _vector-search/vector-search-techniques/knn-score-script.md index 6fe9b93723..c1f6cb50e5 100644 --- a/_vector-database/vector-search/knn-score-script.md +++ b/_vector-search/vector-search-techniques/knn-score-script.md @@ -2,7 +2,7 @@ layout: default title: Exact vector search with scoring script nav_order: 20 -parent: Vector search +parent: Vector search techniques has_children: true has_math: true redirect_from: diff --git a/_vector-database/vector-search/painless-functions.md b/_vector-search/vector-search-techniques/painless-functions.md similarity index 99% rename from _vector-database/vector-search/painless-functions.md rename to _vector-search/vector-search-techniques/painless-functions.md index 4cacc0ba06..c582bedcf8 100644 --- a/_vector-database/vector-search/painless-functions.md +++ b/_vector-search/vector-search-techniques/painless-functions.md @@ -3,7 +3,7 @@ layout: default title: Painless extensions nav_order: 25 parent: Exact vector search with scoring script -grand_parent: Vector search +grand_parent: Vector search techniques has_children: false has_math: true redirect_from: From 3d93fc8a13937c4f62d7704c1a4bfa1161b47780 Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Thu, 16 Jan 2025 11:35:04 -0500 Subject: [PATCH 03/32] Layout update Signed-off-by: Fanit Kolchina --- _includes/cards.html | 33 +++++++++++++++++-- _sass/_home.scss | 3 +- .../creating-vector-index/vector-field.md | 8 ++--- 3 files changed, 37 insertions(+), 7 deletions(-) diff --git a/_includes/cards.html b/_includes/cards.html index 3fa1809506..875459d7ba 100644 --- a/_includes/cards.html +++ b/_includes/cards.html @@ -1,14 +1,43 @@
-

Explore OpenSearch documentation

+

OpenSearch and OpenSearch Dashboards

-

OpenSearch and OpenSearch Dashboards

+

All documentation

Build your OpenSearch solution using core tooling and visualizations

+
+ +

Vector search

+

Use vector database capabilities for more relevant search results

+ +
+ +
+ +

Machine learning

+

Power your applications with machine learning model integration

+ +
+ + +
+ +

OpenSearch Dashboards

+

Explore and visualize your data using interactive dashboards

+ +
+
+ +
+ +
+

Supporting tools

+
+

Data Prepper

diff --git a/_sass/_home.scss b/_sass/_home.scss index 9b5dd864a9..7dbe4ae93a 100644 --- a/_sass/_home.scss +++ b/_sass/_home.scss @@ -24,6 +24,7 @@ .card-container-wrapper { @include gradient-open-sky; + margin-bottom: 2rem; } .card-container { @@ -46,7 +47,7 @@ @extend .panel; @include thick-edge-left; padding: 1rem; - margin-bottom: 4rem; + margin-bottom: 2rem; text-align: left; background-color: white; display: flex; diff --git a/_vector-search/creating-vector-index/vector-field.md b/_vector-search/creating-vector-index/vector-field.md index 2164def374..cfb11624cc 100644 --- a/_vector-search/creating-vector-index/vector-field.md +++ b/_vector-search/creating-vector-index/vector-field.md @@ -12,7 +12,7 @@ The k-NN plugin introduces a custom data type, the `knn_vector`, that allows use When defining a `knn_vector` field in OpenSearch, you can select from different data types to balance storage requirements and performance. By default, k-NN vectors are float vectors, but you can also opt for byte or binary vectors for more efficient storage. -## Float Vectors +## Float vectors Float is the default type for `knn_vector` fields. @@ -20,7 +20,7 @@ Float is the default type for `knn_vector` fields. - **Precision**: High, suitable for applications requiring maximum accuracy. - **Use case**: Best for scenarios where storage cost is not a primary concern and precision is critical. -## Byte Vectors +## Byte vectors Starting with k-NN plugin version 2.17, you can use `byte` vectors with the `faiss` and `lucene` engines to reduce the amount of required memory and storage space. For more information, see [Byte vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#byte-vectors). @@ -29,7 +29,7 @@ Starting with k-NN plugin version 2.17, you can use `byte` vectors with the `fai - **Engines supported**: Available when using the `faiss` or `lucene` engine. - **Use case**: Ideal for applications that prioritize storage efficiency and can tolerate reduced precision. -## Binary Vectors +## Binary vectors Starting with k-NN plugin version 2.16, you can use `binary` vectors with the `faiss` engine to reduce the amount of required storage space. For more information, see [Binary vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#binary-vectors). @@ -37,7 +37,7 @@ Starting with k-NN plugin version 2.16, you can use `binary` vectors with the `f - **Performance**: Provides high recall performance while significantly lowering operational costs. - **Use case**: Suitable for large-scale deployments where cost-efficiency is crucial without sacrificing search performance. -### Choosing the Right Data Type +### Choosing the right data type The choice of data type for your `knn_vector` field depends on your specific use case: From 47b235f99ba5fe2dbcd5633d69cff52f89858162 Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Tue, 21 Jan 2025 10:33:27 -0500 Subject: [PATCH 04/32] Add cards to search topics Signed-off-by: Fanit Kolchina --- _includes/cards.html | 85 ++++--------------- _includes/home_cards.html | 72 ++++++++++++++++ _includes/list.html | 22 +++++ _sass/_home.scss | 78 ++++++++++++++++- _sass/custom/custom.scss | 37 ++++++++ _vector-search/creating-vector-index/index.md | 2 +- .../auto-generated-embeddings.md | 2 +- _vector-search/getting-started/index.md | 80 ++++++++++------- .../getting-started/neural-search-tutorial.md | 4 +- .../pre-generated-embeddings.md | 2 +- _vector-search/index.md | 57 ++++++++++++- _vector-search/ml-powered-search/index.md | 72 +++++++++------- .../ml-powered-search/text-chunking.md | 2 +- _vector-search/optimizing-storage/index.md | 2 +- .../specialized-operations/index.md | 4 +- .../vector-search-techniques/index.md | 2 +- index.md | 2 +- 17 files changed, 379 insertions(+), 146 deletions(-) create mode 100644 _includes/home_cards.html create mode 100644 _includes/list.html diff --git a/_includes/cards.html b/_includes/cards.html index 875459d7ba..2efb08523b 100644 --- a/_includes/cards.html +++ b/_includes/cards.html @@ -1,72 +1,17 @@
-

OpenSearch and OpenSearch Dashboards

-
-
- -

All documentation

-

Build your OpenSearch solution using core tooling and visualizations

- -
- - -
- -

Vector search

-

Use vector database capabilities for more relevant search results

- -
- -
- -

Machine learning

-

Power your applications with machine learning model integration

- -
- - -
- -

OpenSearch Dashboards

-

Explore and visualize your data using interactive dashboards

- -
+
+ {% for card in include.cards %} +
+ +

{{ card.heading }}

+ {% if card.description %} +

{{ card.description }}

+ {% endif %} + {% if include.documentation_link %} + + {% endif %} +
+ {% endfor %} +
- -
- -
-

Supporting tools

-
- -
- -

Data Prepper

-

Filter, mutate, and sample your data for ingestion into OpenSearch

- -
- -
- -

Clients

-

Interact with OpenSearch from your application using language APIs

- -
- - -
- -

OpenSearch Benchmark

-

Measure performance metrics for your OpenSearch cluster

- -
- -
- -

Migration Assistant

-

Migrate to OpenSearch

- -
-
- -
- + \ No newline at end of file diff --git a/_includes/home_cards.html b/_includes/home_cards.html new file mode 100644 index 0000000000..34222f1396 --- /dev/null +++ b/_includes/home_cards.html @@ -0,0 +1,72 @@ +
+

OpenSearch and OpenSearch Dashboards

+
+
+ +

All documentation

+

Build your OpenSearch solution using core tooling and visualizations

+ +
+ + +
+ +

Vector search

+

Use vector database capabilities for more relevant search results

+ +
+ +
+ +

Machine learning

+

Power your applications with machine learning model integration

+ +
+ + +
+ +

OpenSearch Dashboards

+

Explore and visualize your data using interactive dashboards

+ +
+
+ +
+ +
+

Supporting tools

+
+ +
+ +

Data Prepper

+

Filter, mutate, and sample your data for ingestion into OpenSearch

+ +
+ +
+ +

Clients

+

Interact with OpenSearch from your application using language APIs

+ +
+ + +
+ +

OpenSearch Benchmark

+

Measure performance metrics for your OpenSearch cluster

+ +
+ +
+ +

Migration Assistant

+

Migrate to OpenSearch

+ +
+
+ +
+ diff --git a/_includes/list.html b/_includes/list.html new file mode 100644 index 0000000000..c32fcdd0c5 --- /dev/null +++ b/_includes/list.html @@ -0,0 +1,22 @@ +
+ {% if include.list_title %} +
{{ include.list_title }}
+ {% endif %} + {% assign counter = 0 %} + {% for item in include.list_items %} + {% assign counter = counter | plus: 1 %} +
+
{{ counter }}
+
+
+ {% if item.link %} + {{ item.heading }} + {% else %} + {{ item.heading }} + {% endif %} +
+

{{ item.description | markdownify }}

+
+
+ {% endfor %} +
diff --git a/_sass/_home.scss b/_sass/_home.scss index 7dbe4ae93a..53645c666d 100644 --- a/_sass/_home.scss +++ b/_sass/_home.scss @@ -22,12 +22,16 @@ // Card style -.card-container-wrapper { +.home-card-container-wrapper { @include gradient-open-sky; margin-bottom: 2rem; } -.card-container { +.card-container-wrapper { + margin-bottom: 0; +} + +.home-card-container { display: grid; grid-template-columns: 1fr; margin: 0 auto; @@ -43,7 +47,23 @@ } } -.card { +.card-container { + display: grid; + grid-template-columns: 1fr; + margin: 0 auto; + padding: 2rem 0; + grid-row-gap: 1rem; + grid-column-gap: 1rem; + grid-auto-rows: 1fr; + @include mq(md) { + grid-template-columns: repeat(1, 1fr); + } + @include mq(lg) { + grid-template-columns: repeat(2, 1fr); + } +} + +.home-card { @extend .panel; @include thick-edge-left; padding: 1rem; @@ -68,6 +88,11 @@ } } +.card { + @extend .home-card; + margin-bottom: 0; +} + @mixin heading-font { @include heading-sans-serif; font-size: 1.5rem; @@ -111,6 +136,53 @@ width: 100%; } +// List layout + +.numbered-list { + display: flex; + flex-direction: column; + gap: 2rem; + padding: 1rem; +} + +.list-item { + display: flex; + align-items: flex-start; + gap: 1rem; +} + +.number-circle { + width: 2.5rem; + height: 2.5rem; + border-radius: 50%; + background-color: $blue-lt-100; + color: $blue-dk-300; + display: flex; + align-items: center; + justify-content: center; + font-weight: bold; + font-size: 1.2rem; + flex-shrink: 0; +} + +.list-content { + max-width: 100%; +} + +.list-heading { + @include heading-font; + margin: 0 0 0.75rem 0; + font-size: 1.2rem; + color: $blue-dk-300; + font-weight: bold; +} + +.list-content p { + margin: 0.5rem 0; + font-size: 1rem; + line-height: 1.5; +} + // Banner style .os-banner { diff --git a/_sass/custom/custom.scss b/_sass/custom/custom.scss index b3ee3c3775..96633eace4 100755 --- a/_sass/custom/custom.scss +++ b/_sass/custom/custom.scss @@ -307,6 +307,43 @@ img { } } +@mixin btn-dark-blue { + color: white; + background-color: $blue-300; + font-size: 1.13rem; + font-weight: 510; + border-width: 1px; + border-style: solid; + border-radius: 5px; + box-shadow: 1px 1px $grey-lt-300; + cursor: pointer; +} + +.btn-dark-blue { + @include btn-dark-blue; + border-color: $blue-dk-300; + padding: 0.5rem 1rem; + margin-left: 0.4rem; + margin-right: 0.4rem; + + &:hover:not([disabled]) { + background-color: $blue-vibrant-300; + box-shadow: 1px 2px 4px $grey-lt-300; + transform: translateY(-1px); + text-decoration: underline; + text-underline-offset: 2px; + } + + &:active { + transform: translateY(1px); + } +} + +.centering-container { + display: flex; + justify-content: center; +} + // Back to top button .top-link { display: block; diff --git a/_vector-search/creating-vector-index/index.md b/_vector-search/creating-vector-index/index.md index 3d3d25d20c..99f51c63cd 100644 --- a/_vector-search/creating-vector-index/index.md +++ b/_vector-search/creating-vector-index/index.md @@ -4,7 +4,7 @@ title: Creating a vector index nav_order: 20 has_children: true redirect_from: - - /vector-database/creating-a-vector-db/ + - /vector-search/creating-a-vector-db/ --- # Creating a vector index diff --git a/_vector-search/getting-started/auto-generated-embeddings.md b/_vector-search/getting-started/auto-generated-embeddings.md index db0871726f..1cc4d43e96 100644 --- a/_vector-search/getting-started/auto-generated-embeddings.md +++ b/_vector-search/getting-started/auto-generated-embeddings.md @@ -1,6 +1,6 @@ --- layout: default -title: Auto-generated embeddings +title: Auto-generated embeddings quickstart parent: Getting started nav_order: 20 --- diff --git a/_vector-search/getting-started/index.md b/_vector-search/getting-started/index.md index 2fbfd8dca7..89962fa2de 100644 --- a/_vector-search/getting-started/index.md +++ b/_vector-search/getting-started/index.md @@ -3,51 +3,71 @@ layout: default title: Getting started nav_order: 10 has_children: true +has_toc: false redirect_from: - - /vector-database/getting-started/ + - /vector-search/getting-started/ +quickstart_cards: + - heading: "Pre-generated embeddings quickstart" + description: "Use embeddings generated outside of OpenSearch" + link: "/vector-search/getting-started/pre-generated-embeddings/" + - heading: "Auto-generated embeddings quickstart" + description: "Use embeddings automatically generated within OpenSearch" + link: "/vector-search/getting-started/auto-generated-embeddings/" +tutorial_cards: + - heading: "Semantic and hybrid search tutorial" + description: "Learn how to implement semantic and hybrid search" + link: "/vector-search/getting-started/neural-search-tutorial/" +pre_items: + - heading: "Generate embeddings" + description: "Generate embeddings outside of OpenSearch using your favorite embedding utility." + - heading: "Create an OpenSearch index" + description: "Create an OpenSearch index to upload your embeddings." + link: "/vector-search/creating-vector-index/index/#pre-generated-embeddings-or-raw-vectors" + - heading: "Ingest embeddings" + description: "Ingest your embeddings into the index." + link: "/vector-search/ingesting-data/#raw-vector-ingestion" + - heading: "Search embeddings" + description: "Search your embeddings using vector search." + link: "/vector-search/querying-data/#searching-pre-generated-embeddings-or-raw-vectors" +auto_items: + - heading: "Configure an embedding model" + description: "Configure a machine learning model that will automatically generate embeddings from your text at ingest time and query time." + link: "/ml-commons-plugin/integrating-ml-models/" + - heading: "Create an OpenSearch index" + description: "Create an OpenSearch index to upload your text." + link: "/vector-search/creating-vector-index/index/#auto-generated-embeddings" + - heading: "Ingest text" + description: "Ingest your text into the index." + link: "/vector-search/ingesting-data/#auto-generated-embeddings" + - heading: "Search text" + description: "Search your text using vector search. Query text is automatically converted to vector embeddings and compared to document embeddings." + link: "/vector-search/querying-data/#searching-auto-generated-embeddings" --- # Getting started with vector search You can either upload pre-generated embeddings to OpenSearch or have OpenSearch automatically generate embeddings from your text. -## Pre-generated embeddings +{% include cards.html cards=page.quickstart_cards %} -With this approach, you generate embeddings externally and then index them into OpenSearch. This method offers greater flexibility in how embeddings are created. The workflow for this approach is as follows: +--- -1. Generate embeddings using external tools - - Custom machine learning models - - Embedding services (OpenAI, Cohere) - - Domain-specific embedding techniques -2. Ingest pre-computed vector embeddings into OpenSearch -3. Perform vector similarity search +## Option 1: Pre-generated embeddings -For a complete example, see [Getting started with pre-generated embeddings]({{site.url}}{{site.baseurl}}/vector-database/getting-started/pre-generated-embeddings/). +Work with embeddings generated outside of OpenSearch: -This approach is suitable for the following use cases: - - Scientific research - - Domain-specific applications - - Custom embedding requirements +{% include list.html list_items=page.pre_items%} -## Auto-generated embeddings +## Option 2: Auto-generated embeddings -With this approach, embeddings are generated dynamically within OpenSearch. This method provides a simplified workflow by offering automatic text-to-vector conversion. The workflow for this approach is as follows: +Work with text that is automatically converted to embeddings within OpenSearch: -1. Choose an embedding model: - - Pretrained models - - Custom uploaded models - - Externally hosted model connections -2. Index text data -3. OpenSearch automatically generates embeddings -4. Perform semantic search +{% include list.html list_items=page.auto_items%} -For a complete example, see [Getting started with auto-generated embeddings]({{site.url}}{{site.baseurl}}/vector-database/getting-started/auto-generated-embeddings/). +--- -For a comprehensive tutorial, see [Neural search tutorial]({{site.url}}{{site.baseurl}}/search-plugins/neural-search-tutorial/). +## Tutorial -This approach is suitable for the following use cases: - - General-purpose search - - Rapid prototyping - - Standard text corpus - - Quick implementation +For a more in-depth look into text-to-embedding search, follow a comprehensive tutorial. +{% include cards.html cards=page.tutorial_cards documentation_link=false %} \ No newline at end of file diff --git a/_vector-search/getting-started/neural-search-tutorial.md b/_vector-search/getting-started/neural-search-tutorial.md index a0d93c24c6..7133e2cca4 100644 --- a/_vector-search/getting-started/neural-search-tutorial.md +++ b/_vector-search/getting-started/neural-search-tutorial.md @@ -1,6 +1,6 @@ --- layout: default -title: Semantic search tutorial +title: Semantic and hybrid search tutorial has_children: false parent: Getting started nav_order: 30 @@ -9,7 +9,7 @@ redirect_from: - /search-plugins/neural-search-tutorial/ --- -# Semantic search tutorial +# Semantic and hybrid search tutorial By default, OpenSearch calculates document scores using the [Okapi BM25](https://en.wikipedia.org/wiki/Okapi_BM25) algorithm. BM25 is a keyword-based algorithm that performs well on queries containing keywords but fails to capture the semantic meaning of the query terms. Semantic search, unlike keyword-based search, takes into account the meaning of the query in the search context. Thus, semantic search performs well when a query requires natural language understanding. diff --git a/_vector-search/getting-started/pre-generated-embeddings.md b/_vector-search/getting-started/pre-generated-embeddings.md index de483c549b..589cc0be73 100644 --- a/_vector-search/getting-started/pre-generated-embeddings.md +++ b/_vector-search/getting-started/pre-generated-embeddings.md @@ -1,6 +1,6 @@ --- layout: default -title: Pre-generated embeddings +title: Pre-generated embeddings quickstart parent: Getting started nav_order: 10 --- diff --git a/_vector-search/index.md b/_vector-search/index.md index 69ba3c789b..278e28e6d8 100644 --- a/_vector-search/index.md +++ b/_vector-search/index.md @@ -9,16 +9,65 @@ permalink: /vector-search/ redirect_from: - /vector-search/index/ - /search-plugins/vector-search/ +raw_steps: + - heading: "Pre-generated embeddings quickstart" + description: "Follow a quickstart tutorial for raw vector search." + link: "/vector-search/getting-started/pre-generated-embeddings/" + - heading: "Vector search techniques" + description: "Select a search technique and configure your vector search." + link: "/vector-search/vector-search-techniques/" + - heading: "Specialized vector search" + description: "Learn about specialized vector search use cases, such as filtering, nested field search, and radial search." + link: "/vector-search/specialized-operations/" + - heading: "Optimizing vector storage" + description: "Learn about storage saving techniques, such as disk-based vector search and vector quantization." + link: "/vector-search/optimizing-storage/" + - heading: "Performance tuning" + description: "Improve search performance." + link: "/vector-search/performance-tuning/" +ml_steps: + - heading: "Auto-generated embeddings quickstart" + description: "Follow a quickstart tutorial for text-to-embedding search." + link: "/vector-search/getting-started/auto-generated-embeddings/" + - heading: "Semantic and hybrid search tutorial" + description: "Dive into semantic search and hybrid search." + link: "/vector-search/getting-started/neural-search-tutorial/" + - heading: "ML-powered search" + description: "Learn about many ML-powered search options that OpenSearch provides." + link: "/vector-search/ml-powered-search/" + - heading: "Specialized vector search" + description: "Learn about specialized vector search use cases, such as filtering, nested field search, and radial search." + link: "/vector-search/specialized-operations/" + - heading: "Optimizing vector storage" + description: "Learn about storage saving techniques, such as disk-based vector search and vector quantization." + link: "/vector-search/optimizing-storage/" + - heading: "Performance tuning" + description: "Improve search performance." + link: "/vector-search/performance-tuning/" --- # Vector search OpenSearch is a comprehensive search platform that supports a variety of data types, including vectors. OpenSearch vector database functionality is seamlessly integrated with its generic database function. -In OpenSearch, you can generate vector embeddings, store those embeddings in an index, and use them for vector search in the following ways: +In OpenSearch, you can generate vector embeddings, store those embeddings in an index, and use them for vector search. -- **Pre-generated embeddings**: Generate embeddings using a library of your choice before ingesting them into OpenSearch. Once you ingest vectors into an index, you can perform a vector similarity search on the vector space. For more information, see [Working with embeddings generated outside of OpenSearch](#working-with-embeddings-generated-outside-of-opensearch). -- **Auto-generated embeddings**: Automatically generate embeddings within OpenSearch. To use embeddings for semantic search, the ingested text (the corpus) and the query need to be embedded using the same model. [Neural search]({{site.url}}{{site.baseurl}}/search-plugins/neural-search/) packages this functionality, eliminating the need to manage the internal details. For more information, see [Generating vector embeddings within OpenSearch](#generating-vector-embeddings-in-opensearch). + +[Get started]({{site.url}}{{site.baseurl}}/vector-search/getting-started/){: .btn-dark-blue} + +--- + +## Bring your own vectors + +If you’ve already generated your own vector embeddings, OpenSearch makes it easy to ingest and search them. Follow this documentation sequence to get started: + +{% include list.html list_items=page.raw_steps%} + +--- + +## Seamless text-to-embedding search + +Simplify your search process by letting OpenSearch handle embedding generation. Follow this documentation sequence to begin using text-to-embedding search: -To get started, see [Getting started]({{site.url}}{{site.baseurl}}/vector-database/getting-started/). +{% include list.html list_items=page.ml_steps%} \ No newline at end of file diff --git a/_vector-search/ml-powered-search/index.md b/_vector-search/ml-powered-search/index.md index 9768f3257b..f4b2d48697 100644 --- a/_vector-search/ml-powered-search/index.md +++ b/_vector-search/ml-powered-search/index.md @@ -7,50 +7,64 @@ has_toc: false redirect_from: - /neural-search-plugin/index/ - /search-plugins/neural-search/ - - /vector-database/ml-powered-search/ + - /vector-search/ml-powered-search/ +model_cards: + - heading: "Use a pretrained model provided by OpenSearch" + link: "/ml-commons-plugin/pretrained-models/" + - heading: "Upload your own model to OpenSearch" + link: "/ml-commons-plugin/custom-local-models/" + - heading: "Connect to a model hosted on an external platform" + link: "/ml-commons-plugin/remote-models/index/" +tutorial_cards: + - heading: "Semantic and hybrid search tutorial" + description: "Learn how to implement semantic and hybrid search" + link: "/vector-search/getting-started/neural-search-tutorial/" +search_method_cards: + - heading: "Semantic search" + description: "Uses dense retrieval based on text embedding models to search text data." + link: "/vector-search/ml-powered-search/semantic-search/" + - heading: "Hybrid search" + description: "Combines keyword and neural search to improve search relevance." + link: "/vector-search/ml-powered-search/hybrid-search/" + - heading: "Multimodal search" + description: "Uses multimodal embedding models to search text and image data." + link: "/vector-search/ml-powered-search/multimodal-search/" + - heading: "Neural sparse search" + description: "Uses sparse retrieval based on sparse embedding models to search text data." + link: "/vector-search/ml-powered-search/neural-sparse-search/" + - heading: "Conversational search" + description: "Uses retrieval-augmented generation and conversational memory to provide context-aware responses." + link: "/vector-search/ml-powered-search/conversational-search/" +chunking_cards: + - heading: "Text chunking" + description: "Use text chunking to ensure adherence to token limit for embedding models." + link: "/vector-search/ml-powered-search/text-chunking/" --- # ML-powered search -Neural search transforms text into vectors and facilitates vector search both at ingestion time and at search time. During ingestion, neural search transforms document text into vector embeddings and indexes both the text and its vector embeddings in a vector index. When you use a neural query during search, neural search converts the query text into vector embeddings, uses vector search to compare the query and document embeddings, and returns the closest results. - -Before you ingest documents into an index, documents are passed through a machine learning (ML) model, which generates vector embeddings for the document fields. When you send a search request, the query text or image is also passed through the ML model, which generates the corresponding vector embeddings. Then neural search performs a vector search on the embeddings and returns matching documents. +ML-powered search streamlines your workflow by generating embeddings automatically. OpenSearch converts text into vectors during indexing and querying. It creates and indexes vector embeddings for documents, then processes query text into embeddings to find and return the most relevant results. ## Prerequisite -Before using neural search, you must set up an ML model. When selecting a model, you have the following options: - -- Use a pretrained model provided by OpenSearch. For more information, see [OpenSearch-provided pretrained models]({{site.url}}{{site.baseurl}}/ml-commons-plugin/pretrained-models/). - -- Upload your own model to OpenSearch. For more information, see [Custom local models]({{site.url}}{{site.baseurl}}/ml-commons-plugin/custom-local-models/). +Before using text-to-embedding search, you must set up an ML model for embedding generation. When selecting a model, you have the following options: -- Connect to a foundation model hosted on an external platform. For more information, see [Connecting to remote models]({{site.url}}{{site.baseurl}}/ml-commons-plugin/remote-models/index/). +{% include cards.html cards=page.model_cards %} +--- ## Tutorial -For a step-by-step tutorial, see [Neural search tutorial]({{site.url}}{{site.baseurl}}/search-plugins/neural-search-tutorial/). - -## Using an ML model for neural search - -Once you set up an ML model, choose one of the following search methods to use your model for neural search. +{% include cards.html cards=page.tutorial_cards %} -### Semantic search - -Semantic search uses dense retrieval based on text embedding models to search text data. For detailed setup instructions, see [Semantic search]({{site.url}}{{site.baseurl}}/search-plugins/semantic-search/). - -### Hybrid search - -Hybrid search combines keyword and neural search to improve search relevance. For detailed setup instructions, see [Hybrid search]({{site.url}}{{site.baseurl}}/search-plugins/hybrid-search/). - -### Multimodal search +--- -Multimodal search uses neural search with multimodal embedding models to search text and image data. For detailed setup instructions, see [Multimodal search]({{site.url}}{{site.baseurl}}/search-plugins/multimodal-search/). +## ML-powered search methods -### Sparse search +Once you set up an ML model, choose one of the following search methods. -Sparse search uses neural search with sparse retrieval based on sparse embedding models to search text data. For detailed setup instructions, see [Sparse search]({{site.url}}{{site.baseurl}}/search-plugins/neural-sparse-search/). +{% include cards.html cards=page.search_method_cards %} -### Conversational search +--- -With conversational search, you can ask questions in natural language, receive a text response, and ask additional clarifying questions. For detailed setup instructions, see [Conversational search]({{site.url}}{{site.baseurl}}/search-plugins/conversational-search/). +{% include cards.html cards=page.chunking_cards %} \ No newline at end of file diff --git a/_vector-search/ml-powered-search/text-chunking.md b/_vector-search/ml-powered-search/text-chunking.md index 77781efa7e..9242b2178a 100644 --- a/_vector-search/ml-powered-search/text-chunking.md +++ b/_vector-search/ml-powered-search/text-chunking.md @@ -2,7 +2,7 @@ layout: default title: Text chunking parent: ML-powered search -nav_order: 65 +nav_order: 80 redirect_from: - /search-plugins/text-chunking/ --- diff --git a/_vector-search/optimizing-storage/index.md b/_vector-search/optimizing-storage/index.md index 75263c6506..87d4829713 100644 --- a/_vector-search/optimizing-storage/index.md +++ b/_vector-search/optimizing-storage/index.md @@ -5,7 +5,7 @@ nav_order: 60 has_children: true has_toc: false redirect_from: - - /vector-database/optimizing-storage/ + - /vector-search/optimizing-storage/ --- # Optimizing vector storage diff --git a/_vector-search/specialized-operations/index.md b/_vector-search/specialized-operations/index.md index 85ba4abba5..974de5b637 100644 --- a/_vector-search/specialized-operations/index.md +++ b/_vector-search/specialized-operations/index.md @@ -4,13 +4,15 @@ title: Specialized vector search nav_order: 45 has_children: true has_toc: false +redirect_from: + - /vector-search/specialized-operations/ --- # Specialized vector search OpenSearch supports the following specialized vector search applications. -### Vector search with filtering +## Vector search with filters For information about vector search with filtering, see [Vector search with filters]({{site.url}}{{site.baseurl}}/search-plugins/knn/filter-search-knn/). diff --git a/_vector-search/vector-search-techniques/index.md b/_vector-search/vector-search-techniques/index.md index f017151bda..e9f486b60f 100644 --- a/_vector-search/vector-search-techniques/index.md +++ b/_vector-search/vector-search-techniques/index.md @@ -7,7 +7,7 @@ has_toc: false redirect_from: - /search-plugins/knn/ - /search-plugins/knn/index/ - - /vector-database/vector-search/ + - /vector-search/vector-search-techniques/ --- # Vector search techniques diff --git a/index.md b/index.md index 6fac0021db..ed4d943d9f 100755 --- a/index.md +++ b/index.md @@ -9,4 +9,4 @@ permalink: / {% include banner.html %} -{% include cards.html %} \ No newline at end of file +{% include home_cards.html %} \ No newline at end of file From 2ac67d9d2828296ada30b7e294c23006c59ae9d8 Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Thu, 23 Jan 2025 14:41:21 -0500 Subject: [PATCH 05/32] More restructuring Signed-off-by: Fanit Kolchina --- .../supported-field-types/knn-vector.md | 10 +- _includes/home_cards.html | 16 +- _ml-commons-plugin/custom-local-models.md | 2 +- _ml-commons-plugin/remote-models/index.md | 2 +- _vector-search/api.md | 8 +- .../creating-vector-index/index-settings.md | 24 - _vector-search/creating-vector-index/index.md | 47 +- .../creating-vector-index/method.md | 100 ++-- .../creating-vector-index/vector-field.md | 33 +- .../auto-generated-embeddings.md | 112 +--- _vector-search/getting-started/index.md | 4 +- .../getting-started/neural-search-tutorial.md | 205 ++------ _vector-search/index.md | 29 +- _vector-search/ingesting-data.md | 42 +- .../ml-powered-search/hybrid-search.md | 4 +- _vector-search/ml-powered-search/index.md | 10 +- .../ml-powered-search/multimodal-search.md | 8 +- .../ml-powered-search/neural-sparse-search.md | 2 +- .../neural-sparse-with-pipelines.md | 2 +- .../ml-powered-search/semantic-search.md | 6 +- .../ml-powered-search/text-chunking.md | 2 +- .../binary-quantization.md | 182 +++++++ .../disk-based-vector-search.md | 10 +- .../faiss-16-bit-quantization.md | 154 ++++++ .../faiss-product-quantization.md | 52 ++ .../optimizing-performance/index.md | 32 ++ .../knn-vector-quantization.md | 41 ++ .../lucene-scalar-quantization.md | 115 +++++ .../performance-tuning.md | 7 +- _vector-search/optimizing-storage/index.md | 53 -- .../knn-vector-quantization.md | 486 ------------------ .../{querying-data.md => searching-data.md} | 19 +- _vector-search/settings.md | 28 +- .../filter-search-knn.md | 2 +- .../specialized-operations/index.md | 24 +- .../radial-search-knn.md | 20 +- .../approximate-knn.md | 12 +- .../vector-search-techniques/index.md | 14 +- .../knn-score-script.md | 7 +- .../painless-functions.md | 6 +- 40 files changed, 849 insertions(+), 1083 deletions(-) delete mode 100644 _vector-search/creating-vector-index/index-settings.md create mode 100644 _vector-search/optimizing-performance/binary-quantization.md rename _vector-search/{optimizing-storage => optimizing-performance}/disk-based-vector-search.md (91%) create mode 100644 _vector-search/optimizing-performance/faiss-16-bit-quantization.md create mode 100644 _vector-search/optimizing-performance/faiss-product-quantization.md create mode 100644 _vector-search/optimizing-performance/index.md create mode 100644 _vector-search/optimizing-performance/knn-vector-quantization.md create mode 100644 _vector-search/optimizing-performance/lucene-scalar-quantization.md rename _vector-search/{ => optimizing-performance}/performance-tuning.md (98%) delete mode 100644 _vector-search/optimizing-storage/index.md delete mode 100644 _vector-search/optimizing-storage/knn-vector-quantization.md rename _vector-search/{querying-data.md => searching-data.md} (71%) diff --git a/_field-types/supported-field-types/knn-vector.md b/_field-types/supported-field-types/knn-vector.md index 91b4457bf8..0aa48f2ef7 100644 --- a/_field-types/supported-field-types/knn-vector.md +++ b/_field-types/supported-field-types/knn-vector.md @@ -11,7 +11,7 @@ has_math: true **Introduced 1.0** {: .label .label-purple } -The [k-NN plugin]({{site.url}}{{site.baseurl}}/search-plugins/knn/index/) introduces a custom data type, the `knn_vector`, that allows users to ingest their k-NN vectors into an OpenSearch index and perform different kinds of k-NN search. The `knn_vector` field is highly configurable and can serve many different k-NN workloads. In general, a `knn_vector` field can be built either by providing a method definition or specifying a model id. +The `knn_vector` data type allows you to ingest vectors into an OpenSearch index and perform different kinds of vector search. The `knn_vector` field is highly configurable and can serve many different vector workloads. In general, a `knn_vector` field can be built either by providing a method definition or specifying a model id. ## Example @@ -53,7 +53,7 @@ The following modes are currently supported. | `in_memory` (Default) | `nmslib` | Prioritizes low-latency search. This mode uses the `nmslib` engine without any quantization applied. It is configured with the default parameter values for vector search in OpenSearch. | | `on_disk` | `faiss` | Prioritizes low-cost vector search while maintaining strong recall. By default, the `on_disk` mode uses quantization and rescoring to execute a two-pass approach to retrieve the top neighbors. The `on_disk` mode supports only `float` vector types. | -To create a k-NN index that uses the `on_disk` mode for low-cost search, send the following request: +To create a vector index that uses the `on_disk` mode for low-cost search, send the following request: ```json PUT test-index @@ -130,7 +130,7 @@ PUT test-index ## Method definitions -[Method definitions]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index#method-definitions) are used when the underlying [approximate k-NN]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/) algorithm does not require training. For example, the following `knn_vector` field specifies that *nmslib*'s implementation of *hnsw* should be used for approximate k-NN search. During indexing, *nmslib* will build the corresponding *hnsw* segment files. +[Method definitions]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/method/) are used when the underlying [approximate k-NN]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/) algorithm does not require training. For example, the following `knn_vector` field specifies that NMSLIB's implementation of HNSW should be used for approximate k-NN search. During indexing, NMSLIB will build the corresponding HNSW segment files. ```json "my_vector": { @@ -150,7 +150,7 @@ PUT test-index ## Model IDs -Model IDs are used when the underlying Approximate k-NN algorithm requires a training step. As a prerequisite, the model must be created with the [Train API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#train-a-model). The +Model IDs are used when the underlying approximate k-NN algorithm requires a training step. As a prerequisite, the model must be created with the [Train API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#train-a-model). The model contains the information needed to initialize the native library segment files. ```json @@ -180,7 +180,7 @@ In [k-NN benchmarking tests](https://github.com/opensearch-project/opensearch-be When using `byte` vectors, expect some loss of precision in the recall compared to using `float` vectors. Byte vectors are useful in large-scale applications and use cases that prioritize a reduced memory footprint in exchange for a minimal loss of recall. {: .important} -When using `byte` vectors with the `faiss` engine, we recommend using [SIMD optimization]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index#simd-optimization-for-the-faiss-engine), which helps to significantly reduce search latencies and improve indexing throughput. +When using `byte` vectors with the `faiss` engine, we recommend using [SIMD optimization]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/vector-field/#simd-optimization-for-the-faiss-engine), which helps to significantly reduce search latencies and improve indexing throughput. {: .important} Introduced in k-NN plugin version 2.9, the optional `data_type` parameter defines the data type of a vector. The default value of this parameter is `float`. diff --git a/_includes/home_cards.html b/_includes/home_cards.html index 34222f1396..e467b82032 100644 --- a/_includes/home_cards.html +++ b/_includes/home_cards.html @@ -4,7 +4,7 @@

All documentation

-

Build your OpenSearch solution using core tooling and visualizations

+

Build your OpenSearch solution using core tooling and visualizations.

@@ -12,14 +12,14 @@

Vector search

-

Use vector database capabilities for more relevant search results

+

Use vector database capabilities for more relevant search results.

Machine learning

-

Power your applications with machine learning model integration

+

Power your applications with machine learning model integration.

@@ -27,7 +27,7 @@

OpenSearch Dashboards

-

Explore and visualize your data using interactive dashboards

+

Explore and visualize your data using interactive dashboards.

@@ -41,14 +41,14 @@

Data Prepper

-

Filter, mutate, and sample your data for ingestion into OpenSearch

+

Filter, mutate, and sample your data for ingestion into OpenSearch.

Clients

-

Interact with OpenSearch from your application using language APIs

+

Interact with OpenSearch from your application using language APIs.

@@ -56,14 +56,14 @@

OpenSearch Benchmark

-

Measure performance metrics for your OpenSearch cluster

+

Measure performance metrics for your OpenSearch cluster.

Migration Assistant

-

Migrate to OpenSearch

+

Migrate to OpenSearch.

diff --git a/_ml-commons-plugin/custom-local-models.md b/_ml-commons-plugin/custom-local-models.md index 09c3105f8d..b85ef0f13e 100644 --- a/_ml-commons-plugin/custom-local-models.md +++ b/_ml-commons-plugin/custom-local-models.md @@ -320,7 +320,7 @@ The response contains the tokens and weights: ## Step 5: Use the model for search -To learn how to use the model for vector search, see [Using an ML model for neural search]({{site.url}}{{site.baseurl}}/search-plugins/neural-search/#using-an-ml-model-for-neural-search). +To learn how to use the model for vector search, see [ML-powered search methods]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/index/#ml-powered-search-methods). ## Question answering models diff --git a/_ml-commons-plugin/remote-models/index.md b/_ml-commons-plugin/remote-models/index.md index ddde42ecec..a61a70bd2c 100644 --- a/_ml-commons-plugin/remote-models/index.md +++ b/_ml-commons-plugin/remote-models/index.md @@ -323,7 +323,7 @@ To learn how to use the model for batch ingestion in order to improve ingestion ## Step 7: Use the model for search -To learn how to use the model for vector search, see [Using an ML model for neural search]({{site.url}}{{site.baseurl}}/search-plugins/neural-search/#using-an-ml-model-for-neural-search). +To learn how to use the model for vector search, see [ML-powered search methods]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/index/#ml-powered-search-methods). ## Step 8 (Optional): Undeploy the model diff --git a/_vector-search/api.md b/_vector-search/api.md index 3151b70e00..9272626d9a 100644 --- a/_vector-search/api.md +++ b/_vector-search/api.md @@ -1,7 +1,7 @@ --- layout: default title: Vector API -nav_order: 90 +nav_order: 80 has_children: false redirect_from: - /search-plugins/knn/api/ @@ -9,11 +9,11 @@ redirect_from: # Vector API -The k-NN plugin adds several APIs for managing, monitoring, and optimizing your k-NN workload. +OpenSearch provides several vector APIs for managing, monitoring, and optimizing your vector workload. ## Stats -The k-NN `stats` API provides information about the current status of the k-NN plugin. The plugin keeps track of both cluster-level and node-level statistics. Cluster-level statistics have a single value for the entire cluster. Node-level statistics have a single value for each node in the cluster. You can filter the query by `nodeId` and `statName`, as shown in the following example: +The k-NN `stats` API provides information about the current status of the k-NN plugin, which implements vector search functionality. This includes both cluster-level and node-level statistics. Cluster-level statistics have a single value for the entire cluster. Node-level statistics have a single value for each node in the cluster. You can filter the query by `nodeId` and `statName`, as shown in the following example: ``` GET /_plugins/_knn/nodeId1,nodeId2/stats/statName1,statName2 @@ -351,7 +351,7 @@ Request parameter | Description `max_training_vector_count` | The maximum number of vectors from the training index to be used for training. Defaults to all the vectors in the index. Optional. `search_size` | The training data is pulled from the training index using scroll queries. This parameter defines the number of results to return per scroll query. Default is `10000`. Optional. `description` | A user-provided description of the model. Optional. -`method` | The configuration of the approximate k-NN method used for search operations. For more information about the available methods, see [k-NN index method definitions]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index#method-definitions). The method requires training to be valid. +`method` | The configuration of the approximate k-NN method used for search operations. For more information about the available methods, see [k-NN index method definitions]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/method/). The method requires training to be valid. `space_type` | The space type for which this model is trained, for example, Euclidean or cosine. Note: This value can also be set in the `method` parameter. #### Usage diff --git a/_vector-search/creating-vector-index/index-settings.md b/_vector-search/creating-vector-index/index-settings.md deleted file mode 100644 index 1f1c09e75e..0000000000 --- a/_vector-search/creating-vector-index/index-settings.md +++ /dev/null @@ -1,24 +0,0 @@ ---- -layout: default -title: Vector index settings -parent: Creating a vector index -nav_order: 30 ---- - -# Vector index settings - -The k-NN plugin introduces several index settings that can be used to configure the k-NN structure as well. - -At the moment, several parameters defined in the settings are in the deprecation process. Those parameters should be set in the mapping instead of the index settings. Parameters set in the mapping will override the parameters set in the index settings. Setting the parameters in the mapping allows an index to have multiple `knn_vector` fields with different parameters. - -Setting | Default | Updatable | Description -:--- | :--- | :--- | :--- -`index.knn` | false | false | Whether the index should build native library indexes for the `knn_vector` fields. If set to false, the `knn_vector` fields will be stored in doc values, but approximate k-NN search functionality will be disabled. -`index.knn.algo_param.ef_search` | 100 | true | The size of the dynamic list used during k-NN searches. Higher values result in more accurate but slower searches. Only available for NMSLIB. -`index.knn.advanced.approximate_threshold` | 15,000 | true | The number of vectors a segment must have before creating specialized data structures for approximate search. Set to `-1` to disable building vector data structures and `0` to always build them. -`index.knn.algo_param.ef_construction` | 100 | false | Deprecated in 1.0.0. Instead, use the [mapping parameters](https://opensearch.org/docs/latest/search-plugins/knn/knn-index/#method-definitions) to set this value. -`index.knn.algo_param.m` | 16 | false | Deprecated in 1.0.0. Use the [mapping parameters](https://opensearch.org/docs/latest/search-plugins/knn/knn-index/#method-definitions) to set this value instead. -`index.knn.space_type` | l2 | false | Deprecated in 1.0.0. Use the [mapping parameters](https://opensearch.org/docs/latest/search-plugins/knn/knn-index/#method-definitions) to set this value instead. - -An index created in OpenSearch version 2.11 or earlier will still use the old `ef_construction` and `ef_search` values (`512`). -{: .note} diff --git a/_vector-search/creating-vector-index/index.md b/_vector-search/creating-vector-index/index.md index 99f51c63cd..dcfdbfb91d 100644 --- a/_vector-search/creating-vector-index/index.md +++ b/_vector-search/creating-vector-index/index.md @@ -5,6 +5,8 @@ nav_order: 20 has_children: true redirect_from: - /vector-search/creating-a-vector-db/ + - /search-plugins/knn/knn-index/ + - /vector-search/creating-vector-index/ --- # Creating a vector index @@ -55,7 +57,7 @@ Regardless of the type of vector search, the following elements are part of crea Set the `dimension` property to match the size of the vectors used. 4. **Choose a space type**: - Select a distance metric for similarity comparisons, such as `l2` (Euclidean distance) or `cosine`. + Select a distance metric for similarity comparisons, such as `l2` (Euclidean distance) or `cosinesimil`. 5. **Select a method**: Configure the indexing method, such as HNSW or IVF, to optimize vector search performance. @@ -71,8 +73,8 @@ The following table summarizes key index configuration differences for the suppo | Feature | Vector field type | Ingest pipeline | Transformation | Use case | |--------------------------|-----------------------|---------------------|-------------------------|-------------------------| -| **Pre-generated embeddings or raw vectors** | `knn_vector` | Not required | Direct ingestion | Raw vector search | -| **Auto-generated embeddings** | `knn_vector` | Required | Auto-generated vectors | Semantic search | +| **Pre-generated embeddings or raw vectors** | [`knn_vector`]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) | Not required | Direct ingestion | Raw vector search | +| **Auto-generated embeddings** | [`knn_vector`]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) | Required | Auto-generated vectors | ML-powered search | ## Pre-generated embeddings or raw vectors @@ -100,17 +102,31 @@ PUT /my-raw-vector-index } ``` -**Key Characteristics:** -- Uses the `knn_vector` type. -- Directly ingests vector data. -- No additional transformations are required. -- Supports custom configurations for indexing methods (e.g., FAISS). +## Auto-generated embeddings +Auto-generating embeddings require configuring an [ingest pipeline]({{site.url}}{{site.baseurl}}/api-reference/ingest-apis/index/) with a model ID of the embedding model: +```json +PUT /_ingest/pipeline/nlp-ingest-pipeline +{ + "description": "An NLP ingest pipeline", + "processors": [ + { + "text_embedding": { + "model_id": "aVeif4oB5Vm0Tdw8zYO2", + "field_map": { + "text": "passage_embedding" + } + } + } + ] +} +``` +{% include copy-curl.html %} -## Auto-generated embeddings +For more information about configuring a model, see [Integrating ML models]({{site.url}}{{site.baseurl}}/ml-commons-plugin/integrating-ml-models/). -Auto-generating embeddings require configuring an ingest pipeline. When creating embeddings, specify the pipeline at index creation time: +When creating an index, specify the pipeline as the default pipeline: ```json PUT /my-semantic-search-index @@ -137,10 +153,11 @@ PUT /my-semantic-search-index } } ``` +{% include copy-curl.html %} -**Key Characteristics:** -- Uses the `knn_vector` type. -- Includes an ingest pipeline for automatic embedding generation. -- Dimension matches the embedding model output. -- Includes a `text` field for the original content. +## Next steps +- [Ingesting data into a vector index]({{site.url}}{{site.baseurl}}/vector-search/searching-data/) +- [Vector data types]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/vector-field/) +- [Supported methods]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/method/) +- [k-NN vector field type]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) \ No newline at end of file diff --git a/_vector-search/creating-vector-index/method.md b/_vector-search/creating-vector-index/method.md index d31e2f0d40..8139fee715 100644 --- a/_vector-search/creating-vector-index/method.md +++ b/_vector-search/creating-vector-index/method.md @@ -7,88 +7,88 @@ nav_order: 20 # Supported methods -A _method_ definition refers to the underlying configuration of the approximate k-NN algorithm you want to use. Method definitions are used to either create a `knn_vector` field (when the method does not require training) or [create a model during training]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#train-a-model) that can then be used to [create a `knn_vector` field]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-k-nn-index-from-a-model). +A _method_ definition refers to the underlying configuration of the [approximate k-NN]({{site.url}}{{site.baseurl}}/vector-search/vector-search-techniques/approximate-knn/) algorithm you want to use. Method definitions are used to either create a `knn_vector` field (when the method does not require training) or [create a model during training]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#train-a-model) that can then be used to [create a `knn_vector` field]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-k-nn-index-from-a-model). A method definition will always contain the name of the method, the space_type the method is built for, the engine (the library) to use, and a map of parameters. Mapping parameter | Required | Default | Updatable | Description :--- | :--- | :--- | :--- | :--- -`name` | true | n/a | false | The identifier for the nearest neighbor method. -`space_type` | false | l2 | false | The vector space used to calculate the distance between vectors. Note: This value can also be specified at the top level of the mapping. -`engine` | false | faiss | false | The approximate k-NN library to use for indexing and search. The available libraries are `faiss`, `nmslib`, and `lucene`. -`parameters` | false | null | false | The parameters used for the nearest neighbor method. +`name` | Yes | N/A | No | The identifier for the nearest neighbor method. +`space_type` | No | `l2` | No | The vector space used to calculate the distance between vectors. Note: This value can also be specified at the top level of the mapping. +`engine` | No | `faiss` | No | The approximate k-NN library to use for indexing and search. The available libraries are `faiss`, `nmslib`, and `lucene`. +`parameters` | No | `null` | No | The parameters used for the nearest neighbor method. -### Supported nmslib methods +## Supported NMSLIB methods Method name | Requires training | Supported spaces | Description :--- | :--- | :--- | :--- -`hnsw` | false | l2, innerproduct, cosinesimil, l1, linf | Hierarchical proximity graph approach to approximate k-NN search. For more details on the algorithm, see this [abstract](https://arxiv.org/abs/1603.09320). +`hnsw` | No | `l2`, `innerproduct`, `cosinesimil`, `l1`, `linf` | Hierarchical proximity graph approach to approximate k-NN search. For more details on the algorithm, see this [abstract](https://arxiv.org/abs/1603.09320). -#### HNSW parameters +### HNSW parameters Parameter name | Required | Default | Updatable | Description :--- | :--- | :--- | :--- | :--- -`ef_construction` | false | 100 | false | The size of the dynamic list used during k-NN graph creation. Higher values result in a more accurate graph but slower indexing speed. -`m` | false | 16 | false | The number of bidirectional links that the plugin creates for each new element. Increasing and decreasing this value can have a large impact on memory consumption. Keep this value between 2 and 100. +`ef_construction` | No | 100 | No | The size of the dynamic list used during k-NN graph creation. Higher values result in a more accurate graph but slower indexing speed. +`m` | No | 16 | No | The number of bidirectional links that the plugin creates for each new element. Increasing and decreasing this value can have a large impact on memory consumption. Keep this value between 2 and 100. -For nmslib, *ef_search* is set in the [index settings](#index-settings). +For NMSLIB, *ef_search* is set in the [index settings]({{site.url}}{{site.baseurl}}/vector-search/settings/#index-settings). {: .note} An index created in OpenSearch version 2.11 or earlier will still use the old `ef_construction` value (`512`). {: .note} -### Supported Faiss methods +## Supported Faiss methods Method name | Requires training | Supported spaces | Description :--- | :--- |:---| :--- -`hnsw` | false | l2, innerproduct, hamming | Hierarchical proximity graph approach to approximate k-NN search. -`ivf` | true | l2, innerproduct, hamming | Stands for _inverted file index_. Bucketing approach where vectors are assigned different buckets based on clustering and, during search, only a subset of the buckets is searched. +`hnsw` | No | `l2`, `innerproduct`, `hamming` | Hierarchical proximity graph approach to approximate k-NN search. +`ivf` | Yes | `l2`, `innerproduct`, `hamming` | Stands for _inverted file index_. Bucketing approach where vectors are assigned different buckets based on clustering and, during search, only a subset of the buckets is searched. -For hnsw, "innerproduct" is not available when PQ is used. +For HNSW, `innerproduct` is not available when PQ is used. {: .note} The `hamming` space type is supported for binary vectors in OpenSearch version 2.16 and later. For more information, see [Binary k-NN vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#binary-vectors). {: .note} -#### HNSW parameters +### HNSW parameters Parameter name | Required | Default | Updatable | Description :--- | :--- | :--- | :--- | :--- -`ef_search` | false | 100 | false | The size of the dynamic list used during k-NN searches. Higher values result in more accurate but slower searches. -`ef_construction` | false | 100 | false | The size of the dynamic list used during k-NN graph creation. Higher values result in a more accurate graph but slower indexing speed. -`m` | false | 16 | false | The number of bidirectional links that the plugin creates for each new element. Increasing and decreasing this value can have a large impact on memory consumption. Keep this value between 2 and 100. -`encoder` | false | flat | false | Encoder definition for encoding vectors. Encoders can reduce the memory footprint of your index, at the expense of search accuracy. +`ef_search` | No | 100 | No | The size of the dynamic list used during k-NN searches. Higher values result in more accurate but slower searches. +`ef_construction` | No | 100 | No | The size of the dynamic list used during k-NN graph creation. Higher values result in a more accurate graph but slower indexing speed. +`m` | No | 16 | No | The number of bidirectional links that the plugin creates for each new element. Increasing and decreasing this value can have a large impact on memory consumption. Keep this value between 2 and 100. +`encoder` | No | flat | No | Encoder definition for encoding vectors. Encoders can reduce the memory footprint of your index, at the expense of search accuracy. An index created in OpenSearch version 2.11 or earlier will still use the old `ef_construction` and `ef_search` values (`512`). {: .note} -#### IVF parameters +### IVF parameters Parameter name | Required | Default | Updatable | Description :--- | :--- | :--- | :--- | :--- -`nlist` | false | 4 | false | Number of buckets to partition vectors into. Higher values may lead to more accurate searches at the expense of memory and training latency. For more information about choosing the right value, refer to [Guidelines to choose an index](https://github.com/facebookresearch/faiss/wiki/Guidelines-to-choose-an-index). -`nprobes` | false | 1 | false | Number of buckets to search during query. Higher values lead to more accurate but slower searches. -`encoder` | false | flat | false | Encoder definition for encoding vectors. Encoders can reduce the memory footprint of your index, at the expense of search accuracy. +`nlist` | No | 4 | No | Number of buckets to partition vectors into. Higher values may lead to more accurate searches at the expense of memory and training latency. For more information about choosing the right value, refer to [Guidelines to choose an index](https://github.com/facebookresearch/faiss/wiki/Guidelines-to-choose-an-index). +`nprobes` | No | 1 | No | Number of buckets to search during query. Higher values lead to more accurate but slower searches. +`encoder` | No | flat | No | Encoder definition for encoding vectors. Encoders can reduce the memory footprint of your index, at the expense of search accuracy. For more information about setting these parameters, refer to the [Faiss documentation](https://github.com/facebookresearch/faiss/wiki/Faiss-indexes). -#### IVF training requirements +### IVF training requirements -The IVF algorithm requires a training step. To create an index that uses IVF, you need to train a model with the [Train API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#train-a-model), passing the IVF method definition. IVF requires that, at a minimum, there are `nlist` training data points, but it is [recommended that you use more than this](https://github.com/facebookresearch/faiss/wiki/Guidelines-to-choose-an-index#how-big-is-the-dataset). Training data can be composed of either the same data that is going to be ingested or a separate dataset. +The IVF algorithm requires a training step. To create an index that uses IVF, you need to train a model with the [Train API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#train-a-model), passing the IVF method definition. IVF requires that, at a minimum, there are `nlist` training data points, but we recommend [that you use more than this](https://github.com/facebookresearch/faiss/wiki/Guidelines-to-choose-an-index#how-big-is-the-dataset). Training data can be composed of either the same data that is going to be ingested or a separate dataset. -### Supported Lucene methods +## Supported Lucene methods Method name | Requires training | Supported spaces | Description :--- | :--- |:--------------------------------------------------------------------------------| :--- -`hnsw` | false | l2, cosinesimil, innerproduct (supported in OpenSearch 2.13 and later) | Hierarchical proximity graph approach to approximate k-NN search. +`hnsw` | No | `l2`, `cosinesimil`, `innerproduct` (supported in OpenSearch 2.13 and later) | Hierarchical proximity graph approach to approximate k-NN search. -#### HNSW parameters +### HNSW parameters Parameter name | Required | Default | Updatable | Description :--- | :--- | :--- | :--- | :--- -`ef_construction` | false | 100 | false | The size of the dynamic list used during k-NN graph creation. Higher values result in a more accurate graph but slower indexing speed.
The Lucene engine uses the proprietary term "beam_width" to describe this function, which corresponds directly to "ef_construction". To be consistent throughout the OpenSearch documentation, we retain the term "ef_construction" for this parameter. -`m` | false | 16 | false | The number of bidirectional links that the plugin creates for each new element. Increasing and decreasing this value can have a large impact on memory consumption. Keep this value between 2 and 100.
The Lucene engine uses the proprietary term "max_connections" to describe this function, which corresponds directly to "m". To be consistent throughout OpenSearch documentation, we retain the term "m" to label this parameter. +`ef_construction` | No | 100 | No | The size of the dynamic list used during k-NN graph creation. Higher values result in a more accurate graph but slower indexing speed.
The Lucene engine uses the proprietary term "beam_width" to describe this function, which corresponds directly to "ef_construction". To be consistent throughout the OpenSearch documentation, we retain the term "ef_construction" for this parameter. +`m` | No | 16 | No | The number of bidirectional links that the plugin creates for each new element. Increasing and decreasing this value can have a large impact on memory consumption. Keep this value between 2 and 100.
The Lucene engine uses the proprietary term "max_connections" to describe this function, which corresponds directly to "m". To be consistent throughout OpenSearch documentation, we retain the term "m" to label this parameter. Lucene HNSW implementation ignores `ef_search` and dynamically sets it to the value of "k" in the search request. Therefore, there is no need to make settings for `ef_search` when using the Lucene engine. {: .note} @@ -107,7 +107,7 @@ An index created in OpenSearch version 2.11 or earlier will still use the old `e } ``` -### Supported Faiss encoders +## Supported Faiss encoders You can use encoders to reduce the memory footprint of a k-NN index at the expense of search accuracy. The k-NN plugin currently supports the `flat`, `pq`, and `sq` encoders in the Faiss library. @@ -134,27 +134,27 @@ The `hnsw` method supports the `pq` encoder for OpenSearch versions 2.10 and lat Encoder name | Requires training | Description :--- | :--- | :--- -`flat` (Default) | false | Encode vectors as floating-point arrays. This encoding does not reduce memory footprint. -`pq` | true | An abbreviation for _product quantization_, it is a lossy compression technique that uses clustering to encode a vector into a fixed size of bytes, with the goal of minimizing the drop in k-NN search accuracy. At a high level, vectors are broken up into `m` subvectors, and then each subvector is represented by a `code_size` code obtained from a code book produced during training. For more information about product quantization, see [this blog post](https://medium.com/dotstar/understanding-faiss-part-2-79d90b1e5388). -`sq` | false | An abbreviation for _scalar quantization_. Starting with k-NN plugin version 2.13, you can use the `sq` encoder to quantize 32-bit floating-point vectors into 16-bit floats. In version 2.13, the built-in `sq` encoder is the SQFP16 Faiss encoder. The encoder reduces memory footprint with a minimal loss of precision and improves performance by using SIMD optimization (using AVX2 on x86 architecture or Neon on ARM64 architecture). For more information, see [Faiss scalar quantization]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-vector-quantization#faiss-16-bit-scalar-quantization). +`flat` (Default) | No | Encode vectors as floating-point arrays. This encoding does not reduce memory footprint. +`pq` | Yes | An abbreviation for _product quantization_, it is a lossy compression technique that uses clustering to encode a vector into a fixed size of bytes, with the goal of minimizing the drop in k-NN search accuracy. At a high level, vectors are broken up into `m` subvectors, and then each subvector is represented by a `code_size` code obtained from a code book produced during training. For more information about product quantization, see [this blog post](https://medium.com/dotstar/understanding-faiss-part-2-79d90b1e5388). +`sq` | No | An abbreviation for _scalar quantization_. Starting with k-NN plugin version 2.13, you can use the `sq` encoder to quantize 32-bit floating-point vectors into 16-bit floats. In version 2.13, the built-in `sq` encoder is the SQFP16 Faiss encoder. The encoder reduces memory footprint with a minimal loss of precision and improves performance by using SIMD optimization (using AVX2 on x86 architecture or Neon on ARM64 architecture). For more information, see [Faiss scalar quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-performance/faiss-16-bit-quantization/). -#### PQ parameters +### PQ parameters Parameter name | Required | Default | Updatable | Description :--- | :--- | :--- | :--- | :--- -`m` | false | 1 | false | Determines the number of subvectors into which to break the vector. Subvectors are encoded independently of each other. This vector dimension must be divisible by `m`. Maximum value is 1,024. -`code_size` | false | 8 | false | Determines the number of bits into which to encode a subvector. Maximum value is 8. For IVF, this value must be less than or equal to 8. For HNSW, this value can only be 8. +`m` | No | 1 | No | Determines the number of subvectors into which to break the vector. Subvectors are encoded independently of each other. This vector dimension must be divisible by `m`. Maximum value is 1,024. +`code_size` | No | 8 | No | Determines the number of bits into which to encode a subvector. Maximum value is 8. For IVF, this value must be less than or equal to 8. For HNSW, this value can only be 8. -#### SQ parameters +### SQ parameters Parameter name | Required | Default | Updatable | Description :--- | :--- | :-- | :--- | :--- -`type` | false | `fp16` | false | The type of scalar quantization to be used to encode 32-bit float vectors into the corresponding type. As of OpenSearch 2.13, only the `fp16` encoder type is supported. For the `fp16` encoder, vector values must be in the [-65504.0, 65504.0] range. -`clip` | false | `false` | false | If `true`, then any vector values outside of the supported range for the specified vector type are rounded so that they are in the range. If `false`, then the request is rejected if any vector values are outside of the supported range. Setting `clip` to `true` may decrease recall. +`type` | No | `fp16` | No | The type of scalar quantization to be used to encode 32-bit float vectors into the corresponding type. As of OpenSearch 2.13, only the `fp16` encoder type is supported. For the `fp16` encoder, vector values must be in the [-65504.0, 65504.0] range. +`clip` | No | `false` | No | If `true`, then any vector values outside of the supported range for the specified vector type are rounded so that they are in the range. If `false`, then the request is rejected if any vector values are outside of the supported range. Setting `clip` to `true` may decrease recall. -For more information and examples, see [Using Faiss scalar quantization]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-vector-quantization/#using-faiss-scalar-quantization). +For more information and examples, see [Using Faiss scalar quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-performance/faiss-16-bit-quantization/). -#### Examples +### Examples The following example uses the `ivf` method without specifying an encoder (by default, OpenSearch uses the `flat` encoder): @@ -239,7 +239,7 @@ The following example uses the `ivf` method with an `sq` encoder of type `fp16`: } ``` -### Choosing the right method +## Choosing the right method There are several options to choose from when building your `knn_vector` field. To determine the correct methods and parameters, you should first understand the requirements of your workload and what trade-offs you are willing to make. Factors to consider are (1) query latency, (2) query quality, (3) memory limits, and (4) indexing latency. @@ -249,19 +249,19 @@ If you want to use less memory and increase indexing speed as compared to HNSW w If memory is a concern, consider adding a PQ encoder to your HNSW or IVF index. Because PQ is a lossy encoding, query quality will drop. -You can reduce the memory footprint by a factor of 2, with a minimal loss in search quality, by using the [`fp_16` encoder]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-vector-quantization/#faiss-16-bit-scalar-quantization). If your vector dimensions are within the [-128, 127] byte range, we recommend using the [byte quantizer]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/#byte-vectors) to reduce the memory footprint by a factor of 4. To learn more about vector quantization options, see [k-NN vector quantization]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-vector-quantization/). +You can reduce the memory footprint by a factor of 2, with a minimal loss in search quality, by using the [`fp_16` encoder]({{site.url}}{{site.baseurl}}/vector-search/optimizing-performance/faiss-16-bit-quantization/). If your vector dimensions are within the [-128, 127] byte range, we recommend using the [byte quantizer]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/#byte-vectors) to reduce the memory footprint by a factor of 4. To learn more about vector quantization options, see [k-NN vector quantization]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-vector-quantization/). -### Memory estimation +## Memory estimation In a typical OpenSearch cluster, a certain portion of RAM is reserved for the JVM heap. The k-NN plugin allocates native library indexes to a portion of the remaining RAM. This portion's size is determined by the `circuit_breaker_limit` cluster setting. By default, the limit is set to 50%. Having a replica doubles the total number of vectors. {: .note } -For information about using memory estimation with vector quantization, see the [vector quantization documentation]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-vector-quantization/#memory-estimation). +For information about using memory estimation with vector quantization, see the [vector quantization documentation]({{site.url}}{{site.baseurl}}/vector-search/optimizing-performance/knn-vector-quantization/). {: .note } -#### HNSW memory estimation +### HNSW memory estimation The memory required for HNSW is estimated to be `1.1 * (4 * dimension + 8 * M)` bytes/vector. @@ -271,11 +271,11 @@ As an example, assume you have a million vectors with a dimension of 256 and M o 1.1 * (4 * 256 + 8 * 16) * 1,000,000 ~= 1.267 GB ``` -#### IVF memory estimation +### IVF memory estimation The memory required for IVF is estimated to be `1.1 * (((4 * dimension) * num_vectors) + (4 * nlist * d))` bytes. -As an example, assume you have a million vectors with a dimension of 256 and nlist of 128. The memory requirement can be estimated as follows: +As an example, assume you have a million vectors with a dimension of 256 and `nlist` of 128. The memory requirement can be estimated as follows: ``` 1.1 * (((4 * 256) * 1,000,000) + (4 * 128 * 256)) ~= 1.126 GB diff --git a/_vector-search/creating-vector-index/vector-field.md b/_vector-search/creating-vector-index/vector-field.md index cfb11624cc..973f77f987 100644 --- a/_vector-search/creating-vector-index/vector-field.md +++ b/_vector-search/creating-vector-index/vector-field.md @@ -7,46 +7,21 @@ nav_order: 10 # Vector data types -The k-NN plugin introduces a custom data type, the `knn_vector`, that allows users to ingest their k-NN vectors into an OpenSearch index and perform different kinds of k-NN search. The `knn_vector` field is highly configurable and can serve many different k-NN workloads. For more information, see [k-NN vector]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/). - +The `knn_vector` data type allows you to ingest vectors into an OpenSearch index and perform different kinds of vector search. The `knn_vector` field is highly configurable and can serve many different k-NN workloads. For more information, see [k-NN vector]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/). When defining a `knn_vector` field in OpenSearch, you can select from different data types to balance storage requirements and performance. By default, k-NN vectors are float vectors, but you can also opt for byte or binary vectors for more efficient storage. ## Float vectors -Float is the default type for `knn_vector` fields. - -- **Default type**: Each dimension is stored as a 4-byte floating-point number. -- **Precision**: High, suitable for applications requiring maximum accuracy. -- **Use case**: Best for scenarios where storage cost is not a primary concern and precision is critical. +Float is the default type for `knn_vector` fields. Each dimension is stored as a 4-byte floating-point number. ## Byte vectors -Starting with k-NN plugin version 2.17, you can use `byte` vectors with the `faiss` and `lucene` engines to reduce the amount of required memory and storage space. For more information, see [Byte vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#byte-vectors). - -- **Storage efficiency**: Each dimension is stored as a signed 8-bit integer, reducing storage space significantly. - - Value range: [-128, 127]. -- **Engines supported**: Available when using the `faiss` or `lucene` engine. -- **Use case**: Ideal for applications that prioritize storage efficiency and can tolerate reduced precision. +Starting with OpenSearch version 2.17, you can use `byte` vectors with the `faiss` and `lucene` engines to reduce the amount of required memory and storage space. Each dimension is stored as a signed 8-bit integer, significantly reducing storage space. For more information, see [Byte vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#byte-vectors). ## Binary vectors -Starting with k-NN plugin version 2.16, you can use `binary` vectors with the `faiss` engine to reduce the amount of required storage space. For more information, see [Binary vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#binary-vectors). - -- **Storage efficiency**: Memory costs are reduced by a factor of 32 compared to float vectors. -- **Performance**: Provides high recall performance while significantly lowering operational costs. -- **Use case**: Suitable for large-scale deployments where cost-efficiency is crucial without sacrificing search performance. - -### Choosing the right data type - -The choice of data type for your `knn_vector` field depends on your specific use case: - -- **Float vectors**: Use when high precision is essential, and storage space is not a limiting factor. -- **Byte vectors**: Use to save storage space while maintaining acceptable precision levels, especially for large datasets. -- **Binary vectors**: Use to achieve cost efficiency and scalability with acceptable trade-offs in precision. - -By selecting the appropriate data type, you can optimize storage, performance, and cost-effectiveness for your OpenSearch deployment. - +Starting with OpenSearch version 2.16, you can use `binary` vectors with the `faiss` engine to reduce the amount of required storage space. For more information, see [Binary vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#binary-vectors). ## SIMD optimization for the Faiss engine diff --git a/_vector-search/getting-started/auto-generated-embeddings.md b/_vector-search/getting-started/auto-generated-embeddings.md index 1cc4d43e96..79bc84f841 100644 --- a/_vector-search/getting-started/auto-generated-embeddings.md +++ b/_vector-search/getting-started/auto-generated-embeddings.md @@ -119,7 +119,7 @@ PUT /_ingest/pipeline/nlp-ingest-pipeline ### Step 3(b): Create a vector index -Now you'll create a vector by setting `index.knn` to `true`. In the index, the field named `text` will contains an image description, and a [`knn_vector`]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) field named `passage_embedding` will contains the vector embedding of the text. Additionally, set the default ingest pipeline to the `nlp-ingest-pipeline` you created in the previous step: +Now you'll create a vector index by setting `index.knn` to `true`. In the index, the field named `text` will contains an image description, and a [`knn_vector`]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) field named `passage_embedding` will contains the vector embedding of the text. Additionally, set the default ingest pipeline to the `nlp-ingest-pipeline` you created in the previous step: ```json @@ -131,9 +131,6 @@ PUT /my-nlp-index }, "mappings": { "properties": { - "id": { - "type": "text" - }, "passage_embedding": { "type": "knn_vector", "dimension": 768, @@ -153,7 +150,7 @@ PUT /my-nlp-index ``` {% include copy-curl.html %} -Setting up a k-NN index allows you to later perform a vector search on the `passage_embedding` field. +Setting up a vector index allows you to later perform a vector search on the `passage_embedding` field. ### Step 3(c): Ingest documents into the index @@ -162,8 +159,7 @@ In this step, you'll ingest several sample documents into the index. The sample ```json PUT /my-nlp-index/_doc/1 { - "text": "A West Virginia university women 's basketball team , officials , and a small gathering of fans are in a West Virginia arena .", - "id": "4319130149.jpg" + "text": "A man who is riding a wild horse in the rodeo is very near to falling off ." } ``` {% include copy-curl.html %} @@ -171,8 +167,7 @@ PUT /my-nlp-index/_doc/1 ```json PUT /my-nlp-index/_doc/2 { - "text": "A wild animal races across an uncut field with a minimal amount of trees .", - "id": "1775029934.jpg" + "text": "A rodeo cowboy , wearing a cowboy hat , is being thrown off of a wild white horse ." } ``` {% include copy-curl.html %} @@ -180,61 +175,11 @@ PUT /my-nlp-index/_doc/2 ```json PUT /my-nlp-index/_doc/3 { - "text": "People line the stands which advertise Freemont 's orthopedics , a cowboy rides a light brown bucking bronco .", - "id": "2664027527.jpg" + "text": "People line the stands which advertise Freemont 's orthopedics , a cowboy rides a light brown bucking bronco ." } ``` {% include copy-curl.html %} -```json -PUT /my-nlp-index/_doc/4 -{ - "text": "A man who is riding a wild horse in the rodeo is very near to falling off .", - "id": "4427058951.jpg" -} -``` -{% include copy-curl.html %} - -```json -PUT /my-nlp-index/_doc/5 -{ - "text": "A rodeo cowboy , wearing a cowboy hat , is being thrown off of a wild white horse .", - "id": "2691147709.jpg" -} -``` -{% include copy-curl.html %} - -When the documents are ingested into the index, the `text_embedding` processor creates an additional field that contains vector embeddings and adds that field to the document. To see an example document that is indexed, search for document 1: - -```json -GET /my-nlp-index/_doc/1 -``` -{% include copy-curl.html %} - -The response includes the document `_source` containing the original `text` and `id` fields and the added `passage_embedding` field: - -```json -{ - "_index": "my-nlp-index", - "_id": "1", - "_version": 1, - "_seq_no": 0, - "_primary_term": 1, - "found": true, - "_source": { - "passage_embedding": [ - 0.04491629, - -0.34105563, - 0.036822468, - -0.14139028, - ... - ], - "text": "A West Virginia university women 's basketball team , officials , and a small gathering of fans are in a West Virginia arena .", - "id": "4319130149.jpg" - } -} -``` - ## Step 4: Search the data Now you'll search the index using semantic search. To automatically generate vector embeddings from query text, use a `neural` query and provide the model ID of the model you set up earlier so that vector embeddings for the query text are generated with the model used at ingestion time: @@ -252,7 +197,7 @@ GET /my-nlp-index/_search "passage_embedding": { "query_text": "wild west", "model_id": "aVeif4oB5Vm0Tdw8zYO2", - "k": 5 + "k": 3 } } } @@ -260,11 +205,11 @@ GET /my-nlp-index/_search ``` {% include copy-curl.html %} -The response all five documents, and the document order reflects semantic meaning: +The response contains the matching documents: ```json { - "took": 25, + "took": 127, "timed_out": false, "_shards": { "total": 1, @@ -274,57 +219,40 @@ The response all five documents, and the document order reflects semantic meanin }, "hits": { "total": { - "value": 5, + "value": 3, "relation": "eq" }, - "max_score": 0.01585195, + "max_score": 0.015851952, "hits": [ { "_index": "my-nlp-index", - "_id": "4", - "_score": 0.01585195, + "_id": "1", + "_score": 0.015851952, "_source": { - "text": "A man who is riding a wild horse in the rodeo is very near to falling off .", - "id": "4427058951.jpg" + "text": "A man who is riding a wild horse in the rodeo is very near to falling off ." } }, { "_index": "my-nlp-index", "_id": "2", - "_score": 0.015748845, - "_source": { - "text": "A wild animal races across an uncut field with a minimal amount of trees.", - "id": "1775029934.jpg" - } - }, - { - "_index": "my-nlp-index", - "_id": "5", "_score": 0.015177963, "_source": { - "text": "A rodeo cowboy , wearing a cowboy hat , is being thrown off of a wild white horse .", - "id": "2691147709.jpg" - } - }, - { - "_index": "my-nlp-index", - "_id": "1", - "_score": 0.013272902, - "_source": { - "text": "A West Virginia university women 's basketball team , officials , and a small gathering of fans are in a West Virginia arena .", - "id": "4319130149.jpg" + "text": "A rodeo cowboy , wearing a cowboy hat , is being thrown off of a wild white horse ." } }, { "_index": "my-nlp-index", "_id": "3", - "_score": 0.011347735, + "_score": 0.011347729, "_source": { - "text": "People line the stands which advertise Freemont 's orthopedics , a cowboy rides a light brown bucking bronco .", - "id": "2664027527.jpg" + "text": "People line the stands which advertise Freemont 's orthopedics , a cowboy rides a light brown bucking bronco ." } } ] } } ``` + +## Next steps + +- Learn about configuring semantic and hybrid search in the [Semantic and hybrid search tutorial]({{site.url}}{{site.baseurl}}/vector-search/getting-started/neural-search-tutorial/) \ No newline at end of file diff --git a/_vector-search/getting-started/index.md b/_vector-search/getting-started/index.md index 89962fa2de..113ccc820c 100644 --- a/_vector-search/getting-started/index.md +++ b/_vector-search/getting-started/index.md @@ -28,7 +28,7 @@ pre_items: link: "/vector-search/ingesting-data/#raw-vector-ingestion" - heading: "Search embeddings" description: "Search your embeddings using vector search." - link: "/vector-search/querying-data/#searching-pre-generated-embeddings-or-raw-vectors" + link: "/vector-search/searching-data/#searching-pre-generated-embeddings-or-raw-vectors" auto_items: - heading: "Configure an embedding model" description: "Configure a machine learning model that will automatically generate embeddings from your text at ingest time and query time." @@ -41,7 +41,7 @@ auto_items: link: "/vector-search/ingesting-data/#auto-generated-embeddings" - heading: "Search text" description: "Search your text using vector search. Query text is automatically converted to vector embeddings and compared to document embeddings." - link: "/vector-search/querying-data/#searching-auto-generated-embeddings" + link: "/vector-search/searching-data/#searching-auto-generated-embeddings" --- # Getting started with vector search diff --git a/_vector-search/getting-started/neural-search-tutorial.md b/_vector-search/getting-started/neural-search-tutorial.md index 7133e2cca4..eead670988 100644 --- a/_vector-search/getting-started/neural-search-tutorial.md +++ b/_vector-search/getting-started/neural-search-tutorial.md @@ -20,13 +20,11 @@ In this tutorial, you'll learn how to implement the following types of search: ## OpenSearch components for semantic search -In this tutorial, you'll implement semantic search using the following OpenSearch components: +In this tutorial, you'll use the following OpenSearch components: -- [Model group]({{site.url}}{{site.baseurl}}/ml-commons-plugin/model-access-control#model-groups) - [Pretrained language models provided by OpenSearch]({{site.url}}{{site.baseurl}}/ml-commons-plugin/pretrained-models/) - [Ingest pipeline]({{site.url}}{{site.baseurl}}/api-reference/ingest-apis/index/) - [k-NN vector]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) -- [Neural search]({{site.url}}{{site.baseurl}}/search-plugins/neural-search/) - [Search pipeline]({{site.url}}{{site.baseurl}}/search-plugins/search-pipelines/index/) - [Normalization processor]({{site.url}}{{site.baseurl}}/search-plugins/search-pipelines/normalization-processor/) - [Hybrid query]({{site.url}}{{site.baseurl}}/query-dsl/compound/hybrid/) @@ -62,18 +60,15 @@ For more information about ML-related cluster settings, see [ML Commons cluster This tutorial consists of the following steps: -1. [**Set up an ML language model**](#step-1-set-up-an-ml-language-model). - 1. [Choose a language model](#step-1a-choose-a-language-model). - 1. [Register a model group](#step-1b-register-a-model-group). - 1. [Register the model to the model group](#step-1c-register-the-model-to-the-model-group). - 1. [Deploy the model](#step-1d-deploy-the-model). -1. [**Ingest data with neural search**](#step-2-ingest-data-with-neural-search). - 1. [Create an ingest pipeline for neural search](#step-2a-create-an-ingest-pipeline-for-neural-search). - 1. [Create a k-NN index](#step-2b-create-a-k-nn-index). - 1. [Ingest documents into the index](#step-2c-ingest-documents-into-the-index). -1. [**Search the data**](#step-3-search-the-data). +1. [**Choose a language model**](#step-1-choose-a-language-model). +1. [**Register and deploy the model**](#step-2-register-and-deploy-the-model). +1. [**Ingest data**](#step-3-ingest-data). + 1. [Create an ingest pipeline for embedding generation](#step-3a-create-an-ingest-pipeline). + 1. [Create a vector index](#step-3b-create-a-vector-index). + 1. [Ingest documents into the index](#step-3c-ingest-documents-into-the-index). +1. [**Search the data**](#step-4-search-the-data). - [Search using a keyword search](#search-using-a-keyword-search). - - [Search using a neural search](#search-using-a-neural-search). + - [Search using a semantic search](#search-using-a-semantic-search). - [Search using a hybrid search](#search-using-a-hybrid-search). Some steps in the tutorial contain optional `Test it` sections. You can ensure that the step was successful by running requests in these sections. @@ -84,11 +79,9 @@ After you're done, follow the steps in the [Clean up](#clean-up) section to dele You can follow this tutorial using your command line or the OpenSearch Dashboards [Dev Tools console]({{site.url}}{{site.baseurl}}/dashboards/dev-tools/run-queries/). -## Step 1: Set up an ML language model +## Step 1: Choose a language model -Neural search requires a language model in order to generate vector embeddings from text fields, both at ingestion time and query time. - -### Step 1(a): Choose a language model +First, you'll need to choose a language model in order to generate vector embeddings from text fields, both at ingestion time and query time. For this tutorial, you'll use the [DistilBERT](https://huggingface.co/docs/transformers/model_doc/distilbert) model from Hugging Face. It is one of the pretrained sentence transformer models available in OpenSearch that has shown some of the best results in benchmarking tests (for details, see [this blog post](https://opensearch.org/blog/semantic-science-benchmarks/)). You'll need the name, version, and dimension of the model to register it. You can find this information in the [pretrained model table]({{site.url}}{{site.baseurl}}/ml-commons-plugin/pretrained-models/#sentence-transformers) by selecting the `config_url` link corresponding to the model's TorchScript artifact: @@ -96,7 +89,7 @@ For this tutorial, you'll use the [DistilBERT](https://huggingface.co/docs/trans - The model version is `1.0.1`. - The number of dimensions for this model is `768`. -Take note of the dimensionality of the model because you'll need it when you set up a k-NN index. +Take note of the dimensionality of the model because you'll need it when you set up a vector index. {: .important} #### Advanced: Using a different model @@ -111,108 +104,15 @@ Alternatively, you can choose one of the following options for your model: For information about choosing a model, see [Further reading](#further-reading). -### Step 1(b): Register a model group - -For access control, models are organized into model groups (collections of versions of a particular model). Each model group name in the cluster must be globally unique. Registering a model group ensures the uniqueness of the model group name. - -If you are registering the first version of a model without first registering the model group, a new model group is created automatically. For more information, see [Model access control]({{site.url}}{{site.baseurl}}/ml-commons-plugin/model-access-control/). -{: .tip} - -To register a model group with the access mode set to `public`, send the following request: - -```json -POST /_plugins/_ml/model_groups/_register -{ - "name": "NLP_model_group", - "description": "A model group for NLP models", - "access_mode": "public" -} -``` -{% include copy-curl.html %} - -OpenSearch sends back the model group ID: - -```json -{ - "model_group_id": "Z1eQf4oB5Vm0Tdw8EIP2", - "status": "CREATED" -} -``` - -You'll use this ID to register the chosen model to the model group. - -
- - Test it - - {: .text-delta} +## Step 2: Register and deploy the model -Search for the newly created model group by providing its model group ID in the request: - -```json -POST /_plugins/_ml/model_groups/_search -{ - "query": { - "match": { - "_id": "Z1eQf4oB5Vm0Tdw8EIP2" - } - } -} -``` -{% include copy-curl.html %} - -The response contains the model group: - -```json -{ - "took": 0, - "timed_out": false, - "_shards": { - "total": 1, - "successful": 1, - "skipped": 0, - "failed": 0 - }, - "hits": { - "total": { - "value": 1, - "relation": "eq" - }, - "max_score": 1, - "hits": [ - { - "_index": ".plugins-ml-model-group", - "_id": "Z1eQf4oB5Vm0Tdw8EIP2", - "_version": 1, - "_seq_no": 14, - "_primary_term": 2, - "_score": 1, - "_source": { - "created_time": 1694357262582, - "access": "public", - "latest_version": 0, - "last_updated_time": 1694357262582, - "name": "NLP_model_group", - "description": "A model group for NLP models" - } - } - ] - } -} -``` -
- - -### Step 1(c): Register the model to the model group - -To register the model to the model group, provide the model group ID in the register request: +To register the model, provide the model group ID in the register request: ```json POST /_plugins/_ml/models/_register { "name": "huggingface/sentence-transformers/msmarco-distilbert-base-tas-b", "version": "1.0.1", - "model_group_id": "Z1eQf4oB5Vm0Tdw8EIP2", "model_format": "TORCH_SCRIPT" } ``` @@ -234,7 +134,9 @@ GET /_plugins/_ml/tasks/aFeif4oB5Vm0Tdw8yoN7 ``` {% include copy-curl.html %} -Once the task is complete, the task state will be `COMPLETED` and the Tasks API response will contain a model ID for the registered model: +OpenSearch saves the registered model in the model index. Deploying a model creates a model instance and caches the model in memory. + +Once the task is complete, the task state will be `COMPLETED` and the Tasks API response will contain a model ID for the deployed model: ```json { @@ -324,55 +226,12 @@ POST /_plugins/_ml/models/_register "all_config": "{\"_name_or_path\":\"old_models/msmarco-distilbert-base-tas-b/0_Transformer\",\"activation\":\"gelu\",\"architectures\":[\"DistilBertModel\"],\"attention_dropout\":0.1,\"dim\":768,\"dropout\":0.1,\"hidden_dim\":3072,\"initializer_range\":0.02,\"max_position_embeddings\":512,\"model_type\":\"distilbert\",\"n_heads\":12,\"n_layers\":6,\"pad_token_id\":0,\"qa_dropout\":0.1,\"seq_classif_dropout\":0.2,\"sinusoidal_pos_embds\":false,\"tie_weights_\":true,\"transformers_version\":\"4.7.0\",\"vocab_size\":30522}" }, "created_time": 1676074079195, - "model_group_id": "Z1eQf4oB5Vm0Tdw8EIP2", "url": "https://artifacts.opensearch.org/models/ml-models/huggingface/sentence-transformers/msmarco-distilbert-base-tas-b/1.0.1/onnx/sentence-transformers_msmarco-distilbert-base-tas-b-1.0.1-onnx.zip" } ``` For more information, see [Using ML models within OpenSearch]({{site.url}}{{site.baseurl}}/ml-commons-plugin/using-ml-models/). -### Step 1(d): Deploy the model - -Once the model is registered, it is saved in the model index. Next, you'll need to deploy the model. Deploying a model creates a model instance and caches the model in memory. To deploy the model, provide its model ID to the `_deploy` endpoint: - -```json -POST /_plugins/_ml/models/aVeif4oB5Vm0Tdw8zYO2/_deploy -``` -{% include copy-curl.html %} - -Like the register operation, the deploy operation is asynchronous, so you'll get a task ID in the response: - -```json -{ - "task_id": "ale6f4oB5Vm0Tdw8NINO", - "status": "CREATED" -} -``` - -You can check the status of the task by using the Tasks API: - -```json -GET /_plugins/_ml/tasks/ale6f4oB5Vm0Tdw8NINO -``` -{% include copy-curl.html %} - -Once the task is complete, the task state will be `COMPLETED`: - -```json -{ - "model_id": "aVeif4oB5Vm0Tdw8zYO2", - "task_type": "DEPLOY_MODEL", - "function_name": "TEXT_EMBEDDING", - "state": "COMPLETED", - "worker_node": [ - "4p6FVOmJRtu3wehDD74hzQ" - ], - "create_time": 1694360024141, - "last_update_time": 1694360027940, - "is_async": true -} -``` -
Test it @@ -425,13 +284,13 @@ GET /_plugins/_ml/profile/models ```
-## Step 2: Ingest data with neural search +## Step 3: Ingest data -Neural search uses a language model to transform text into vector embeddings. During ingestion, neural search creates vector embeddings for the text fields in the request. During search, you can generate vector embeddings for the query text by applying the same model, allowing you to perform vector similarity search on the documents. +OpenSearch uses a language model to transform text into vector embeddings. During ingestion, OpenSearch creates vector embeddings for the text fields in the request. During search, you can generate vector embeddings for the query text by applying the same model, allowing you to perform vector similarity search on the documents. -### Step 2(a): Create an ingest pipeline for neural search +### Step 3(a): Create an ingest pipeline -Now that you have deployed a model, you can use this model to configure [neural search]({{site.url}}{{site.baseurl}}/search-plugins/neural-search/). First, you need to create an [ingest pipeline]({{site.url}}{{site.baseurl}}/api-reference/ingest-apis/index/) that contains one processor: a task that transforms document fields before documents are ingested into an index. For neural search, you'll set up a `text_embedding` processor that creates vector embeddings from text. You'll need the `model_id` of the model you set up in the previous section and a `field_map`, which specifies the name of the field from which to take the text (`text`) and the name of the field in which to record embeddings (`passage_embedding`): +Now that you have deployed a model, you can use this model to configure an [ingest pipeline]({{site.url}}{{site.baseurl}}/api-reference/ingest-apis/index/) that contains one processor: a task that transforms document fields before documents are ingested into an index. In this example, you'll set up a `text_embedding` processor that creates vector embeddings from text. You'll need the `model_id` of the model you set up in the previous section and a `field_map`, which specifies the name of the field from which to take the text (`text`) and the name of the field in which to record embeddings (`passage_embedding`): ```json PUT /_ingest/pipeline/nlp-ingest-pipeline @@ -485,9 +344,9 @@ The response contains the ingest pipeline: ``` -### Step 2(b): Create a k-NN index +### Step 3(b): Create a vector index -Now you'll create a k-NN index with a field named `text`, which contains an image description, and a [`knn_vector`]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) field named `passage_embedding`, which contains the vector embedding of the text. Additionally, set the default ingest pipeline to the `nlp-ingest-pipeline` you created in the previous step: +Now you'll create a vector index with a field named `text`, which contains an image description, and a [`knn_vector`]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) field named `passage_embedding`, which contains the vector embedding of the text. Additionally, set the default ingest pipeline to the `nlp-ingest-pipeline` you created in the previous step: ```json @@ -521,7 +380,7 @@ PUT /my-nlp-index ``` {% include copy-curl.html %} -Setting up a k-NN index allows you to later perform a vector search on the `passage_embedding` field. +Setting up a vector index allows you to later perform a vector search on the `passage_embedding` field.
@@ -543,7 +402,7 @@ GET /my-nlp-index/_mappings
-### Step 2(c): Ingest documents into the index +### Step 3(c): Ingest documents into the index In this step, you'll ingest several sample documents into the index. The sample data is taken from the [Flickr image dataset](https://www.kaggle.com/datasets/hsankesara/flickr-image-dataset). Each document contains a `text` field corresponding to the image description and an `id` field corresponding to the image ID: @@ -623,9 +482,9 @@ The response includes the document `_source` containing the original `text` and } ``` -## Step 3: Search the data +## Step 4: Search the data -Now you'll search the index using keyword search, neural search, and a combination of the two. +Now you'll search the index using keyword search, semantic search, and a combination of the two. ### Search using a keyword search @@ -717,9 +576,9 @@ Document 3 is not returned because it does not contain the specified keywords. D ``` -### Search using a neural search +### Search using a semantic search -To search using a neural search, use a `neural` query and provide the model ID of the model you set up earlier so that vector embeddings for the query text are generated with the model used at ingestion time: +To search using semantic search, use a `neural` query and provide the model ID of the model you set up earlier so that vector embeddings for the query text are generated with the model used at ingestion time: ```json GET /my-nlp-index/_search @@ -742,7 +601,7 @@ GET /my-nlp-index/_search ``` {% include copy-curl.html %} -This time, the response not only contains all five documents, but the document order is also improved because neural search considers semantic meaning: +This time, the response not only contains all five documents, but the document order is also improved because semantic search considers semantic meaning:
@@ -820,7 +679,7 @@ This time, the response not only contains all five documents, but the document o ### Search using a hybrid search -Hybrid search combines keyword and neural search to improve search relevance. To implement hybrid search, you need to set up a [search pipeline]({{site.url}}{{site.baseurl}}/search-plugins/search-pipelines/index/) that runs at search time. The search pipeline you'll configure intercepts search results at an intermediate stage and applies the [`normalization-processor`]({{site.url}}{{site.baseurl}}/search-plugins/search-pipelines/normalization-processor/) to them. The `normalization-processor` normalizes and combines the document scores from multiple query clauses, rescoring the documents according to the chosen normalization and combination techniques. +Hybrid search combines keyword and semantic search to improve search relevance. To implement hybrid search, you need to set up a [search pipeline]({{site.url}}{{site.baseurl}}/search-plugins/search-pipelines/index/) that runs at search time. The search pipeline you'll configure intercepts search results at an intermediate stage and applies the [`normalization-processor`]({{site.url}}{{site.baseurl}}/search-plugins/search-pipelines/normalization-processor/) to them. The `normalization-processor` normalizes and combines the document scores from multiple query clauses, rescoring the documents according to the chosen normalization and combination techniques. #### Step 1: Configure a search pipeline @@ -1019,4 +878,4 @@ DELETE /_plugins/_ml/model_groups/Z1eQf4oB5Vm0Tdw8EIP2 ## Further reading - Read about the basics of OpenSearch semantic search in [Building a semantic search engine in OpenSearch](https://opensearch.org/blog/semantic-search-solutions/). -- Read about the benefits of combining keyword and neural search, the normalization and combination technique options, and benchmarking tests in [The ABCs of semantic search in OpenSearch: Architectures, benchmarks, and combination strategies](https://opensearch.org/blog/semantic-science-benchmarks/). +- Read about the benefits of combining keyword and semantic search, the normalization and combination technique options, and benchmarking tests in [The ABCs of semantic search in OpenSearch: Architectures, benchmarks, and combination strategies](https://opensearch.org/blog/semantic-science-benchmarks/). diff --git a/_vector-search/index.md b/_vector-search/index.md index 278e28e6d8..fd35839bec 100644 --- a/_vector-search/index.md +++ b/_vector-search/index.md @@ -19,12 +19,9 @@ raw_steps: - heading: "Specialized vector search" description: "Learn about specialized vector search use cases, such as filtering, nested field search, and radial search." link: "/vector-search/specialized-operations/" - - heading: "Optimizing vector storage" - description: "Learn about storage saving techniques, such as disk-based vector search and vector quantization." - link: "/vector-search/optimizing-storage/" - - heading: "Performance tuning" - description: "Improve search performance." - link: "/vector-search/performance-tuning/" + - heading: "Optimizing vector search performance" + description: "Learn about optimizing vector search reduce memory usage and improve performance." + link: "/vector-search/optimizing-performance/" ml_steps: - heading: "Auto-generated embeddings quickstart" description: "Follow a quickstart tutorial for text-to-embedding search." @@ -35,22 +32,20 @@ ml_steps: - heading: "ML-powered search" description: "Learn about many ML-powered search options that OpenSearch provides." link: "/vector-search/ml-powered-search/" - - heading: "Specialized vector search" - description: "Learn about specialized vector search use cases, such as filtering, nested field search, and radial search." - link: "/vector-search/specialized-operations/" - - heading: "Optimizing vector storage" - description: "Learn about storage saving techniques, such as disk-based vector search and vector quantization." - link: "/vector-search/optimizing-storage/" - - heading: "Performance tuning" - description: "Improve search performance." - link: "/vector-search/performance-tuning/" + - heading: "Optimizing vector search performance" + description: "Learn about optimizing vector search reduce memory usage and improve performance." + link: "/vector-search/optimizing-performance/" --- # Vector search -OpenSearch is a comprehensive search platform that supports a variety of data types, including vectors. OpenSearch vector database functionality is seamlessly integrated with its generic database function. +Traditional lexical search, based on term frequency models like BM25, is widely used and effective for many search applications. However, lexical search techniques require significant investment in time and expertise to tune them to account for the meaning or relevance of the terms searched. To embed semantic understanding into your search application, you can use machine learning embedding models that can encode the meaning and context of documents, images, and audio into vectors for similarity search. These embedded meanings can be searched using the k-nearest neighbors (k-NN) functionality provided by OpenSearch.  + +Using OpenSearch as a vector database brings together the power of traditional search, analytics, and vector search in one complete package. OpenSearch’s vector database capabilities can accelerate artificial intelligence (AI) application development by reducing the effort for builders to operationalize, manage, and integrate AI-generated assets. Bring your models, vectors, and metadata into OpenSearch to power vector, lexical, and hybrid search and analytics, with performance and scalability built in. + +## Using OpenSearch as a vector database -In OpenSearch, you can generate vector embeddings, store those embeddings in an index, and use them for vector search. +OpenSearch provides an integrated  vector database that can support AI systems by serving as a knowledge base. This benefits AI applications like generative AI and natural language search by providing a long-term memory of AI-generated outputs. These outputs can be used to enhance information retrieval and analytics, improve efficiency and stability, and give generative AI models a broader and deeper pool of data from which to draw more accurate responses to queries. [Get started]({{site.url}}{{site.baseurl}}/vector-search/getting-started/){: .btn-dark-blue} diff --git a/_vector-search/ingesting-data.md b/_vector-search/ingesting-data.md index f1a9a8b9c3..635485c44c 100644 --- a/_vector-search/ingesting-data.md +++ b/_vector-search/ingesting-data.md @@ -19,8 +19,6 @@ The following table compares ingestion for each vector search method. ## Raw vector ingestion -Raw vector ingestion does not require an ingest pipeline because vectors are pre-generated outside of OpenSearch. - When working with pre-generated embeddings, you directly ingest vector data into the `knn_vector` field. No pipeline is required because the vectors are already generated: ```json @@ -43,17 +41,9 @@ POST /_bulk ``` {% include copy-curl.html %} -**Key Characteristics:** -- Direct ingestion of vector arrays -- No transformation during ingestion -- Optional metadata fields -- Supports single document or bulk ingestion - - ## Auto-generated embeddings -For auto-generated embeddings, you first need to set up an ingest pipeline that will convert text to vectors. Then, you ingest text data, and the pipeline automatically generates the embeddings: - +For auto-generated embeddings, you first need to set up an ingest pipeline that will convert text to vectors: ```json PUT /_ingest/pipeline/nlp-ingest-pipeline { @@ -72,37 +62,21 @@ PUT /_ingest/pipeline/nlp-ingest-pipeline ``` {% include copy-curl.html %} -After setting up the pipeline, ingest text data: +Then, you ingest text data, and the pipeline automatically generates the embeddings: ```json POST /my-semantic-search-index/_doc { - "passage_text": "Your text content here", - "metadata": "Optional additional information" + "passage_text": "Your text content here" } ``` {% include copy-curl.html %} The pipeline automatically generates and stores the embeddings in the `passage_embedding` field. -**Key Characteristics:** -- Ingest plain text -- Automatic vector generation during ingestion -- Original text preserved -- Pipeline handles transformation - -## Best practices - -When ingesting data into vector indexes, consider the following best practices: - -1. **Batch processing**: Use the Bulk API for better performance when ingesting multiple documents. -2. **Pipeline monitoring**: Monitor pipeline performance and errors, especially for auto-generated embeddings. -3. **Data validation**: Ensure vector dimensions match the index configuration. -4. **Error handling**: Implement proper error handling for failed ingestion attempts. -5. **Resource management**: Monitor system resources during large-scale ingestion, especially with auto-generated embeddings. - -## Further reading +## Next steps -- [Bulk API Documentation]({{site.url}}{{site.baseurl}}/api-reference/document-apis/bulk/) -- [Ingest Pipeline Documentation]({{site.url}}{{site.baseurl}}/api-reference/ingest-apis/index/) -- [Text Embedding Processor Documentation]({{site.url}}{{site.baseurl}}/api-reference/ingest-apis/processors/text-embedding/) +- [Searching vector data]({{site.url}}{{site.baseurl}}/vector-search/searching-data/) +- [Bulk API]({{site.url}}{{site.baseurl}}/api-reference/document-apis/bulk/) +- [Ingest pipelines]({{site.url}}{{site.baseurl}}/api-reference/ingest-apis/index/) +- [Text embedding processor]({{site.url}}{{site.baseurl}}/api-reference/ingest-apis/processors/text-embedding/) diff --git a/_vector-search/ml-powered-search/hybrid-search.md b/_vector-search/ml-powered-search/hybrid-search.md index e2eedd8038..4cce40fc53 100644 --- a/_vector-search/ml-powered-search/hybrid-search.md +++ b/_vector-search/ml-powered-search/hybrid-search.md @@ -4,13 +4,15 @@ title: Hybrid search parent: ML-powered search has_children: false nav_order: 60 +redirect_from: + - /search-plugins/hybrid-search/ --- # Hybrid search Introduced 2.11 {: .label .label-purple } -Hybrid search combines keyword and neural search to improve search relevance. To implement hybrid search, you need to set up a [search pipeline]({{site.url}}{{site.baseurl}}/search-plugins/search-pipelines/index/) that runs at search time. The search pipeline you'll configure intercepts search results at an intermediate stage and applies the [`normalization_processor`]({{site.url}}{{site.baseurl}}/search-plugins/search-pipelines/normalization-processor/) to them. The `normalization_processor` normalizes and combines the document scores from multiple query clauses, rescoring the documents according to the chosen normalization and combination techniques. +Hybrid search combines keyword and semantic search to improve search relevance. To implement hybrid search, you need to set up a [search pipeline]({{site.url}}{{site.baseurl}}/search-plugins/search-pipelines/index/) that runs at search time. The search pipeline you'll configure intercepts search results at an intermediate stage and applies the [`normalization_processor`]({{site.url}}{{site.baseurl}}/search-plugins/search-pipelines/normalization-processor/) to them. The `normalization_processor` normalizes and combines the document scores from multiple query clauses, rescoring the documents according to the chosen normalization and combination techniques. **PREREQUISITE**
To follow this example, you must set up a text embedding model. For more information, see [Choosing a model]({{site.url}}{{site.baseurl}}/ml-commons-plugin/integrating-ml-models/#choosing-a-model). If you have already generated text embeddings, ingest the embeddings into an index and skip to [Step 4](#step-4-configure-a-search-pipeline). diff --git a/_vector-search/ml-powered-search/index.md b/_vector-search/ml-powered-search/index.md index f4b2d48697..e15458d7fb 100644 --- a/_vector-search/ml-powered-search/index.md +++ b/_vector-search/ml-powered-search/index.md @@ -1,7 +1,7 @@ --- layout: default title: ML-powered search -nav_order: 50 +nav_order: 45 has_children: true has_toc: false redirect_from: @@ -24,7 +24,7 @@ search_method_cards: description: "Uses dense retrieval based on text embedding models to search text data." link: "/vector-search/ml-powered-search/semantic-search/" - heading: "Hybrid search" - description: "Combines keyword and neural search to improve search relevance." + description: "Combines keyword and semantic search to improve search relevance." link: "/vector-search/ml-powered-search/hybrid-search/" - heading: "Multimodal search" description: "Uses multimodal embedding models to search text and image data." @@ -49,7 +49,11 @@ ML-powered search streamlines your workflow by generating embeddings automatical Before using text-to-embedding search, you must set up an ML model for embedding generation. When selecting a model, you have the following options: -{% include cards.html cards=page.model_cards %} +- Use a pretrained model provided by OpenSearch. For more information, see [OpenSearch-provided pretrained models]({{site.url}}{{site.baseurl}}/ml-commons-plugin/pretrained-models/). + +- Upload your own model to OpenSearch. For more information, see [Custom local models]({{site.url}}{{site.baseurl}}/ml-commons-plugin/custom-local-models/). + +- Connect to a foundation model hosted on an external platform. For more information, see [Connecting to externally hosted models]({{site.url}}{{site.baseurl}}/ml-commons-plugin/remote-models/index/). --- diff --git a/_vector-search/ml-powered-search/multimodal-search.md b/_vector-search/ml-powered-search/multimodal-search.md index dc49ba40c3..e74a6ff7b5 100644 --- a/_vector-search/ml-powered-search/multimodal-search.md +++ b/_vector-search/ml-powered-search/multimodal-search.md @@ -13,7 +13,7 @@ redirect_from: Introduced 2.11 {: .label .label-purple } -Use multimodal search to search text and image data. In neural search, text search is facilitated by multimodal embedding models. +Use multimodal search to search text and image data using multimodal embedding models. **PREREQUISITE**
Before using text search, you must set up a multimodal embedding model. For more information, see [Choosing a model]({{site.url}}{{site.baseurl}}/ml-commons-plugin/integrating-ml-models/#choosing-a-model). @@ -21,12 +21,12 @@ Before using text search, you must set up a multimodal embedding model. For more ## Using multimodal search -To use neural search with text and image embeddings, follow these steps: +To use multimodal search with text and image embeddings, follow these steps: 1. [Create an ingest pipeline](#step-1-create-an-ingest-pipeline). 1. [Create an index for ingestion](#step-2-create-an-index-for-ingestion). 1. [Ingest documents into the index](#step-3-ingest-documents-into-the-index). -1. [Search the index using neural search](#step-4-search-the-index-using-neural-search). +1. [Search the index](#step-4-search-the-index). ## Step 1: Create an ingest pipeline @@ -108,7 +108,7 @@ PUT /nlp-index/_doc/1 Before the document is ingested into the index, the ingest pipeline runs the `text_image_embedding` processor on the document, generating vector embeddings for the `image_description` and `image_binary` fields. In addition to the original `image_description` and `image_binary` fields, the indexed document includes the `vector_embedding` field, which contains the combined vector embeddings. -## Step 4: Search the index using neural search +## Step 4: Search the index To perform vector search on your index, use the `neural` query clause either in the [k-NN plugin API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api/#search-for-a-model) or [Query DSL]({{site.url}}{{site.baseurl}}/opensearch/query-dsl/index/) queries. You can refine the results by using a [k-NN search filter]({{site.url}}{{site.baseurl}}/search-plugins/knn/filter-search-knn/). You can search by text, image, or both text and image. diff --git a/_vector-search/ml-powered-search/neural-sparse-search.md b/_vector-search/ml-powered-search/neural-sparse-search.md index c7109a9443..ca9299d560 100644 --- a/_vector-search/ml-powered-search/neural-sparse-search.md +++ b/_vector-search/ml-powered-search/neural-sparse-search.md @@ -23,7 +23,7 @@ You can configure neural sparse search in the following ways: - Generate vector embeddings within OpenSearch: Configure an ingest pipeline to generate and store sparse vector embeddings from document text at ingestion time. At query time, input plain text, which will be automatically converted into vector embeddings for search. For complete setup steps, see [Configuring ingest pipelines for neural sparse search]({{site.url}}{{site.baseurl}}/search-plugins/neural-sparse-with-pipelines/). - Ingest raw sparse vectors and search using sparse vectors directly. For complete setup steps, see [Ingesting and searching raw vectors]({{site.url}}{{site.baseurl}}/search-plugins/neural-sparse-with-raw-vectors/). -To learn more about splitting long text into passages for neural search, see [Text chunking]({{site.url}}{{site.baseurl}}/search-plugins/text-chunking/). +To learn more about splitting long text into passages for neural sparse search, see [Text chunking]({{site.url}}{{site.baseurl}}/search-plugins/text-chunking/). ## Accelerating neural sparse search diff --git a/_vector-search/ml-powered-search/neural-sparse-with-pipelines.md b/_vector-search/ml-powered-search/neural-sparse-with-pipelines.md index e1047015ba..e11946e741 100644 --- a/_vector-search/ml-powered-search/neural-sparse-with-pipelines.md +++ b/_vector-search/ml-powered-search/neural-sparse-with-pipelines.md @@ -41,7 +41,7 @@ This tutorial consists of the following steps: ### Prerequisites -Before you start, complete the [prerequisites]({{site.url}}{{site.baseurl}}/search-plugins/neural-search-tutorial/#prerequisites) for neural search. +Before you start, complete the [prerequisites]({{site.url}}{{site.baseurl}}/search-plugins/neural-search-tutorial/#prerequisites). ## Step 1: Configure a sparse encoding model/tokenizer diff --git a/_vector-search/ml-powered-search/semantic-search.md b/_vector-search/ml-powered-search/semantic-search.md index fae615cc3c..5bfe5aa9a6 100644 --- a/_vector-search/ml-powered-search/semantic-search.md +++ b/_vector-search/ml-powered-search/semantic-search.md @@ -11,7 +11,7 @@ redirect_from: # Semantic search -Semantic search considers the context and intent of a query. In OpenSearch, semantic search is facilitated by neural search with text embedding models. Semantic search creates a dense vector (a list of floats) and ingests data into a k-NN index. +Semantic search considers the context and intent of a query. In OpenSearch, semantic search is facilitated by text embedding models. Semantic search creates a dense vector (a list of floats) and ingests data into a k-NN index. **PREREQUISITE**
Before using semantic search, you must set up a text embedding model. For more information, see [Choosing a model]({{site.url}}{{site.baseurl}}/ml-commons-plugin/integrating-ml-models/#choosing-a-model). @@ -24,7 +24,7 @@ To use semantic search, follow these steps: 1. [Create an ingest pipeline](#step-1-create-an-ingest-pipeline). 1. [Create an index for ingestion](#step-2-create-an-index-for-ingestion). 1. [Ingest documents into the index](#step-3-ingest-documents-into-the-index). -1. [Search the index using neural search](#step-4-search-the-index-using-neural-search). +1. [Search the index](#step-4-search-the-index). ## Step 1: Create an ingest pipeline @@ -115,7 +115,7 @@ PUT /my-nlp-index/_doc/2 Before the document is ingested into the index, the ingest pipeline runs the `text_embedding` processor on the document, generating text embeddings for the `passage_text` field. The indexed document includes the `passage_text` field, which contains the original text, and the `passage_embedding` field, which contains the vector embeddings. -## Step 4: Search the index using neural search +## Step 4: Search the index To perform vector search on your index, use the `neural` query clause either in the [k-NN plugin API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api/#search-for-a-model) or [Query DSL]({{site.url}}{{site.baseurl}}/opensearch/query-dsl/index/) queries. You can refine the results by using a [k-NN search filter]({{site.url}}{{site.baseurl}}/search-plugins/knn/filter-search-knn/). diff --git a/_vector-search/ml-powered-search/text-chunking.md b/_vector-search/ml-powered-search/text-chunking.md index 9242b2178a..b35667c9f5 100644 --- a/_vector-search/ml-powered-search/text-chunking.md +++ b/_vector-search/ml-powered-search/text-chunking.md @@ -93,7 +93,7 @@ POST testindex/_doc?pipeline=text-chunking-embedding-ingest-pipeline ``` {% include copy-curl.html %} -## Step 4: Search the index using neural search +## Step 4: Search the index You can use a `nested` query to perform vector search on your index. We recommend setting `score_mode` to `max`, where the document score is set to the highest score out of all passage embeddings: diff --git a/_vector-search/optimizing-performance/binary-quantization.md b/_vector-search/optimizing-performance/binary-quantization.md new file mode 100644 index 0000000000..650ac37d94 --- /dev/null +++ b/_vector-search/optimizing-performance/binary-quantization.md @@ -0,0 +1,182 @@ +--- +layout: default +title: Binary quantization +parent: Vector quantization +grand_parent: Optimizing vector search performance +nav_order: 40 +has_children: false +has_math: true +--- + +# Binary quantization + +Starting with version 2.17, OpenSearch supports binary quantization (BQ) with binary vector support for the Faiss engine. BQ compresses vectors into a binary format (0s and 1s), making it highly efficient in terms of memory usage. You can choose to represent each vector dimension using 1, 2, or 4 bits, depending on the desired precision. One of the advantages of using BQ is that the training process is handled automatically during indexing. This means that no separate training step is required, unlike other quantization techniques such as PQ. + +## Using BQ + +To configure BQ for the Faiss engine, define a `knn_vector` field and specify the `mode` as `on_disk`. This configuration defaults to 1-bit BQ and both `ef_search` and `ef_construction` set to `100`: + +```json +PUT my-vector-index +{ + "mappings": { + "properties": { + "my_vector_field": { + "type": "knn_vector", + "dimension": 8, + "space_type": "l2", + "data_type": "float", + "mode": "on_disk" + } + } + } +} +``` +{% include copy-curl.html %} + +To further optimize the configuration, you can specify additional parameters, such as the compression level, and fine-tune the search parameters. For example, you can override the `ef_construction` value or define the compression level, which corresponds to the number of bits used for quantization: + +- **32x compression** for 1-bit quantization +- **16x compression** for 2-bit quantization +- **8x compression** for 4-bit quantization + +This allows for greater control over memory usage and recall performance, providing flexibility to balance between precision and storage efficiency. + +To specify the compression level, set the `compression_level` parameter: + +```json +PUT my-vector-index +{ + "mappings": { + "properties": { + "my_vector_field": { + "type": "knn_vector", + "dimension": 8, + "space_type": "l2", + "data_type": "float", + "mode": "on_disk", + "compression_level": "16x", + "method": { + "parameters": { + "ef_construction": 16 + } + } + } + } + } +} +``` +{% include copy-curl.html %} + +The following example further fine-tunes the configuration by defining `ef_construction`, `encoder`, and the number of `bits` (which can be `1`, `2`, or `4`): + +```json +PUT my-vector-index +{ + "mappings": { + "properties": { + "my_vector_field": { + "type": "knn_vector", + "dimension": 8, + "method": { + "name": "hnsw", + "engine": "faiss", + "space_type": "l2", + "parameters": { + "m": 16, + "ef_construction": 512, + "encoder": { + "name": "binary", + "parameters": { + "bits": 1 + } + } + } + } + } + } + } +} +``` +{% include copy-curl.html %} + +## Search using binary quantized vectors + +You can perform a k-NN search on your index by providing a vector and specifying the number of nearest neighbors (k) to return: + +```json +GET my-vector-index/_search +{ + "size": 2, + "query": { + "knn": { + "my_vector_field": { + "vector": [1.5, 5.5, 1.5, 5.5, 1.5, 5.5, 1.5, 5.5], + "k": 10 + } + } + } +} +``` +{% include copy-curl.html %} + +You can also fine-tune search by providing the `ef_search` and `oversample_factor` parameters. +The `oversample_factor` parameter controls the factor by which the search oversamples the candidate vectors before ranking them. Using a higher oversample factor means that more candidates will be considered before ranking, improving accuracy but also increasing search time. When selecting the `oversample_factor` value, consider the trade-off between accuracy and efficiency. For example, setting the `oversample_factor` to `2.0` will double the number of candidates considered during the ranking phase, which may help achieve better results. + +The following request specifies the `ef_search` and `oversample_factor` parameters: + +```json +GET my-vector-index/_search +{ + "size": 2, + "query": { + "knn": { + "my_vector_field": { + "vector": [1.5, 5.5, 1.5, 5.5, 1.5, 5.5, 1.5, 5.5], + "k": 10, + "method_parameters": { + "ef_search": 10 + }, + "rescore": { + "oversample_factor": 10.0 + } + } + } + } +} +``` +{% include copy-curl.html %} + + +## HNSW memory estimation + +The memory required for the Hierarchical Navigable Small World (HNSW) graph can be estimated as `1.1 * (dimension + 8 * m)` bytes/vector, where `m` is the maximum number of bidirectional links created for each element during the construction of the graph. + +As an example, assume that you have 1 million vectors with a dimension of 256 and an `m` of 16. The following sections provide memory requirement estimations for various compression values. + +### 1-bit quantization (32x compression) + +In 1-bit quantization, each dimension is represented using 1 bit, equivalent to a 32x compression factor. The memory requirement can be estimated as follows: + +```r +Memory = 1.1 * ((256 * 1 / 8) + 8 * 16) * 1,000,000 + ~= 0.176 GB +``` + +### 2-bit quantization (16x compression) + +In 2-bit quantization, each dimension is represented using 2 bits, equivalent to a 16x compression factor. The memory requirement can be estimated as follows: + +```r +Memory = 1.1 * ((256 * 2 / 8) + 8 * 16) * 1,000,000 + ~= 0.211 GB +``` + +### 4-bit quantization (8x compression) + +In 4-bit quantization, each dimension is represented using 4 bits, equivalent to an 8x compression factor. The memory requirement can be estimated as follows: + +```r +Memory = 1.1 * ((256 * 4 / 8) + 8 * 16) * 1,000,000 + ~= 0.282 GB +``` diff --git a/_vector-search/optimizing-storage/disk-based-vector-search.md b/_vector-search/optimizing-performance/disk-based-vector-search.md similarity index 91% rename from _vector-search/optimizing-storage/disk-based-vector-search.md rename to _vector-search/optimizing-performance/disk-based-vector-search.md index 3e6a6b2308..37264a4e79 100644 --- a/_vector-search/optimizing-storage/disk-based-vector-search.md +++ b/_vector-search/optimizing-performance/disk-based-vector-search.md @@ -1,8 +1,8 @@ --- layout: default title: Disk-based vector search -nav_order: 30 -parent: Optimizing vector storage +nav_order: 20 +parent: Optimizing vector search performance has_children: false redirect_from: - /search-plugins/knn/disk-based-vector-search/ @@ -12,7 +12,7 @@ redirect_from: **Introduced 2.17** {: .label .label-purple} -For low-memory environments, OpenSearch provides _disk-based vector search_, which significantly reduces the operational costs for vector workloads. Disk-based vector search uses [binary quantization]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-vector-quantization/#binary-quantization), compressing vectors and thereby reducing the memory requirements. This memory optimization provides large memory savings at the cost of slightly increased search latency while still maintaining strong recall. +For low-memory environments, OpenSearch provides _disk-based vector search_, which significantly reduces the operational costs for vector workloads. Disk-based vector search uses [binary quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-performance/binary-quantization/), compressing vectors and thereby reducing the memory requirements. This memory optimization provides large memory savings at the cost of slightly increased search latency while still maintaining strong recall. To use disk-based vector search, set the [`mode`]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/#vector-workload-modes) parameter to `on_disk` for your vector field type. This parameter will configure your index to use secondary storage. @@ -181,7 +181,7 @@ GET my-vector-index/_search ## Model-based indexes -For [model-based indexes]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-k-nn-index-from-a-model), you can specify the `on_disk` parameter in the training request in the same way that you would specify it during index creation. By default, `on_disk` mode will use the [Faiss IVF method]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/#supported-faiss-methods) and a compression level of `32x`. To run the training API, send the following request: +For [model-based indexes]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-k-nn-index-from-a-model), you can specify the `on_disk` parameter in the training request in the same way that you would specify it during index creation. By default, `on_disk` mode will use the [Faiss IVF method]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/method/#supported-faiss-methods) and a compression level of `32x`. To run the training API, send the following request: ```json POST /_plugins/_knn/models/test-model/_train @@ -206,5 +206,5 @@ You can override the `compression_level` for disk-optimized indexes in the same ## Next steps -- For more information about binary quantization, see [Binary quantization]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-vector-quantization/#binary-quantization). +- For more information about binary quantization, see [Binary quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-performance/binary-quantization/). - For more information about k-NN vector workload modes, see [Vector workload modes]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/#vector-workload-modes). \ No newline at end of file diff --git a/_vector-search/optimizing-performance/faiss-16-bit-quantization.md b/_vector-search/optimizing-performance/faiss-16-bit-quantization.md new file mode 100644 index 0000000000..4fadd9d8f7 --- /dev/null +++ b/_vector-search/optimizing-performance/faiss-16-bit-quantization.md @@ -0,0 +1,154 @@ +--- +layout: default +title: Faiss 16-bit scalar quantization +parent: Vector quantization +grand_parent: Optimizing vector search performance +nav_order: 20 +has_children: false +has_math: true +--- + +# Faiss 16-bit scalar quantization + +Starting with version 2.13, the k-NN plugin supports performing scalar quantization for the Faiss engine within OpenSearch. Within the Faiss engine, a scalar quantizer (SQfp16) performs the conversion between 32-bit and 16-bit vectors. At ingestion time, when you upload 32-bit floating-point vectors to OpenSearch, SQfp16 quantizes them into 16-bit floating-point vectors and stores the quantized vectors in a k-NN index. + +At search time, SQfp16 decodes the vector values back into 32-bit floating-point values for distance computation. The SQfp16 quantization can decrease the memory footprint by a factor of 2. Additionally, it leads to a minimal loss in recall when differences between vector values are large compared to the error introduced by eliminating their two least significant bits. When used with [SIMD optimization]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/vector-field/#simd-optimization-for-the-faiss-engine), SQfp16 quantization can also significantly reduce search latencies and improve indexing throughput. + +SIMD optimization is not supported on Windows. Using Faiss scalar quantization on Windows can lead to a significant drop in performance, including decreased indexing throughput and increased search latencies. +{: .warning} + +## Using Faiss scalar quantization + +To use Faiss scalar quantization, set the k-NN vector field's `method.parameters.encoder.name` to `sq` when creating a k-NN index: + +```json +PUT /test-index +{ + "settings": { + "index": { + "knn": true, + "knn.algo_param.ef_search": 100 + } + }, + "mappings": { + "properties": { + "my_vector1": { + "type": "knn_vector", + "dimension": 3, + "space_type": "l2", + "method": { + "name": "hnsw", + "engine": "faiss", + "parameters": { + "encoder": { + "name": "sq" + }, + "ef_construction": 256, + "m": 8 + } + } + } + } + } +} +``` +{% include copy-curl.html %} + +Optionally, you can specify the parameters in `method.parameters.encoder`. For more information about `encoder` object parameters, see [SQ parameters]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/method/#sq-parameters). + +The `fp16` encoder converts 32-bit vectors into their 16-bit counterparts. For this encoder type, the vector values must be in the [-65504.0, 65504.0] range. To define how to handle out-of-range values, the preceding request specifies the `clip` parameter. By default, this parameter is `false`, and any vectors containing out-of-range values are rejected. + +When `clip` is set to `true` (as in the preceding request), out-of-range vector values are rounded up or down so that they are in the supported range. For example, if the original 32-bit vector is `[65510.82, -65504.1]`, the vector will be indexed as a 16-bit vector `[65504.0, -65504.0]`. + +We recommend setting `clip` to `true` only if very few elements lie outside of the supported range. Rounding the values may cause a drop in recall. +{: .note} + +The following example method definition specifies the Faiss SQfp16 encoder, which rejects any indexing request that contains out-of-range vector values (because the `clip` parameter is `false` by default): + +```json +PUT /test-index +{ + "settings": { + "index": { + "knn": true, + "knn.algo_param.ef_search": 100 + } + }, + "mappings": { + "properties": { + "my_vector1": { + "type": "knn_vector", + "dimension": 3, + "space_type": "l2", + "method": { + "name": "hnsw", + "engine": "faiss", + "parameters": { + "encoder": { + "name": "sq", + "parameters": { + "type": "fp16" + } + }, + "ef_construction": 256, + "m": 8 + } + } + } + } + } +} +``` +{% include copy-curl.html %} + +During ingestion, make sure each vector dimension is in the supported range ([-65504.0, 65504.0]). + +```json +PUT test-index/_doc/1 +{ + "my_vector1": [-65504.0, 65503.845, 55.82] +} +``` +{% include copy-curl.html %} + +During querying, the query vector has no range limitation: + +```json +GET test-index/_search +{ + "size": 2, + "query": { + "knn": { + "my_vector1": { + "vector": [265436.876, -120906.256, 99.84], + "k": 2 + } + } + } +} +``` +{% include copy-curl.html %} + +## Memory estimation + +In the best-case scenario, 16-bit vectors produced by the Faiss SQfp16 quantizer require 50% of the memory that 32-bit vectors require. + +### HNSW memory estimation + +The memory required for Hierarchical Navigable Small Worlds (HNSW) is estimated to be `1.1 * (2 * dimension + 8 * m)` bytes/vector, where `m` is the maximum number of bidirectional links created for each element during the construction of the graph. + +As an example, assume that you have 1 million vectors with a dimension of 256 and an `m` of 16. The memory requirement can be estimated as follows: + +```r +1.1 * (2 * 256 + 8 * 16) * 1,000,000 ~= 0.656 GB +``` + +### IVF memory estimation + +The memory required for IVF is estimated to be `1.1 * (((2 * dimension) * num_vectors) + (4 * nlist * dimension))` bytes/vector, where `nlist` is the number of buckets to partition vectors into. + +As an example, assume that you have 1 million vectors with a dimension of 256 and an `nlist` of 128. The memory requirement can be estimated as follows: + +```r +1.1 * (((2 * 256) * 1,000,000) + (4 * 128 * 256)) ~= 0.525 GB +``` \ No newline at end of file diff --git a/_vector-search/optimizing-performance/faiss-product-quantization.md b/_vector-search/optimizing-performance/faiss-product-quantization.md new file mode 100644 index 0000000000..de71164df0 --- /dev/null +++ b/_vector-search/optimizing-performance/faiss-product-quantization.md @@ -0,0 +1,52 @@ +--- +layout: default +title: Faiss product quantization +parent: Vector quantization +grand_parent: Optimizing vector search performance +nav_order: 30 +has_children: false +has_math: true +--- + +# Faiss product quantization + +Product quantization (PQ) is a technique used to represent a vector using a configurable number of bits. In general, it can be used to achieve a higher level of compression as compared to byte or scalar quantization. PQ works by separating vectors into _m_ subvectors and encoding each subvector with _code_size_ bits. Thus, the total amount of memory for the vector is `m*code_size` bits, plus overhead. For details about the parameters, see [PQ parameters]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/method/#pq-parameters). PQ is only supported for the _Faiss_ engine and can be used with either the _HNSW_ or _IVF_ approximate nearest neighbor (ANN) algorithms. + +## Using Faiss product quantization + +To minimize loss in accuracy, PQ requires a _training_ step that builds a model based on the distribution of the data that will be searched. + +The product quantizer is trained by running k-means clustering on a set of training vectors for each subvector space and extracts the centroids to be used for encoding. The training vectors can be either a subset of the vectors to be ingested or vectors that have the same distribution and dimension as the vectors to be ingested. + +In OpenSearch, the training vectors need to be present in an index. In general, the amount of training data will depend on which ANN algorithm is used and how much data will be stored in the index. For IVF-based indexes, a recommended number of training vectors is `max(1000*nlist, 2^code_size * 1000)`. For HNSW-based indexes, a recommended number is `2^code_size*1000`. See the [Faiss documentation](https://github.com/facebookresearch/faiss/wiki/FAQ#how-many-training-points-do-i-need-for-k-means) for more information about the methodology used to calculate these figures. + +For PQ, both _m_ and _code_size_ need to be selected. _m_ determines the number of subvectors into which vectors should be split for separate encoding. Consequently, the _dimension_ needs to be divisible by _m_. _code_size_ determines the number of bits used to encode each subvector. In general, we recommend a setting of `code_size = 8` and then tuning _m_ to get the desired trade-off between memory footprint and recall. + +For an example of setting up an index with PQ, see the [Building a k-NN index from a model]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-k-nn-index-from-a-model) tutorial. + +## Memory estimation + +While PQ is meant to represent individual vectors with `m*code_size` bits, in reality, the indexes consume more space. This is mainly because of the overhead of storing certain code tables and auxiliary data structures. + +Some of the memory formulas depend on the number of segments present. This is not typically known beforehand, but a recommended default value is 300. +{: .note} + +### HNSW memory estimation + +The memory required for HNSW with PQ is estimated to be `1.1*(((pq_code_size / 8) * pq_m + 24 + 8 * hnsw_m) * num_vectors + num_segments * (2^pq_code_size * 4 * d))` bytes. + +As an example, assume that you have 1 million vectors with a dimension of 256, `hnsw_m` of 16, `pq_m` of 32, `pq_code_size` of 8, and 100 segments. The memory requirement can be estimated as follows: + +```r +1.1 * ((8 / 8 * 32 + 24 + 8 * 16) * 1000000 + 100 * (2^8 * 4 * 256)) ~= 0.215 GB +``` + +### IVF memory estimation + +The memory required for IVF with PQ is estimated to be `1.1*(((pq_code_size / 8) * pq_m + 24) * num_vectors + num_segments * (2^code_size * 4 * d + 4 * ivf_nlist * d))` bytes. + +For example, assume that you have 1 million vectors with a dimension of 256, `ivf_nlist` of 512, `pq_m` of 32, `pq_code_size` of 8, and 100 segments. The memory requirement can be estimated as follows: + +```r +1.1*((8 / 8 * 64 + 24) * 1000000 + 100 * (2^8 * 4 * 256 + 4 * 512 * 256)) ~= 0.171 GB +``` \ No newline at end of file diff --git a/_vector-search/optimizing-performance/index.md b/_vector-search/optimizing-performance/index.md new file mode 100644 index 0000000000..0c1cb6f82c --- /dev/null +++ b/_vector-search/optimizing-performance/index.md @@ -0,0 +1,32 @@ +--- +layout: default +title: Optimizing vector search performance +nav_order: 60 +has_children: true +has_toc: false +redirect_from: + - /vector-search/optimizing-performance/ +storage_cards: + - heading: "Vector quantization" + description: "Reduce vector storage space by quantizing vectors." + link: "/vector-search/optimizing-performance/knn-vector-quantization/" + - heading: "Disk-based vector search" + description: "Uses binary quantization to reduce operational costs of vector workloads." + link: "/vector-search/optimizing-performance/disk-based-vector-search/" +performance_cards: + - heading: "Performance tuning" + description: "Improve indexing and search performance for approximate k-NN (ANN)." + link: "/vector-search/optimizing-performance/performance-tuning/" +--- + +# Optimizing vector search performance + +Vector search operations can be resource-intensive, especially when dealing with large-scale vector datasets. OpenSearch provides several optimization techniques to reduce memory usage and enhance search performance. + +## Optimizing vector storage + +{% include cards.html cards=page.storage_cards %} + +## Optimizing performance + +{% include cards.html cards=page.performance_cards %} \ No newline at end of file diff --git a/_vector-search/optimizing-performance/knn-vector-quantization.md b/_vector-search/optimizing-performance/knn-vector-quantization.md new file mode 100644 index 0000000000..7f405677aa --- /dev/null +++ b/_vector-search/optimizing-performance/knn-vector-quantization.md @@ -0,0 +1,41 @@ +--- +layout: default +title: Vector quantization +parent: Optimizing vector search performance +nav_order: 10 +has_children: true +has_toc: false +redirect_from: + - /search-plugins/knn/knn-vector-quantization/ +outside_cards: + - heading: "Byte vectors" + description: "Quantize vectors outside of OpenSearch before ingesting them into an OpenSearch index." + link: "/field-types/supported-field-types/knn-vector#byte-vectors" +inside_cards: + - heading: "Lucene scalar quantization" + description: "Use built-in scalar quantization for the Lucene engine." + link: "/vector-search/optimizing-performance/lucene-scalar-quantization/" + - heading: "Faiss 16-bit scalar quantization" + description: "Use built-in scalar quantization for the Faiss engine." + link: "/vector-search/optimizing-performance/faiss-16-bit-quantization/" + - heading: "Faiss product quantization" + description: "Use built-in product quantization for the Faiss engine." + link: "/vector-search/optimizing-performance/faiss-product-quantization/" + - heading: "Binary quantization" + description: "Use built-in binary quantization for the Faiss engine." + link: "/vector-search/optimizing-performance/binary-quantization/" +--- + +# Vector quantization + +By default, the k-NN plugin supports the indexing and querying of vectors of type `float`, where each dimension of the vector occupies 4 bytes of memory. For use cases that require ingestion on a large scale, keeping `float` vectors can be expensive because OpenSearch needs to construct, load, save, and search graphs (for native `nmslib` and `faiss` engines). To reduce the memory footprint, you can use vector quantization. + +OpenSearch supports many varieties of quantization. In general, the level of quantization will provide a trade-off between the accuracy of the nearest neighbor search and the size of the memory footprint consumed by the vector search. + +## Quantize vectors outside of OpenSearch + +{% include cards.html cards=page.outside_cards %} + +## Quantize vectors within OpenSearch + +{% include cards.html cards=page.inside_cards %} \ No newline at end of file diff --git a/_vector-search/optimizing-performance/lucene-scalar-quantization.md b/_vector-search/optimizing-performance/lucene-scalar-quantization.md new file mode 100644 index 0000000000..5c088da794 --- /dev/null +++ b/_vector-search/optimizing-performance/lucene-scalar-quantization.md @@ -0,0 +1,115 @@ +--- +layout: default +title: Lucene scalar quantization +parent: Vector quantization +grand_parent: Optimizing vector search performance +nav_order: 10 +has_children: false +has_math: true +--- + +# Lucene scalar quantization + +Starting with version 2.16, the k-NN plugin supports built-in scalar quantization for the Lucene engine. Unlike [byte vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#byte-vectors), which require you to quantize vectors before ingesting documents, the Lucene scalar quantizer quantizes input vectors in OpenSearch during ingestion. The Lucene scalar quantizer converts 32-bit floating-point input vectors into 7-bit integer vectors in each segment using the minimum and maximum quantiles computed based on the [`confidence_interval`](#confidence-interval) parameter. During search, the query vector is quantized in each segment using the segment's minimum and maximum quantiles in order to compute the distance between the query vector and the segment's quantized input vectors. + +Quantization can decrease the memory footprint by a factor of 4 in exchange for some loss in recall. Additionally, quantization slightly increases disk usage because it requires storing both the raw input vectors and the quantized vectors. + +## Using Lucene scalar quantization + +To use the Lucene scalar quantizer, set the k-NN vector field's `method.parameters.encoder.name` to `sq` when creating a k-NN index: + +```json +PUT /test-index +{ + "settings": { + "index": { + "knn": true + } + }, + "mappings": { + "properties": { + "my_vector1": { + "type": "knn_vector", + "dimension": 2, + "space_type": "l2", + "method": { + "name": "hnsw", + "engine": "lucene", + "parameters": { + "encoder": { + "name": "sq" + }, + "ef_construction": 256, + "m": 8 + } + } + } + } + } +} +``` +{% include copy-curl.html %} + +## Confidence interval + +Optionally, you can specify the `confidence_interval` parameter in the `method.parameters.encoder` object. +The `confidence_interval` is used to compute the minimum and maximum quantiles in order to quantize the vectors: +- If you set the `confidence_interval` to a value in the `0.9` to `1.0` range, inclusive, then the quantiles are calculated statically. For example, setting the `confidence_interval` to `0.9` specifies to compute the minimum and maximum quantiles based on the middle 90% of the vector values, excluding the minimum 5% and maximum 5% of the values. +- Setting `confidence_interval` to `0` specifies to compute the quantiles dynamically, which involves oversampling and additional computations performed on the input data. +- When `confidence_interval` is not set, it is computed based on the vector dimension $$d$$ using the formula $$max(0.9, 1 - \frac{1}{1 + d})$$. + +Lucene scalar quantization is applied only to `float` vectors. If you change the default value of the `data_type` parameter from `float` to `byte` or any other type when mapping a [k-NN vector]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/), then the request is rejected. +{: .warning} + +The following example method definition specifies the Lucene `sq` encoder with the `confidence_interval` set to `1.0`. This `confidence_interval` specifies to consider all the input vectors when computing the minimum and maximum quantiles. Vectors are quantized to 7 bits by default: + +```json +PUT /test-index +{ + "settings": { + "index": { + "knn": true + } + }, + "mappings": { + "properties": { + "my_vector1": { + "type": "knn_vector", + "dimension": 2, + "space_type": "l2", + "method": { + "name": "hnsw", + "engine": "lucene", + "parameters": { + "encoder": { + "name": "sq", + "parameters": { + "confidence_interval": 1.0 + } + }, + "ef_construction": 256, + "m": 8 + } + } + } + } + } +} +``` +{% include copy-curl.html %} + +There are no changes to ingestion or query mapping and no range limitations for the input vectors. + +# Memory estimation + +In the ideal scenario, 7-bit vectors created by the Lucene scalar quantizer use only 25% of the memory required by 32-bit vectors. + +### HNSW memory estimation + +The memory required for the Hierarchical Navigable Small World (HNSW) graph can be estimated as `1.1 * (dimension + 8 * m)` bytes/vector, where `m` is the maximum number of bidirectional links created for each element during the construction of the graph. + +As an example, assume that you have 1 million vectors with a dimension of 256 and M of 16. The memory requirement can be estimated as follows: + +```r +1.1 * (256 + 8 * 16) * 1,000,000 ~= 0.4 GB +``` \ No newline at end of file diff --git a/_vector-search/performance-tuning.md b/_vector-search/optimizing-performance/performance-tuning.md similarity index 98% rename from _vector-search/performance-tuning.md rename to _vector-search/optimizing-performance/performance-tuning.md index 6a602ef262..1eee277951 100644 --- a/_vector-search/performance-tuning.md +++ b/_vector-search/optimizing-performance/performance-tuning.md @@ -1,7 +1,8 @@ --- layout: default title: Performance tuning -nav_order: 70 +nav_order: 30 +parent: Optimizing vector search performance redirect_from: - /search-plugins/knn/performance-tuning/ --- @@ -105,7 +106,7 @@ This approach is recommended only for workloads that involve a single initial bu During indexing, vector search builds a specialized data structure for a `knn_vector` field to enable efficient approximate k-NN search. However, these structures are rebuilt during [force merge]({{site.url}}{{site.baseurl}}/api-reference/index-apis/force-merge/) on k-NN indexes. To optimize indexing speed, follow these steps: -1. **Disable vector data structure creation**: Disable vector data structure creation for new segments by setting [`index.knn.advanced.approximate_threshold`]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/#index-settings) to `-1`. +1. **Disable vector data structure creation**: Disable vector data structure creation for new segments by setting [`index.knn.advanced.approximate_threshold`]({{site.url}}{{site.baseurl}}/vector-search/settings/#index-settings) to `-1`. To specify the setting at index creation, send the following request: @@ -215,7 +216,7 @@ This API operation only loads the segments of active indexes into the cache. If Recall depends on multiple factors like number of vectors, number of dimensions, segments, and so on. Searching over a large number of small segments and aggregating the results leads to better recall than searching over a small number of large segments and aggregating results. The larger the native library index, the more chances of losing recall if you're using smaller algorithm parameters. Choosing larger values for algorithm parameters should help solve this issue but sacrifices search latency and indexing time. That being said, it's important to understand your system's requirements for latency and accuracy, and then choose the number of segments you want your index to have based on experimentation. -The default parameters work on a broader set of use cases, but make sure to run your own experiments on your data sets and choose the appropriate values. For index-level settings, see [Index settings]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index#index-settings). +The default parameters work on a broader set of use cases, but make sure to run your own experiments on your data sets and choose the appropriate values. For index-level settings, see [Index settings]({{site.url}}{{site.baseurl}}/vector-search/settings/#index-settings). ## Approximate nearest neighbor versus score script diff --git a/_vector-search/optimizing-storage/index.md b/_vector-search/optimizing-storage/index.md deleted file mode 100644 index 87d4829713..0000000000 --- a/_vector-search/optimizing-storage/index.md +++ /dev/null @@ -1,53 +0,0 @@ ---- -layout: default -title: Optimizing vector storage -nav_order: 60 -has_children: true -has_toc: false -redirect_from: - - /vector-search/optimizing-storage/ ---- - -# Optimizing vector storage - -Vector search operations can be memory-intensive, especially when dealing with large-scale vector datasets. OpenSearch provides several optimization techniques to reduce memory usage while maintaining search performance. This section covers different approaches to optimize vector storage and search operations. - -## Available optimization techniques - -OpenSearch supports the following vector storage optimization methods: - -1. **Vector quantization techniques** - - Byte vectors - - Lucene scalar quantization - - Faiss 16-bit scalar quantization - - Product quantization (PQ) - - Binary quantization (BQ) - -2. **Disk-based vector search** - - Reduces operational costs for vector workloads - - Uses binary quantization for compression - - Provides significant memory savings with minimal impact on search quality - -## Choosing an optimization method - -The choice of optimization method depends on your specific requirements: - -| Method | Best for | Memory Savings | Impact on Search Quality | -|--------|----------|----------------|-------------------------| -| Disk-based search | Low-memory environments | Highest (32x reduction) | Minimal impact with rescoring | -| Vector quantization | Balanced approach | Varies (2x-32x reduction) | Varies by technique | - -### When to use disk-based search -- Limited memory environments -- Large-scale vector operations -- When willing to accept slightly increased search latency - -### When to use vector quantization -- Need fine-grained control over compression -- Specific accuracy requirements -- Different memory-performance trade-off needs - -## Next steps - -- Learn about [Vector quantization]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-vector-quantization/) techniques -- Explore [Disk-based vector search]({{site.url}}{{site.baseurl}}/search-plugins/knn/disk-based-vector-search/) diff --git a/_vector-search/optimizing-storage/knn-vector-quantization.md b/_vector-search/optimizing-storage/knn-vector-quantization.md deleted file mode 100644 index 4ed2ad8e22..0000000000 --- a/_vector-search/optimizing-storage/knn-vector-quantization.md +++ /dev/null @@ -1,486 +0,0 @@ ---- -layout: default -title: Vector quantization -parent: Optimizing vector storage -nav_order: 50 -has_children: false -has_math: true -redirect_from: - - /search-plugins/knn/knn-vector-quantization/ ---- - -# Vector quantization - -By default, the k-NN plugin supports the indexing and querying of vectors of type `float`, where each dimension of the vector occupies 4 bytes of memory. For use cases that require ingestion on a large scale, keeping `float` vectors can be expensive because OpenSearch needs to construct, load, save, and search graphs (for native `nmslib` and `faiss` engines). To reduce the memory footprint, you can use vector quantization. - -OpenSearch supports many varieties of quantization. In general, the level of quantization will provide a trade-off between the accuracy of the nearest neighbor search and the size of the memory footprint consumed by the vector search. The supported types include byte vectors, 16-bit scalar quantization, product quantization (PQ), and binary quantization(BQ). - -## Byte vectors - -Starting with version 2.17, the k-NN plugin supports `byte` vectors with the `faiss` and `lucene` engines in order to reduce the amount of required memory. This requires quantizing the vectors outside of OpenSearch before ingesting them into an OpenSearch index. For more information, see [Byte vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#byte-vectors). - -## Lucene scalar quantization - -Starting with version 2.16, the k-NN plugin supports built-in scalar quantization for the Lucene engine. Unlike [byte vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#byte-vectors), which require you to quantize vectors before ingesting documents, the Lucene scalar quantizer quantizes input vectors in OpenSearch during ingestion. The Lucene scalar quantizer converts 32-bit floating-point input vectors into 7-bit integer vectors in each segment using the minimum and maximum quantiles computed based on the [`confidence_interval`](#confidence-interval) parameter. During search, the query vector is quantized in each segment using the segment's minimum and maximum quantiles in order to compute the distance between the query vector and the segment's quantized input vectors. - -Quantization can decrease the memory footprint by a factor of 4 in exchange for some loss in recall. Additionally, quantization slightly increases disk usage because it requires storing both the raw input vectors and the quantized vectors. - -### Using Lucene scalar quantization - -To use the Lucene scalar quantizer, set the k-NN vector field's `method.parameters.encoder.name` to `sq` when creating a k-NN index: - -```json -PUT /test-index -{ - "settings": { - "index": { - "knn": true - } - }, - "mappings": { - "properties": { - "my_vector1": { - "type": "knn_vector", - "dimension": 2, - "space_type": "l2", - "method": { - "name": "hnsw", - "engine": "lucene", - "parameters": { - "encoder": { - "name": "sq" - }, - "ef_construction": 256, - "m": 8 - } - } - } - } - } -} -``` -{% include copy-curl.html %} - -### Confidence interval - -Optionally, you can specify the `confidence_interval` parameter in the `method.parameters.encoder` object. -The `confidence_interval` is used to compute the minimum and maximum quantiles in order to quantize the vectors: -- If you set the `confidence_interval` to a value in the `0.9` to `1.0` range, inclusive, then the quantiles are calculated statically. For example, setting the `confidence_interval` to `0.9` specifies to compute the minimum and maximum quantiles based on the middle 90% of the vector values, excluding the minimum 5% and maximum 5% of the values. -- Setting `confidence_interval` to `0` specifies to compute the quantiles dynamically, which involves oversampling and additional computations performed on the input data. -- When `confidence_interval` is not set, it is computed based on the vector dimension $$d$$ using the formula $$max(0.9, 1 - \frac{1}{1 + d})$$. - -Lucene scalar quantization is applied only to `float` vectors. If you change the default value of the `data_type` parameter from `float` to `byte` or any other type when mapping a [k-NN vector]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/), then the request is rejected. -{: .warning} - -The following example method definition specifies the Lucene `sq` encoder with the `confidence_interval` set to `1.0`. This `confidence_interval` specifies to consider all the input vectors when computing the minimum and maximum quantiles. Vectors are quantized to 7 bits by default: - -```json -PUT /test-index -{ - "settings": { - "index": { - "knn": true - } - }, - "mappings": { - "properties": { - "my_vector1": { - "type": "knn_vector", - "dimension": 2, - "space_type": "l2", - "method": { - "name": "hnsw", - "engine": "lucene", - "parameters": { - "encoder": { - "name": "sq", - "parameters": { - "confidence_interval": 1.0 - } - }, - "ef_construction": 256, - "m": 8 - } - } - } - } - } -} -``` -{% include copy-curl.html %} - -There are no changes to ingestion or query mapping and no range limitations for the input vectors. - -### Memory estimation - -In the ideal scenario, 7-bit vectors created by the Lucene scalar quantizer use only 25% of the memory required by 32-bit vectors. - -#### HNSW memory estimation - -The memory required for the Hierarchical Navigable Small World (HNSW) graph can be estimated as `1.1 * (dimension + 8 * m)` bytes/vector, where `m` is the maximum number of bidirectional links created for each element during the construction of the graph. - -As an example, assume that you have 1 million vectors with a dimension of 256 and M of 16. The memory requirement can be estimated as follows: - -```r -1.1 * (256 + 8 * 16) * 1,000,000 ~= 0.4 GB -``` - -## Faiss 16-bit scalar quantization - -Starting with version 2.13, the k-NN plugin supports performing scalar quantization for the Faiss engine within OpenSearch. Within the Faiss engine, a scalar quantizer (SQfp16) performs the conversion between 32-bit and 16-bit vectors. At ingestion time, when you upload 32-bit floating-point vectors to OpenSearch, SQfp16 quantizes them into 16-bit floating-point vectors and stores the quantized vectors in a k-NN index. - -At search time, SQfp16 decodes the vector values back into 32-bit floating-point values for distance computation. The SQfp16 quantization can decrease the memory footprint by a factor of 2. Additionally, it leads to a minimal loss in recall when differences between vector values are large compared to the error introduced by eliminating their two least significant bits. When used with [SIMD optimization]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index#simd-optimization-for-the-faiss-engine), SQfp16 quantization can also significantly reduce search latencies and improve indexing throughput. - -SIMD optimization is not supported on Windows. Using Faiss scalar quantization on Windows can lead to a significant drop in performance, including decreased indexing throughput and increased search latencies. -{: .warning} - -### Using Faiss scalar quantization - -To use Faiss scalar quantization, set the k-NN vector field's `method.parameters.encoder.name` to `sq` when creating a k-NN index: - -```json -PUT /test-index -{ - "settings": { - "index": { - "knn": true, - "knn.algo_param.ef_search": 100 - } - }, - "mappings": { - "properties": { - "my_vector1": { - "type": "knn_vector", - "dimension": 3, - "space_type": "l2", - "method": { - "name": "hnsw", - "engine": "faiss", - "parameters": { - "encoder": { - "name": "sq" - }, - "ef_construction": 256, - "m": 8 - } - } - } - } - } -} -``` -{% include copy-curl.html %} - -Optionally, you can specify the parameters in `method.parameters.encoder`. For more information about `encoder` object parameters, see [SQ parameters]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/#sq-parameters). - -The `fp16` encoder converts 32-bit vectors into their 16-bit counterparts. For this encoder type, the vector values must be in the [-65504.0, 65504.0] range. To define how to handle out-of-range values, the preceding request specifies the `clip` parameter. By default, this parameter is `false`, and any vectors containing out-of-range values are rejected. - -When `clip` is set to `true` (as in the preceding request), out-of-range vector values are rounded up or down so that they are in the supported range. For example, if the original 32-bit vector is `[65510.82, -65504.1]`, the vector will be indexed as a 16-bit vector `[65504.0, -65504.0]`. - -We recommend setting `clip` to `true` only if very few elements lie outside of the supported range. Rounding the values may cause a drop in recall. -{: .note} - -The following example method definition specifies the Faiss SQfp16 encoder, which rejects any indexing request that contains out-of-range vector values (because the `clip` parameter is `false` by default): - -```json -PUT /test-index -{ - "settings": { - "index": { - "knn": true, - "knn.algo_param.ef_search": 100 - } - }, - "mappings": { - "properties": { - "my_vector1": { - "type": "knn_vector", - "dimension": 3, - "space_type": "l2", - "method": { - "name": "hnsw", - "engine": "faiss", - "parameters": { - "encoder": { - "name": "sq", - "parameters": { - "type": "fp16" - } - }, - "ef_construction": 256, - "m": 8 - } - } - } - } - } -} -``` -{% include copy-curl.html %} - -During ingestion, make sure each vector dimension is in the supported range ([-65504.0, 65504.0]). - -```json -PUT test-index/_doc/1 -{ - "my_vector1": [-65504.0, 65503.845, 55.82] -} -``` -{% include copy-curl.html %} - -During querying, the query vector has no range limitation: - -```json -GET test-index/_search -{ - "size": 2, - "query": { - "knn": { - "my_vector1": { - "vector": [265436.876, -120906.256, 99.84], - "k": 2 - } - } - } -} -``` -{% include copy-curl.html %} - -### Memory estimation - -In the best-case scenario, 16-bit vectors produced by the Faiss SQfp16 quantizer require 50% of the memory that 32-bit vectors require. - -#### HNSW memory estimation - -The memory required for Hierarchical Navigable Small Worlds (HNSW) is estimated to be `1.1 * (2 * dimension + 8 * m)` bytes/vector, where `m` is the maximum number of bidirectional links created for each element during the construction of the graph. - -As an example, assume that you have 1 million vectors with a dimension of 256 and an `m` of 16. The memory requirement can be estimated as follows: - -```r -1.1 * (2 * 256 + 8 * 16) * 1,000,000 ~= 0.656 GB -``` - -#### IVF memory estimation - -The memory required for IVF is estimated to be `1.1 * (((2 * dimension) * num_vectors) + (4 * nlist * dimension))` bytes/vector, where `nlist` is the number of buckets to partition vectors into. - -As an example, assume that you have 1 million vectors with a dimension of 256 and an `nlist` of 128. The memory requirement can be estimated as follows: - -```r -1.1 * (((2 * 256) * 1,000,000) + (4 * 128 * 256)) ~= 0.525 GB -``` - -## Faiss product quantization - -PQ is a technique used to represent a vector in a configurable amount of bits. In general, it can be used to achieve a higher level of compression as compared to byte or scalar quantization. PQ works by separating vectors into _m_ subvectors and encoding each subvector with _code_size_ bits. Thus, the total amount of memory for the vector is `m*code_size` bits, plus overhead. For details about the parameters, see [PQ parameters]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/#pq-parameters). PQ is only supported for the _Faiss_ engine and can be used with either the _HNSW_ or _IVF_ approximate nearest neighbor (ANN) algorithms. - -### Using Faiss product quantization - -To minimize loss in accuracy, PQ requires a _training_ step that builds a model based on the distribution of the data that will be searched. - -The product quantizer is trained by running k-means clustering on a set of training vectors for each subvector space and extracts the centroids to be used for encoding. The training vectors can be either a subset of the vectors to be ingested or vectors that have the same distribution and dimension as the vectors to be ingested. - -In OpenSearch, the training vectors need to be present in an index. In general, the amount of training data will depend on which ANN algorithm is used and how much data will be stored in the index. For IVF-based indexes, a recommended number of training vectors is `max(1000*nlist, 2^code_size * 1000)`. For HNSW-based indexes, a recommended number is `2^code_size*1000`. See the [Faiss documentation](https://github.com/facebookresearch/faiss/wiki/FAQ#how-many-training-points-do-i-need-for-k-means) for more information about the methodology used to calculate these figures. - -For PQ, both _m_ and _code_size_ need to be selected. _m_ determines the number of subvectors into which vectors should be split for separate encoding. Consequently, the _dimension_ needs to be divisible by _m_. _code_size_ determines the number of bits used to encode each subvector. In general, we recommend a setting of `code_size = 8` and then tuning _m_ to get the desired trade-off between memory footprint and recall. - -For an example of setting up an index with PQ, see the [Building a k-NN index from a model]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-k-nn-index-from-a-model) tutorial. - -### Memory estimation - -While PQ is meant to represent individual vectors with `m*code_size` bits, in reality, the indexes consume more space. This is mainly due to the overhead of storing certain code tables and auxiliary data structures. - -Some of the memory formulas depend on the number of segments present. This is not typically known beforehand, but a recommended default value is 300. -{: .note} - -#### HNSW memory estimation - -The memory required for HNSW with PQ is estimated to be `1.1*(((pq_code_size / 8) * pq_m + 24 + 8 * hnsw_m) * num_vectors + num_segments * (2^pq_code_size * 4 * d))` bytes. - -As an example, assume that you have 1 million vectors with a dimension of 256, `hnsw_m` of 16, `pq_m` of 32, `pq_code_size` of 8, and 100 segments. The memory requirement can be estimated as follows: - -```r -1.1 * ((8 / 8 * 32 + 24 + 8 * 16) * 1000000 + 100 * (2^8 * 4 * 256)) ~= 0.215 GB -``` - -#### IVF memory estimation - -The memory required for IVF with PQ is estimated to be `1.1*(((pq_code_size / 8) * pq_m + 24) * num_vectors + num_segments * (2^code_size * 4 * d + 4 * ivf_nlist * d))` bytes. - -For example, assume that you have 1 million vectors with a dimension of 256, `ivf_nlist` of 512, `pq_m` of 32, `pq_code_size` of 8, and 100 segments. The memory requirement can be estimated as follows: - -```r -1.1*((8 / 8 * 64 + 24) * 1000000 + 100 * (2^8 * 4 * 256 + 4 * 512 * 256)) ~= 0.171 GB -``` - -## Binary quantization - -Starting with version 2.17, OpenSearch supports BQ with binary vector support for the Faiss engine. BQ compresses vectors into a binary format (0s and 1s), making it highly efficient in terms of memory usage. You can choose to represent each vector dimension using 1, 2, or 4 bits, depending on the desired precision. One of the advantages of using BQ is that the training process is handled automatically during indexing. This means that no separate training step is required, unlike other quantization techniques such as PQ. - -### Using BQ -To configure BQ for the Faiss engine, define a `knn_vector` field and specify the `mode` as `on_disk`. This configuration defaults to 1-bit BQ and both `ef_search` and `ef_construction` set to `100`: - -```json -PUT my-vector-index -{ - "mappings": { - "properties": { - "my_vector_field": { - "type": "knn_vector", - "dimension": 8, - "space_type": "l2", - "data_type": "float", - "mode": "on_disk" - } - } - } -} -``` -{% include copy-curl.html %} - -To further optimize the configuration, you can specify additional parameters, such as the compression level, and fine-tune the search parameters. For example, you can override the `ef_construction` value or define the compression level, which corresponds to the number of bits used for quantization: - -- **32x compression** for 1-bit quantization -- **16x compression** for 2-bit quantization -- **8x compression** for 4-bit quantization - -This allows for greater control over memory usage and recall performance, providing flexibility to balance between precision and storage efficiency. - -To specify the compression level, set the `compression_level` parameter: - -```json -PUT my-vector-index -{ - "mappings": { - "properties": { - "my_vector_field": { - "type": "knn_vector", - "dimension": 8, - "space_type": "l2", - "data_type": "float", - "mode": "on_disk", - "compression_level": "16x", - "method": { - "parameters": { - "ef_construction": 16 - } - } - } - } - } -} -``` -{% include copy-curl.html %} - -The following example further fine-tunes the configuration by defining `ef_construction`, `encoder`, and the number of `bits` (which can be `1`, `2`, or `4`): - -```json -PUT my-vector-index -{ - "mappings": { - "properties": { - "my_vector_field": { - "type": "knn_vector", - "dimension": 8, - "method": { - "name": "hnsw", - "engine": "faiss", - "space_type": "l2", - "parameters": { - "m": 16, - "ef_construction": 512, - "encoder": { - "name": "binary", - "parameters": { - "bits": 1 - } - } - } - } - } - } - } -} -``` -{% include copy-curl.html %} - -### Search using binary quantized vectors - -You can perform a k-NN search on your index by providing a vector and specifying the number of nearest neighbors (k) to return: - -```json -GET my-vector-index/_search -{ - "size": 2, - "query": { - "knn": { - "my_vector_field": { - "vector": [1.5, 5.5, 1.5, 5.5, 1.5, 5.5, 1.5, 5.5], - "k": 10 - } - } - } -} -``` -{% include copy-curl.html %} - -You can also fine-tune search by providing the `ef_search` and `oversample_factor` parameters. -The `oversample_factor` parameter controls the factor by which the search oversamples the candidate vectors before ranking them. Using a higher oversample factor means that more candidates will be considered before ranking, improving accuracy but also increasing search time. When selecting the `oversample_factor` value, consider the trade-off between accuracy and efficiency. For example, setting the `oversample_factor` to `2.0` will double the number of candidates considered during the ranking phase, which may help achieve better results. - -The following request specifies the `ef_search` and `oversample_factor` parameters: - -```json -GET my-vector-index/_search -{ - "size": 2, - "query": { - "knn": { - "my_vector_field": { - "vector": [1.5, 5.5, 1.5, 5.5, 1.5, 5.5, 1.5, 5.5], - "k": 10, - "method_parameters": { - "ef_search": 10 - }, - "rescore": { - "oversample_factor": 10.0 - } - } - } - } -} -``` -{% include copy-curl.html %} - - -#### HNSW memory estimation - -The memory required for the Hierarchical Navigable Small World (HNSW) graph can be estimated as `1.1 * (dimension + 8 * m)` bytes/vector, where `m` is the maximum number of bidirectional links created for each element during the construction of the graph. - -As an example, assume that you have 1 million vectors with a dimension of 256 and an `m` of 16. The following sections provide memory requirement estimations for various compression values. - -##### 1-bit quantization (32x compression) - -In 1-bit quantization, each dimension is represented using 1 bit, equivalent to a 32x compression factor. The memory requirement can be estimated as follows: - -```r -Memory = 1.1 * ((256 * 1 / 8) + 8 * 16) * 1,000,000 - ~= 0.176 GB -``` - -##### 2-bit quantization (16x compression) - -In 2-bit quantization, each dimension is represented using 2 bits, equivalent to a 16x compression factor. The memory requirement can be estimated as follows: - -```r -Memory = 1.1 * ((256 * 2 / 8) + 8 * 16) * 1,000,000 - ~= 0.211 GB -``` - -##### 4-bit quantization (8x compression) - -In 4-bit quantization, each dimension is represented using 4 bits, equivalent to an 8x compression factor. The memory requirement can be estimated as follows: - -```r -Memory = 1.1 * ((256 * 4 / 8) + 8 * 16) * 1,000,000 - ~= 0.282 GB -``` diff --git a/_vector-search/querying-data.md b/_vector-search/searching-data.md similarity index 71% rename from _vector-search/querying-data.md rename to _vector-search/searching-data.md index b55cdbcd53..f1d5de90d9 100644 --- a/_vector-search/querying-data.md +++ b/_vector-search/searching-data.md @@ -15,7 +15,7 @@ The following table compares the search syntax and typical use cases for each ve | Feature | Query type | Input format | Model required | Use case | |----------------------------------|------------------|------------------|---------------------|----------------------------| | **Pre-generated embeddings** | `knn` | Vector array | No | Raw vector search | -| **Auto-generated embeddings** | `neural` | Text | Yes | Semantic search | +| **Auto-generated embeddings** | `neural` | Text | Yes | ML-powered search | ## Searching pre-generated embeddings or raw vectors @@ -36,16 +36,9 @@ GET /my-raw-vector-index/_search ``` {% include copy-curl.html %} -**Key characteristics**: - -- Utilizes the `knn` query type. -- Requires a vector array input. -- Specify `k` to return the top-k nearest neighbors. -- Does not require a model for query transformation. - ## Searching auto-generated embeddings -For semantic searches using embeddings, use the `neural` query type and provide text input: +For ML-powered searches using embeddings, use the `neural` query type and provide query text input: ```json GET /my-semantic-search-index/_search @@ -62,11 +55,3 @@ GET /my-semantic-search-index/_search } ``` {% include copy-curl.html %} - -**Key characteristics**: - -- Employs the `neural` query type. -- Accepts plain text as input. -- Requires the same `model_id` used during indexing. -- Converts query text into dense vector embeddings automatically. -- Specify `k` to retrieve the top-k matches. diff --git a/_vector-search/settings.md b/_vector-search/settings.md index f2fa4788c4..b03a2a051a 100644 --- a/_vector-search/settings.md +++ b/_vector-search/settings.md @@ -1,18 +1,18 @@ --- layout: default title: Settings -nav_order: 80 +nav_order: 90 redirect_from: - /search-plugins/knn/settings/ --- -# k-NN settings +# Vector search settings -The k-NN plugin adds several new cluster settings. To learn more about static and dynamic settings, see [Configuring OpenSearch]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/index/). +OpenSearch supports the following vector search settings. To learn more about static and dynamic settings, see [Configuring OpenSearch]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/index/). ## Cluster settings -The following table lists all available cluster-level k-NN settings. For more information about cluster settings, see [Configuring OpenSearch]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/index/#updating-cluster-settings-using-the-api) and [Updating cluster settings using the API]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/index/#updating-cluster-settings-using-the-api). +The following table lists all available cluster-level vector search settings. For more information about cluster settings, see [Configuring OpenSearch]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/index/#updating-cluster-settings-using-the-api) and [Updating cluster settings using the API]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/index/#updating-cluster-settings-using-the-api). Setting | Static/Dynamic | Default | Description :--- | :--- | :--- | :--- @@ -27,14 +27,24 @@ Setting | Static/Dynamic | Default | Description `knn.model.index.number_of_shards`| Dynamic | `1` | The number of shards to use for the model system index, which is the OpenSearch index that stores the models used for approximate nearest neighbor (ANN) search. `knn.model.index.number_of_replicas`| Dynamic | `1` | The number of replica shards to use for the model system index. Generally, in a multi-node cluster, this value should be at least 1 in order to increase stability. `knn.model.cache.size.limit` | Dynamic | `10%` | The model cache limit cannot exceed 25% of the JVM heap. -`knn.faiss.avx2.disabled` | Static | `false` | A static setting that specifies whether to disable the SIMD-based `libopensearchknn_faiss_avx2.so` library and load the non-optimized `libopensearchknn_faiss.so` library for the Faiss engine on machines with x64 architecture. For more information, see [SIMD optimization for the Faiss engine]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/#simd-optimization-for-the-faiss-engine). -`knn.faiss.avx512.disabled` | Static | `false` | A static setting that specifies whether to disable the SIMD-based `libopensearchknn_faiss_avx512.so` library and load the `libopensearchknn_faiss_avx2.so` library or the non-optimized `libopensearchknn_faiss.so` library for the Faiss engine on machines with x64 architecture. For more information, see [SIMD optimization for the Faiss engine]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/#simd-optimization-for-the-faiss-engine). +`knn.faiss.avx2.disabled` | Static | `false` | A static setting that specifies whether to disable the SIMD-based `libopensearchknn_faiss_avx2.so` library and load the non-optimized `libopensearchknn_faiss.so` library for the Faiss engine on machines with x64 architecture. For more information, see [SIMD optimization for the Faiss engine]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/vector-field/#simd-optimization-for-the-faiss-engine). +`knn.faiss.avx512.disabled` | Static | `false` | A static setting that specifies whether to disable the SIMD-based `libopensearchknn_faiss_avx512.so` library and load the `libopensearchknn_faiss_avx2.so` library or the non-optimized `libopensearchknn_faiss.so` library for the Faiss engine on machines with x64 architecture. For more information, see [SIMD optimization for the Faiss engine]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/vector-field/#simd-optimization-for-the-faiss-engine). ## Index settings The following table lists all available index-level k-NN settings. All settings are static. For information about updating static index-level settings, see [Updating a static index setting]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/index-settings/#updating-a-static-index-setting). -Setting | Default | Description -:--- | :--- | :--- +At the moment, several parameters defined in the settings are in the deprecation process. Those parameters should be set in the mapping instead of the index settings. Parameters set in the mapping will override the parameters set in the index settings. Setting the parameters in the mapping allows an index to have multiple `knn_vector` fields with different parameters. + +Setting | Default | Updatable | Description +:--- | :--- | :--- | :--- +`index.knn` | `false` | No | Whether the index should build native library indexes for the `knn_vector` fields. If set to `false`, the `knn_vector` fields will be stored in doc values, but approximate k-NN search functionality will be disabled. +`index.knn.algo_param.ef_search` | `100` | Yes | `ef` (or `efSearch`) represents the size of the dynamic list for the nearest neighbors used during a search. Higher `ef` values lead to a more accurate but slower search. `ef` cannot be set to a value lower than the number of queried nearest neighbors, `k`. `ef` can take any value between `k` and the size of the dataset. +`index.knn.advanced.approximate_threshold` | `15000` | Yes | The number of vectors a segment must have before creating specialized data structures for approximate search. Set to `-1` to disable building vector data structures and `0` to always build them. +`index.knn.algo_param.ef_construction` | `100` | No | Deprecated in 1.0.0. Instead, use the [mapping parameters](https://opensearch.org/docs/latest/search-plugins/knn/knn-index/#method-definitions) to set this value. `index.knn.advanced.filtered_exact_search_threshold`| `null` | The filtered ID threshold value used to switch to exact search during filtered ANN search. If the number of filtered IDs in a segment is lower than this setting's value, then exact search will be performed on the filtered IDs. -`index.knn.algo_param.ef_search` | `100` | `ef` (or `efSearch`) represents the size of the dynamic list for the nearest neighbors used during a search. Higher `ef` values lead to a more accurate but slower search. `ef` cannot be set to a value lower than the number of queried nearest neighbors, `k`. `ef` can take any value between `k` and the size of the dataset. \ No newline at end of file +`index.knn.algo_param.m` | `16` | No | Deprecated in 1.0.0. Use the [mapping parameters](https://opensearch.org/docs/latest/search-plugins/knn/knn-index/#method-definitions) to set this value instead. +`index.knn.space_type` | `l2` | No | Deprecated in 1.0.0. Use the [mapping parameters](https://opensearch.org/docs/latest/search-plugins/knn/knn-index/#method-definitions) to set this value instead. + +An index created in OpenSearch version 2.11 or earlier will still use the old `ef_construction` and `ef_search` values (`512`). +{: .note} \ No newline at end of file diff --git a/_vector-search/specialized-operations/filter-search-knn.md b/_vector-search/specialized-operations/filter-search-knn.md index dcd98fbacd..5597ef74fd 100644 --- a/_vector-search/specialized-operations/filter-search-knn.md +++ b/_vector-search/specialized-operations/filter-search-knn.md @@ -11,7 +11,7 @@ redirect_from: # Vector search with filters -To refine k-NN results, you can filter a k-NN search using one of the following methods: +To refine vector search results, you can filter a vector search using one of the following methods: - [Efficient k-NN filtering](#efficient-k-nn-filtering): This approach applies filtering _during_ the k-NN search, as opposed to before or after the k-NN search, which ensures that `k` results are returned (if there are at least `k` results in total). This approach is supported by the following engines: - Lucene engine with a Hierarchical Navigable Small World (HNSW) algorithm (k-NN plugin versions 2.4 and later) diff --git a/_vector-search/specialized-operations/index.md b/_vector-search/specialized-operations/index.md index 974de5b637..44dc036e27 100644 --- a/_vector-search/specialized-operations/index.md +++ b/_vector-search/specialized-operations/index.md @@ -1,25 +1,25 @@ --- layout: default title: Specialized vector search -nav_order: 45 +nav_order: 50 has_children: true has_toc: false redirect_from: - /vector-search/specialized-operations/ +cards: + - heading: "Vector search with filters" + description: "Apply filtering at various stages of your vector search" + link: "/vector-search/specialized-operations/filter-search-knn/" + - heading: "Nested field vector search" + description: "Use vector search to search nested fields" + link: "/vector-search/specialized-operations/nested-search-knn/" + - heading: "Radial search" + description: "Search all points within a vector space that reside within a specified maximum distance or minimum score threshold from a query point." + link: "/vector-search/specialized-operations/radial-search-knn/" --- # Specialized vector search OpenSearch supports the following specialized vector search applications. -## Vector search with filters - -For information about vector search with filtering, see [Vector search with filters]({{site.url}}{{site.baseurl}}/search-plugins/knn/filter-search-knn/). - -## Nested field vector search - -For information about vector search with nested fields, see [Vector search with filters]({{site.url}}{{site.baseurl}}/search-plugins/knn/nested-search-knn/). - -## Radial search - -With radial search, you can search all points within a vector space that reside within a specified maximum distance or minimum score threshold from a query point. For information about vector search with nested fields, see [Radial search]({{site.url}}{{site.baseurl}}/search-plugins/knn/radial-search-knn/). +{% include cards.html cards=page.cards %} \ No newline at end of file diff --git a/_vector-search/specialized-operations/radial-search-knn.md b/_vector-search/specialized-operations/radial-search-knn.md index 27d7cb2aa2..31505b76f0 100644 --- a/_vector-search/specialized-operations/radial-search-knn.md +++ b/_vector-search/specialized-operations/radial-search-knn.md @@ -6,29 +6,31 @@ parent: Specialized vector search has_children: false has_math: true redirect_from: - - /search-plugins/radial-search-knn/ + - /search-plugins/knn/radial-search-knn/ --- # Radial search -Radial search enhances the k-NN plugin's capabilities beyond approximate top-`k` searches. With radial search, you can search all points within a vector space that reside within a specified maximum distance or minimum score threshold from a query point. This provides increased flexibility and utility in search operations. +Radial search enhances the vector search capabilities beyond approximate top-k searches. With radial search, you can search all points within a vector space that reside within a specified maximum distance or minimum score threshold from a query point. This provides increased flexibility and utility in search operations. -## Parameter type +## Parameters -`max_distance` allows users to specify a physical distance within the vector space, identifying all points that are within this distance from the query point. This approach is particularly useful for applications requiring spatial proximity or absolute distance measurements. +Radial search supports the following parameters: -`min_score` enables the specification of a similarity score, facilitating the retrieval of points that meet or exceed this score in relation to the query point. This method is ideal in scenarios where relative similarity, based on a specific metric, is more critical than physical proximity. +- `max_distance`: Specifies a physical distance within the vector space, identifying all points that are within this distance from the query point. This approach is particularly useful for applications requiring spatial proximity or absolute distance measurements. -Only one query variable, either `k`, `max_distance`, or `min_score`, is required to be specified during radial search. For more information about the vector spaces, see [Spaces](#spaces). +`min_score`: Specifies a similarity score, facilitating the retrieval of points that meet or exceed this score in relation to the query point. This method is ideal in scenarios where relative similarity, based on a specific metric, is more critical than physical proximity. + +Only one query variable, either `k`, `max_distance`, or `min_score`, is required to be specified during radial search. ## Supported cases -You can perform radial search with either the Lucene or Faiss engines. The following table summarizes radial search use cases by engine. +You can perform radial search with either Lucene or Faiss engine. The following table summarizes radial search use cases by engine. | Engine supported | Filter supported | Nested field supported | Search type | | :--- | :--- | :--- | :--- | -| Lucene | true | false | approximate | -| Faiss | true | true | approximate | +| Lucene | Yes | No | Approximate | +| Faiss | Yes | Yes | Approximate | ## Spaces diff --git a/_vector-search/vector-search-techniques/approximate-knn.md b/_vector-search/vector-search-techniques/approximate-knn.md index 66e6e52de8..cae24f20b7 100644 --- a/_vector-search/vector-search-techniques/approximate-knn.md +++ b/_vector-search/vector-search-techniques/approximate-knn.md @@ -13,20 +13,20 @@ redirect_from: Standard k-NN search methods compute similarity using a brute-force approach that measures the nearest distance between a query and a number of points, which produces exact results. This works well in many applications. However, in the case of extremely large datasets with high dimensionality, this creates a scaling problem that reduces the efficiency of the search. Approximate k-NN search methods can overcome this by employing tools that restructure indexes more efficiently and reduce the dimensionality of searchable vectors. Using this approach requires a sacrifice in accuracy but increases search processing speeds appreciably. -The Approximate k-NN search methods leveraged by OpenSearch use approximate nearest neighbor (ANN) algorithms from the [nmslib](https://github.com/nmslib/nmslib), [faiss](https://github.com/facebookresearch/faiss), and [Lucene](https://lucene.apache.org/) libraries to power k-NN search. These search methods employ ANN to improve search latency for large datasets. Of the three search methods the k-NN plugin provides, this method offers the best search scalability for large datasets. This approach is the preferred method when a dataset reaches hundreds of thousands of vectors. +The Approximate k-NN search methods leveraged by OpenSearch use approximate nearest neighbor (ANN) algorithms from the [NMSLIB](https://github.com/nmslib/nmslib), [Faiss](https://github.com/facebookresearch/faiss), and [Lucene](https://lucene.apache.org/) libraries to power k-NN search. These search methods employ ANN to improve search latency for large datasets. Of the three search methods the k-NN plugin provides, this method offers the best search scalability for large datasets. This approach is the preferred method when a dataset reaches hundreds of thousands of vectors. -For details on the algorithms the plugin currently supports, see [k-NN Index documentation]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index#method-definitions). +For details on the algorithms the plugin currently supports, see [k-NN Index documentation]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/). {: .note} -The k-NN plugin builds a native library index of the vectors for each knn-vector field/Lucene segment pair during indexing, which can be used to efficiently find the k-nearest neighbors to a query vector during search. To learn more about Lucene segments, see the [Apache Lucene documentation](https://lucene.apache.org/core/8_9_0/core/org/apache/lucene/codecs/lucene87/package-summary.html#package.description). These native library indexes are loaded into native memory during search and managed by a cache. To learn more about preloading native library indexes into memory, refer to the [warmup API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#warmup-operation). Additionally, you can see which native library indexes are already loaded in memory. To learn more about this, see the [stats API section]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#stats). +The k-NN plugin builds a native library index of the vectors for each `knn-vector` field/Lucene segment pair during indexing, which can be used to efficiently find the k-nearest neighbors to a query vector during search. To learn more about Lucene segments, see the [Apache Lucene documentation](https://lucene.apache.org/core/8_9_0/core/org/apache/lucene/codecs/lucene87/package-summary.html#package.description). These native library indexes are loaded into native memory during search and managed by a cache. To learn more about preloading native library indexes into memory, refer to the [warmup API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#warmup-operation). Additionally, you can see which native library indexes are already loaded in memory. To learn more about this, see the [stats API section]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#stats). Because the native library indexes are constructed during indexing, it is not possible to apply a filter on an index and then use this search method. All filters are applied on the results produced by the approximate nearest neighbor search. ## Recommendations for engines and cluster node sizing -Each of the three engines used for approximate k-NN search has its own attributes that make one more sensible to use than the others in a given situation. You can follow the general information below to help determine which engine will best meet your requirements. +Each of the three engines used for approximate k-NN search has its own attributes that make one more sensible to use than the others in a given situation. Use the following information to help determine which engine will best meet your requirements. -In general, nmslib outperforms both faiss and Lucene on search. However, to optimize for indexing throughput, faiss is a good option. For relatively smaller datasets (up to a few million vectors), the Lucene engine demonstrates better latencies and recall. At the same time, the size of the index is smallest compared to the other engines, which allows it to use smaller AWS instances for data nodes. +In general, NMSLIB outperforms both Faiss and Lucene on search. However, to optimize for indexing throughput, Faiss is a good option. For relatively smaller datasets (up to a few million vectors), the Lucene engine demonstrates better latencies and recall. At the same time, the size of the index is smallest compared to the other engines, which allows it to use smaller AWS instances for data nodes. When considering cluster node sizing, a general approach is to first establish an even distribution of the index across the cluster. However, there are other considerations. To help make these choices, you can refer to the OpenSearch managed service guidance in the section [Sizing domains](https://docs.aws.amazon.com/opensearch-service/latest/developerguide/sizing-domains.html). @@ -396,7 +396,7 @@ To learn more about using binary vectors with k-NN search, see [Binary k-NN vect A _space_ corresponds to the function used to measure the distance between two points in order to determine the k-nearest neighbors. From the k-NN perspective, a lower score equates to a closer and better result. This is the opposite of how OpenSearch scores results, where a higher score equates to a better result. The k-NN plugin supports the following spaces. -Not every method supports each of these spaces. Be sure to check out [the method documentation]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index#method-definitions) to make sure the space you are interested in is supported. +Not every method supports each of these spaces. Be sure to check out [the method documentation]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/method/) to make sure the space you are interested in is supported. {: note.} | Space type | Distance function ($$d$$ ) | OpenSearch score | diff --git a/_vector-search/vector-search-techniques/index.md b/_vector-search/vector-search-techniques/index.md index e9f486b60f..377bdc911c 100644 --- a/_vector-search/vector-search-techniques/index.md +++ b/_vector-search/vector-search-techniques/index.md @@ -1,7 +1,7 @@ --- layout: default title: Vector search techniques -nav_order: 40 +nav_order: 15 has_children: true has_toc: false redirect_from: @@ -12,11 +12,11 @@ redirect_from: # Vector search techniques -Short for *k-nearest neighbors*, the k-NN plugin enables users to search for the k-nearest neighbors to a query point across an index of vectors. To determine the neighbors, you can specify the space (the distance function) you want to use to measure the distance between points. +OpenSearch implements vector search as *k-nearest neighbors*, or *k-NN*, search. k-NN search finds the k neighbors closest to a query point across an index of vectors. To determine the neighbors, you can specify the space (the distance function) you want to use to measure the distance between points. -Use cases include recommendations (for example, an "other songs you might like" feature in a music application), image recognition, and fraud detection. For more background information about vector search, see [Wikipedia](https://en.wikipedia.org/wiki/Nearest_neighbor_search). +Use cases include recommendations (for example, an "other songs you might like" feature in a music application), image recognition, and fraud detection. For more background information about vector search, see [Nearest neighbor search](https://en.wikipedia.org/wiki/Nearest_neighbor_search). -This plugin supports three different methods for obtaining the k-nearest neighbors from an index of vectors: +OpenSearch supports three different methods for obtaining the k-nearest neighbors from an index of vectors: - [Approximate search](#approximate-search) (approximate k-NN, or ANN): Returns approximate nearest neighbors to the query vector. Usually, approximate search algorithms sacrifice indexing speed and search accuracy in exchange for performance benefits such as lower latency, smaller memory footprints, and more scalable search. For most use cases, approximate search is the best option. @@ -27,9 +27,9 @@ This plugin supports three different methods for obtaining the k-nearest neighbo Overall, for larger data sets, you should generally choose the approximate nearest neighbor method because it scales significantly better. For smaller data sets, where you may want to apply a filter, you should choose the custom scoring approach. If you have a more complex use case where you need to use a distance function as part of their scoring method, you should use the Painless scripting approach. -### Approximate search +## Approximate search -OpenSearch supports several algorithms for approximate vector search, each with its own advantages. For complete documentation, see [Approximate search]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/). For more information about the search methods and engines, see [Method definitions]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/#method-definitions). For method recommendations, see [Choosing the right method]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/#choosing-the-right-method). +OpenSearch supports several algorithms for approximate vector search, each with its own advantages. For complete documentation, see [Approximate search]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/). For more information about the search methods and engines, see [Method definitions]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/method/). For method recommendations, see [Choosing the right method]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/method/#choosing-the-right-method). To use approximate vector search, specify one of the following search methods (algorithms) in the `method` parameter: @@ -49,7 +49,7 @@ Method | Engine HNSW | NMSLIB, Faiss, Lucene IVF | Faiss -### Engine recommendations +## Engine recommendations In general, select NMSLIB or Faiss for large-scale use cases. Lucene is a good option for smaller deployments and offers benefits like smart filtering, where the optimal filtering strategy—pre-filtering, post-filtering, or exact k-NN—is automatically applied depending on the situation. The following table summarizes the differences between each option. diff --git a/_vector-search/vector-search-techniques/knn-score-script.md b/_vector-search/vector-search-techniques/knn-score-script.md index c1f6cb50e5..ef17ec5416 100644 --- a/_vector-search/vector-search-techniques/knn-score-script.md +++ b/_vector-search/vector-search-techniques/knn-score-script.md @@ -11,15 +11,15 @@ redirect_from: # Exact vector search with scoring script -The k-NN plugin implements the OpenSearch score script plugin that you can use to find the exact k-nearest neighbors to a given query point. Using the k-NN score script, you can apply a filter on an index before executing the nearest neighbor search. This is useful for dynamic search cases where the index body may vary based on other conditions. +You can use exact vector search with scoring script to find the exact k-nearest neighbors to a given query point. Using the k-NN score script, you can apply a filter on an index before executing the nearest neighbor search. This is useful for dynamic search cases where the index body may vary based on other conditions. -Because the score script approach executes a brute force search, it doesn't scale as well as the [approximate approach]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn). In some cases, it might be better to think about refactoring your workflow or index structure to use the approximate approach instead of the score script approach. +Because the score script approach executes a brute force search, it doesn't scale as well as the [approximate approach]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/). In some cases, it might be better to think about refactoring your workflow or index structure to use the approximate approach instead of the score script approach. ## Getting started with the score script for vectors Similar to approximate nearest neighbor search, in order to use the score script on a body of vectors, you must first create an index with one or more `knn_vector` fields. -If you intend to just use the score script approach (and not the approximate approach) you can set `index.knn` to `false` and not set `index.knn.space_type`. You can choose the space type during search. See [spaces](#spaces) for the spaces the k-NN score script suppports. +If you intend to just use the score script approach (and not the approximate approach) you can set `index.knn` to `false` and not set `index.knn.space_type`. You can choose the space type during search. See [spaces](#spaces) for the spaces the k-NN score script supports. This example creates an index with two `knn_vector` fields: @@ -180,6 +180,7 @@ GET my-knn-index-2/_search {% include copy-curl.html %} ## Getting started with the score script for binary data + The k-NN score script also allows you to run k-NN search on your binary data with the Hamming distance space. In order to use Hamming distance, the field of interest must have either a `binary` or `long` field type. If you're using `binary` type, the data must be a base64-encoded string. diff --git a/_vector-search/vector-search-techniques/painless-functions.md b/_vector-search/vector-search-techniques/painless-functions.md index c582bedcf8..4865537681 100644 --- a/_vector-search/vector-search-techniques/painless-functions.md +++ b/_vector-search/vector-search-techniques/painless-functions.md @@ -12,11 +12,11 @@ redirect_from: # Painless scripting extensions -With the k-NN plugin's Painless Scripting extensions, you can use k-NN distance functions directly in your Painless scripts to perform operations on `knn_vector` fields. Painless has a strict list of allowed functions and classes per context to ensure its scripts are secure. The k-NN plugin adds Painless Scripting extensions to a few of the distance functions used in [k-NN score script]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-score-script), so you can use them to customize your k-NN workload. +With the Painless Scripting extensions, you can use k-NN distance functions directly in your Painless scripts to perform operations on `knn_vector` fields. Painless has a strict list of allowed functions and classes per context to ensure its scripts are secure. The k-NN plugin adds Painless Scripting extensions to a few of the distance functions used in [k-NN score script]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-score-script/), so you can use them to customize your k-NN workload. ## Get started with k-NN's Painless Scripting functions -To use k-NN's Painless Scripting functions, first create an index with `knn_vector` fields like in [k-NN score script]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-score-script#getting-started-with-the-score-script-for-vectors). Once the index is created and you ingest some data, you can use the painless extensions: +To use k-NN's Painless Scripting functions, first create an index with `knn_vector` fields like in [k-NN score script]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-score-script#getting-started-with-the-score-script-for-vectors). Once the index is created and you ingest some data, you can use the Painless extensions: ```json GET my-knn-index-2/_search @@ -49,7 +49,7 @@ GET my-knn-index-2/_search `field` needs to map to a `knn_vector` field, and `query_value` needs to be a floating point array with the same dimension as `field`. ## Function types -The following table describes the available painless functions the k-NN plugin provides: +The following table describes the available Painless functions the k-NN plugin provides: Function name | Function signature | Description :--- | :--- From 99ac17afa2d658df6b59dbb67ca3d51a5279e5ba Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Mon, 27 Jan 2025 13:19:49 -0500 Subject: [PATCH 06/32] Add images and more rewrites Signed-off-by: Fanit Kolchina --- _vector-search/getting-started/index.md | 30 ++++++++++++++++++-- _vector-search/index.md | 6 ++-- images/vector-search/auto-vector-ingest.png | Bin 0 -> 39123 bytes images/vector-search/auto-vector-search.png | Bin 0 -> 33116 bytes images/vector-search/raw-vector-ingest.png | Bin 0 -> 26514 bytes images/vector-search/raw-vector-search.png | Bin 0 -> 25427 bytes 6 files changed, 31 insertions(+), 5 deletions(-) create mode 100644 images/vector-search/auto-vector-ingest.png create mode 100644 images/vector-search/auto-vector-search.png create mode 100644 images/vector-search/raw-vector-ingest.png create mode 100644 images/vector-search/raw-vector-search.png diff --git a/_vector-search/getting-started/index.md b/_vector-search/getting-started/index.md index 113ccc820c..3645cceecb 100644 --- a/_vector-search/getting-started/index.md +++ b/_vector-search/getting-started/index.md @@ -46,7 +46,33 @@ auto_items: # Getting started with vector search -You can either upload pre-generated embeddings to OpenSearch or have OpenSearch automatically generate embeddings from your text. +Vector search, also known as similarity search or nearest neighbor search, is a powerful technique for finding items that are most similar to a given input. Unlike traditional search methods that rely on exact keyword matches, vector search uses _vector embeddings_—--numerical representations of data such as text, images, or audio. These embeddings are transformed into multi-dimensional vectors, capturing deeper patterns and similarities in meaning, context, or structure. + +Use cases include semantic search to understand user intent, recommendations (for example, an "other songs you might like" feature in a music application), image recognition, and fraud detection. For more background information about vector search, see [Nearest neighbor search](https://en.wikipedia.org/wiki/Nearest_neighbor_search). + +## Vector search options + +OpenSearch offers two options for implementing vector search: + +- [Pre-generated embeddings or raw vectors](#option-1-pre-generated-embeddings): You already have pre-computed embeddings or raw vectors from external tools or services. + - **Ingestion**: Ingest pre-generated embeddings directly into OpenSearch. + + ![Pre-generated embeddings ingestion]({{site.url}}{{site.baseurl}}/images/vector-search/raw-vector-ingest.png) + - **Search**: Perform vector search to find the vectors that are closest to a query vector. + + ![Pre-generated embeddings search]({{site.url}}{{site.baseurl}}/images/vector-search/raw-vector-search.png) + +- [Auto-generated embeddings](#option-2-auto-generated-embeddings): OpenSearch automatically generates vector embeddings for you using a machine learning (ML) model. + - **Ingestion**: You ingest plain text data, and OpenSearch uses an ML model to generate embeddings dynamically. + + ![Auto-generated embeddings ingestion]({{site.url}}{{site.baseurl}}/images/vector-search/auto-vector-ingest.png) + - **Search**: At query time, OpenSearch uses the same ML model to convert your input text to embeddings, and these embeddings are used for vector search. + + ![Auto-generated embeddings search]({{site.url}}{{site.baseurl}}/images/vector-search/auto-vector-search.png) + +--- + +## Quickstart {% include cards.html cards=page.quickstart_cards %} @@ -68,6 +94,6 @@ Work with text that is automatically converted to embeddings within OpenSearch: ## Tutorial -For a more in-depth look into text-to-embedding search, follow a comprehensive tutorial. +For a more in-depth look into vector search using auto-generated embeddings, follow a comprehensive tutorial. {% include cards.html cards=page.tutorial_cards documentation_link=false %} \ No newline at end of file diff --git a/_vector-search/index.md b/_vector-search/index.md index fd35839bec..4e2a9ef676 100644 --- a/_vector-search/index.md +++ b/_vector-search/index.md @@ -39,9 +39,9 @@ ml_steps: # Vector search -Traditional lexical search, based on term frequency models like BM25, is widely used and effective for many search applications. However, lexical search techniques require significant investment in time and expertise to tune them to account for the meaning or relevance of the terms searched. To embed semantic understanding into your search application, you can use machine learning embedding models that can encode the meaning and context of documents, images, and audio into vectors for similarity search. These embedded meanings can be searched using the k-nearest neighbors (k-NN) functionality provided by OpenSearch.  +Traditional lexical search, based on term frequency models like BM25, is effective for many search applications. However, these techniques often require substantial time and expertise to fine-tune for capturing the meaning or relevance of search terms. To add semantic understanding to your search application, you can use machine learning embedding models. These models encode the meaning and context of text, images, and audio into vectors, enabling similarity search. OpenSearch supports this functionality through its k-nearest neighbors (k-NN) search capabilities.  -Using OpenSearch as a vector database brings together the power of traditional search, analytics, and vector search in one complete package. OpenSearch’s vector database capabilities can accelerate artificial intelligence (AI) application development by reducing the effort for builders to operationalize, manage, and integrate AI-generated assets. Bring your models, vectors, and metadata into OpenSearch to power vector, lexical, and hybrid search and analytics, with performance and scalability built in. +OpenSearch combines traditional search, analytics, and vector search into a single, unified solution. Its vector database capabilities simplify the development of artificial intelligence (AI) applications by reducing the effort required to manage and integrate AI-generated assets. You can bring your models, vectors, and metadata into OpenSearch to enable vector, lexical, and hybrid search and analytics, all with built-in performance and scalability. ## Using OpenSearch as a vector database @@ -63,6 +63,6 @@ If you’ve already generated your own vector embeddings, OpenSearch makes it ea ## Seamless text-to-embedding search -Simplify your search process by letting OpenSearch handle embedding generation. Follow this documentation sequence to begin using text-to-embedding search: +Simplify your search process by letting OpenSearch handle embedding generation. Follow this documentation sequence to get started: {% include list.html list_items=page.ml_steps%} \ No newline at end of file diff --git a/images/vector-search/auto-vector-ingest.png b/images/vector-search/auto-vector-ingest.png new file mode 100644 index 0000000000000000000000000000000000000000..59344fb5a3bb98bde131a786e843eccf8572659f GIT binary patch literal 39123 zcmeFYWmH|w(k==Ffn!J>-_f5)qrD^qWu+}A4d;V>;zR5auyq^dA>7=t)7sLIm9 z0vltKJuoT`6RA7lDe|H7fb>n4MYm|sgklKrM5`2hzx6W${6yv5lMNoU~?hyiuXPD-=j$CZ)?s&DZRLF0@Rt1KqL(-3*Tu=Z-&!Hl^5#r#+@ zf8WRTU9hO@vy4dJBV_5@Yld2_gI7~H1}|aJ2+-)t&I7FP;%(Q#gjv(l@W)!M^X04gWV@Pma8lcpKLg4`1&e4 z3Oc1Pf|=LIBMiK&29W(*ZBK3-E~$J}kbeJU>AMDe!s1&O3!J|nn;nScZ`?F25TlD4 zW^%;MG;&R{JqmK;iL?Rf?MV&c=sw<-`oelVkc4xWC(SnKH20X;p+gNlO?Zc_bs}5i z{$C6on7(toAj;=sPdE2qf};?*g#jc;rr(|?5(F>>!Ig8tXME8oKK)TjTl{3YfzakB z00pK`h)w$mmvmp5@H0&pb`{c&A6OOM3E1v;g5uB48(%oVG`l425VXPO1rZM*?|o4t zefPh^wS7YQ0=`KIBN{M6XgvVJ9Po?i^A3!gh@U(ZDtw<%SnQX0LCaiOc~E8WB$4Ml z_XE=da3_Qouv=l*Tp}}6cX%&<1x74Ye>Qy)0RM<@njHx4XNDk@O*A|9Ojz3>=}qbf z!WFQbfXc4dDpYTTm;#XSKt-X$+z|!1L<(vsv;Zl=Fok3J$6{nblZ`qk=azmVoYk3*vVkQ=HQ!d81!jV{zK=$+Y>wE` zHxc6Er1%Z@ByA#Wjcq*Hku)I!kaclvqwc%np)yv0NPtqHLQttn5lT@) zg;Lo}zA67g8R@54@j>~3%81G@Ro+TY8Ij2wvusubYn1X}mpJzPKow?111d5aG)g?h zBUNlARpkbiX!$Y43VHrQS{0!Z%+e}laO!2{=wi5H&-4PW5Uvmo)8Wh_E-%NmvN08B zwcV1vGWN2&GH=C#GPknA;!}A`6_4C#RSp%sUmc2Gl}}=~7>-GM+~d0=qd6W%XFBle zJ<24ahV@_Tzoz_YDoQS@ERv{|xo+Eor9-20EjKDRD4*6WSYowSv+i)IzQZ~-bu#8o zpwCU2VwmW6Vs(0PBISN|ZgD1eigN04c67>cCOAO4#dL>u?{mjAsN z<2~|%cg?f!Q(sq$sy?T_R;yTd)i_jb(dce%Z82J1QPp8AQhiyiVLNSdJv29xY}z`1 z*MK)_lecQpiqOh%hj3@`O#Z9@=@r5mA{8POj7*ZMFwk@oYJX#=eGZtqEJ$cq3Z6## zDUdmurId-2nM2R0`CCiA_WOd^e9&ply4aDFYn0>bq{>mz_8*rr2albK{V`{OjkiGa zZW$ANqmG-Zn=c-?_ki=mo#wggUl>Ef^EB&a%UY8>E3GTfc`+4o`Emv0sg+sh7VEX% zn4KVhh3xv;MdR)Xg)qpy>+2&!LRVEbZxS(Mp1`M z)ya&*k;C0pfqiPzKvQtj_z9s4ng?$-njV(GpUa(-J>LBm$B5hwK-Jw=$#79k0Q2C4WT=r4DFfh|!XVnWh;OVKM#i3*mHCO;ydx z0hvUK_zFACYRkS@Qaxx7 zP_;b;%pw*FcrK?`)q7;THncBSF9$6{x-^{@(EpH8o+Rh>`Ayb#!) z->R9-Qeuigep2LjQ$}$j#EUEy~ z5b+N8ihE|xXsxbOqp*2{k&~9|GB1=*jQy)-BPiv zj;^KrKJYy(+GYQ4yHou|(L-_HsE=w_Y55z9+R@|I9uFrbg;qRI5 zuL%YODBX%LtB%wX^fC(Xrj$f3086T-~`_3UK__rnq#P9q_fQgN_;F{Z+42u z&BklV#=mAqWXrV;JnL-U52gHJhce z?Cdp6)1TySj}uH518Bzwh~|R*yY!Q8?x%m>gK~eoD5NAJ zDGB^mGITIDwsADGby|nRcLFvwXRfU7q%JGNZD?yvuWw{)U`!9Nw)?08!VBOA-dY!yW-|dJRI~qEe+c}xr+7N!Ut8ZZI?8HY*{L#_>`unGx#sKsG^kn1suVDcP$na6Z zz(mi;@W0vyR^|P8$}MjWFt*YVHMa(;2iONc3ll5vKi>ZzCI9L1FE!QwQ|7*_pVFsXZ ztwBHpK_o?mlmVb8S&#r_F^u3&C$PW?kMnjC9l>y4UndAuDj12RXgo9_B%zI;l#1Y> zV&H|8xu~E>V&OMy9=1e$kK<`ZDY6nIi||zIZrP)2?XD(gBdr&`cnfiiD{P%?` zS)5>lmZfjA@J#M8YJEL4u-0W z@OflU^1lZ!@jD4h@v|Rj0()VkpB?IdRg3L~lEKp!=;F)2kYeoqG5!1Zm~+B6(*>0O z(y0U}coFTAL5c$7%mh8=BGlitC@SaMP2nX%AQ?3jmYmxFt?H$LK>ttfz9HoDweY3Lv z8JC43rsv|VvbW4z&{rUtJ{wkV9rChtT9j=f&2 zRWL?JMnp;hI`Pl&Ms^5{{3SwQ-~CJ%VGK$>ItoQ!b9pdW(Lv7soq|48Fh; zAmnDDEK6%+FUoRn;fak#zE0HKXf*kgE>_#6=QBOvJ)uP6JgEz})8&?V?O+yKv+@~M z*4}O%|M<~6rDchLuPB>Bx_B#QeUxQiN&mV6P^Y{gn{|;#HUDyW_24qbYKhZuYS~Ha z)qoxwm3m~!g{E+tKKFZe7(h>)3(UrgCXD8Q$*@k3G-MHxu5Log-Zr0!*{>#6rX;GbF{E0S%Q& z!#=HVT`OsN-(M1&F(2Q4rm|W4Q!RUJP%61>RNY4nMA~E`;;-~CsSH;cZQ?qeF0RjP z68zO9^ds9jTU}f6v7t0MZ+~Vs8Nww4GsVDI>2#fUKUs~TC@@GmDpoC^SAD3vDOM@hg?tU^uxvm)S*07~iB3Ot6?g+H?CoiAxpj!d zTv$CR)2T!vAJq=M-XFt^tFiC5-x*Z0x8q-0VslXPb#y`3Lyj z*`of@aSp9UZ3LkfgsG{i=iMabji(pX5r>6MbeZ_kRNu9UR=bg;?m(P=M$=(Dh+M>2YM?bL4 ztxMpq8yJC1FeXLhvgquOUhIgI$!arVjXlM71uS}f|%{H{tect)@ORQY=_%QgZ z(_wT%e@w2#rQ)1nmhPdoXu%n!JfjtDOV--(@Qrw*HC@qD!!GS|SC>$-EUWq2XKQX3 zvH80wg?c0QZ7*pCqNIi*UHd6)C)-X31nSoAEq zYHm55x(M1Kqas(@ZkF%EvPN^~gmI0tr&f|NbqDVltKo9@5iCRN!Rk zG9|6jY4Zh(n8I-%xUsMP41$>!V z`R@yR!+HyW;NF*q&vWvBw#5+*S^*0~iIuc5RYu4H?-z8+geCjsr?s~&oP-j6hh?8^`HfO& z1je$*ARa_5x7nBMtibNW&04Gpj$xcV8-B}U{HBI}Jke;Q9iiwp{r#r+4fE}c6E)#i zpz{&O*L(he$wKE(2kF|d`(upPb=vutl!q%cY8)IR4<^mfbicG|YoT#YCfXXHKh2P0 zrZJoLVd&9!|0IZJwT!rRFF_0LVEdCPmta!tJ9*Lml}&2kF(cKbLyvJ7O2B&EpnS=h zJp+4a{<3-LfQITA79HWbS7S&$+xl4JPE1V?q9}RZH0#sdW4~>Iu)b}=Lyw}`v)jpq z(Dq`PA}_44ut@Hy`5y_i*6~@InBTs>X8niY6w*qb~@)BCQ#Qv5!uEh7> zKV&f%nkzny$5s{1MmX=yKuem%5v*YmkgrTsamj{N=m*}Pt;UfeEyj{ZnFb~tzLXu$ zSEO_4?qujhf1pent?1;+NVx5|vvhF3w-t0)h6$XYhvl>&E-I|3BpG`gTX;{AI5HDVrT5(7lHI zWODsiqy3Nj&A#!!yVm~~TKs>27XMe>|Gy%+|0C-5EelvSy`~ENJp)ZplsjohsepvZ zc+y_%;z$4;D+ojM`}`91>MQvcBS1$&bb(hT9EjDDw$=@v;pG_?3dCa?^a8A?B#|cW z?^X^CG2c5OBun*9;GkoDU!g8MR=jDIM5Np3e+0Xx8Oq3jeakZla}&-UW&-~#vlQr; zbTsU_|aHFDU#n2R#G0~T=*ZpkOU^EE(Y?Sn0m>7 zO^lxz^aU{v9X9@l-SJbF^qmh_X{?XV^XtXm|u)pe|az z2-y4*1G-cHU<-Psd@&Z)k*fb;m3~kIhw`JeF(aTJ1!%!|{ES#gW2b;!WK6B=N5^~| z?f5akQN;8|6#g#}I8ipl*vE?1V?T-&A%GIbPcP%b1U`TYxqdnFLCfpdAF1tNepJ^?`0_XOAMjin28M#H-y-EKRXBekc3*C%&b);TmS~W4b~TAZ<*mkrU528 zNK-_&Q_v=3(P9sT-+VloE485bo8$D^E_hNFW|7@AY;D7Gc9Fv=4C2cT*5!N`S69O2 zn)Qq}E1b%u%7ZNeTviXy&xV`bL4eCXj5v1dCCXOM+andH<2iWg6#A$JnMAaN#c#Ss z35z&?&f*2ZfJ>1V^g>ZVMHd72#U$i2(aoDe;1g~@!nrv~m9Vl|n7V^(0+x+=+vZsu zDE?vl#-4%%KG5kL786S@gQD_6S3rvh3gv`_M?hsl5B%b$T8)0-BZu$0aqfW$+W0P% zyKW8sxC(<}Pu(jT8|022sMKO$C#8pLm){nOz%nQ*9{hYNOvuVQV(qmXzT$Y777kd zCX$|S((!`$S@iOHhcwm<{DCjqtT^E{44_lBt%V7okuiG21rxP`SQUUqtbCEkAi|h3 z8K)O6OAV#5ZEE>hyDix_Xw0i?V^?j?Ci|4hUWsq>8R7v`2>rkn67ekOPr$HG*ZZ1A zQxOB)51Qx+C$cEa0Ii~sB*jPceL&$AqM!l1K&z+ZL4it{s(Q5ngvCs;UlgZlH(s;v%Iy`Z?R$%GLQz7FIeT{PX&Fc)5mWDCpX8I|WzYHy0|&?+Nh z)g<}85W zOeT$fvev%5KrWYU#j8NQmhExI_40IFd9c>r zYTl|`qG+<**9XxP4Br!t$78oYh9{H7jV7H;+htkV9-N}<4sF} z?%t~dSv(va?{6L_%Z-S;!znR#w1u4;cw>og2#%hlU)VssW5h>;Jb*@G>x~M^%MN?2 zv5|OZ&Lzz@2?47XP5kL)bTjUo@I!na7gsA`xhwzmKstvN$Q%E>-TR%!O2-SG|JdVi zG9~yxqhFtX)y;*?{qY=*{K9Wp+z$r@F)|Uy^OYRe zCCa?;n2f&z?`1RC;Xy%}g;>$cbXwW@A2vRbq~0J{0~gF_8?6_=+tm-~Pnk@H!O%D} z4LEETMnlIhbP=POT+>k9k_qH#IzQ!dzRs4ajJn(&&2FC2Cwwzk1#9au7wdvYmSFqX zFBWS@0QkiDAj~QVZODoW!GkyjJMLGmHf>uHpF>qj#cGtAosxJfF*PR=Riu*bsY|q2 zd-vuwrtP^)8c50*Wdl^*lAWk;7Cp+K%7s#BnOu&4My{uR;GiqT#En5uGh@S;F`-HD zx!+W^)$v<~=9Z(;Y0f+`8~%w@MNAZgnpW6)=Ht*shOK*6l0U_ zoM91eU1*Tf0S%sMw153*lk>|VSPUk9o@tTL;}55T3#r3%J7nCP%F4=1lah-m#!kom zF`>TL^lw{ht*({OR5M(v?jr`+V8x5|Ggf?hrsLW5s7&J(6-XpzD(YelBWWy#I|Ffl zj*GH6Zd*$3M-8xDBs@I9+6MUfS>_1+b|ie)w+AnEJea-qoQ0YopMM{2c?RO*hshjj z%h&9I>`=`?9WtW>m~EwyxyqsiT&w>Ci~n(|2#%TVvxTc9of2zx4d@;|lUV$&3_jEI z15q|`vdP}?hg=SRW!n{u#oRCRa7!@QTkUd<+COb`;25JB9R0!YI5Jr{95%UfIs7JP zD%relBQKS}g6*f<iC z1VgSajVxDX8o2YB+T1UVJb&r=i7C_@>b0ZU1UT-gWb(DpL0Ye>#L4;eM@HMVwuvgy z(bQr|F8KQv>YLW*Z;$T03dBn2)?G7QwF7`o0v5pQ*?~oCwT2X`PIVMlN59f4-q?eGc-|UM&(rr_UmK>xs`otD{qc?Y zBx!=OP3w&L0w9YWqtf4S@j>a&<#2C`MB+XeG1N48ZM&>|_z*UkiXJ72o;r+!MA7XO zn-24&0eYt472w_rDs|`;3t|ANIg!nDs(A)5Q>er6<1Wr5MPp2f0GCC01a43A2tF|LLHdomRWY;hE&?&BY1{?SUaBdp;XmerkQJN&Wg=F6he!^0{Or|TZ+=e8xLZbn<}PK%|q z`&lE_5}hSp_P2(mL@w7SQWM$d5v%buCZwbKqvi5)bFFr@`}3fnYTZ_MEZdtSqdQlh zJSv2)9zoNAX@X6R3MBCSPi7d`o0CfXm76+~E5tzmHvdP`874;NhnjOwuE z%aN;)Xcwo^sM6pswIriFitT9QL8aM?AUoBy!CA_-_+y@aZ=eE5V&my+vI6%gxVST#bcB>$nq2&z&5w9E?JVz{A1){@+9d zJoedLUTa4a1stCOk#y^i*}C_qF;ck&yg9}JXE(Ui4^BRdS1J=N_9{yAccLlunF-!_ zVxfTNyVFNC@Kx}(+s*xAg;xGG{H0~6n$$nW%jk{3pk6$^AF90$6tw6;iR3_XBXiA8 zMXHnjPv2~+^|rix+%1V_jyHoez6Wt!~>Pl#Ug zGyGMqjYyO!3|Sa*v*xr~&y7Yf&f?M9ZjlR9!ez-mA(_orTUksAGGJ-H7HhT_myC0> zDaswMpV-o94)sOnl-Q2bP7TP6JB(XyD3vM{6WcxSw+D;W3%N^Yiz8+Qykb^)8U++9 zYy!v+rrIkP1$IVba@Sg@r(Dq#aa?i5vppdcYR_2ITLAQLOEsVxKlCOSL$-h7OsbSv zq~0F`InRYQee1gIp=3T&Cs}&|AUZ)Ni9M-=-8bo`G2}NIL%Y2(kP%BsFaOR> zL*z?60_5NWOzM;-3tf));BW9*k@2&$%@jJpuxBa#L?G{ePqUPB;ZNuI2Sud!q$d{1 z$8BvLK3&IciAe`$i6a&dJb5e=aC#gyQO)OSDokUXCImBRc9Rj9lX{9*thkM;w%q9M zas3tmVTz(@acRl{mmoApmZ+EyinX-d#I-|ej+l{7|E^?Knb;`p8|bR5m2mP^Vl(Mv zjoBp?JYK+W$30FKX2;n)XrM@%Iul(11sB%}VG$+`4w+lM!a(K4^{B$_gxCGk2chm? zk{bB1+O=}--B$#pugm8JJs;)>GdW!-9j;^A?wWnZPo~MRT*42AHnI-mnD5Wms?T^_ z0G-e}b&1Oj=l=BqM}-~@Vp^7q#q9amjxyWfrSNAYK!m#l9>41>g#I|HvrsMlNP6me zI(SR3w)qpu-MBB|Sde#oM7{^$Z^(u$5eEJz3Ma*^hg!9c(Ob7VUO&WzP`Aj_xENEj zRFX3LJk&gazoDTtFKl}E_U+vT8}x%OSLby7+;GMf!NBi^K_-l*H*6XRh_;!e_1!t7u zc)z^im>uY+UFqj>e$ebK9qb@JZI-mLWqzP<_@XkJ zjWi+x-uA7|$giw)j;^>+l&-ty4CkL}8pB1Rekklr(pXF#V0Z`tH`M_6*BJb)0aI{p z7c)dY1`PFR{Pu*<+*BE{c=F{`R!1TeDfS-Mkk076a_&^>3S#%88QaJ+ug?Y2$Dd7d z$VVq?sEdI@ebHHF^EaVVKAvcXluAh$9NxSxPX|)g&OZ38{Z{W*t4(u8-)1H{H_xL* zs*wu`kaQOvc7y{;_4VVhpEyrWs|OOfXgVGokk$@Xv2wgQr&zge%9)BfDz=R+nFbb3KPZ=-wwp%J!6bgL~%?J-oW>bNZT?=l}TA2iA~4p z{zcN@9iPoT61_C3hh8u0g5QYWOb!d(S8g&p# zkn?gETtKYGD9rwkR1dji!6zr(2hSCr2d~j_VC*${5OtwL$+hfYYOO?KO58~LJV~e< zepqbH`e-Sfei`hkfU+jFhR!w|8oh^Dv7V=cz1$5J>iqigLaFRK`MOHq|ramDq%6#A3pJGC9apeT2S?8GlSw5KL zt29B^)FJ>k`mJJH0{Yd(IpLn@!ledFYNWOyVU%%eev-v>EEPg52A2!D^YfO%@cKG| z}C)kuUy1-6nRj&U~oX@1da9w`9^4I9z4fs)2hHSEOQ*@_`G$&GPL<3 z3cP%IyxJ8>qE@N$dUjC~#UOhHB1rjsQRq6GRcr#E7bgD49TdlttKH!uT_Ec)@k0{j zWQ`8xrTcVRPJqq1uARy7Pimy^2X`Qz!Dbn3IbWd>ewFs0F-Q=meSx}>=GRR&@@Lgu zz45Ul(Y?)9CWEQ8y8?Q&eeq<{Ri@+kBUwBc+1c5cOvG07Jq(>M{^R`5BcT*%M(1tU z2zRH;rh&?3s^xaab9A)YO%tK@LrF9^wsY$~?@>UqWGlnAgUPu+h9Cxy$7N%bW1ZD> zrk<&-y*+2ZjQravOE&=irfcsU~%N57*wAWUbQ)Yvs zvdRu-=ReVS5hshahObYzoE~?y6}s&fdk*!s>-bu`5mJ15KthPkxlFAR0hpOrJNoZ{ zcq}UP$~~-T<)M$^wO<*h-Ce^XzvHdG`nX<5Wx=J#@#)-c@lr%NVvNTXp`M=A*%}_l z4z|zv3{~x^$L`QJd&0&Q|NhwKRWjWIzmu8&at&+MH=EDT>uu(4X&d{+OV7tq9Ck9^ zG$pS@xC}3MTK?k+xHwIYw}K8QF5$=5t44SHzK%Oc>P1g+q9*13YZzPS$j$=sy!0)yE4$H~WSH{{fOnX`gJ*{_U&!pEVMU zGvOGII<3g~S5<>WJ$-@bKXiP)idRbvjOyLDeoF;&*bM0op(X99oI!zssTl=2Eu@~O zEfcwfR&J+ko)jF)7<*&Z4Rrf!6w{Ej&nYUu%-U4ktsGgj_x_65ImqkcWoTeAj=D!+M03Gf zk+~v>EEoGbNeL%bXDwvM2vye}(0DXo83SYpHxmO%Pga_zN0z57xO1efyZj;Sk|1O_ zY*s=oW=pXu=j8-m1A%xyZJWB6l_?}AhhT=LsOY-;-wB7YiO>MI7X4L7_)Dw8m11Si z+C}>Gzi%z{d895BNJXCZ%|I$Cyy5b>vv$KP4!;#=>r5Ak)v{T}>IlEFpwgH$ zOz#dFhG#!Mnrc71y4J0C5@dz@TYI@GSK6%q9sQ3tw63)5Twj3PZzWSC=0 zW57>PiR=B2hd{tCrulXWxkSJl>KWfFeR?xh*hHj6lAY4LzqCi6#%(VhZAz{5IY`eF zsZ%;|tWY+iO1wx~86%I9h!?n8o|u2OJ9T;&ofa{pk#s^0&zPk-sQ5=@P*h~EC1QuX z_xAr<(b4A1`&ifWJ2M&{_p_26+FJ(caq_iz=s;FvWRqI2qb)5biOo`Vd(d5>qbV4X zz$SglP1ks5SSCg`m9Z-j%iYaQ==fl@DH4k%-{o|PQmcWMejvr!ruoxXpzryQ7|IWc z-+hbF@I$**E~75XEi6;P?!Oxdni0;OY>K%&;e~qx@}Ze$j&Fstz#a^(3{eknS>{eV z0^i96LT=0r-l&kZ2yTM+3Z+xFcx{&?hfJY6?MrnUSI4yT{4eRP4_9#>GdRS%E@XN& z>7!)CIbU{Dny&V9E?VtB`Nby=8|ZEk1pwB-&Q^rDFBKscXt5H+D&CVF&6;74=kgAu z88k}yudE~+s!4dU=09OAGa>O5KHYi!u+nzHeZJG0ULW1$2&j+FE+{p!tRU>vQC=uN zUqF(JppXeNvr1O5A=RRd08A*>?$WQ}?jyWWZXF{UF{m@J6-dlf)7Y+iq{LP_sTW;{ z^0|S@p-HYz6*>?fDMdjftU>5zR%?rtBi5byEVK<~V48x_)4S z@Rp7DOW8w<(}~&+?b9z#I4qev0P40bH!CZ6N=&7g@49u5&LaiK_D>Ro^Hi9cBH4^B zALRF68t`n3!4*Bm)lOFsMfkr4gr&qZOB4%dP3w4~#73y?-GGbte)Fq#^Qmb` zc=1v#ex!-WKENDYI*Z#mp9<-IH643ZC*Cp%hZ)?tx3#d6ZMO}E!KYcjG|x>J`PIQ? zwv>~@T_-RzJpK7fYpVgOp^d$hK>6UXjT!#6RvigiW9Ct<>+MZ~MuEv@ir}`Y&!Ss! zcx|DTLwGVD8Qsmi=X28lGuM)C@kPy#@pmm=-7(_N8Pe=~Sq0+T>A%b^>dk;M?FSR( z3cSH49)nYua%3)D!`tY=42R0{-jUfoU*Y$tanYRbP_`sW^e!{s87CTO9#hf-WUHzr z4RTc&U#avzqsOR)W06AsJ0&+pQm|cxP1;24Eqi#AzWZq6CV3!-4UasmQCckNK03C!XcQ2F(iC9Hne~f`nl?!!|s+RLV2h59Egfv;dRB-{_vpyqgJx(VL2X z*a#OLM)+12S_}b9)NL?^EP(C)ylmXj=+3fNyG+55~NuwJ^}4l7i>T z1nsz}yG&=KpvYAWe!Xw_YH}$r5jadH>v5bFY-rl)s3Isf2cC>t(~Ls!?)MSAu7omZ#j{z=`0_tVrYoZ9 z?Di&A68|ccG4s+^vR&dH#Qkwy9Ut*1o!TWlsQp6&d76I@!^r`QS>#)>erg72-tlczM#K@llP z<%3Ly`r2zTqG-AuTOGjlQ(-?#o$O3dryWNuCiD0;3cE|`Ot8>t8Y0zWD5Z}4D)%E+ z#NMB>0=1&?NOxwH%sLT;O|ow|axP)BZRE54l~DY_bmOYSi}P)l;=;QHx=rhA4l%Dy zd5y9Z$%alQestnp_8(JT>bjdes)_SZj{TYJN0T=-?sG7S6sD8Soy=l^!EF|})>&wH zX#eq37X26=)7ksxv=C5)bsCB4WV7b2>6Fz29bX(f*7LcB-@yA(Nz-DvQJla zBN~YTWMS8VsP2Uh7DDp`7&|tb$OUB{Af5eb1S%>Utl4e|5yNN#k zmscv?v9Xyy0^i;3yj})(%S26j^ke!DR~`BYI?bo@{Z`9blTfOG50n!oywSDhW9QSa zs(iW@kWZr!N|BRL3^hL<yS4TY1}fWzNoSKUx@jAv!nGI;BBOK^Z_x;n|A@wBgRwShhlgKR zA}Z34=p2AK1_EBT?yvI;E!1WGt_KsKN<}h4Od70qu|~9t3W;Jhk0;5qTXaQU4Q(BO zOJedNZ2a@~d;Fuha#&fgc%2$h*=$~{&eyvPB-?RGmwY{+*Vz%B=?1pKwlTx$H^hWU z5gw!6XCCH3=cAdD3GYsFOTRYK2&xV$jR;o9t%Jz|B+3%Y976g0v_C7m4K5(0jhb5(bHF#{LvGcs{bxppqT7$j@ z-=kPSQHfO52z*yA+&4V-CM_a0KK8ev$Sb9n$GI9 zIKailekCd=_PY-XIwq=}z?Ne!BABObI)6)3VSRYtOF`r&S;;X4PSoOPm4`Tjv{Zd6Ryd;MqBb>{s#N2y#B^-#YM7er4cbWld(fQZJpynx8}2v0hJJPKN^4mb1!A|f-NztrA%EE9{w_!R+OQn{6kW=${o zmXj7;Y2in5Gf~-$oX2$iDNG-xI!)d6p(L2j`8jM5p`dR?BYmvIX1*XCJv^{ z<=Ex(-1FEsXb*S70#wZuS#JIx77_gG(25kIF3_)`5Ri{wk>#mNiD62e)*ho6BM2ul zyL*A*4@di3RW)NXNtzCT~*pt_n3Kuh2XVVEfX6~>XD56*$i0lVVehe_h_r0D* zB85(CD?8CtIvM=NF+V3I(MYH(FZKBuTD$2HgyS4C_0*%okIFn08b5*_na`(Z5WK$j zx>xq4=Gq|9`sL-`?S#jXr@LAJQKx5Jyi$DOzKH?%cFh7jhMab~zf`3Rzfp@eM-YrU z+KE2Zy=KaNU*t(9Xd|f*li7&In3<;3RBB(t9P0brOZ{kV$J}eWo5HZUTlvr1dOYix zd>TJn8Ne;@XnlXZ&p{%JnRW)|c`!6Eie(uVVtsPXR#~huRw&nGvoGf!UV-BP9wuli zM?s3!x4;DW)Sx7lh2rniZhAmN9t1;gEiQrrOlF95zdg#c+w3kHO?|>$+y>$UtgmZr zZh87)PrC(@Tu#wyPDReFRtozu=I=lb)_iKna!#cDebse+v%GOFyn_4wU%w3WUHyMy z-DdmG_9e&QZFoH4ZAYTlql|}}FsyjP?lE3X-LN(v%Wdz!56-b5-FaPYgUY--$_IMI zJa;Rcmz$)`Wz0X{pL0x;;y3Ei%7Ia(3FZf3TbN~$cTS9WrH?XH6z$gQTIIPlJM1!p zo%V*I0Mm>bozD`p#W9S|rV1mVNtB68!sZTblFt1s}z%GVbczt210ZFtq(?|(M^+SNxgOJ+`XZ>VgvS+(d!W=qI3VO1Va zF)>#bN>jG(uf}O|SVp8P-R*j6PcgRFFy+-DH_)yrc(ys`U*Y$}eB-bs4jDmfqsLdH zc=R+XQJSf12jS( z$>{vxd+?>^ud@0%=%?%9--fGR&Z>Qmt>VDrDf~oHbi>6HM2a~zstv>foon(%e(pNi z%=2sHGSe5m@3bVVDi`Gssj4p~Pu^TAq`7Ay8$1OmRekU3w-FXg>4O2n_eGhSargaG z0%$igg>t&j!#V;Ca%wM+gSB&S-ql--^y_zh6go5-_hd!|*ZW$Nc`l%JB}mD+qRZhQ z#Aye~|3lVSMzz&--L{mr&_XFtN{bh5vEUSUcXxMpcPSKi0>LG?y99T43-0a&mz(GP zp7+YQe=-*_OEYhVO<@FxB)AUT{^=#ut+;d&&MtgkV=u1N|l8>#Z-H%SJ#DL)6_47@yr zFgu1-lE#vXk;iYSuruxhFrmOQ|_4Zz8SltB=XEbjvaN&*l z#@&J2NPqC?!k@-Nnf5*a(#rl4oKVHhwR7OpwM9zz@(DoBnsTp|FzO1TkC03=7x3v> znbKQ#ep{8_b>M=VscH>C3=YiR-$U&s$=@9~NPG7L!s_$=N@ly&sa1nljeL+r70<}k z+p36eWx)-x7KE`Z0l91OZ{9!8`^;Q<=nloYp8m1V^IG;9V@0~F0{28TKc6|ebns-b zGLFC_^{lmGp5*9Z`8!CZQ7Uyo+AQisZGOTECfc0I5E|&74Z;;O!awofy$00jKP=7YVZC zHM^qmv?!tRv}vJzWJ&XX?pN{1^OVL*Tll~&p)Cl!|8q9k%`@W7#cedBSK?mJ{pfQP zcnX)IS=>JEl+O~dW*<&MoEzG<-a@GS`}8Sut!WN?Ej~g?v~X0e9#?gC#+$f+6|iw7 z%lig;V_3N#Ch%na0H5bSOzJAW_nFI@8IcN|9fjkx3%uFE zhCZ5QoBORzz#;X(CY+HK?jONb{AdOylAk7Ocd@^P z%lL5q1-L8s=xnBJ^m6%FxOhJREG0E~%=`;mibLi-e&?6*);Tt?x3!>yv5i*JdTNGy zQd(WNlRJMiw2(Ouq*m58(8078pzbp5^Zkdzec_SS2uJ+30F&UJJ$AE{#&1grANl=MfGNmJ>bA!x68$Sq zg^lC;h&EON6mD(RwowbS?soKA5RI0s`|lD3TG5{;u^6yzvI1+o0S>p;fhN6I3FFgP zvBN?)fW&Qty4GiQ|Lx$L2riEb$<8lMF8~G?+rr@=k7~2#m(9+od{r!qXp`kIe2xtb zwZ9J(jn%Fy7~S2*H)IOC=8qf$JsyIXA0W->Mg?%}B;sqdxvPpv;K*35oq&GKB$sQhkruU(kEHq%2K9kRz- zUh#gPY(6Wzo;M;_CZ!>nbR*?|mYOg=MdS3fZ25)@jcAaDWf+= zAs=(okyC^TQ8P7dJX7yxjm@&8z}y^XJBGf4zrj>QKM|#R78#AU(G>yDbT~pn zokJI1I&Noxo!*pCl-fVU*Mr!@fgrMK&`%_q{A*sp$!n6iN}V0>3|{LyLYf5=h|SRR z2Tbui*)KlELr%gi>SJCF=z#E5eQ`-GEEX{DqOwg&bsXm4&J7CziAKkj@dLxp7>WSB zO2LmT%W=OIsjw{lEJ&&i0=> zO2xBuBd>m@a|E1En%&KCjOJJbbmpBNv{b8Fo|a1YjU(#ecnQL(lnOLIK~&}%$41Fy zhH*AU9r1>^Jcl`0J(EKg`S^GsHe5>2f4b5_({q>FjU4U+#;^|4mDga(t<#k3q|Lso z&Wq!S$Di=J&>25NlIZ`NI3@d6jEAr{)a4+@^0f_<>w`<WMmz^#vKaHoW)xl@(0v6^tV@B-;n8e&3q}% zI?NyVGpoP&9!*&6&NDGa@rC<>WLk}1b4JrpdsU%IlJ66`5v8eOs@c!<`r*?{Y#9ZO zyONimLj+8*;~iRr-hU;09{oH=K;M6C{a6Pd8^xTqk>=XArIsgB@8HKB$8Nu!98$MI z1wRd8d}!Tq2Q{MU9@O_5y{PcpR4^zwB}^}KaiX^Ve)XfWOh#h$_8wprpTHWe zQJ9mvoGvj(jaR69EGgZi+&R-}bSqC0Y$y`EX{KM71dM1p4%p4Y^bWs06p52tNA zNB55jOMLlz?g%!*2-0e#KDvA1juop| z^7f;8RN?J!()0zbSW_}y`A3hSvA2#uw?OWV5GZBFA!;l>qaZ|!a(H@D6l6w|-}y6u z)K!e)`2CW154JjU*rk^C>uHp=xWntV6agp@z=N)<<^FQ%F8n@ zysC4fd!t8{f|YAJC!e5#2p&7<2k|`WLR@h2R2BGr4H#W)Yz$*?f8KxO?MoA@a?E4V zXxvAOrEj`1$=EJ_e5wbd5Ai~9jDUvSGAhr6>T%Lw<{bEIZsbNR-W(X%n6+($DA z&19H~UuMzAyPa1P+Xf8vKF?#>o0xKqZEC&yD&4=UMAP(~6tM2Yoy7fy@>y6i^)IT= zOTe8yw&vK>LjYRV%CFjUhn;NshsfS8){V=MiyS7wooB?%(IGAR)BKE)N_m=$zNKxR zKlL9rkrksdBVCr}sz;WvjB;3Rr_S@Dg17Vdp^QYJGHiCNS?b%u;;1aMK8=VgvvCUoz_W_FR)Trz>(GmJ+S9>gpqtq$_qEX?0cF`7$5{ z(THu;R+!I-y`D*#I&IwOCrz)9U??BfPfMr$yG&sN>cdIiHAEhoYLrgcZmO|dWZN3u z2q>h(g4=e3$`x>6kz(YTf=3!h z0z^ADL=UL}rP1vqh@PDkKM$8)dmW|DG zU0MF*_Xs@1Q=^{o@_$reHT7j&x9G)>+roVr^j{DXbY#B0)*zYXN9dwWI(Vu~G8UZ{ zDbSpzHfAf+X2i0LkplRBonjH_A(53z6f0=ceT~zf4YeXmr5E7@M0Q}D6!=%Hu&yRu z;|$$9K+`RtH9PU%8#Ia^AueM`oR|JS`lRDk-dvtcmlBxP4Yo7pBuVZrN4_{%gSdJV zClMVDPN%f3cdjUuLwKz?Y0YbOXDt-^F8elZdZ#ftNn|VCOlF*CJJ<^0zK1J$%vEBE zGw56%YX=c21&c5&#h-F`VSPUS8pNp_DBq3m9mOD}2X<5-o(y!4=1=&&KptvlgaWmU z)}OS<9^N2^@7%54x0WZ6Zx03>qF7OXwu1PgSgeNK0NTygqYmc>%#NOQA?QvD*fhPt zPY-&%&8NEOI~`ny2r?Q6GT zl>XDG=4h=9kfMOLKZ}(t)L;A?7XK2FkuS|>fV^wPtoy1=Y*vnN^}TAV_nV#PE6n@W zTmSWXb!!~F&wgy){hux4z@j)U(2vK4T0JpbO$Jvr`wh0+=HZXTQA3HZ>j}v+wG%PC zRI-nJm5iTd2O!^vmwre+j^DPx`j?DC?mBD=3s?8I-0D{X}9^)OkN5abj<#E136~^oo&(6r$olC@%_!(oHnhvD=uE$YUtA>IeW}zRcIQ zlAl%#DdR%b@T|0c&E)38bpcbQMNotouM;qjd`=}Xz}Y%&#B&lZWz0&RgwRWwZooQk zR4Mds;jrDNE_iIw2v)an%?SChVy*M_nIT_d@j+^*yU?whnAd7?9bRS*FC1-vmj9}Y z$3ymHaR1$O^F17o&4HujfozMv1T&NQstZG!zCcOLlV>A5q-%~N+Z|hx!t79RFWDk; zeb;AHZ!UMIfFe05Xm}wcgsYQ*)rCE&fPYwG~MUb zi0K0>ZIW!j@Ix#>*e7hMww)iuPKGQ(!1tnMV@GP=;etD5MRpMvZqV5 z4{%M9teCN_f_Ft;0-G88oJ7}W+`-9rB1ZdvqyTLwSjLI2UjgSUY1n9Kp?qPk;iYJW z>$lzLT~ey+GKZ|$yB?=0KUhoT@IUrO^q8ql$@KKZC{fAseq(hl!r6I(AmF|%=s^tB zU9Uw5iCU0WGuMb5zq5&xpXby6aM>rDkOjw*(K}5LZwtYb{4 zD%H{8X0*9+mtyTnasJk4X#Ysa@yx7mG}(`y*TT8rkYwAsY|q*BTw`_{ZYJ>Z1)$n~ zRhaAQTO9~!$>KlTEsWcKpTjs!1?$?ylr7a%LuXZyB~p<-;&2~EA}f92xqDHVP}1tO znt5;%{+-(5xNx=aSJE+GCnpe3sfn~edc+cEh^|yjskcu#sz`ktGpK!=>CZR?YDIqT*!kFOpP4mUao}Mu<2SF+6yccW0pu z{TS<1ULYv1+PR_Vh!_1?5H?>xAyJL2F3F{r(#KL6^awsaOJjD>D~(-{Y-o0GszF8d z!R}M68AzU#Q0_j8#X3K$NhDwR1QNzE07N zIygXagxOD}s(BBbSXriCEEcz|^{H&w%3xJPu!3Aqm3>8ww0aph_(vi+JHw3{bH))^Rw z0)hd)q7?_&+v}-v+<$lIlcBJ&K+1*|I2(=RnnXE=O*9E>$6-;`u>y)h`YUHbPHuy+ zwRtm#u@um^eq}itk%r5a29knRNgV2!DuX~lKYM3J%QVe)WXi=X?fa}n9Ovb$IBL9-_@7mx@qkG*~T2}L9^#`!~(2cVi*_r zm)@IXtji=<=Q#}X3%YJ&(B~0UtadkN`2$p*?)$H&704TVeHfxjJNsku_8)1~*D{Ai zG^n+|*bH>e>klRN=Sw=3GebsI0tE@hlCsgTuyEYxUpAx8_bty8L)(?9CbE?fS#UlT z!1>l39e8Cx5V^T=Tfg(gk{gLIXR+YpYv(6r)m!s*PqbZKqi7bdCsIx&&YGSNTde+q zi&RMztlT)`tDp*!qGv&Fo2@z2Vjd$w;K>}fMS~wDAxeVi=$g7GO%LFXAR=iiGs-`w~b})h~?7HubrS(<8n^JM+s?mI( z!C1ow#awM9_8TtGqFwUfBPPjf*otbb`ux`#nB>BVN@Z^0S`LHMS{5>8HiL$qiHBx6 zO+u()XGgV5ChI+MaI*zDZ`g8diYNG9=rOVuZ|ZOur*WsCP`3&zJduH(>#)I62xzsi z2b~Heok+H7%b~&qKD8Y|{#wV?^R>j_bl!@0b=nnvJ!NQ$+{bFXlB6o2p}Xt)#e_T-<&W8INc-t>={9s=wb~nn*eQeKi#Z-j>d?LIZ{uSnxAA zMkG8|(S~l1=5JePC(eAvSe$Drcf&gG_I&ZT?p+WT+d%S~?}84b;_5UOH2lpUST-K1 zb&R!b$BQFuNN3v;$s=PClrBOLDGgiNR`F2BP%7Bxq^#YE{Y59}{kJGOER{&0Cziao zf;o*9JU~-J=$_u-+Dr{Q97!=*sNu#oUtMu+CJ=pDfgU@M83#QSYbDJ}QeX*Y5^-{PoZg*CRLfa$&mi>0EF!Na^+i(K^yaw68@rrGM8=ga&MkHuq z7tx}X_E37<6QbuvqdJ&*9GBR?u& zmmq&U)^Ux3fbbPEhpmxpb>6(M9*EAEl3~5!6k@SRPy8 z>rFesITFc&0~6 zUq_!~U{mOO3hLLwj`{Y^_Pt)oQTYV8sASV;?OMdGumWQ`fSdE4TfUqvkD$|bs- z+zyE}tlGcR-q%UY>P>MM_JDbyI%zUr6YzOlzAA|W8L}k=?wE$hoUQd0os=z?0##OV ztv;El=7jP}_3ByRfC}=E*&L4!>z<6XRcLT`1{eSYmolEWTsyzyh3vHEBy1wN%%=V% zJ}?6aiuc?npFHJv=RW&HeA_1}JY;r^fv3CAwvf8LEnow%krNZ;u_X6x`JyKq(MOR) z@(7B@M^C#4W!2CXB_6NfZHb-Dz3t3f)f*NB%b9oWua{3PKJf8N}kXoAnmPk69WC@xJ`~nX9 z(z|^oNG_LFtv`Ta>;k^VudumeKyfb&6T{2mV1`(CG(<&2hZU7cLtTJ~fQ&(TB(FZt zaKBA%dV*>-!f=+$+U!G_tvZ~!qF+Da3`X#%lNm^|PuE+Ty<-*grv9SeI?qrVjwB%i z#J&}UHXAz~3R?F<4Zs| z?swY{r^%J-gFl!SVL&{5N0~mA za4H8us+N~Tyz+Sw`|v0(#P~xtHUI_hn!p|P+qtNU%(YF_J#jX{+%!iHGU?+RgsgRY z_P+jE&u$N}*@lA&F#chEfYdTdq_))Rz>{XklOheRh2~`G=xVhfqU*1iPknhT5ERn?aTA(_({BG2L07Nw6mq-RVk8xf z&6@@qRBH+)1eyD$&$z%$tu(o!(VV%k`GfNY10*~sJ3(0jHrqd*EBXIp8AWl6hwg(+ z+Q7yVy@XVv1|zZ7Zu@`@tO#?cPCTwK8dLLRH4TwBJs)xkBcWgG>qi-J-H{ln^7yr z1L#MuV2D6vLjR*%6CDE92P5BLBm|czrZYM;w96yfhfeKau4aikY8sOTY-R3}qpth) zaxlKbQv6MY83%+0p0HfpUn)f-kM!t!BwT9qLCcWXTL&s|cl6)%<=9ovdWMM-siE&N zntu5@niko9)@Z*v4D$bOuaMx9M&@|tV)ne58; zj9oiwjwDe!0qMdK6@CVJ@LyHtqgfq#vc`CxVZC*cNoHlH$Uj}DEl zsw*#DT(?TP-r^jMwHuq6LFMD~YjE3aazpWYGEyv%$0yth^=e6(zD>6qmczWgUem)f zy9`$&((2k3;1@8|h?D-4WJuB^gDrzr$vtFu=sZuaATwx+Uy7TBYN591+y$+VS3)Z$ zs~i}?os$%4OXI@#a5?_lG_@pwK>FFwS8cQA2Dd+kN+$5hlMexz4DJIY>puW_PRZes z*dfuqQ9yVtGgdR+7e@y}Q3S+c!s~sF4lf7SB8ZYNmEfg+j56;ej5$NB$%Y&b@R(48 zB}bIGHI$h=449ZC3W7AvS;Q&Ctg}Nz6+iEDbM4?gKUPTQdgWQFE+bi~9xm45&bFrS zMp8$v6d0=#fAu0{Fo_|fH1t{JvsvF(d4TI|!Gg_)fbTp;Zhph*XxM;AtJDr+OTcPd z1gExM*g&5$cirY3VW3yAq11zGi2Elko+uOoGm~xZ!{S zw@4){tdB5*H;`HS;Zx2l56{37bX33iJ5;<8-NuxE(!#`;kA|? z!x(d8Qogxgwkyo8OpV!#kJ-=L?eD5O9qDvI#LI|pHxr`tu}B$R%6i6~R(U=D^poma z?G|VJFu4z2Ikwu3%AD|@C`hXQT<1iHyZ+?q&{d!Gcw-)3wEXUrYZ={p<5xN3sTE0l zO^f})M_rAmN?Y_wuf?(V0kfkW7hOJK4Hw;atj21KP+NCGDDC$s-?fUP4ibynjt3aK z{^WIntI4MR6f4%A%q}28hdr3=_HH#&=e&b(wTohWHQY`wh4sucB)`PUzKa_%TYFOx zQB~T#{Yc<9tgdqlr|_m_w_l8C26LR+c8Ve*!B)@*GMxipIc5JdphHs^TBeBB|N0eE zL|M@I#F0$9$j;AE&~-Ah(Gi7%vkP8p;)c+i4so=i zPxZVrW!?pbvUBw5Fd(!HOT?MTgWaYecw|;vgY8WZ!*1vHk7~Pu(=StmTqooMWxY&$ zfhk?Po0q-I75l@l3qC92Hp4^|#}t;Pe3^JR0@XQI{4 zU{Am2R3^pvo)yj#!S!hs!^}85CbX^3=OT!R@3IuC^KN-(@h{^ADc(nBw2zOGuXWB) z8B5V>84T%jTobkzi0P~qq~=tXq%iBv%Niz5UoiEdBSlv%bmVaJkF^R!yq~i(5of0v zgTgo_oAdedtDMf3Ht0@aon9&D=3wCoXZw-!<#E|oK>D$tQE+|AoipVcHH+agr)z(z z3>HgF=}4?hM6g-UMDXQyBHmIE!pefI7{tJQfpl8-rbW4tex+#YlTQ6nm^X2>a*)S% zO?bLa7an%a_do^W9m=hrrqFSNr()(Q3WJtnrjEa>y9NV(JaV$5Sp+qDwdt%`Apu48 zbpGc-r(A~Tl8qL8qYvf`y{)X%{-kmlIU1W+YXE%AT0%~>!5_R)DuZqSHVn8iLLX@~ zql)$>x~NsQ1ie<1GwMHF9lzVf+VPw=<6;2!-Ivqr{Qz|=D*gW3io1-}$16s1@5>0a zN4N7^nX}E)HUmiD$0B&M-MJft%k==3$IGaNe4nyGsJ>HktN#bGP=UjxCJ$`u79dr7PY(M27-sE-MrtRV8~SYR~Sp$*Q&@ z?IUN!(XDY3=PlPo3OG*j4b#R^!>(;Ane@EUYY$nIo4@*>U}eO}_Dk9(+-X0Qr!)7x zG}}Wt3ESKj)Vay5O$$oOU`g{){z9Q$V`FnfGtAO6>NVaI)Qtw(XK2XX0k62U&wYC- z;mH`%Xbhz35>>uHWgEu$oBT1}(@)1e4l{^CMtnOq-r9ccnMA{Ct$0ryPW%~>dM>W3 z`!gR6TJJw-sdJiqiTr_#UNIbXP^VAHU9yU7A$`=Fra}J^JuMELw^DrI7Mi6tAwKWw-nMY_a((iq zHI&k7?JPywH*g#|ZM^vlA0MI$uVA-7kv?{eAEzK;9cIg-2da_#~*yh&x-9cEM+Nl_5q)zp%EK`_JpZp6dE z!;~~hbPT3CjgR&vGO70>bdb|=Ga2D8ggx4F=ty`}JCY49O(u4E9Isazr_Xogsg(3J zZ*H&hcvA;xIG(3o!%?Bgn1@8Krep0f`qZ1{9QA>K)v;)m7bFCy3|dP5i!$C>lhnLq zHrJM6Mro07w|T}sD(2p1m>lR7>9gzIXv>Z`I`1pLDa5PSW745WR8%y$%r%4o0SrEs z(|SQEF2}L&t=pCdj0vOhT3&pqwHw88W(%zXXu0-Mix)YZPtiv(p(p%Ldt$-*o1BZ5 zL~E#iqEjDhsEt#O_nXQ$mB?SI+d?av$(mV{2`YZ?D^`b?oQD@&C>@`5!rr?KmBo|~ zY#LX7IDVS+HETwOUX%BVCy2pMzS)Y~L3>Z!vief5IhUJ*TGVSwWT=gE#&t7=*BqMv zPCEG1M?_nPUPk==+o7C~JaklFjIX1&93$A7Y-V2 zthEP}FASG0sZ7l;s*9VjnD2uH4m0G*EHx$ai)_QqK*Us&)K5%fzwzmzh?>+?r(|lO zKh8Ed;OD9#-4?VbOSB#Ozc$HZkxGHGX^uXZwmn(zSZZ0mp<8xNjE-7qTOA-q*VCJ9 zbz@9srE%D3yH2&DyX;_+CHb)?<(E}TirvjyTBo?OplcOenB)U@S?{>b12ydNGH z&~i11PsRin%5*t97wDDWTL}Vv=YDvX7SGfQAhbOjz<5Vo=78nNUVaRPyENIawMiH) z$Hi29=;X?nJLXGE0o7r~mW%>loVaH(!?@$;a|Q1-9<-&EJ(jrZM-auE+JnlRT@%a# zo*(l3rWAl^31ciT#HhpkxS+&7%X%)|rxYz}1@~pWH~d^8*bn1ITTUT&#kg8OSw}&n zGb8f1OlO1nj>taS7zo4iIS=TwM6e~tM0jLWZe zxoMy|!{>*J;|Vo$kz&k?H={~f%Oe8^8g+@JS@QKcb>khVl>wF$#q~RFu_h+u#Z5rj zdYi53*a#C!-};=Jl+(5EL$t`~iEF|k-byJ zr!Fryk5bery6zP)U zajDnOA<_!6V!lVAJ$}9Ty`E=r<|8cE$b~cRxUq@R`Uqu={@4y`kr+6q7uqx~Jj_hb;8fK|Julbh$vpN0Uph%)a?Rp${o5}iWi7`bDt0#&xF;Sf zToUPRf_;iaB-@RM1PPr-&0<3$^7X|<7|q6!);i=)KDHDOdxyH!JvjV+Qn^%B&Kq=> zeiRy6*P%R8q-13aPJUrI6K<@W!?I_hpD2q=Vz%{gA2w~q*zm6I)bpxV6)9ZP)p&kW zzq(TxjEV>$%cR0hx0qb(*&kt_gP%!Qjwa%q7{_y|YMsF>I)vl6ojAvqM@41~bWXlY zg8OU%TfJmpw?@DyFZd(6A|Pu2F_9U8KOt}BXQZ>6`r@y~bE;YsvN{W3@$I3oR#Es> zX9%oIwgG!#fxe3f`^G==uC7p!QBZ7~5A;vN?4Z!FBKOa46YbO(!U@c&l zPg9`9`zI}TEtk`b>kO;Dfw+N-op3gL&i(F%svTF&G1x~Nz@qcfD7}pH`@B|I2uj0U z$I++CQxrd9bk|bV%1)LcH=djbPr+C43B&uJkIZf)l!mh463uF{y3?i2#=M;Kkc$Oi znBkePLb1O4r*xyu`EyMBhrbl0v?tSDkTLtgj8DK1$caKaG6@GU$x$4)3h$g#>v?04 zASX?SwBOks3-&mW|9AVZ{9>uc!3wyA@{RteHK~V>JBwiFYMWCDqt)jw`%Rk7T=S>& z$x9G=mH9}a2%_iX{>@~=G3iHWsOKM(Mh5$G%gDDZSR@|ArKXAH!s^GDHLFjXx7XI*1 zT9?|_hYFMj^XKl?l8{bAe3&glH7B0*8!zrXw%a>MVqK3KhVLy~A9vGawzU<6XXB0i zvk8G^vAJ*;f9~fb&)*LkYAx1b5ZK@*1`ZA}4yW0@?X?TTeviv`dHpI8(B+WGrtPM2 zIYTd938>I)L+BjQIO_(tumjdvqcYgw%rBKE{nC0(vA{*b-P#i)0>djHywG~LMCh}M#X{B_gisIhaQuVqr zFoA{!sOjkkIUAI`^PXTRRBH(xx!LXmhz1m>oBT=Vc7nt+M|}`ujt8oPWACj2wI34RT{wl=Fs zVBR-b9`h_Op|m9N<<&#J4UUhU&du#CN_P*7@%5H@6yOh)3p1lcK>Cu$yEqH?4-bGk zH>}rR`qc4FVgYriIh=(Tjb|(-=4hSQbx!yfHNXN$E=Qq>votvFpyj8kPxgya`8@n< z_3-mNwn2gm7pdW;!)y;|cy)x#--aZ{!2&;+_a?s(C@W)NL@gf6w5k5Yb}==ohl%7z zCNDPDYFtJ@JThr=G#jfJu6n7QfMa0(kC4<%iQwFA(KRdt0ERAs~?cK=J@m_~RQ$LAVNR{)8%`TjtV>p!vkBc+=WiphwyGf5M zV%XI&rIOAO10pWIKE5*Na3yH+;0O ztsTAZHT#U+a^styM6>8!|4#=QCA}=_P~X+ET3@G5Egtzl z*Sw5@uUCGZUy#bQGTMvjYWnj)E=DXuT*OM%zSqbC+npnpM@3h_IJM@nDsEEtpk;*)tWpO21dl!1ypHmk;`qa-q;k;to zcO4WnyBKW2fNd}+^s(yMoK^cMu%?seJ}I81r?c8#j>{64*|77DK;>3y#w zuFLC7eXG}~Iq#IT4+~9LQE^PXGw#o%O|-l1m%BUk!W2{yxVpKi-t7h{p{A|;OzTSk zfyh@qZ+JXP1e$lQpFDl8ns@AzpPzxw3wJPF7uT;y1EtGN?wfn>Gvem*IjN(#T=O5T zh866)hxIONx7^pZhY#5u{;nsVxoZaFntNlwcNbYzgbrAW#4xlrHYQP7RVs4@Zl1py z)nYOG*IP$aNI-YqL?PF+u&U}f9Z1GWs%ZW2xLwV1G+M`HICCauf92-_)|m(%zp?($#s2yKscf!{N?h3)G&-;6BPU{b_^*Wb=9+}Awe+-}dFG>b^Z zTWi73ySa{kFS4p1L>C0=VVCx*cd(EVV^8!V)PJp&#aKS1Cb?-jV|oS-vobve{(}X4 z;Sj)xcJJo^u5y3k-N#(Js1s`y8Lu8e-ZnxoPCdp`YjkK(`)rrT_9_rH)*x^07t4N< zVUmyIUu{^$2R4?o1L0$?zB2Q9Y2)W6)$4A;M0X!Vr8%$L8qKMOfZ>Iz;ka^z(%!14 z6&1r}ztt_!3-|UyIj)YC#xMLL)@b%^%AtFnpm(j62Oi%S_tjGXC*AbO$VkI=QqWoF z7#LVlH#&b99z_jACjtk>ri#BBjvf*uu+`^D)w-v1a^=ug_@(R|s|WKSy6T3W^#VTT zzZ5z(oe5`9HTREd(UUh6QE!uUS`qm8__!U;_zb>qv56xfJX*DHr$Ss;a4~SPZ7w6f zakqccn%47@gPDu&kIS>9(AJ<6#w|1-r0zm2(!+0u-<~oNEw!9e?MA2dZR1S|(a%z+ znPu?hjXh4X8AuNnE?;tUz=F}Ve%L)g$5ItM50+;L%2Riown@u6zFxN2PwMd?AT^ME zreBk0gE#~qJ?h11D9OvvW6U?2#^5bf3S4>cQ{o^%i*x;y56zvd=p5Y?B^E1CgRfI- z{h&iLd)ppcc`|vTWB1su{$(nOJ|xXjpIMF%nq{bR^i7i#sc`L}+`C;9n0;Wq9?`o+ z6$PtTSYPodIrL>l#@q}$9-QZOH-V=KX?d(8V$kcA(-#d&Zg`ZP@|?6>%4NeoeRjS>U^$puDnM<;WzWqh*WJ>WnxAg<}tE9;lG8SVqt>SUlPOLgGrf zS@qAgFLXTjwjom-h`a?fU%mjl*Wmh*hcSYMHe!& ztf%F=Wb;c^6OGYo66>*T-@=<-D2u20ju-xK=V<~Pt=h8gl&t8@<`q-_io0wl(@<&+ z?J|o=IgnBrS#T6ZKj^9X$lDtZz9-OajSAp1j;dS$Wo*kX2=BtPoYCku{1t*VGGd^J zD462}7zkb+TFJmpS}7+aEwEHW{xpR5yspeqkbm|z7ITA)j9yMW84Dnx1J&qcr$?-< zI2jy16d9U`OP0_yu@aaPXFe9e$N25iZ%}nCx-*yyqf)#yoD!mDGHtB06FDDaINja= zMF{CcX+d?0WZ>3(+AZw_bG~N7koqHu`iuv&NUhtob%}eoK~4t&kA|;D_ev*+t_h|o zvGTECWmln;(q2LV0;g{V9*nuR>t(}iYV3p~Vk$`ltTgbWg8$^D;U8J!BNUp+hS6SB z?9*#JJ_vK<%6crP@3D-xnkQ+O=L0>7wai&--R}HeAqWX%w!bNNNSjN*48!!KhYy4w z{|p3N1ids`);3ce>tgYhW|2t@#+MJr!pqZgOo*LBK@zrG3xu5NOzIj{8d)tIY+w+1 zyBKrVCHu6guJRo&z{sCJ*RJOv{V2d#rf$(gSSs#AR`QU`;h_Iqigu2?U&lr7!RfIb z>MRqTdHnWL@yJ2&9LTf2u61~4A^~-3lF=hwex%uZR<}%JdFNvN9?VeYz;n0TiFU+~ z-=*#n7h&9=Q|4XZQ0<6(tD|2BU7@70d@ggE%(k~$+`L|eNsx9>=g~u?&@^Ob7|<^! zg1`*-*~Qy1tV_~yo_0i)k*(rVDzMQEl)RvrbV$EWEH!ybxp}EDc*=Udf)B)MLrWqO z8IP@f*76pWntd^kg`;;6ZDaQCMT^pu=m}~@bGU_^kF<@8pH_84< zS7`H88q5C1K0NWAU-h_g(ehP|TXz^kVIK{H&n|n2=ZQ1NuBB?R^$x`RV)o#UVdK!O zj=)NxdXU*@LnjEj)6(QwwkICaRHc`xCYt5aRX178hwEVpQhQsuDl#V8J;j>b!;E0) z*vHM%?lFYI<~TYLE=4&Lz3aYWJ~ly!jEwBeK}`J6Xqp~w37;W;A8wO_i=HZpif#_= z(!0PdN-N@TvIS?dMhfy3O$TN2rC z2(8zvcE@f|Lw6uf3NL9pz-9{S#M~iZ9I04bcsd8SiAEymyPuV<=#Xj89~*t&kDUDj~o%P&mUaP(gV8cD@b$RxsT>qH$L zYZS}9i{=0p>De_}(=Tn&P4*Q`ErynI#FzVZmYR>@h;?3Vyr_AQmmJ9NrbV|oIbAO8 zU5;|9d@6?^JcfTq`hTKn-w&o^iq>7|)e4i_I}RH5+|3T7buu1}qmt`Cz}x)Q)1J;^ zyFP3V?NuD!#^YeO8c3b7Ui8r<-1cv~Fh8C0E1!zB2orXs`fchYG> z+~)^4o%x@r*H6+98Izp%pDD*nWb<^gJ1_gsg@w=(_(n9?<^A8t`+vgk0wh{{!AGcJy%?wMu4lx^QZ>|3Pp6B?#a+PVp@S z$F--s`>zHc@J$iyc)5A*uAk%oy=Ds$($vtq8O3vQ|F)_<=x|H%@Be+9-JfglJ=38i z<>>!v`3u$SHeXpW2`;%3sHQw_@eSXph@BXscF(r-g!I21=~pItHjKp+6Cb`-mFAdY zm=q#Z^faEdpFMdw9$ro#yZ7BjeedGI&4GLWYV%EBBs(G3DYQ-p?eXL;)yd#6rp5mE zuOEkHoK_hYxEG2)lP{>(2o<~nh_H+Wq^{oY>Sv^RYs$vRLXCr))(!E4#____%*U zkK;a)8=3mkJF@@Z4;NTJcw@dg;gSS_kQg|>NDZWnSSiFu_>Y^B! zc)yb1Ed_`LCVJ8Q+BdtNe_CxJ2^kQ(zRmK1&}=q;s(HvSeN<*4fRaeqq!4MGx`|zr z^$u=jv{v+)2dgBa+1xgpJ`Ww#M(=uqNT5@RiRrX&Ku)pCIeAc)BdNa$=RBUKV|Lys z&s%P|Ny$6z^4`!jwq(?D3 zbnrCS()QQ)=?#oK=RO>bJ&!~0{@OE}y3(qS=}VO*a`@sL@=hP8-E2~~1iqtSY!MQv zFk4a-lJ7w=edY-?syyOQ%-jl7C8c7Brf_GuiV~IyEf83JczDd@z;%jf#YEr$e#4aS z`~~~)v_>>H>f)jon$wBNp%y{47F1YxV4B-SqGz10vBLdLp-wA^`rfLAOT>qoY4M|T zSzH!Pv-|bFus76J{k7*=D6g=phFJLWenQpHtw+QwZo|NOwl;oE&XlYmT73nq|8`48 z*X8j?p(49xE3`@e>r1N~NTtju2!UXv)}Ka1-!d~-0GSZC^_a2UHZ3v0Q4kyT_i3Yrn!HE-}tN zT9E(ZK$Ex#`Hf7AWv6XqXaD?mng`bX;~dQ*{e@qXJB|PV>?r*7FH;90SsRXNG8zrL z5Ul58L7LJoTADn^&BOmL2Sg2WMwDIz84{f)nC+zBcCfm~B zeR`sD8N0drV@K?#qF10m@$WwYrymU$E~=DUYiEeZ_#iquyeVGX( zQ&1#8oYs2kSqk2Whf~~B%h~^B*VfR#K@a4cpZ-nYIQOsAO0l~}qS1d;M0whJd#C%P zKi0}k?mDTRuMJ!5&)Ph{crND=hsQ9-=Po0UbXrof!4ECxeZ=^1bXwh4lo%2n#X?=% z^+^Yw!$KJ0te~w;SxBeVV$k-;z2YK2|7+sZx$E=HPoQz;2XUU!*#`CEA2q5c?O5DF zO_4t$*A{;__iEY;$}4}*L+tP_GA_#cHW>BFv2_6sjTa5zYh8MSUe~*G#yQVbubTz? z+x548zLOg|;Haa;#_o@QJ{D_}>7NMLiTm{wR@PwsI|x79B07rd0-;jJ zsXv$J`l-XS9CA{(D3ZJGQ(czJFG~paCv~1-T_Sa?CE<6RBh-#r`x9>&k3R&1czzPK zms!rYXzvi?fb#!^Lkk8uBa>%1?Of)AplY||>6BQtpigq1jW@O$E4eQtl@hauMIobu zA>~O1>d$uf{I2`K zI)*%Kjb3qh^DUA^(@o<74vf9z8yLfy1a{Wt6}4E;MK$ZbTpdzo1FG zB{RikWgENr1wcv|TO)-g77@fnTB6@|+3NhEUyEq{OgUtMUFI=G@(XfVyoWJ>AnoEg z>SANhdRL2B0Cg-S8sWcL0=lXvj{dMn2gFN)8&Ye~$${pwe|(B_cUeWGQQ zHq+cd?hXk(Ov_pRKWIgWzx|yKtWSKYqqNFqPDUvEf3I0)%ZA-4wOlEx+7bWlQ>3A?sy9V23| zq2ZnL7n~oOAD?&DTF>*W^?twWS?l?{4+VRD-JiFL=9?>w*1Ct9BH7lh6-l3dY}3G! z+in0iokDvos2{XuhTN?$lFlKwvU*iNy3?e!)D`aJnB2Y*k&IFw#_yH2?qFqbmSX4S zO;eyYB9d>Or;@@;P%rVHq4v~$vLqSGzE|hF6KiBXf2;q7y=YL9omYfK>pi(6(QB>^ z7p-$*?l4kd@!xdL<%mSVcU5w4$u0W_hj6#3`IHyOkF4`=xi;)b51zK4(CI&naJA8k zp;0ySw4QpDBH5Ee%`Pi8BYB#+X8Z3wYAW`WWxa;^_3yH;`omJgsfyGwBp6Q2Uu)P> z-ye|{YoGX`>qu6}z+oyY1wD>WtVoU7C%VFr+b)SXx5>P7GhxKiscnAPg}yyB+MF7I zd7paK%FfF9pES>FWn1}P%qG=07)#lJL<3Y}?4O1)9#vBF!Jk8}8QkI!u;chZPrPUc z{6Sk-`Dx@u{>7SDEIG|8YK`>eS?r5*>8lNH%1v35opF|)ikT|ehS#ai%B-6&?OGju zHP6js7rG{6zfg@sX{Ald-FTe*xn#zqGDeM_;bN5U&626iD*m;luCXI^GEOuJJcnHH z0cLq+Wi16fH6dlgMlu(tc0&m+t>45(`1WI49+RFR!vM^gN~EQFSdv!^3HA z&bs4Ukdo2}T{9rPNu^u;iOd2X{$2}{AAS&ZshUd@%$jrf;S(tK7F6Hz#y*wM`Wgn4 zVv)?vsBRL7RWlDY6jfyYUCKb2vqE8x0(}rnHc{zLwUI^$5!1hdGClV^DB@@ z#pCAT_0e%@*~i}aW>=)9R65DWkER!#r;*6E35=4j*!)-*^8()>RID$}q?~znXLoC8 zGLl{5dmj>c5RXdI67poF<_9C|;e}dCq`H;|OCm}Lx| zPIijA`Q`MK{+Lo>msaDKVRdO;X*{ox1;s`8A_RZ?;u;?6`Uceec_8 zI*xxn;*!#ujIZ9zvb#CYoOw|Z4kg5)-;y~5RazGF$LQcZez5tq_?V6^DtXb z20mrD!+P(++^HE*C+~XL^ym%c6Yr`Yeh>fLt*7B$Ezooed}A>VkL;B&ph+Sh>L3bx zb9;9kD9#Q0Pv`j`-D*n;W_{+bCcDNT3V7n?hj-k{V8|k$5As(T{)@a6Zsit8mYwM8 zkWp$M{RWCNz54ve+p{L^<8Cjj&wMHXpN9uGjd(LNvmrV@MEJ50hy#xmm$A+$hS-*- zi=)`?u(nv^t#7Y=Fpa4hc6zkdxvnE!g38gSuyN;d< zYCT9$$Vc>aWpN??F%!7gXT-!Ue4hN zCI(|X@{j8QgX|C>d%~JS;mI$J$&KOEECk!g;0lMZgb*PYe$P&l+s*>BfSo9!QRPaZ_>ZQu2`lIa>`Mm*xG@ViL7g`44yLTXX4=gds#GNgw3jz{ z4Vgs-V@wj{osHHEwTkdus4xc$+NbB^y2l>(09v|(wt%~qfc2Jcruk59e}dzPxUva}csGHCzPr9WLN^1#nk`v3Ui zOH@;3Uss}a`QKIDZ${&xs5aoXdQRWQq$`;l4@EE}Iw45Na_=r@iKce(F|iha*yQ78 zlDM=FK;vY9h3H6ii4@Qt032pK2Qg4=A)?2CG^Api=tbU+ld zT@%+s(M^tgCS-BL9{Av#%&dohA0B*BL!n9Lc=1%0r2u;t|UX<4vS<< Z&6y678R^+$b95N^kt}S@i}oF1{tf+bPr?8I literal 0 HcmV?d00001 diff --git a/images/vector-search/auto-vector-search.png b/images/vector-search/auto-vector-search.png new file mode 100644 index 0000000000000000000000000000000000000000..c9fa35cfbc7b0ad517f44989fbf1bc3a3c6c3c18 GIT binary patch literal 33116 zcmeFYV{~P~)-D{|?j#-Cw$ZU|+v?c1ZQHidK_?ws9oxpY`<(N>r_cGuxWDg@yT@2# zuU)IAo>{AE)||D&a-VX$FyJ zp{eQ9M2at|>628lr}e8shs6*P0DgtgP@Z?G*B@*^Iy2(V#q#4 z-u!X@`DqEGt;+=oM1i&AX3Is6zGF1zHiaC{3?#5X=+mA2r9k3BNL?a|B&^Ok0k!n` zoMcpv5t|%CF-g7h~z_tS-0eeDfzhD3yo69<8Emr*qO?w8Rti9F$@W_Cl+B~7RdvwpebjO zX>V~6_?S9YFNJx?^LqIcrEJ1e@H7MY2D53%9C&`!mxG3G5M!UhX`dk?4O_~5;6}zAre1Xv zB5qi@qf^Gt+Sn?D@?&34?p#u>%y_f|@=8!oHs72$&dPK=(uwGa+nbzd=+xmzCLUwY zaIpRcx12t;1G#Cq0?sAe`yHC4D)m}7dL+eO1%93qS*zV?hzX|eypLV3GbM_MQCR-T1DT3<+sQK zqoN7!Xrn6(PhPjJvSEJQN!IuwZx`Qi6b0o{T)OGFYU+OIIGGdv9 z*}r^_>{%eYLAnui!sx`7^0j6#XV`xUA2Hsk!GRb0GY)bXsHag#6Pc2dAu0uM`f2Dg;tD{#TF9M&2MvssgsTsl6d)7@YH!ka*F?`1j9W39Ow5Mw$z{N@RpBPNu zLD>Dd{bEnthUkW(i(?o4*gv`*{DkWT-2?24y%p*kkV`Oz3I`<{L;#Xn7f}~2FKkY} zP7sAe76v0iTmZ`$l_tF|PD9FuOpkJjx)mo*qLV;u!5A*LDnLd0l}v{WKAtZD{YQzh zoFk2f(57S!X)fs&VQ;=XMF^U!BphjAL$abAr<}X^Gl5uQW1P_shyBF~m9gfr`ybs! zC}j}JWG-J=Gc|>HCA$T^3FYG0lOB`ICTbIFf6&RjO2ZYB{8Zf8Ckqupro=R)>6#XDbL&VNx`tB&9~D_^NoKimjxo+@caA z|5LF>p0AijMX(&JqD~o%YE3z&46e*8qmVO{GnCzIBCCYc+iA1%r;3Z(e)&NqTjfKg zk78k^du4IixxAH%XI_jdyNX^_kD_<&i|9S3Q}O}V^#0^juBY*Z4!ruHGO>tJ^S9=2 zsUvMADJ8Wf;*HX`T?eqV=(MiY#?^+^bDD*#EH-L3J>TmezMPvmn{Xx4<)zNj&x|^= zIKMfQaJ{;8xR5zVI}f@zIcK`yA0gjkc|dy%dwlgUdRcveyz+!+)fcuW) zjNQe~!W6}_ZoO>zG%vmoW|CwxZ|=O6W_V`WHPANxlmfjNWRPIkFj_Q<#@5U%!&J@C zZ2*zEl6gJ#kaqubaMCG@JWGw$ZYIn6C(AU89SeSgd_7dtSiO0(8>gPH&*aO z=B7q8^+ol~M#ZL^*717FRu3B+%c=UBx*ikZ`s;cPyE)U_@x{p$v(BZ5makK``5UI4 z2%Yo~2oHv@WUmU4-k}_!lA(ejD8y+BV{K<)4tMt2mu|Dyg^ArtA#=#3{8>}kN?ABr zxpWMgeOmI3`pcqA!RHNIq9>BB(N6EbR8C5Mk9@Cm^xUgC{OQ8K{Sjm_AZ?0g+;dlV zhvbR-=yrL$*S>gLg*iU4M7>qHruB<^y>tCFKek4$K(25)tv378a;wn?s~4#Et4|<} z3`VHRH1@6QmENf@p&u?1wmjYy#SDu)?s5;3?oqG(;lOJo90oLIGlV~Z>denP$o_Br z)B(O4Q98I?ZHCvskt*X@3A6K?X7=;p^DhT@23mqQVA&B;gIq=Ai4BQQh&RPNL@`7q z3lGGqqdPFF@up&JBm0L%hH%5a=eV3Dd)}cqtdu{&-=%vQh=hRQV%o{_*D7ReA_&#kf=e#l~9JNrmAM`m~;|(LXEv< zz146UhkzHsS4q8$&6NBg7I7UEqaUsb`O-_15tE42#Hx5xieo6IF4asr%_a+p*12c7 zs)cG!s;*VPG#VYIW;730-%s;gu1~oZPphYO_#66Ng*U$KjwD4dIng*pIBwcTtk36E zrgsv$K5=hdV&CrF47mqpX0uX^ppdO&uQS(bmyWP(EpoQFgrqE+7{3 zd#z>EHG5{hw{)-7uLZ9`es4Q3qgJFCr^DWCw>Ohw?B?iwIJ-AZk~I7iQSyV+A0W#G`bMd2=M{;hHHM& zc(bWjqqu#WfrEzg?d-5E7cvG6_n2dxW7T;<6+8ur8QYpxx<`~?_{r&Pa3~gFb`3t;azDi+g$lh-l}0E>7Di9 z+T5NKri*X0Cx0@h>d@tO?Km{k(i71oIr!r#C-faCw9eQ%v6ZjYsmmC7;aFZZ>4zuPODHlJ=Jo&J^+ znIqRV_Nue#`xjr=6Xv*9V1&I_~<2ozKeL2$(dWU>#0Mf5VS4ir!xw)Lftjje%k4pi>` zG|_Y==#H_M*dG23C-Y+eGvSfnjG^2T_Ji{iTHGA!Na)#N5fHnony5>d%E$mw0q&uI zfI}^SzyNo^fD0RN0m9sb5FkjvHwxeq$piVHr=ak8p#O6ZO!=pyppvkJ1mIiA$kD{a z*2&z?IgDzO3&3j8LRsBeT}GPA$j*k&z}U{vgwD;z{tpQdj~f@@*2cuyfWXbh+SZB7 zjhE;jJ-7h(f12ru2>#K<*@~A)T}GZj*v`>}fQ^oUj)8~|hJb*8$I;l7OHt(8zsLc< zc!|uNo$a~k>0Mo2>0FuV>>SPL896yQ=^2>lnV4t+J!qZWZJiC=Xl^%SI|NndPZ;5|Vs{fmkorCdj%DHY`iKDQc z4M3zb-@iTcFXF!+{)>=@{*UE#^^{bVu>k@WuP5r@YO~=XVM)P;rvY=on@j=(= zVOLyaB$E)VpDSCFTI{*5su%5tS)TVq!G6Ke48UZ4U}-?8`XJJNe|{jr0>7(Y+}iwS zE1-)XNuMO>C(>VC|3Md)2t*CGmROB+l+SGpoD9}J!=XU!g zx+QuektHG#*63fp{1-GGVt&aG8=j@>#Q*8kJOuz^=4j-YBmar7b1VShjz`&cvH#FS zTA&b2-C~A9mM-c)N6ir)uqw9mWwvANf1>R74dA~#g)D8Xzm)|-o#6$@J=)~n$7B3Y zS4E@f7Yi10v@`sdo}Pc?cDs)A8vdtY*ra`uf!`b)4gO2dM<{??&&wpwq5m|DMG^o+ z^zPokf9ZLF43PV{N%LHz~j!92^wH$N0C62v2EF(h!oRjK$6)!0T@wSNK)OmNo2m#Q8O{2=&UY`HjX0J zJubWO+`$bmgxV?8Do8b1DWbR9=`KiKrvA(gxvFEx>V1b0ht znKRBGq9G#MEscPCL-YF;*fBW$I4HM&DE*h8W=Q^EC7~2L(P4kUQKQEf%PI9K@*yOe zUs?(YD@6_Wg&Cf7Lb)#k9ro(t+^UVmVWcBDvPjgg3WPl7t+2rHbOimm%gewM?s5wn z-)r-SnQygTxM(B!xQ4sO;#4u=X0Io4?pN+6%=TM!5r{QuaWjK(GTJqxfk{#ZIWxlC zUib6hEsrM9{nPalx{HmT@`}r{F{$%>GnU`AZS3c7R@qgt5c|J_Nd1!%NS(+hobM~q zhDTVXR7}cJ<<1pXt0W0zcgN+9#evNgT5)wf-N&qr>+`LOV~sfpRaGQ83|9gy2uup~ zQdOC3)KVdsvOT!xUnSnpx2jn`)Y&kjD^la!pCbPna2aHPQB@LtV($KUvF<1QkTsUw zx~;}bvftnSjhCm(6$kCq{(e^DY^@nw>99s%BZQWr%@r~zIAr%6$F8YL4G$a!b99%# zC8Gb)^X_i&*wF)-=@EK3D$@+}0`Aie5s?93-sb062kFvMnmF&H{ zXE)09n*QqntKxBb{T1RJE(1uS7~+uB;~~@ek3z_}bEi(cjow1bRIjIJ>YBGC5@`xj z<8Z=M;(M#zo2~l^^-e=VQ+nMgwh1qr{n`vNTlT~10a&d6V79XFNy;$GJ%Yhad ztC@N${`An^zuwtA3y_cuy10os84$k9&3>wGm#FMHb_T!REwP10hdGRVxAF#iDzz`4 zXDbmnOLPHf{*&J|7{8x@SQL-xXDW<{&cJa=95*D-*o|?x`#dhfBb&dA=fq~VAb|&& z{ND2YZuI2$^G0{Lg0)_Mn5@fdhYWI=yoN^*15zxhR$FHTI8LQVo77A?qZzFkhb58u zp3`xEusT!C*yp9;N1Or$>T7;&Z7iqKw*srne)}2?Jw9?NztT}DcUje>{=$;6TW2)ZJcp%i|P)uSc6+V$dKGVvk0xCBwj^kx}|e1ZK0iO6J{KrzINkySaI{&?es zk#5D~1kAVLzh*UfRDG$y5dRZPhxGHtG5^XA$Gp4y+4jT=LAnj6$#g9~B5IKsZTDcU zInK6DzQEK&sl7&Kq0PwYWC)SIGgrAZto!=!Y4Wt6cB@pQE<5V^RcP#Z%KL;R4DP*Kd6 zgfC~9i8hnNBU0N0sW3bDIj2O&>xkcAte|(c&7sS-M$E31(tci;qdeT%F&L+`T(S!> zt>!K;n$V=k-cr@%oeaiF>4UE^8?WU3nnSA`FI;Xav3hOQk ztvuhT^!NB6ds>fva?|A5TDu0XR@VC)6`|f=VV8i^ADd9rb*NL;oyUUB2d;YBeD(wG zrhV^cWk#@Pq+6v*bt~vTn|Bbu;C89O`AGOv(5@#@&_CbL>1WX(`POJg_2_5{>Cr#r z<_WiC%|@H=R6$ccmXgO_>=e{lqfu!pdKG~9es1<4v$T;%irsRV1UK(QM@bN<<${1m zj&JXO9XRACO5w${T(E3&0Nl8Hh%-=YjGV6WCG zp^Byvgz26VrS;wK|JYrH^WMU^iFjDRdwx^f`K*!l^&? zPNlKB2d#4#h=q<|F?m!}#e!s6T*=V!vrg=z-Y=!>TW{1!Qqd~+5JhXXjVU>PT5RMI zy@Z+Qe0BP!A&|r}uE3k?NZ;~Sgsb4er8qg*Wnm;wQ(6o+t9qlzjnCgkPD@pMvr0h3 z1fq;rEiD?M8CW5TjE#a~#ebhyiZ(3v*Gg_4?U!Ru7(NfMwoVoLPR||%5X4zE$E3lK zpZc%emJ&U2;Lud}fQcnm!qehN)`)}IJfQpLk!v^%VrPF8)0*T5Cu{h4}`{ptUjYzd%f zHoYLEAULg}h9AGSu&SbC7EyzZEtKfT@78}0HC;S!G>*LHq(r%UM$QN3rya%4l%H|h zd<*Vzmp+%D#`Lbk;{~n0NJ4tn{a8L>XOksP%BS~VKCzKUgXqB2;$N7%<@_s*Y7*Rz zj16IvJY-Tr2ZoU@$=hThyKW>XqT*QB_zTk_eGqF4@qT*xcGhefB;FTzIoqk-hriw` zKWLITAT4YZsJKSv|BNzq$N}qYO|QWP{$FAN28$B_B9i~tANc=(&mh>r;o1KLan1<> zo0D(kisX7#nx%N|!R0#FlmJ8>khgu2e-{i+|8ooxsGx>LMM*EbFYW(4Jw85md~CCm z7WfUe%q#I*yj|cgmjaxZrvGeoA_wm1z3wmP3*h|W17D#{22kjl4C&>A6(n~?QkcEGSM&G zNH^_(28j4Hin0-O-SUqX0#tzbOPI>%dEJF9E(wfJmlOID{ucQQE?DFlCo=LK^p_iS z@o_ST4Zws(xEs!o+XF%0YBs~nCRh>^Xt1+aHH?kgYR=Ek-{tj;5)%`{z{)DKKHkpu z58gyb`o)CE1WxDx@XC;p=xjHwO#40;NzrW11EfVI%Xs(Z<^}p*AK=$lnt=RUe-bOD zDhR;n4j=qTB)4~P=uf6rQ?9M?xIa;Ewf%~Vi>v(j&p9@37TR7~SkgC;vgw_L+V%6p z8w>)XWCqW^(Kp5KaCHT$%N2fMM9wI9$BeM#%BzS%6^C8s4WVYh>bweV4M$E+E{R4% zQ4=%fD|g7uj69F$1KhW7-zaoCSl-^>m8`egcK&$V{|K-`g~i(oEBq!cp z6n)a+j3==XH!r4Et@5j`re(8P1#6(g!p0^Ni@{xPb0F63^@*O!WdHs4d~3emR$o!! zboevhWFn=)szDr$S{1I{aep8P8WjlxBjoDJ{`qomwb@FcOr9_bn-#|U^+9F7-Q_gJ z>*DmZvSXB7nm_{u>Q-#8^f2tM+h064GR(~t2H6gQc)$~%h{u&+cY#c#OFdA$wK&X+ zZe3+KzGmOsL-v@Wdra)vgtLGr%uxd+7pRk*ljLPLkbrU7q*jYA5jIWxGG{XuX{ogI+fZ1RQ3}2^17G3XfN_ zgPYgw3P?BG{&M!}++*Y#I*>2WN10R#Am6m%+b?29Hwa3iiYvQEG;w4 zKN6HVoI7*@34oV-vf7{J)8p~aIU?!Fg)3Yc$<*3yb#H@hu9v#cS*H6KH0pj|xxbHD zWh56`F}=DT%>e%%iAKGc@u1M_;SqmM<94mdT6lfBCQhN#p}L)^(r&fgu-_X){vf6f zCz-;mP7o2C#vP1fc}ujaPLDtn+Z$np@_xkr#ztwSLTYDcXF(bkZu4??ENGkN@Z)BG z;#aek+TG=r9$6-f1&ZlZ25B-xhv2wKscd%98Y(5MSS+4sqv=$g%jse*4Wh$B|8yE4 zWf2tF@6~4{^1>goSqd3;Z>~|=bd<~K+B8Ne5x~UxSdf~#$-&U(83KEu6Mm;&LGMeT zCAoC{bXLH7)N7C{H|&1V-sNb83_~NqXn0nyvkm<7^e6cDi7&8A?s4z2w9H$nfSGt3 zU#I{%PlMA@K@sAGg__-X3<;Od3yXz%sZ4=*CFOEzT!HUp=9?tBte1uh=}Xo6&ung! zK7#3S(pBn%h%|u;B7Gwx;?Hxya)N$yIE}cUp&C-?yF{nkMWGg6sZxnip;{f0#py&O zhqt5ku5q;0Tcy!Ry>nLzK^-%wu%|Jb_cO_fza&5=6rMeM!kV^wo){6lR0gv=cYGsO}9Iy zzWLz-JPw=JoZ-DjJ79hExy`csM+m?o=%>oo;rU26_MSk-G**zoX!NZej{5yoB+y%t zU`!@_J>GnQShT9Ds>ozuYDz|v^5y^?I5Sr#foQmb`B!jC*~fmWUXdfp`xiZCe|#dO z7hM|n4lV0BA7E9D@jfO`Pvkc1dtuQdNkeZ?BXtuxD*k*y{O#8V!cE?E7^qg#?nxm* z_J+me8nQP0Di#tfLQEt*kZ~IAwHBNGs;mS#Z`Bw!5k=E|3WB&rzFzOw7HP>w$EE|$ zsquIYPR?>wl>)2%SlVprM#rM8t14XNKf#YcCfJyw0KVvIt6go@xY5Dn7KGI-=!@&b zuU`r!;pdSl7TLU>Elon&y*|7x#$LJ5UbzBAP(zQa;S9Cj;W#$}jrV$vW6m6uOO20< zx)?BfAFo~mENc0O14qm6_a&L*zN?wN+ zf#@KfefKro9fnGWVM7C+>blnO+WJDNs&TuBQb`woV~^Z_DGR#dAvk@v7Ob_aWhyB< z>OVvIeD1JjMAFyj$Y>~`z&@?h8RMT=YPi$J_~d+vL3g=H_#FaX>~Mz|bw2ioRLX9M z`ftASy@xjYApLi?a{F#%_;M0tL+Zouw$szMki!NQ$<5k*#_rs;Dv}3Ki)b^smX&4s6FW(>d zh{I}#VZQdD>g4W4$5Xz-fqAb|12PfwGln5wIqnXZ8PB-5>|To8xLW>8uS_}_dQ5m^ z%J##c7#5ciMof0EU)+p*6f&sWxL%7pwe{7-VjQ;opS3!54%1r+Ta)P2`!4xh z@W#C*VN4XYPMrvD2OmNQkoPfVh4J1SJb^1K3jc){eKB#Ii)NAR%=}i*2wW z_|1giQ;hJZ;Yc0f(N;Yu7aShhX?R%J(88K4gX;qJS*D6xEwan>XRi@sSk|_w&gH% zmk-@`p$>!lYzAnw)NM*tv`jLH_x7)iM%O_OZFH)q&s!HoFTAI|mBv21tmY@qd*eCq`;us> zZzJ^TK`qhPd8)-Q3Af>}nDrv_QV|1htX3v`iiU=}m`c6I5Fdbg4Z$y{BX?W>{4JF| zn$>S-o0Avf8DMr)ZU+>r7!U#U4ze->jPT{hHcoo!0x*Pe+@9ap*FRga&MpQ{Y}SfN z3A59`U_|2wBFJQh0pIE!tv7(be9Y=Tm=E~*q%}G=OO|8N*XUr2oxff$_4r;XPwuMG z{swcZH#YIt<}|Re2Bh$u;6r5GIlQtLJ(CkcEtlyn59yJF->z0(rq?6XX8nUq{Q@V9U_y z40*Mxa45Bk$XMpfQZIc)GIgXG zy{`V&uis&SPcl9w70JoF;`lx>U_7sqBkk0qhX8-}*V`*$VP#nM4k)V!`p1{=oql&k z9N@XjH`^jpk=K_Ju5g=LmmrI4fO_(MAqh-2i^YvOERB%@FOW?hKVmX$!XoJ%Ei|if z1hUJjMLL=@y5n%`b}u|JX5;2w;G~4=C(N&~;=0D*zD3xiNz1;JDaeV3$ zA)}-wVR7?)I%`+v+JnDdDAd7G{!aD2@j!8G#Fav+s_gRGT}uzMSVOK8O$MDx?V^CV z*@85I3<&`~F*;EyJ&>&(n+9WhcwGIXzHWHKn-si{UpIo@#BDBbhc2Lgr7mmHkQJBCKi$tLmRy}Wl zE3%7n!R6UF>`kV}c<3Sqkasfakox#|d(aFbEnNmV+nsMLSlkGB15)&I>RK9x>W?*8 zbFHynA3KHgU3}Ca&Vw^E0-Letkq{dsovHMq%{TjdoXJ*spQO3|{jE+^EU%Y@+QDcm zPETc7JD<9qNq3B2lix)tRQ+~g7bi1fQtnT7>nk&(R^)HWT4mbBBH$=RFU;h2E`E~} zr5H8J=Z)+>AEQd5xYJP5;LtB51&!9!lQ}? zwbho`D2b+Ccytgqp=j$27a(BTVf$`>?*}$BD~UbuQbx=Vw2r zsbT46!*Ha*+@|-e+}8O5_5ATXy0V`3Y>}!AGEEm|(nk(fQR@XiQp)kwzEy* zjc5uwgVPVtGM(ZC%?Nz%cMTtYRjF~guQa>*=1Q$Fb_I&z7*(4N1Z2 z$wrQL6|&8-Zf0Ye5N_!Ab}h0lbed+0$+H69zGML(2S2kb2LuE|!bz3Y^!4=(RYva_ z{6$C^%2Hvmmned%F=ffyHe;qT3R0!G+XG@H{E5WXiohPea>r~?r}(c2`<&9KejCq% zezsajt^`)dyMn6^9efNjU81yiD}@3h!~cL~w+;KMF!5s{QZiCKL{Ez)vKb0{tkr5> zo^M*In~kc~q5p`lTi_b=+C~n#9tKUK{v~=pz9&iJZB`U!LOuK}x$?(HMtTW)|J>pc zt8Eum^-FkI$41QWfsHiS#74b}qY zy)%;%oSr1-U04bW*)tx0u^y8H)7#7TEdq7wWQt@tTc%h9mA87D9~&Sr;sI=YL+f1Z zXH8kPzteyf&0ZZN6M^ClCkV0Io`(Bd??z`NrhS>ZW3ilvs#L2TNhFsmTG;t`f2E*M z#lA-G8ER5K+@DBgbUu>j@p?jXIhpSd0EY!^IRcBNQU^vyA&o|3lDnVYAGeT^kO<~d z==JcrJs(rX$8vZ*w+|0PkLOB%Po^{5J30=8Ars5DJDRv}oY1KrMPF-y{Q8!Pac&xpu`s`%aO29L82Quy^dnom>6 zm|hu$2Rv3d)k5!C&a=lTl{$E@=w`^&Z>22h-^4V2w)A?0o$6mHx}QFkbdGZAR%xOk z293St@s}$~BYk8YKvND57?7meXQTLOFC~FzHG)u3^zz++R*N&V%h1B@Jj-M?FO+16)l7{C?vvLN&V*LCIxkh8jWrj zM~mH7%%kDgJJYEGQ#`#e6=CgQ{5m4~VW!B73&T`VMteXSFM_64CX;O}jgc6T!XrnI z_WgKF^}r3FIqG|LTqmMfb>mY_?6U@SrelZZP#u0aERo{Z)}hjJJ~Z^yg>U5Gl#$`;Jo z>~?s7#cNW!nqLk-V!GiFy<^ES&wzcQ?7AVLfV2J|URJqP&^JvJlr z`k6O6i2+CJB^qxw0r}P0&yP?{A!JyLo~`0i>G<5tF-F>bp>%VZh<|}`Lq*mL#NFZ&f=G1KRn`^Rox;jFZ{eI*(I}orf-%7VG`f4kk z6V;W??VFs725zK?%||WXfI{mV3`-3?MHd=MEq3qZKp$1p+k;71yKwP?Mbc-#?1Se- zSGk7)ja~&K_F@aU_mSW!e0#66wZ`Z!?*fax3G-WvZD5G|dGopz9bYkuy6Y7XjczwO zOm@EV$x#_S$g0GOLNBIE^>;b-mV&K0wQ1Brr{b=XYOd|rVyAt(1;ie87OQ|Kc6+K9 z=n3IWgbhV1Nn2McVx;G5{xr>fjP9700zsi=$KHv7>&Y{=99+Z zWV%v2;Pgom2~7?&G~&eu3TW46cmlT}M*|w}CEPg!KgXvMo zoo^UqNua(4*U*Zd_#eC8!7#22D53@wv=HY8M9X)xbq3;Ac8IT*3rGaBI;H(x@@LjT zM6Nx0{6SEu2u6&;uhydoN0A-q!T#gp+T|!rDU^ZsJKqwdnugCWdmU8TO3=|wO6aw* z4qxv4bFk7lpiTNE+gcUeW$3iyMsaDih&t^9J>8GusCtv-`R2m6Kqwh8!mlz3TpLVa zOT}>P%i)`b{G<_@_VQ>>-YWf7;ii-Q7ncG#KpE?lAZ$Y1exE-lt4s|HNDf_l3B{wx zj+w|!xRqeriNAcQA4&w$upT5DxIR|uGZTGiG^9#W6(xMv?0>)EF)dEJ@m6(I8E?#J zsN_MFbTl>&%G%Cn>+;2Hw!WON?=_gq*xModA{UfJgN_>Z=7x%~xT3`9zc=S1Wd+&g zXD13ud%V@luNsm5``S>xA}l#MIZBYj{idi3oeH83ko^+e>~h~{egCBx z1X9uHr8|*zL3!Y#$n)}W9?Adtrqm(Gi^BYCIHDLXamUX9mBB|HU*0e8GW7Ar>m$km zi-*cmRqwd7B(}xSzJ~5&;5-s75Q1E`Pg&iw3hc8Ry_W{DJV%u?7A;eTccbfzEVCI= zYGh>B`^}XB52~$c5ldWB28V(s4pGp6>rU&|ULIemJ*5+>1I2T&im=XXgsT-mlXlHW)H#xC1M-cAZ3o?N0u8YV46)S>$ptb?M8o>6v~9oDAIi4N z9c@k9!ef(KN`w9agb{dv<7Enz9mmx+d)*%JT84dB&}F)0a^u0{v~sLqcS1Y{hAB*B z6fW<@MU1FYH6DDkE4C$)QlQ;3MvQqH^@iXTYJloiJ6aWw%Zajfw}%Q9;$Dde@Jy4o zZj1Gban_(yJuEf2$S197&TH7p!O@Lu+g#jym4*@(3L1lqP#~}j3?<7W-PgV|^4Fc` z6L210T_3(4ad}pj$SNpG#JexEhnPt;I(YY%V(>l5hpSvYT#jHCL77ODa3se=Ozrj1 zlt3i8A_6o&(2 z&lkAME5z8O8=Zdud5*lB@)`S7fO zd-d#EpX6j$eDTh8U?;!kx?E{#KSw(zB}!4t9Q{Ti7P#nJ&~RuK$jE@Ch@!=_qcJ)R zaCmNJrZZsz=A9^L;It6)M=8aHKpU9`lp0e5%n&JBq(D6q+Nb~uiS`VdD5(fVe5{BU zSaW6-qPx#Ccu`srn?|=+&l~(t?JfvXNimzWlvOMeQ{Q2%R8nBDu5O+>$sy*nh+>=0 z$FCt5?;-vHg{+hoA_Uy+U5aoU%~nf$TOcwi(_U#ebDe4dAWUr~1W;MseK=oB>pbP`==7@YqBws_Ck7Wx zH0fulPRDvP8MY%PuLLPTRkDby2P3l2S`TSpwWSG6P5vlCwc1t6yB!7ODQF-r>_m~A z*z#Fs#-$E+zy7im`##_y!Uz{Cx=ADi-4rFL(fDa?P$0-REHgw>1P12Yg1Z6@KIeU@ zi-xvH3bW2fog@Hipio8{P)v{Tg{T_V5VbD*6I&QxXd+N4ztyGb$1;QZ*xoqN}T`1$9v0RnQlQT8ACZI+F3fTCLv^o5a(}Bxjr8G8id)WhaJop)^VOa7sv(iZwESPvd zR7z6`0=}2`+7-sVLh@b7+uTxxJ6HIxNk+b}CgQRTSG!7$`YzmA(D0AfH~01k)bo0MDw=PvLj-=im>{BYo}9>e z=YR!9*^ml~oegH4j#O2ngM2-SFD!dx;nvq8=pv_>EZBdO=zQMm{^4t!6tc~YDM`{K zgia^yJ|S(2TzZhrCvM-zZ}L4pl=}|n#PPvR6>9lTM^cuPMbz4@?$MaQ&^r4mt~>K3 zh=uXgK|p3$nIB2iAdS%)>c#cU4+D4jO=sLyC!rcYT%)m6@TR@It$gM-vmQBvja#$GdO1g(VH?^bT5DlljDDj?_MS<*cuP&1ndr>VWe2LT7*A_fsmUW02! z%#^IOAR4G+SoDbF_!Hf_Fu^uLkWGu`Vy=M*)KrRk6sBl-lzCblhkd0S?_|C9A7a@= zaXM|(2{YfUn}{1595Tb9>fkX0eIM2E^fpSHnvgReJ6o4M@32&-(j;4plIRt1F>Tj_ z9uG}|x#4l3Unpi9bd|ee9H;(y?hN_L+Z(cF#ljJqxU?iBoxOo64$V=x;!9qSqI&mEh$2s7W&%==LP1^yNn+J<2}Yax za+%(Evs!MB%z|1YhVytfti0h>ixs7Oq^O)sAvt8D({)17{&=={ zf5dV$=4+x@M@j!RUcDv)?2OXWP^;Av0`2h6fj?FkX@eLq6aXEU3q#~z21$(VDdPc( zAOl^D#QPAo)j0Es^+@2BOTyP7GV&eb$jE7xHzGR~&Y)ki$Me+#M{ZVXX*0_8 z!wNM1afB+tuRc5(t4?)v;?)xg?aBrY-)I&OcvogUoWC1VCq=|Tj>yTY*XO;+_alK+ zYA*-!SN<$haUcXdNc;Xwi21cfK842@e#dU%s%d_~DraG8oL}2nhLEyhU&Abz*;l)4 zs+Nl)gH}?G4=719$WAq^U zrxD#CT#?>D;)~Tgtn?O%ZgAzr z5@}&fsmcE8^yY4|nn$Yb>W(9>1f`%4&xRW4ZEN#_OCEK*>R+{ZQh`$e>+w->FvS}qy zu3>`0eSiTph8uae1B!3l=VIf(dteOp`j+^!cx_`Yxc z9|MnkcAH&!3o&SfqJ9rvPdPKX2t{LYYZ$PAVbvkpH%{~wCOjGpJy`HKJ3q{6jGCtU z1SasM(Tmf`tC)de`l&1vY=D^fYW(sY(e>xl;}pBQ(6GmKYuARrC_b{~;4LEKwCdb@ zUfqmc#O=)|k`*4+Wyq3~7Zl7BnV$8&cVe8~P$Uq%woRi-z*@D;)pZwMEl<|bArz8c zt5zoI<$N9k5^1#-_Sk-y>9qu5-Uj1u6c5=>Qw(G>4@VPAvXrj*5L|vATyR*7p(IKr z<7>caD{z}Mj8tvR%rKpHNAT%f{?Nk0!gXfunV4ykrd$w8NMrXjD%Hb$8SgPe z$Aif)7WHA3ULN0Ign8>v>lzzx9}{kz0Hsa9=$&B%p{{w4Bn zS)|AARB73^95pI)mBl=!i&JXcbV>G#|FN0tkMA?lkvgUY<|StiDdfyIOLC}nbJt862=<#ILQ*x=JUwX-q-j2U+cH|11hv7S-eX2n4`(_Kd);8H%5Tq6Y z(DPC=kVi_~Wze(EU{ycctEA=hxTC1>-9%1186!6RG)zRQC*%LoXg0kUZ%8kWoEN=0 zW++%A0@!?5l1H%mYo`EU3=cF^Y5*Mal9&)ys3%m{zR6uqiqbA5lgq(XWAgdef3 z3523%#c((LzPU^Np08EfrZ(NT0Q%%TetI-4VrLvp^pP@%A3@W3piS0@f)k;UY5wlU zZ_RtKa1xYg>+0~aFZ_IFI{ndyM}QS{uT9D#N9~F zI%T>vYm*N0;{xcbg8Bk8SjNs(4^#HawwJqMQ&HpE8NKv zrJ4$UZxt1ADy>pV(K_#kO1QR5Q*#9ht*1DhXoP%KKu1s{x-6PP61bFMvRSa8?iF2OYrbYsEYA-D$u!GgQHdxAr73+@oy zgS)%COD`hlp6-6?(NF&vmxukZDQeZKnzN>UAO8HU59pVXXKHX++EMY|m;r+ZFh+kN z1LE3H$b9vri$T~^yFPS8av;f0{}JDZI&of)?O+et%bd+NYGyEg@23;gBN=;Z`TFea~VJK5~3W&}ZuZ6f+{RnOAlE3#&r9oS%xPwu_C}+PtEiC0tXMtMtq$ z^C1@v;;qI2spD=MEDAzrwv^QB>5f-9f77V=Y(XyS+UDtv*M=BDFtv?Lc$P}8_lNM_ z>t|EHGX;-N;SM=g;c3CqBm7Dw}}f;y_CTTew3Lvuh8L5-eMMI z%8ypXs;{RPBeB4BNy+Qj{s@t&kUs3!PDX`~!2OWbjGs7;?kjlYP*mar+1o1!IOEX^nC8f!Q_!gh4N_x%>C_Poch z0dLj%1tY=#{TcroxKHpZA_IqZQI6&UAI{xqi?JBbaO)-B*dSjoi6C)nXLs?*A9Sc@ zCSl?nlFm9j$H;q->2bE=OBNGA^mH|!Oi+J}VdUTJ6S|>mhe8Y=(obweK1~P5=@< zm39fu;>GT`g7^VH!MLOxW}+bYuN-wu8hMgos&Ha~G~e`ficS;K^KTK1e{2Zyp<$dfKkLTv(v=amyJmtcExF~hkl+xMNcq=2fQoime28F$bVT93 zJ?qnkuDGEdRAPsM=XUQmv*+xha*iKRxsw>rgq|K+TbZu|#;va9^=848wFGpQ$SDQ2 zKUyXQ>7h|H*ACG*-*@%Yjv3fi-&-~j-hHn!8hl60frgWQHZibu?uWt5%QGh4%upPr zh=D|=XG}vN=6$DBhScRv{|b{+(Dzcl!UnybB%Th(_#>GVqy|a8hie67MT5|2o(z70 zaugPsR06$tI)C$lR<30H$*G|-YLbiT(9qDYMn~&Qt>v$=Qs^2gcN<2jqj(mWtRHQFjhIE&l7Tx#3Du38(r6Gez*8ai(;!mY7B-{U# zMyvYKwyXNcPOyEHB6+Y_kIH}m-l&(h>^$^hTZcDldzns-YK^G`jae=o;{p!p%zycw z8gRf-(jrm%aUIx-4#esBO%fDZ*-ALba4`v1_8H@S!7@ppJY9UN60(-Fj^9by2SiK6 zX}PePnN*tlsKy8Q)xD0k7#(SN+m%=vKZBbj609YErVz#U<`qkDaxyMF4kN)h`6S;* z9)?y)7Tg@OPQXg=M$=L8c|35{Pyp>;JBZ-p1g};$ucS@yeiHs7@Qyqh(T`=b3N7CH zD3wOZf$8gLD_59W;VIjX> zKZYP@`$t8OO62$aY8!M-Ec(bUhsHi9I7mKs4H)Akn`a+oR4V09CA3B2-3Mn6aj+>= z#y$(NV?i^}=pnJdrHU;$R6Xi^d|df8LI{`Cyv~@LvBk~rTsv>zDyqxJfJPIi(Caa> ztbYbkky)Hnu^7)LUd$M%BN3%?S_cFM!W|lpt!;ZGi%(cWl$e*TnlB7V7T8p$D z4}h@*)$~*Oc(5TDll|O|49C`t<`Nw%6l+gk>X(aZO_ax8$Zf=a1BXjk1nVz&9iQVD zGgUYi#bKek`y`BC8_gr<=07%0D;X!i(%IfFs7b`Gf}_FOdl_uNIu;_6Y=B`b2W>o= zJ2Q6bPL+WG6=umQBuBrx<9<)YiuI-xL3@qg%**Tg5Py`7H6YsIJyKkPZD@u{V7IE( zE6rT9Y2Q%Wlh(pj94U@^_~Hkls@{s;3`Ri`pAlMFm(VI(iuFu^1uvDC6#q@)0fFI)Y>>hgc=$ zM;C+fO7TxMNf6%VoSXBt);$)plPfEF^>UDhHN_?FcvZ8@HwiMX+=#fWP$)zq$sgBK z<@mG#I6O-mWj~I}W6Rs&zXe}&++Xi+KbUmDjplhBKah`l?f|VidAaBhrC;0jWkTgY zEhz7JrnOHfs(i47CqX5BSeBE<4GlYlA^WCOZ8W{sl#I05ofbUH9HQj()ULHThLPT2 zo|_9pcJod*fS$xcNXXv+Yr%cFe=>ezAd6DtjgzKLq@pMG^sQ?}mXf&B45zDXr>ag; zP#-fZD+I%=Y-#5lx~+i6#wF#Gu-2tn<3jm9BaOUyT}@H+rkIGI#3rdaVn@8j{V0f- zMIHsL(|TldMw%8?LCnU=i18TrbuO0FBz_P@z!~|-W!CUzr~Bi`HM!`mGR!Noac3h6 zZBPEhk&ex=3T0jO{MKT%Gz3w>8o`Gz1yO&OxgCQY8 zTIqK0?ydvK4I@6|E8)7&Pmds&)UJuawo~|R?;yc3SPUZLhpQygLl{zlB2=io3@vo0xx*| z`g?5Z3~tb57-D!<92VryI!?5RC6ksX76?r>y!>~q&rdvem;2NVuBIIKZwhctP)r3#}MT!%{6)aR+soWA?A100t zW+2YVjH3$d;u^zbzr^ueDna!cUmwiUBo?OAda+T$f#a2rQzI!_EeOYWNeRYNg^Yoy zKy!ylZ@Kq@(ouusG2=B~#-_SCwJ|&a6#~>DLCFB4#6@Gq(5l&Fp51oV+gS<0RxB%8 zj8R1J(__KB(rkT~O$w`$^uXS53%z6p7&0FS{a7Xkn4ce^KcCV2C#XE0bRe~m?P$o| z9Gf0-B{3lOovYXs{mu)ppo?SX3KrA@Ae;=85?T?#4trS^6rjgKwkpFkmqEgT!avh` zxrGMs5EVJR_l+U?e7yW)*>*7ggN7n^6USy)e{UA*RiLgqsac#QDtP)Qar4qGMo~DH zkwH35U~EjGNT3+{mDd>^C?-HLRitPxab2_VLzzty+^NS!XtuEMJ97a*TnBFF3hi`d^1V(G#1JO$l}A;Riwi2{|MowC2m zHrN{NcWr8~g>qP!h1N^hK3~i-IH#9-(#@k{6xMVXpEt7XPP}#2y_KX(=yB(vqf4!^>PWtOD&-=$Iq(JwGPK~=Fmj*jjnRu?R-ba{U1;0{rCO=Jkk6X&Y^KSMV zO)4c*6LeWZqsBFVx#DG*?1wK*BI0#3k7`4(EI^p!baUH#M$5g?lO^@MqMfm z@YT#!Oxc1Chn?Q^RWd@p*a8?c1E9JL_-H7y>Rj?M*rJ_K( zO%W8@y4?9B+~dptSBWy5Ap+ADKxQIbFX5*)3V`)WM_?lMsLj$Y2`Cz4@7wkRUbEkV zIMaT#*mE^(Rxjf_6IKZIG2iBo<$VXGOni0w}iXqKBC_1M^77kdG`C zlK3i`ik9NHH0hV2WUHhf3iyK>0#wZ)LfOszyD~DBzlejEv&aO##QbZwpX_NAv9yjo zW+yJ%V)tnb)A51W)#iE#L8E66zlXU=XpQk_8mC=9E_8BI5}CNA&u#nmw~cDg5#5>Q zPkcv&W+g6SZh(YKkZe9IXUV|h(aaAGA?;6>C8i(D{#YN&mC7K0NqPMS3m&66T`UPB zpM@hK3jR2mBn%A#zy5Y?ti(qx1&-7oYWgMa`tIH2&j79OdIGKZ`O=W??z=uY{U^6Z zXS<&iGXZq8lLM}+?GJlFTJuCDhg8e}O?>Kh`DHL|sfo5G&2N79$}Y|_r)a~o2&)3t zYRON1u1q<#oPh(SuU;}3J{`i!o1?+jqtqLH`hol%wE6~Se)ipYPFw4bDk+_HP8(PW zuO}($BId9VIxcs~H6m18@C&%uE$6U(#pGV`22jTz-7QQu4SoeSpi_o~hMx9y-E579ZFg&0 zkd(aRb}$d8yx~s~s8H6c8eGcQr4tuMpX}yn0by*+N(Ydh8SHTD&73WZOdS@XV>5$x zd@pS#>?^8t`{q#DuEIC9%#9{uZUhx@E#z+{i`qkq0#}LADX_k!ybCqR`5a1H29K-ClX|Ju9SJh? zA$r1M0>ad)!EkDggJ8N%APNxf^91O#*6AuPE-uicFr&Cut(AXtbaYltf>=Fw6deRn;411)UKL8$>q0vGP(0PxHwrP61DbrYho9G_==wTeNYr3w!5E ze9~X6LOD1i^_R1Y%+%`8dqqR&k>-!TCAw!+hQik7^Pw=2gJ`gEw;dATk?M8dF&4LYVk_hxz~qg3<{ z)<)8RR{!&U0B+nA0_i+1=Es$TU3{BX1s?iP8_xZ~N0CD!Puoy}9Hr5dXk-j!?Y|)m z1!USA`DExV_C`5uXuP;_+ajuf?voSG$c~y>BYs8r?(`dkXNwuBQrnyj1$at2A#*gn zPga&$3o-Cs29K{@Hsp3=CYnGRw_hPj5oS!V1Q%q@e%KKmjB5<2WdhB%iX3-QKej zvG-jF6*qHM5#HXWlhmAFn=Us?Q24C-K;FF;*PBp8fQHXTZpEi zYUf#jOz@N~ihFl5`mqkyIABh&zjH^DVdJdi!YwYSo$KsEK!ZvH6*D(aol1<72br&# zbe%SiH6iqFt4CL~U{U0xn!o zQIf>|gF8y&kq<^lE?(X$+*tRQn$BP0xIDR4uWsmCMU75=u|sTR(VlXlTM=HC}04xCyKz@ zwom_jpEf88Z=h~5fCY~W1Kg~Ap#7BE2;^g&5HW}_6Sg+DxOZWi$AGw07V&RN69~! zJ}M+Q(=!SzGD`TW#6c}TG|ZZNhUWpCffG6rF6rAO*T*3Y+WldRYwM$NB+g~djPN;j zQZ$2+SyyYFm692608t3kkAz87j>bX#9YDph9B6?w0#b88y+(@P+E1(FF9i-LyzTp_ zUfc%`%%c|T%|&lxe7*OV$RFPr@PBGHB)Sr4p(ReJ9Poc}Hd_>h6-YxG?v~> zc&$|M3UEFmh8%~3zLkUjPbmTP*Skul4EIL!_~TzCOl#nLQF}wR8zxiHU_;D4sHCaF zTb&N<7IBS^L%Qs6t6NYCK2?^}76I3?Gq;YLTj+xn42j}{n1N{;QDWnePq;a^^N{Y# zc@cV3{9u><%7qpX5RhSL0QUx=_3;Y7(qt$^;OPbuaOS%Ji_AI?+Mad>=6JZ1Lq%CImL#V4DHe5`> zrByN#u>rOV{6Py`cJi@c$J9zHeY>*HARE;Ud_^bH0bk( zODEcN2U=C6sl2T*JbCtPa^6RG0La3<`vr$OZTWp5nTpDFTx@&bG--JLcThlaKf|mE z?TGj?sD`e&Ah~d)Pz3OTl0Y%2;`Y}RC@3fwdy@#y_sh@nSt4-GIs1yalI;LwnGUtL zQqR(oe!%1U=o>E{F77H2<&0vm5zL`hW^D|AyDyR}QZ0!BV5y&!dyN0k6?#RWpCH}& z@xD74-ENb7mqd+|VamT`bCs~*CXU8+a>G5`uq;NJ`PvEQ#v?w^02)ZO|0E8->%Gs3 z18gZ8L+uC}HY8mW_HMlC9xiqI60{=GS3k0B@$KeTscXt~5>M&kx%W$XGf;`z2sxW% zG`u1bw2_g^u4|F_Vm6iQMi&pJU1}pcSqFR3WThSWnX9WaGY7TSY5Q~G&!xKElcA<7 zTX&dNBk6701iH@O`{VAP`haBeuTpIuji&JMa9%+C$$;YN7=@&Xf{Z6XH&J2OgFMR#0|T?r z?26-jw$Z-U6y09(8r(#`hBOUFci~K+2aK9Ts5kn3MozBb`KKQW)M_?yRRaYcS`JHD zpm%XYF)(OZ$J8H=^N;rD`o7D7C-LjUHiUvdTUg6&FsWlM-Xs0IBNRIYO1?Vjt}vR^ z>iRo;X3&xJ62|Dlc8_J}p=Q+y+OU`Zq(j^~q93s7mochnZ6A1BzQ$E611i^EY zQ$rtD?>tdXE^eh%np=n;Pp<{OuZ%U6?&-M6LGZuIv{>%DIcaBV(P)t{jp=^<9l6x6 z`P(7q$j$pPRB`rP!C*s@+MFc6Bc-ptZ`bj~oz*K0{FR9sz8t~>Brg|7i#&N-W}8Iy zdW2k`L7D{J;rQR<6C<1!c47Vw_Wj7$4U!jK{X5SZ(>_nGFodU0fwWkMw zx<;;OW()e)WDgG4#d|t|St=XvN5Ar$LoY=pnl;wc_g zNqlp515hsxCNX2SBBlGmu_d&P^>a(yE7S|Y8C6v$NcxOL35yL6jCWrawGG@8&e`Mn zOV@wQ{u0|#cd9$VlLf_dMIhV~Gqm{w{O1-44Gk0nH)5>we)VfFPHXqi&#QLI7NZ#0 zS1R(|_2k3aVIe;-#46wwh)7wpZ{U%Te$KoZIDL8)`_S@5ZA!p-ilv$7Xo2R8wC4wOqdYsjEgx;hs{g?j5JqF+g;hWhekjUHd&|oKP zL>*49@8UyglHIDcJ^fzThg~+h$Xp>ooE&u;A*nP)5L*`}J5ckhFF7=_^!}Z<@6e0} zckXl-m7L#>VSIo1Whk(#_-YJ-`TJJc+3Ab(%$i?CQM6f%eD>q06$5D0b~R!9kTbc` zu+1ioQS?S(5VD?2Wk^Tdqf%?O1Ei}&cI5nb{vB2&NHY^AUWbh3+^i%Q0tu56^F?*2)N<3sWlWj%})} zbPy7-x6BqyP~F`44aSL;6bePa!d*X$a#pMWW!r-$Fes;AL%l5zkZo|eFC6nzj+e(5PKd8dMX3}Wqa_) zlJci_{_^j7EDC$9b70wfVu5%TzIE%nm^-h9ngELD7H^(%+lWLP(F*KWF(S4vcuJ(qn!fCoQ-lGAfCF{eSj=@u~o(RNDx zQnQvl3LWGk;f4vlHyN2X{W@5LF(2rZIwNi&7mJ{<9r{4{Cnc040%4?mgXZsjCr(H6 z7}NoBM!dM#v@NzL((N-fECYVAm%Vb3m0anW%{Bo+nthjlz>V8vw4^~lV z*ntHt!%Cw;|NK1Ph_jMk$nL$a>!^bYc$rQG6j3nX#;+96dXg34r?w*+$$8k~`h(Wu z;Aw#>m1a?Cw*QEJ@@%F37x%b`hPXZKv58X-arIi|h``9H-(xAYgxG;BlNx8Ib*ciwXINa3Yw4n7r?Jt_>Rxfae&d$u@ERXmz)PMuq|=vVZSQ&UTo zKJ1F#VQC3K6g(2tauX2sWO(Af?#+vQ^UbCk*933sgkB0#Lg2$N10!R?+1c5u;)niV zFbG%@FOcFZd)C+!KPm}^psuc>FDktNz^JyD)JCGL;RCV4oo^hrTmoGa{w^B%n5cE3 z!@SZHOy4m7m+_}r9G=|IXA~H`W+>u!#wSK#}ShCjO04sQGDCN1mI$Jam(5%1o^K>=IhBNy3ao=eD zcnSN^T`!a|syr<~y_@AirK+7^c~lJ-dfipK$2&uxIj0`_W%XbX!oncmfQ~m^!r9Dx zl9`6D?Y&N|r=JraE-KH56b`*d4{P^z6P2yQ`sP!c#X5%{lW~{{VnUg?#NIxdrveAo z)(&0ov5EIfBhjbbaNPpg`3JSXE4clFkeJx_YqMX^Ne^3T9wE-PkEXwt8Kbs!+!m#W z?sm6%8e#0j=xT@29R~KQx6le2)t)~jJpc5y9mW6Aj;&3iFX)SG7V#mH)P*HN08gh? z@Z83M-0X9;aRJVsbvGj%l+q{q6i_x3OP|%=C51@lGW+T^Zj0}Z{}Ry%AaZruo%eWAGFr|9~$1YO?*v&|O$ zNl4oz6xr*3mzLrd?19v-b>o?~8(TB~oESXiP&l}n!6-yDs8WrlXs_XrX&|&?7FUp9Bd9Bb@y2iN$Bhfx3Zar zIHwK)W}mm5XD)1Iq4~QX2j}+Dj%8q+h~L?E8r}aABktRqgw*nuR|_!_1_Ose1%ZJ- z!%Ba(A1g0D$RCVmJ>OV6A|a_ncQ&+(KzM!OWb`=`+&?eUu`Ofz*BiqDNrXt?TlFg2 zeVdw@S8I1pF%gD!M-BUfG9JJxI(4cHO#^y3%~~o-mze=}EL;JPCg-6f7GZ#{pLVvz z-Ss-x;hUev50hYIktrg?Z`T^NYC6uMTO;XA4WGlVzyOXX^_0|9Q^^!;S{zMV59J8m^gZZ?3a51%Zlp~g zW#Wb78#`V$HYJncuWvLoG)&MMfxc}iZS8M;N&{Jj#7>xRs3c{DUXJ!P6!@cc%v1aR z?RGk`Uy%(Q6zXR|5=KGcrXH4ySNQuk(1ha|oQHT|W@TQ;0Uj5>#AIr8J#KS(FDH~t zz~Yd7#=zG0%UTnBd?4&1pfOxpuDGC@DUV&PR>hKCJcw^KT15KF<=V(7ZMVZiG0-+ujNd6I}~0d-<5h<;`sCVH3H zozs{m6GeSQ7eAlpb%Pm)#R;oJK42%bR;fE{U3G`wlp{5zo*)MlKLY&1Y8 znF`g-mfL&Wlj?~r`u=vYU#?F6fdxrmR$B3e!SMQ zuqcvFeLDmN?Q)QtsKdHkTkTATkdP1{m)LmRqNQ5;!JL18#&0{#b@*U^U6Gri$f}{N z8S~C99~yphbJGM~)m{1HT4(SU!IR+MO9ha8JuY}IEcVVhusLC7j+0cn^jDM|UgW&C2SYPNiJ)$z6Xqz}sjSiMw`OM$h-kOv8?eeGUx? zX?VuRbpGlN%uGTAF=dFirT}{eTwgCb2F`*IIh@9KU#Agn|MBzVQ!sTJSA+x(&ZOZn z&^Te!a{~bk3rYk7WS^q%k8T40c|+ui)huCkWc9jtz|T~0?@)ci`G6cS$8RB8+<5z- z!>WGYC$JQf8MQXfE)0ZfI7C|0=X2bK%b?n|H3PPF-Qv(BWOmPCF);9JC|JA&3#E^K ze;g8J?~ss?{GNAQVNejHN%3zORDf68&aCat=n<(rYDmW6Xgpt! z2CNUXX=*|UDe&IF!@Tw(+JF7X*_5x}pCrJ(w0NgX`u~31^{b{HeF$=doXjJo>wRuN zi~OhdXzGv?r!iD3%8-EH69FGL8hRbMm1SEa9@ye8i`SEGB$;rYi5IpMm0%a_6pD#W zXn#-78wvrc$p-0L!R^^(1Sh&1W#zF9Xi?bQP-MP+6WEB&sa&gn8-d{6mXn?&7N?oI z@20SvsVH$dB^E1$frAoHMp;Y%g#?#PY4}?rUIg$n6hyWpg~J(o!EA;3XU1NYFfW(y zPDgvkckf46To9B5{*F8e1URf725~LA;ul5Zkhxy-nQC-PQ9a zF8q4JU6}1s<65ftS-7K&bAmC7djej>`9sFKuxE82)7TI0g@Bo*XiOtUpdwTf)1Y&sp;}s; z^?mq+iuI*%^}4FqrDAROa7Eg>rh;6n)dl-a^=1L*Ut@bCOu`|x(OE>1bUi&*?&v#V zeea0N>6{jpe53Z1|LmB*naM{yz+dJ_8l^(jzKaqR#3UYEh;vufZx(oZqklKxUX=RQ@G zo2iL>{5*#L<0Kb~jatfVA(DdGgs5Ezg~ca;h{R3g-z8l}$B|DL*fdsaI$~ zWPZW@CZKKG6EFGRB<%-c2?I=y$1*aL`i+o~H#?I!mllMvln5&oD*eT;Jt!piwiAxO z3nN*9@%D1erpI86^}2@VNR;x$0Zn)|366gb}!BK`D&|GKi^}rV_mS zK*tcS>kO_~T}4=usCP8Gq=~>Osiz!VfD=h7YOUiFu)|Lv`fsIfHR+elKy}4HXYaJ?O8&j4{1~uMb-5=Ru}X`u-d;=Uo_2QM*4&q8n}kzR zYNYHIamK38<+(?A=8 z^m<_d0hwQL(=&dMHDbh>136Hmj|39ygRNS4;Hb+o6uKU+Cb?-wWmU)dGumgY=&V1EWC7;64HQyafNzk(W_E&>oQg_Toho zQB}M$NNG`E{$1@a6MAKMFaOih-xG=e3kd4}DfsW2d=X_}Nu2+uEh#7xq^3s0j~k5u zApyLAIE2ehj8dL-bV>@I`KM1vo?SeDuK+IE^2W*!@ZT|cG9Y4GptK7H7f&9#-6%> znK~|nKLhpOlpi)Tob~BGsSUq#TrH;;@P7}SU|*{@z*96Jila^8xago3Bh|6__e4n) zAohtG#8ekO%EAT-;`61Vm>}S=#AVEmjEG{}+^-j@b8~YQRaL2gFgf%M67^rdqU{8= z&V7?3CM|){)AN?&XLeg0y0;G0j`_|j@>i}< zO}*s=?m!+MT`0Mg27$$zkD*-KvR_~kOTqY%eD@c(C#wSWPuIGrK9A}I0v>n>a?F&P zjZFm*$6qEV{FBb858gF3s{8UlP<3JVL@oL*J_b)>wV;B)fwTr8O5l4hgrHC41DSJ#N2 z)VsGLL${545xkRLc|5LR-%e`{hu%Is$>w~|6v(6?NC&9DcZymM^E_{}7u;@9?s&r2 z|4Vh$;h6K>#I5@(CCT>zLM~NDM~TmXBOXxhy9O2dEEmCd{oQA@n4GBCh3*& zzmxqlk^j4A|0^1RzWo1DCz458j^J3vlYN|8ie^O2f0Bg?YWw63>YZOGC9D3IPW!E_ z_~1lxmA}Xt{`r7_Y2yb>#EBrelIVYFvn8O-mKWlo|E0|+z*lagy~=36FX%rX_<)_o z=NtPQBfJb=fdsxHW@lCT?~?^??)LYQf1}plmi7iPzU?sK(pdjxkIMnOQvQGV(w>Mp XMD`H~OEWsafFB7_S&>p9J)i#z(q7Vo literal 0 HcmV?d00001 diff --git a/images/vector-search/raw-vector-ingest.png b/images/vector-search/raw-vector-ingest.png new file mode 100644 index 0000000000000000000000000000000000000000..a1c0951bcc9c38188ec1e991b026a5673332fea3 GIT binary patch literal 26514 zcmeFXWmp|e@&^ip1PyXRe z1&^ns1M-a6{AeiXTcMSoz}%QpXyqV%ts)){lT$@Rf=_}g^I}FYh*N_qEiEjtF*ex( zBV#d5(T7^-oXi) zaORqHk!J&Cl^mZ(Ypr{(h`>V3M7<$yi`U7u8JUh?~je zT4Z|^1Jk51^b$DR$=PplLtuV>oqGN^+HTASUJEUz8*%}X&(zj#! z&hvn&oR2@<+(QVAeay@YAVD^jd!CHv#}t56$%LHsMW6i8ubjN}!Spvms~WvVc0~QvjBzTxPstBJzK@E)-ASDp0c&zYP2v9*QhM9{x5?K?GRHRoRn9^YR zEGwRplQ!jjV8;U02GNF~8$u_(kfl9|G08q9e87044*xO7kFkeCU*jjG46z9rIij*Z zr!QwzQ^?^!#MgEGDh;$!*TfhO+3(Jes1DW* z-VU|p0t!E=kUL|srf3QAO0@}k5y{7}C)_8Pj#R`~#PAdGQ}UM!C|4>$D@mwQs+cJ> zW?!fv7nl_ulnttms+OwpRB*@$Pu-ZMvmjWblzn%JWy=m!Wl}PrBBMd0#8Wy_!&X*P zsaK6s7*{G+;LH1>Dp-VBT&V&{y`mCT2w&*=D~B_fGnn0UBsHJY%W=JAT-8~9w`i|~ zt>mu6TPdf+y(F*jRKZf!BQr{kT~)8NUCFEBN%R)OF>#M;Vs~^b!^7xI_p?Ti3W!i5i*f);%~{G+Ng(qcVfC8LgaU7Hf6uc9*IelF+TA$(@B?N(P(RL~dLVZIm936h zmZ^-PP5)EMQp)An-Ot9kCid1!0+sRa`ah3@dTNZ+9g(~RU!78&lH%>ht@6i{$ zYwmrYy4o64jd_jr8l~E+hT$rUhHuu^7GqWAmF>pDRhLzowlgNz!}Fs*Oj{Q2>hZ>G zverym5L)Q(5bg|~$)6RWyn;D`rGf>&14w=<4mO^I*x%UcoV!h5=ES!tf1g1v;7=V( zQ%=Q6&7fn@>eN=K`MM~&5Oi9-A$la`8tM2trFxXV-S1N3;IUJ_Kkm%``z_GCTgC+6 zsQsq$2FU~W-tGKwr)mDW6k~W~fo7v*MSF^SwPp1=E4o}hTRvyvXGPk%#YT-cW(QaY zo_D|(S@dAl3G8dv3%z3>B41o2Yz6!a%1IUl+{Jbz;6aDoe)n@YJUR?U-6uanwaM{J zs4lTC8h;>m`qO`2q$=$5aS?Aq6-St6haO?<4fvzG7BnBi$B|PfG9C`j4zY771N|;gD-wmdwIv*eD_mi5mWx(jdUZxr|A-&Uh}~ zD&r(WEl1r^&9!t&v&MdGQfqJd^*Gb{@|bJ>xNJg~zq-p+cuj1pKOu6#@rz@a!@5n_ z>TG&Rato2`1NX)`_VvzHuX{jB8vFS({wweM^BFCh_He!D*#4d#ZRHnAst27xs@A7~ zImA4E&y`=5bsj0N^=&IvD?uwzE{&&!G)k1iblB@ncBaydZ5$mBr{%jX+>b z8EdF2MV5`1dh4Jl&OD-Q7d(A9f`UcxEJB-?Iu<&jo8_KeU^ip2vJAu_i_C#CM7+bj z;+mZ|TCeTU%xn71!10CijMz_bfiM;Ey(Q)P^UAm0cEvMaH)&9A8%ew`w=9A_qy@nR=)|K{_-S0h-_4otn zck3V93Z7S=#@B9|-4D@^=j+}?U)w%xerSL)gvx7ZYRFheUUhe7zdq}2`+U!Me@!sR zPw8HGS$U)$ua}Z{H?1rT6xNLvB3Q%OgXe#zeQlT^X^N(bmQFY0FY>W?z1b<8Fq^0$ zn-EJ6PnT~UeAeB(A5QA0kAB^J6JOAnT^yvjEU~SC+bC=+^0IucH{@}9*=(70MLECb zFZ3#W9(-7Qq`I1k4^7Ol&yeo;>1qCseR2ENFl)OZV#VlDj3z+zK~4hfw=*qRf*F_@ zFBnpi%>A75Cs`jj8RQ3fo;45oWL~hj0I&GgKlil4F>ih?mHM1=m`MbBAF2XsfA$6{P3SLIMJUE1(k&*B|%SR zLkDAH8%Hx+rw9*d2~bn><|-Oa8nQB6hPKvp`bM?}#&mAhc7I5~c-*)^MQdXxeL^>D zD;q~HH(ug@zTg6t|GcIrCj93UCre&p4Os<3VOs}dLN+=EItF4sSVBTV9tR^6E+rAM zf0Kj$<0Uq8a0}9e=Gk@$V2~Ur%tHZF^rw<@)yFY;!z~JzpfTQv`xk;NKU5 zK<5znQzu;v7|g#fUvdHc9)9Tmej)7J-LL;W4vz>NK={v|0zp!>O(6ma*Qfell<)wmFLjHH}gy66^Z&?501oYPq69SbDfjQv>`tJbXkQbr+VrSTvgNbVW|J|pR z$nP1(GFqGD?tX*7`I4_H9Y5wvB3NVp#isy1F@xbId5OkI5oO6&BeK7&+zEgNk1yuO z_IGd^L100VstnbDzwCGr3g{am<_O^Y9bl;th>99h6Xq{VydWV|1uJHL{@bz|NNk2^ z5v76uW>BINA0&vy9?Hp2e_Jm3Lj^0QCiI{A`ick=#)&?v*@wR^tAeOBK9bPSJaD5se1bPh%NTAmYp-syP|KcYF_+rKW+TqftHblK)uzVgBYkaa z1J5#hMW|+m*H6P!vWkc6qVuh>stNLr#Jp?z0{Q+$WO<{ah*(Z?a_n%Q2lUuj8V~ z@xI&twOgoDMjnPn4?}NDz%yOLPu}`Y#naY$d!5-yG|%o8E}eE)OhSvV2R zV<*{7E~2PZ&pWkb1!eQ%`_*8G-P6r>gL8Y-{X*ZpBOm)hY^!(GNaa$hV49H%@G4Z| z@X}5h64;a*9=b1G9?2I5C8e?EWBbvQ@su}&5SuYKX`H)Ok0On}32*3`>J{(N$ky|< zZ{Xmnv#lAv;~#Ov{t?Ht*|MS@0^VX|_3j6SCW|ptwa`xU*RN2xytM=XkL&ZhF^#Gn zsMhDpB)3LK#MX-wu-?A@Ev-q_o0Ca+R*Q`QEWP(!$nMWP?1jPQG#5w4gSGOHVTpBS zoQ<{^=;NoeC z9g%VYZsPM+HU4~4sP43`NH04{OxNZuCbHVSyz%4fls_jXhf^S35B(oKv?GO}i<-r{ z>+^4a@an+I+7j zFpTkxIpVK9Q7DwU^$@@6JOu{wLR^c?!{^@p-!ULo!sAw{HFiH{q)kAp2ks>zokTh%yyV4pT>TB zn-{)cTReqx0VVVvQrWiJXs~ zhb^g3iddp^no@&I@DOUW2N5DJS0&WF8eK$woz!OZT!)mJWAphNR9#^#PJtY*XkIbv z=mfsUmO1V4j@Totq*tt?JUzX2bSi~BR-%0R1V?mqG_%}PyP>PoIWN)&k9y&rqrSTP zL1oz{GWo5%xCAx+-S*%D4ADs6uf(7!m-u5*I7{o#Rm!+->yWwYt)=uMprd7RycgC1 z+ky{e3|`b##N3I6mv>a9+xADRGYa7-$dmx+m+h;L#EYWrYc84!`6H*ZQJA)?;Rk(J zSNg#5wdZWZ1t5pV5l3rl9HcKKx^&u=X0;19{?QBN+mm5klzMW^SjzNwV<%%Tf01pSmNq`6{kJ+C-_W1rVYj}S2AiK!e!Rz+Y6Asz|I3>T}#{2X{kxGKDq5Z8IG z`9c*AsrUZYBQuBh4U!oAsWE~an=5rdd`n`IfT@1%UW1+!;ibQA2g3q+dak+{;?x47 z^EVZb#_XIN)ErVeB5ELIj`uH`t$*w)2c_SQ4;gN4&xzU@@=s1%sv}mjh<07AY!^<% zT5}DUB$d>WD}LAFhm>p+VK!ThaBO2(gDn~ohP%k2!fZ9xIL?QDe6(v5=4GbV~3Uo^?nBt-Nv_+NO)X zz$sg0tK?)x4@J_*HG;yU-ASSmGqWG${$5_H_TJEMlp3YYU;WE-S zaNpr;kgOIM7wW`xOU&*pMglvT*lVYH zvCcwl-;0P-uN{{Hxf?L`8 z?+hFqoxKb8iW@vxtw?h}-H90K2 zeo_PJkdDqoN`G51Ot-1A$xZSkspO!bA!XO#J_u0sMa^8M`Xg9xHZR_CR#d8r`9Rc&Qs!@3Gc2ML|KKE%D8}{tO;J6sZ59*?)_C z1D=yx3c^L$S6R`88I>gv5;weag8#j=Z>*nRjZ3 z=bMI<*;p0tmx)FCyYtW1?`C&eWhtFR7u34l`{3r`vAr2g(0{hpayqpNZygpD)t}1g zh=hzBYt!D@Dd2Rn$k@Yki0(28h02b=Re6FAQd!WF0THz!o`wG~crv z1gErbH@WF!^_>%R0;M9v)!}j-%@2CLsC?NJgQM9ZuKUfN;Hi9BoDlW``|bY4YUA*x zAAR8%Ia+lVl$zC~Vv*QEg@u$>Yt5<)hDLEOydHP6NqSzlXh6?PQFvSqB;%1EbNga= zN)yA0G?4~Bxn1c%Tm(xcQtNjI!i0o}&)kN8-tfHsY%y1ym*Mq#yU;8Xjn8{iB^-Gn|E*vry5fKqpezk}Srm}Vto$j=MkwIUW z+w~ax?c>E}&*i+fZI);_x)yHe;g%{Ej~hd}=apm#W*k+%WIQ>D6r9`Tq%)YnJH&h@ z-@FTCj^xd|n7v=z){a9_D7ps*C_aMw`1ris%*g*_GL{N^Zk3Sq-6AJL1cKaJ4LQ(m zYBI4X+@fsD8$~C=69E=h zQ5z$%mBnnK-}U~4Wff7rWM!3%qGW1#Z{r{{$q;I6X?0w}s-xrCDkn)eUvp#bYA>)? z`bZ^L0yzSQUCtx2*O3%Ra*2;Mw6wIuvC9|?QqUbF4K-FksCjE{Eg+R8q2DMVq$Q{qvkr++t;|U#L@fJhknwgP0V&yOq6iF-whHbn^U8>I zuj8ffK!eceW-<@ER}%xWUqN7+JeH8~wxQ4{lElOyF7rPI%AiR8;&HD=r!^di64lW7 z=}ivA&;-!Y#FE~LDS=k7Q)vW`rc0Z#tZ)`w(+3rgsjS<1iNMNw46dO1a#9cw0E0!N zPRM>cUZ^N}-{i2=kA3M%q){hSmJ&OWxiU@WOzMCIY?E9vgEoRWdgM_V&dWtqIvoyX z&1+eFWQVM*Ec9D-wYDK@j&C+!x_@@S4bXHUd*g;SMD?KBL&7x3bvF6Z>&y+5WwOJf z-uPPz5;&jbM32)AKgS7E@zDXQ6p2C<2{^^XgNm*ddc|l2bNS@b6Js?Bv1QVa-g)s= z!S+edoe`1^ljg=X8euY`%&n`eZx%HoV$gOV!7smK;-}8uj=&bz#${yaX*Do=$$Ll8 zs1(K=zKDs1VFv^{N8hY!B$;{_(kiynOAT@a5s3M7k&Ebxf>%^i?&!ke8(Vg*mGGwO z&eq0$jte0-m3KwGX}x||zOaK@MWDBUdylL{G)?aTG6ccdqQ z3S_^MyL)9oJ7_(~))~SbxtGfBCQ3Foea=tOj=^*WKjYio+&m2ydb3W5p-QCSA~6`L zvGaYy{3QNeciv!M?LaT2l>?Dd<5Or*N>7p9jOXDPW24cYgnm4h)Tl57=~re5RCYFD z+l9{*pWZ(xUzqf+Ey6?_)G0K|e0D=SwOC*??c+c;zM^vQ&=p#|-EPFH#oOH9>&%nz zB7Le49BQyvD;{Ncr?R>lQ>e)yjd6tm>GoPxP5y2uvgvOpFT5?=w-4-y{HS!mkLz<{ zJwu#_X)5zpBkM<3)LNo2?5wP48|Ybr;t7MAem@;+e(N;C0K)16iWEM2FfWz4 zyg20GfI(RBu$0nChsxF}_82?Owi@aOp9|VGOupUclCK-NstE}5Gc7LSBB>R6y04*; zuwsLCIgQ?1x&pF_$uIaHb;4Kc5X2`sF>W}Yplf_ce+xju+cR4Us9U`lCVse$(3OlA z$o>}ou$zJ8`W4j^RxDd8N1sxy-o71;wFoP;kwz^)bZ<+SP|FbjM6!A0P+Z*E?lN7F zB%}w~xS{ZzAD>ey$^^UpV(p^&gQY&F2kHf(KN<{E;pDPlbij_WQfwyLeGUBfeE91` zWf#d91S$+LmtX*cdXSVhxUQ`zBy)(@QwzVb>p$;WxQHbepU!%iTVENLWOS2HAQEv4Ks9ogP-k|7o-)9S* z+E}y%q^^_<2{B(g*_*1nj@_Sh8HcpwvR&Vt#}_<1vTq+&HQ^BPft-;+Ch#SNS@euA zIKvQZWHE&eLW_abb6g6uI~NimW?z)cNbDR@0$WHZU~$Cpb?+koI148U0tPaKfN`J5 zY1QSUwYt+;nP4Vq7ed>UX7jRK&cE4cTARY7Q zNK#~^^}gXk-h4VbI}K7$G=sbL1oIyUjKZ(?Vw*t}SM;(v@?=H;szC}aCB#qyo_{Zf zH?PuuP~W)VpYWhJ%uk3UD%;Pl`SrWdFh0t^E9zN@2A1 z-|RO}^)Wmvhs*C5N%p?jh~m;9;L4VLg7{C5jDEe7ZFcu%rC(TYTO=hX-+OZV9vp0V zFqs97fG~brzeM=Y45EPRb$p`)p7Evf--KVS>QqzA;Lf9OerXMAxp19Lunm&E0)GX8 z$;kqFg4Wl&6-w>;FW4+*zSb*odp_IS+n3|q=b0eHa4=tnv+i-u+xq_Y-24DCqfSk6 zWYXDQ&v%P$9t};)hv!-2AvtuI$2PZK-E zswEdw;soaQ#QyuY3wi@@ug`S~fBX-d_14gjaOh@?Dj12c0^a1mwW|@R*RLUk{1LXdVs}HL8q+iEHQLU* zl^bF8K;psUZQ}R_RWse;e!bTPf`#_>_6KV%4RqAqVu2DBz_y#Z5Om2Fm(yv3-J8Ri zLWL{{e9sF(JpOBtEQvwr-_OsF>qoxnM0%lS4S5&_eH6&6Ij#obYrm9~6uy0oKa;cL z3Q^5fAhh<@=&g?mmKa!Q0+KxjZGHu)h?8v{?60=*eLjU!BDM9d18v#oHxyo|MS}tj%s45Z7ACN+xvEltZMvSAm zSOwkK=$@^z8>~=wDDj$}NJFu(IlBJX=x!V>d}e7BITin!9vE#00Dgo;TX+e~xfrz# z?cSp!S#Nf>98KfKVA`#9+?T^+ClMu07n^>0yx!c}@&`FVnTk1L%a%1(OHA8=|9`Oo()bGtD1{JK53R(nO2M` zfzJ!05%%!Nh;-87MhCyAm)BIQyW?!RE)E7gFtDS;$3*5UK41{tuPG~SzZQlDLpkn{qHjzF)B7X`n4S5d-?$8&foYj>WNif1Pk zoncd|S412#D<=A=;mD)2EnrhC22Kfn8MIp^(CYy!tN=(`ikv=g9qy~oXDt&iG6v>i z5tx4Re2NIdA)9!`21ff99N!pGb=TLRFBw#Cu6uLA z?|)C37&GHWY7gg&Q@jUtAV6BQh&vZWu|LmF-HyCg4ss;W0-+a#b2iWRB7E|lCT~Ui zd$YRZM{W{yBdhrqS(5A#BdWh_}O-NziRl|NyxGr0i@0SPV+?E{QaD-YX zm5cwjHO}xsc6DqsPYc=41I1^|7Av!akYWD-=3(tluzyo%+7Gk^U#f1m7RK7UqhRnMPYt9ezTAZlnI5sr%~K~5Ta|L zye_L8`oe(Bvz6%n_PBqmWCKIa#>1i%4Eo)5=9bHg#JnK#m+hEk_m8Zhb#&Yk9;tqT zXUCxM0&2p*O8Rvva(Gk0Taxqn9xS@i5|iJRggf-{TnOm$M|jne??65G*}58Z&eB3Z z7a{J5yY$watyQqN zUFL#+J@I+)Q1WbuSs(y%A}sN3#eggPd{{_nt2XwLf7x@_886 zmr2No#|lKOAJk6^^OKm~g2kFY@76$}vbkc}Y2ML-_u{bI%&sTVYS$ke=C`1Zs7I%z z$(Sh@T?1PQBVoN#9E72m`5wrR9VJk0<$U8W60jQEBb=R6Z#j}sTH(ZnbOwR zzj{9%+KNwgIEz{bxP_IWhgDJMcxBw)-U&95GX|QNcGoHkUr(EYz(5XKyX-^-q%Nd# zpb`d-yK*QD+6OZSC5Qpx@YLKKF4x=Hd!yv0`1qRP?0l&?@r;zXaoMBxnbYUcVsCR{ zf7e|eM#Xa5jAPSFYII7W1`G@jn#00nbjBQO37N^$re=(Jd~#&-px0wdK@w{Q<7nTp zK@f6~hT4`ADEHjib?rA@tkhpRmPJU2d$%8Piue3kEiTZ?A?BdEpM_kdm0Ih4ipZnW zfx5@*LvTmQc(s}&$v=K&_|03+)wAjd6aCHW(~LTfsfDGzZ zycIQyKG`3rit*MjPiikmHeG zOFa1bprPxCl}z_6Ubi1tiifd#Yp>?xZA2{-7lQXFL7kaDm+K{YIM;Bs5pE)%xFbEz z$VCU7yChhseWEZ{qDUTZvGy;gmy;#K8ak{$O)eXuO!6$AYaAefS^NNw40V>CcSBLf z3kr=$LXfcGUN!`M4~aCZON#pvFGs5&QfqeUpj|oGq<`;*b|Mv532E&vK~zN-!tc%FgSU(id0 z^SLyTNnxX1Nk>k;Pl|>lXIe_{yp+!3aJ)s&oMjU6?xoMk8|=aD=EkDe(f9HNjWD_@@>?3TuDG)OTht#f2hyCO z(Lx^^A~$tPGnO zwmGHsj4_Kx7qZ>gxY-hSclh)-ew}u#P&jIDDpHhFQzJd7K8N-1%5KG8#xlTE7TQEb z@9pFlUPl*~dBIBnf4>NXdysDk>?5kcdSuTKO{wF`xX$aYUX{T|eD@t>nPeu>@RXf2 z>*ZRi9cIr5J6a1+%Habtr*5;8WIC<#{?_NG*ZU2kO9pqczJWf2OT>+AEH+- zjcch4cy3V!6%U)ByC@dbD&n~jw_|a@;_}%T|6GTiFD%)c$AW(d0gN|oA%v}Ag6USe zZ?_h_B+;coD~~HXY|#Vur5jI)zi+r(bCz~|HT1=`(582ILSYa(1xy1k8Ph4r3~QvG zR!(0OH2&pJiMUR!WTmUa76~|#%R-%B&y+fA@7LcCB8J*Y#f+zZI8rhjg;w{7t~45X zmKbqP^CdQCs4?;iZ#NufZc`+N#3TT{@T8t{Y)7ul_yrl2jL9yRE{dIORJ6j29fXsP zOow&I3%M54Dmo6w@ci?Zyq1P_1~j*-bcv`0s)We}wC-5x z@qgS%UKEC+F|7EP9ZoA_BO_9g(m{Nw>VfIC>8st5%WgRO1FvMG^Ze3W`i>W-37C|E zb#1>2z4Nd!kMXR)j91zZ6-WC9K&l^!RWXM|FhaTy299`U+QoLmRSD))nujX0G=!ior392F(bH#X%;Bvgy#Q5-+y@M9HvqL`t)U(IpCvUkV7R7V?VsVKg zOrj)$V^{1#%W^dC)b@%(WyNu%mvmcr07a8tusl`rN8ZB)&N;PC>j~*A)5AH6OQC6b zWV@pHMOq*UGTXdFx5QSI5n3{&!@f!!#!VtkQf;x}cq*rwM^a=j8eR(^N!g?ZNP`rz zGKa{jMS9w28g!%)6a{hX|8X>W$OWk-xVQgHS z{MAV9+*({?&&Ic7+BPd~mnF74$2f;zL;|rCHtQQU7?8K~9r7JpGQZWRsNu^5KC4B7 zcj)?jnYQvG<)d*2h2;OFxfISJ=>BH8if^S~|0l`y?VZY9GGmcY`^#XQz55-=(>-P? z*>5k37Jhc_jyx9BXt2P-_qh>V)ox+{;&VbbbPJ6+@Asj=EXvYai9W3*Q@kGs!x7=xQ!LkOT)!wwuo6S0-iTzm+eCR0{q9(Qe;N zED$h74AFIQhKY}}v?p+9&E<3udbS$>iERVP8`*nhhHKxG@8uJ=yagqxEA*wfXFx54Vqb%WVcu&VcE-d zP=@#u*)Tb+VySvZ6#zoA{N`9ted@?B_;T(=4;e3!U5~)qRsi|1dMy4+;txdSWk{pG zL`YU<5bH4lK#CX|WyOAnvy;v^vfo?PZ$w-D$6v&2HVp=&hdcTu~#-qzey7`dylRdJ*Rx{sE z=Se&BC66;HSVK{`~hCOasi$vWJz3 zPRL};oItWHV3DS)5n$!fK#HFnPa)sEvB57Y6PW;~Mw4R1MM+2q76hB;`IzQrX=EX> zWgJ&Pqh4;tXw*AS>hmMnG5IbIlOQysNUN?ms&1%o_1_7=UDoCzbs@?1aEx(cHQ8`g z=s^t~W0i`!bmMzowt0(;+ZX{8=s|wK1AfYAOtmh4`#`_T$&AGk@`hnNO-VLnzEvzC zP0miAN3wfT+!p5V${0%t@gt?XwZRT;>2>+3hE;6157DfbL7tIHXn0plbqFDb;BjT5hzv(N!sd- zm*NFi==!?)2~nMdsCseYI)Sr>CHa*s|D<=0W10*5!FDd6_{3T9=j>4@GR)4e%Gd^| z@J2~n>7X4u<7SB;HsOs!N7}*h3q6hi2Iw0g&&LkDcXd5BN>vyi-pg@apNo#wCUp9@ zH$1?cPf#M}^5c&9#r4ZXO~Y-sVr?2f^8JHq?|fH$ME;Ha%hNfd*RxdXbSw9IYX^Jw z><=v3z4rW|kXiKt5qB#*CYW)HI&C`@4zY@~fIRVfn#-*yxQPk2g}S#p$zQKLR%{0_tV!wHrHhWJ{0ko9XVCqUPL=D>9Jqamt<>GoP>BaL}oo_j5)}8CkeReGkQ~beXyfvc!s|xZomDE zrc=eSKt|2(9jM$e$sb%14<$oJD{>{{M8OTAcPaJLI;m(djS<>7UGpUw4ybm&@X%N{ z<++1O?@L{iWD6SpJ70iC~z=5cj)>1wgY?}@Cs zX2@G3wBqqVBCaKsU4DW4l~7(hGE1rUX`=jB-uZPnq{A=YkY7LU6M8J^;R8~cV|{6% zcZw@x5r&y8mok;|b4VH7dP9&eE-oeqKnG(Ep09MS7-M}|p-0zmp?KC{x2l$DE6-+u zy1V|bKBgMCzff9;lzB6FSRlFS(k5}-!UUM6;W{Qn!7?G2f>7N8VBy{3wkM*3) zXojCr2ik^PX4nKihLjXhHc=eTq4Od=OgFe?3Z0^=Nujay5QT>b5aMoRq%j%`PYxV?Qjds1X02SD1o98iNAat31;- zfuiv?B1CJq?qvx7QBKa+Bn=BN=&Wz3aSb}+a@>~m_R4PlO(3e8reW2T7~B%TLGbM} zV5a)$fzFE06|*iZsc-HgB58u4#a`0Ga5cenXYLj*;h->Jurc=@2d~2(OYi*cL-@B= zx8P*GmzGca!WD)tNv6s?B}H3bf_lAXJPH^!AL5!F8TYGtCnX9wn)gc@6 zW94rf_u|CISS(YCf;-!ve4Ab=)>E_Rs-UWnYdPQOZb?DU(L4RISDKQbfOGxa;3nv9 zdz4t3c^)o)kjTGpxMv+p#!hC&)sfd!lYjjx&xU=2U3YYNZy!{wnjZIq9vI?Y!!LTj z{^4?+&?{VzG{z9pYOiXj3cqSg2F1;1YteI)oGz~w6L3$g%$bwo)UC?eraa4Q&JE{R zxBG(pg|k{#{u%QE0yb%OMjk3)d^-t9e_TX(xgJ8uStJr0k_u4%v>l_BNP5YHhtVFM zF*h3*Qizc-8Dr|e=e*+3T3`UAe)`dQTeYE|3ZROg`(6Nu&{?KAMRWiD*`Ojf>z=1D z`V``5Q%53(FR9`bBRXdW@qF{Lx$Gh;tbuVFV0>Orbe>}xX@a@d7~ig2Ai}xL82!h# z{GNn7sIouU9BXtOLHCK~)RXJ!ED+l&+BC#-A?xvNzBg?8HSb<#EMLmo1jRIV)BoBw zYWGC{3h8HIAq5ehq0UGZ$E9wbS&)ri?nG}CS}y}WcU_WM5TUjK^dfys2R`OC@B34o zLJ2~usIrzB<iuZf>&b*t?ynpMzJW?Xp1ugUwd&Yp?c>8KMr+A)JKo zb{4d7`);_y%O*n_6AxKRZe;}Z=XjaPikkbmv`btqC+O_l3F_FrF?sttRzLqcsS&=; zYHt*^9cbVoKXxrLH{#5{jf#2>&c?)w@JKL4IRFRiz<{<(aOW?tHFtY%g~v;2@mnTZ ziSP84Ya?U26$(uLA@Teuv5J!J-Xat3Hy2p(bul{TY zxE8h8VAJX2!@nc?LtkHiZ%o@+&^y)_4QBMgj#y`H^=3rWG0t9y-DN3k<0oUO>E}?$l*fVoy}} zEv4gHJ>`FRn6idK4~G@TJ{jlzKkb}lR1|OA_oW03kXGq#5LCK*>5xXcr3IwBS-KnP zl5UoeZX_j_?(UFy22uZSp4a!eU)<-+*%!0Bvpd(!bnl2+UZ7EoWiK710ZFC({LT9P7m9ov*ZzX4wleDzcF|M${ zW5vfMUW_Og>EK%%yKwBOVJu%08cPB~WZKcU%Uzt=TI@U)n8vp)jJ(#`S`Wf!JbyrH zwhT6ib!I+{K|^|u+^GlW&(y-$ccStjjBX(SRM3;lc`K1pb51BN3r3w*JX~=O6|a5= z%$~`B06OXzyj$(7OyQn{g)|w60C~Eh6bi&}AtUAd~gA>bJ)we0@LDOyGnw>`9Y^itrru(*Bg0@zCy&FoB zt4%qnU*AjC`C^gC$jFEo81BsFlgPzX`}q=M$;PbFd~N? z3QYjJ-*N|}BsIzC*w=Jveqh86`JR}amQ(9q6TGqUl1`=av8!{nDDk=1m%h}s-rn`7vBr>JFd9qAopF8`ka?E7)FeScn z6X%0$gNuZ?EbhnGn~GI6quHfNY!^GW?-c&9Ix{vVyg z7FIT&8YT$}ST6pIasR5df5#H{o)^*wauCAwLvhFb{{8Nzu|Xn&lK_Wq!LM0q$jd!{ z=?m9#$n3phy8$wln1aUUcEc?4ZY#Z!ij}nOw0MTuH%e|xzUnKqyhYK`K_79Cjoeh9 zk&@I->jZ`%e?rAPbkl6M)zzSIaiyDa?_fvFPm$98Yni)2cat=tr3}7n#V?u9Qww=k zRW;F1X=vo4ox-Tg6z#EC;iE98ZElgSXiKCkj|A!pY|P=dw(8qIAM^QgTtQr0xYZm$ zniS~WpWxM?Ul_$P88dvrI=>R_)K5)*hMq6?j&Emm>0@;!I zPF<2KQ9foHox^$R6f_8 zIdeyi-v$ls#m&cicKJb`P~#G&c|c_#UZ}jFqo%~C$L*4UHW}kzrVfNdT;%O2NGS|cX*U8ExM)q-8DJ>yK!gGxa z>zki6a@hQYgww@`>A1PcIc?JF9mkW`PoF=PyR2BuJG&ICA~s6DY?_2xFE&tJ z4P%&c>`J+6afvcxz5lZRxHA4%q8p3u>CZK0bS;JTJ|?PPJqlEto0&o%MUUU60tF1> zg{37h{|)DISI4ITio0?bZYSPp6dq{m#4)JHzu^bJp`g9p^*%7 zckSNX`Q?PK+=trRo$(FYzDG=(wOm+8*a>lON7kHWPz(wO@q>&*v!GEVpFa+dIgIV%HqKwD`e5hL2hXlCP?S~B|=k67a`{IKmmvs+;-tKwM?pRB_?!en)pjU49?Y8dIqSeddIH zR$2uqgn8^IES>-~;=?@)KuN*#d*cG{E~tO5f}a@wKE<9bQ3E`CP`7rOu3DefPjnz3)S4Zes;6a|+*gRDbgX!r9 z))-5xMwTm0FPrzl4Qleq9KBTm*%Mf%I>X<;y_|<2QkY$%F{ivM)%=8~q@Y`_CZVgR z|70^wqKg=c4cZ>s5#3!{m2GW9`B7R*H~%>0$Wk`j6-=S)Zrex)5il7+=9DSOAuNs8XCE&hdo`AR*{ z6G0%@$43i~Py7kJ6+m3GP`Qk$mgHiOT)V49&rID6w!^K?{9Lkbt^JF{)+LXfCzVMH z5l*>84Q2?O>Ju?^;u)n7$$FNfhBaPfJu+_)m>DsNP0g5Z?X7k(&-?47j$DhI!12W~ zcCk>dzZww-hmv!0BI`GQ@D9rJr(N;NIl7C<=T<9|uc}PS63d&KiGwiDs@pOB=_PcQ zO9lHv4g5UX<7QnPc|xi7#onI^GFnshL>*ehubfMVCFaK(&9_tqycwfc<+;qnT>oAG zU7}U3izG(fV-16o2Tb|2XQyrOXND2y0q1GO`K_c~Yn}YVX|9iDDT)Nn*k5=tm$( z6ZKIPvG3c;pPH3-07?Jww@~R0`H)YXm(#C>kzc(A`_Z)W# zFv$peKfbGjGuI2*Rl{dDClxvUWWouzH)5m6i?bWqD8a%r*aolZdaAhEdH%iDdiltV zaYb}>GNu91eSHdr!(-99;p(eMaNK7I?wzgz8$wE@%l*I_)ZiI zW;hwr&HBi6RaWPrP@8=Y3%TC50y;zCnE|*IKq4~pBL1C8r1@*ZLb7l;c&;aPnf3$G zP|3`M3}F}WC}!6RteB9D>0%T~1p>~}O}-MjxuvCuz4{ORCYr>N{nWmXI83CPiI>Og z+oT^C>bM@6h5pHR@W37eJ_A7iglJS&%QXh`fc(C?d2hG)vxVcsvLpnd!$eJtMFrpn z)}S#;yM@;D4DyvfyO)ms-n22oLIPf+G39pzYxKEFgAT(e{=Ua&kF-)>iU#zU3;`X7 z$u#ZxDtm`iqJzVO;u%P|h~QxBXw2lh#~Aln>D?2q!e2a>6)=3?2sQdR{whBFOIl|u+v+$qDE48IPW*SmXM6ZBBe|10TQw_Hsx$68} zN(~4TDbt0)^s=0`M$r?=lixaeDN&yfbLs^DK=UL6wM)OR3a;6y8p{`lYvjD}{jq&D z6?-iDqj~TqK&io|fyOd)XNGJu2VG=|xZq<(G89wc%$4J7PCS|G#3IPHctmy|;k00S zQQiLh_Iw01#)##iYAV>5v8QRgqNQpHzWh1mT@Q9K|FDibww3_5RmnUb6}2-No&l?Q zf3x!uP=seH$cbo#WGjG3EH2!VxUO^iJ9E-d8~t>BA!QGBWED#)$M+#@aVv%dKppuh zT7s#Q-3=_4?AK6qbi{tuc8RykX)(1go#1w9=e&cO$Sa8=Ow#L|S*gN)IHf4tZ(%Po zXUHM;&!6L5=kg%ukE-r=(~nB!kIwXxl4gkdNFI?{v*?aqU~`|Cqp97*O13?fY9OKE z=}cA>ipXY(r(OO{H(23^W#206F$Ko?IDJaBmH^|-Vo_uM>owfqQZ0c%x%&6jS*qjnyH zuq2L|yvE;+&T`|*P8SBa(TyrZ5){tK|0LGTK&@f$w&X{}r>)4j!$mrv4@9k|8^2un z)LXq8U*t0|D=K2TAAZQEBsczR3jtG^fRYqRHI<(8D$Js$J^K5FZr5x2BAR}OP7+)& zizAsQ9=W!S(w$iM1tB*3G!HE+#*gLn&bDFsP$XQ5Wz!Mweut!!?gKXw_{U?7TD)!# zwO~=1P1{1~q()ZrlNS6^TMjp_+9{Wk3Utb)_l~tAjkVZlLB5DAUsJUZZmylN(nt#< zgf3h@b0XErqw(GpW%)C@@l-0xNVrxVf2qx5O9rZ_rZORPo7iDNc#L!PI+}AQJ!kU= zYkq`>2DwHDVpW~>MIR_Gw_MmCYa+~^no^m_KzJjS&Yki5euZ`oachlO^Puk^14+h7 z#m*!V0xCWSeAL$Q#&D(`_ce)UGSR}ymTztRL2_QAoneFr*ch zdTppqzR@c{|H`ijY^u&gEv9mGa&akm;dW5##v(#LhwXXdSMcd_7(b7VG#@3Q^$Mal zGEwwb6)vy;193MYfAou9NtCVN;vknCbmuCuQL% z2d1VrVWn&oL}6Xpd4ZfIs6CRE#Y&cY9A5)!p@C0^Y!fey2o#QWh|`h2us$YB7N=;e zn=Mb=DXbjlVu@qa0(6YCk|U{=eGhoBa8aaMEZ?3+JOS7{=P7-Hve~cwn77h<*)Z=3 zLSu{8$`an_2ozUDoX>Ro2R_)NP|>KSu=sJ-^4XHq9J`IXnBQ--GLT*Ix;bz9=$#Ij z65w{Vwn87Gk`+vtsGy3#FVub#UCQepFp5~=U|YF|tKR72;W5cgvMuG6%iM=J+UxL= z*-qP=c^e0OoHb)*{TNW@=P?^`mHZ`orwsY5fD4VSUN> zfvuepqH(T$cCdg5Q?MtJ{B*rN5Tg#tgG=TK5o!5wt16Tfw6yaz#&O_;%b)~sCgmIr ztx`8s=+XH~+2@r#14cgGVy^ybpfMsMA|OO+nwTr42qFbbW|hRJTetpV_ro(Ilxwfa zjW;%BBruDPXDzzzY*_rdb&ET=PP zHwcoakGU=8*b+{ks4Sttw#qql(iBt0Rm#laC*U1CzC+#d@a|=J(BzPXZ~PQ&gp`ml z5ZpcC=_vTBQkP?8O&^WZpsSFytV?ROd(GklkMEg3`&R748e)#Y3@aHXC1QI4ez=;- z$nj%gr3xX`ND@>*7@SQ43zzOTRW}f5`~kX!&Y>`7SN@CCOZs8GsCkj(8}fVSoipXa z5^}T~thDe9@yeDi{C<4Ml0i~NsTRI-&8*?s9Hg9(zgI%pNw?w`K=Z!kaJg%7anTcSLFF1BkL4>MJ$)(&IJ6-Uh&)%6qI6(jV0gNAR8REM zD@MSPWE1NFa!c+DgSEi#tfCFJUhaj7rkZ)ADO|W_*5;UZR=V94t^A1+JvsU%9WU%b z(PH=vnZAJ-m07t!u;VeEXPZB2rcRrmF%-fMSVWRD2m#Co7XXLwwA-HeYCcm@^Nvy!Xh&foTXF%_iULbqQE8eX ziNGM~$+R4n;Ac!}-(He3hcEF@SO?C%I$sh$!S#j@yNvaSQjpJ66(L9fr>?47?XV%Q zFvDCP`nhD3EGZpe`=ngs=CrFg;1r_FFDc5VhcEAbFeeHPtqomBUl#v?$Dh%oh@4>f zM$s9MN;cA~)5G=`;Fx}b#E2i|u>;GfAa0Tka5A#()aNBiB%Kh>_UFSw@i|-P=J1P) zi#H9HU4g6h6bZP%?BYC6qOQpN()a?<*}wn7X&acsNo_Ka?d~JogJv(T>=^RXR?{cG zAoTA35gdTjkR>$h*e-<7)>D($+TC*usJG8B6^gO< zNWOAuzJ156Gj8U5?d8evx!PB5@ZZS}3=lugC{ZZoDAZ zj}Aa?EI^PlYgc4*eoQo+`C9gxhwXB$=MXN&wrK>7+8wS!)E$N$uW(G zi(S^+6HGpc1pKY}%?m3lV6Rj+zzvzr!($Z_hR$hYVbgn=*4FOBw(5v1XQ`}2$X+00 z`{8AV#YcVt0j^{&TGBFEyL3rPHs=q+?bQ0^w}_1EqRiVYW$=GeB^AjQvL(K!KGNZ0 z_3e2Q0bs7MfOw*Kfs#iwnh3-E0bxy#jg)0-F%QOdy9Nf}1_uXCX(T$z+3L13qc{@B zs%Ad};z0%$jYh5?g2<6<0N8+piCBp+Dy^%li#R7* z@HccxDEU~Wnkw6zW~vmi-eIZtZ{EiP^^pkm#gD$O7!NUef)iAoe!F(+?-)4v0Rgqz z@w%DXL7}kd8RMe6TG?SX4ClJxdAiz?=xxhnXyUYX^?ze2gkiNZW)Vy(B@5!bMI*T!;hw!Uv&lE5Yhl_L>8 zy;oU3j1vZMKF+V7PyYx1L<9B&AC$fMHwfGSH>fOkIGz6AIb+}N`M<;5E7kvRZ2sI8 z+W+hC8>=!(y`u*Dx7ZfF`$&`Se`j=n2elk|u&K7rzoi1b6ytuL{Ki}TEpx?U0JnTH z@u!Dn0C)vHgUMpg{NkPWzlm=H?1_D={~vwV{+`e){n*R!VX^_Qe|bs&QFEmMLh57> z-;Zbie!lOX*ESgb{@<7Wz_E}LAB_6F$|s z?1B@2&wxyu5?Gja^ofig|C`#Yz#clI?$`gmwBrM;n<>xW2mU+HWMI$gGu8iCzySNJ j^nv~L_t5{p!MQ^;=DC|!V04qcXNH4>rG<(Gw7vcx#nM1} literal 0 HcmV?d00001 diff --git a/images/vector-search/raw-vector-search.png b/images/vector-search/raw-vector-search.png new file mode 100644 index 0000000000000000000000000000000000000000..873eb2f012a3c5fb51b8d95f02d346f0fc329ddd GIT binary patch literal 25427 zcmd42WmFwa*EWbHKp<#vcXubayUW3YdvJGm3l`jhySuvucXyYAJKss3=f2-}&CH)! zYkmx?d!4T8+Eu-G$-cG_rXVN&`4jdhFfg#sk`f|HU|C-G?C&8D!QbUZRvc=(BRNT1i+s^YN*UQRe$TPLF5cvdtH7!=Zb&KNc;NU z>@YFLbq_9}XdEE|0l@cF(GTYATL{2M!xJ8if`Cx~gFOWEVyDpQgGWGs3LPKwX>@^< zU^UK9seSf$3q^-*h4MvH%<@%kQ0&UB1$aNe!j0H?!j>x+`5i3{nTnHc;Ng7(^ z_ye`@@r-m(o(Y>D4HbPmtm-3}D{~s19F(tB?j6tdQg?6g#|Xo z7JFb+942y4{8QvZ=OO8vEQ@ZzunEP8>l3YV@cnjS1k|bOyD8UOWBw;n77t9KfOOJ3 zI6)JxUnZUTzdj5BFgq#Df*)6lAE@MhJOoWJQmnC<1kZfPDZ|>YX@@Z4_80YIN$%U|H=(CJT-y>w`+i8YcZ9r91I0P?kFX56>Z87_x#E{KQ$bE`P-;(j}Bi8s%l>=VO zl*!x)Kq3ALCx3Xt)LI!+hEROu!}T?bOe-z!%RWUZqzAiCW-M1}3O?Cr)adn9W)y64 ze*`m+kw+L*ca3Xim-@c^1bkxoxB&gZ>GC&?&+$ud-7N6_er&d2l3lndSYSq%HO%CQ zTPft4Wcw84#*-;S(mPWcLec#^E%o{Jcwq4t&QF?cuqo~_u_H$sdYYf@Gd759j0ejY zIxv0bdB9XIem~vZLkNt2$j%QSK{l0po{ZjzPVcM7rhjiBhG(U6ZTVp`SQ76Ff117hk#|atOB?SRHE>6j{BkM zA(SIR3&gFEOE!_&7x&Lz{)&uPYW{5c!mfN{zA3g~xE~pUP`A)*+0)@{f~2>o9|%_= zvH~i*U#q@&BgEu^g$F7L9%YXy!Y5Er!=MF734|%0C_EOSsGya=&P5-Ktcge}GAIyC z0a!oFil^meOnD#LvO>2*v?J(*(u*(TXiZ{Fa!d&yGTo@df5`J=>gCi2{G^m2HX$QN zRQBic<%(_&JsOPswys|dKr2I@2R{jk8Xz}RH-xM9s2X3aUDQ2gbjA(oE#4ZlrEem{ z#Yy%X?M>W5*#5owWJ}V7=!&9?V-t1XJ-8Y4fa?j{0q%pn5#r;YML2{C4y2$5VB zUKOPvY(}w47>Pt4`bmT&7mg|Nr_8DZEg3s91Ih*JMyv#>&JPlErZD+s0ctWlavk!| zaeP0}h6{}3?P)cH)}<=QvdA`wI&%~#gTJ^)!IK5lBr3^s$-7BB5{k#y#u^UW?ahy> z4%H9c4!8e8Df*~F?u5mbrYXcL)h_5oBp=I>aGzj0S{Yv%%TL5l$zLI$T%`n~B%w;F zVy4iPd#Qq4XjXJsKBPLPTBgQR$tfc|bz_#vieQae9_$>)o*SsjtYkn%MuSF)r*y1_ zt*oZfpc<|4Td6{UFP~Obuo$zXN(G8~MJ2ijzQ{8*k1K>Lgu`?+y@1QhVZHRXs+0O& z@qQ_L>0PO}QeLTBX@1d}f~BfQcC;FYs$N-#l2_%E=q-jr;y(Ap-q?7ShtavtXF#tC ziHKpnSiM;CKvO|dL1lqNt;}`XJ{%nyolCh{OcRYP| z@-)Nbpd+i}iz6xbvr~%`xnq=LuakpgniIhx@-3!2tb4yZp1a}G@)O+c!Xd};1{)zJ zBm56|XB`7iMdeY1nXHd$Bmx`rzUMZO(PFUu=9cXKMZOHe+_jbL}D?yDyHB8-5wOPF@m!6OJ z*bCnE*8`vWy4o**dBA$DQr%VKNVP?yyS25&cy&cphp})quv)`r#^icrek{qfb>Xf7 zZ~S}Cnn^1{E5jYaoxwBtvm&%t2xo{?h+r@Z$xp?hrqfWn8(ZxQ*J)s0e7kb+400iV z`gn$NI!<~PJ)>rqmO|~fMbU+zvziUjV=0#?hu109K5+Q(sqGSD$lYD&%wJ^Co^)W?Wco)Ourf zf_36~2hhr*hp0|qU%OoDo%j&>;v!)y;9pWsvMS&%b|C2NBBDTIKypm7 zF77UhE-IC`FJ2zif?ke49`ik-yI-UaHxx^6yWMEvM;xGav2D>WUrf3-NmrwwL#FC< z*8bT3?kdmjXVXwqaMQ#op);BXPbQik7XJWn_jI4W+Ko?Y?k~TdWIF**xaUvGCv^C0x?P0V#I^?#q81!z9m4I` zzlX2RW|pS361hBl-MGNM-o5H`3rNf0xOm2Y<$ZrXr(@R|Y499B*w>@0qNSvI&>o^{ zdkUCC%;)!9Nv*2)NPBH)U#VURT7heUT?NFm1b(^?0h(@*lVpwSWG%y zkzJ)*qNDpvYfUr5u%dg{%v0Rf*^`&stdwUjz4y$!%IoE^cF%q`v#MToF%$DbU~_T5 zwxMJUP*sX5|6S&-jjA~Fh`Lkw1bGbo3Bj`%ZC>g`=$L-?>+AxD8LO3LAP!kn9<(9i z9qtwP?7Y!>U8hEV^ClxFE!WHGK~olVG!*U;=LqMrOni2sy=ZdbJANsL zi(qingKu(u{4tS#oiX;CC0U0)t8LqHuTOPgZr<{4L9^wwC0ehs)w^w_qjfL1H>v@D zF!OHxLwn)#>eKJFn-;et^ppAeH__L2$Suf5XhZ1y#^%PXb>vkyCywj$zV^@eO!wCW zL;RF(MZl_K^?1Fs{JUvoVO?RJ7$Jf+oPBuyce>Zc36ka*su<}^GyY;9i`Sdoq6xE! zTCxeT%!o|+wxMU8t^1MW0fv~@tvB%n!0h4>4Y1Uv5^kfYz1Yk0xxtXf^<}Gd)&=$A zn!m`a=y~X2@sa9kCO#}N%Pvd0^QWizJND)6TjQ+FhKLoDM+uq$5u}_1*rpR5Sb`at z87~-8vdsOQ@<&-8I2q&zd7d>7`4nETxd5=h@{fd<++fCgV3@>Dvj8yv;?NDOE_U`B zrWr{2+mm>crNA4eP7>SCFF0xEd+$H)`Ar#%E#Tg`-eD!oU=D>I?dCz)Rm~V6X(B5N zMh$v~0Rs;)2ZI7VfrD;r&ap8r%c5EK4w;%LcB43JeI6t=N9CS<2)q-P}N`$R}c$YXD0!mT7C z_Fr;PiI>>S(b1Nhfx*Sah2DjQ-p1aPfr*QYi-D1ufti^O)Pl~z?YpDCE8TYolK*t_ zPd_5Y4ulI{+E!4;ScBkOA`MP^WR#KocTWSF#J1be4jk~HsLkDoRv=zv>pLb2z9ZVG{$* z#GT{FkeBYQ8BJK8?=s)9CY(Z2m{`u+9S+tv-1sMWRx~;fI(YftulWy}ewBiKL?HzK z?^VSNu1<3>aVqHxCGnq205$t?6VdOVVi!3SCAXD-N>D5q?0+r=$4d?Y2-5#t-%ugG z;IVg)3H_Ja7u5Oz=YP6~1rJ9^`({G)-{u0K)-Rv`?P@U)Of*E6u@>py4+=+k`}l8H zRzkkHz2rYNVE#LDESM;?59GgHu@ebQ>7$O(2LAJ{zPSON|1d@fo`#ftSOsf>F7(g0 z3QTox{KG`7uPls$<02Fb9ML}%AhvD(mBRmPwpYWlZBa3wx|e&_z~(dmeoELJ$%3{>03(*I$iix9+B>}JZpGq~ zt(A+&NrZ|oHnTMT5Y3z^C@2s@_|Ka5)1nZVvO9s#pblwTq;Y**5?5?(V>q|gz>&2K z%+|R+UaWSCpW{sb>LHb^!7xiuj;+?mOJ}(?^DbMh>9&vM!+L8Si35y$%;V9SNaaH& z$@HL=H;){WlnevB+-mm?4r0apWynuR);Bj#!6nO@CMz>DdeCDdBroL7@bNf<)0Q4< z1yNh5@Yy5BrEz|-*0NQV`etg@B{ubk|G3BRZ-d>o@K|@$>mDBP^Y6ek)sb4Gf*x8r zIyya#fx&)*u__8=RJ4@J8yS&5vvVf~67#xyU-ZfKK`?*Sb>+D0eIA3I)|tDlot>b- zH^sJ>mq-1W_BZ$m&-ZbR7M{nV&H`F2VhcXc3t7C2j@D!S zw*!KdE5-@EW8>MSFM>L|3A)>1zK2UdXKhX&H44;-xK$kMFQZXaEv5RDCf=6_B4s@H z4*}fg*?bvJJlF0&8SYb2)KpwMag{$hhtubR0; ztunDVn3!+5?j#WM`A)!-x4~vD6ivEgVC7<5Xpo*4IL-1J`y)M48l!izbOx_TqY&K0 z^RE2KRd1jc(*3xNJ}@Hy-dLqfMHMc*z178F3#0a?)funVV4!v97HPCbRBvlGEbs8^ zCKjuOd&nCn|5|739i5xczIu^zB~y!wnZKDaUTjMn`-{2b`5Tf610ZKK>l!#4c{Mwz zHv;$7N37ikbA#?hJE9%w|U^v9^Q zyrZ-7%b6M*;C(b55|LtwBUhx?Go&>c6~2;k}QIvta__8e1b7e-k!2TPs(#Mr^dSgq|&i7mxk_# zVS=s9--u??g89^JRUQlM8M<#!7z{amLov2aPW`PmQ9aqFxj}l@+!1A%Gta~_cYA* zN}dt{g4C+T6~V^rTf4wKPM>pDi-bFYTcvYNTZvzxqgcYUXhJM}HCJQ&gka2Cq9)Np z%@bA+q9 zt*mOVR{VUL& z)ya8v<;YyciiEn^O48>PYVzD~y{Q}ZiQTTq9%N#A;Wj-8WSkHX0d?=|M3 zayc8cdX*xQYm5(~u`>ueq-z9hx=R?-`PPn;5#yd>%7i?~u%FSO2& zk3SkJ5TWP(}_xcevWwR9v5_nkA@%)|~1jGn^ z+tWMi`ymG zvEIPFXt(=W^Ee05?g5@!4R?l5BXroNBX!83oWAFj z^>{Wa@qf_LZgCZS_H}i0Gj&g1i;|J#EiXfoJl@gDU$!RKgD2?#)i_;oN+r<;2(Tp> zLOr@=@pwb+CBzqpru{smbai!KSD+y2nAO9T;pO3p5_jX?plwfJ5G;60g-)3x7M5U! zM^nm8c%{ z`^RLjlp3@ePX-~8=r$;1c;*cDRzkVFIh#_iUd!STx0VzBy9J8aCALPC?@4^ESG>IK z2|ru*hb(lVg>8pwPnXT{vljjg^{o=BVk!BoKDbOhhnO%rWBbd|UdC5Au^bCr8tK2V z%~l%9?5K&%Q^u1QyxN!Nc6I24;QBwv(-%rW8rlcyUtIP7E2RAYse#b7MxI|4Dx9G2B)>S;d?GvLoQ`yK}-I!FiM7`z*EXBN_WZm|O|>ome%q1TBHJyFPM zys~5l0?8|X;4$5Wkk)dGljSGtH4R`oY*SOyF0P-=AN<=T1a&y0PZT>A{0}B4G=->h z+#L(~`x>;Fg9J1oVv|B|W<|WjG^ghr2E1C_DK55-KVUcwJuYvh09VId_pKSq?MvJh z96428YODWF@gS6l0;w+84|-x9JcR{%N&VwD|7nzD-y7DZCO2xqEpTR+`Otg+!NV9izX<=w?^}q9#`PaqOfh)SKpf^R4Mqr-fX(1dP!Cv-CKKVjZc3zxBNpP88b}Zk_4jcYx<_X*FPbzG!x@ z&0LVD0M6@SFgf^bT%W0E#L$E8XIswk@= z8&l~Ip@N4;^K2RHzMzDLg>@gz6e43|eLolsg){71tJx}0=kz@uN_W?N23W6qz#r+^c z9Y-W2tib^xWRYtY<*~V=*AmMy`GF7I_!e24J&y{=bb#b`gk*rwnOBD2E#+GW72W{G z0|DozRj%$Omr{?fRXFju+U#I9TNKyuy-c?qdw6)5)BT2$hlgkKmn5oovwi;&7BUzt z=O@3L(KO$@__!7qN>3D*JjedvqID}qDfO07a47Nlc2Xg!<-!$>>bUQXb$Xm;_!<-r z$Aw?;I^gO0FmQ`${1?ZlFi!%J#i_iPfE(lQ)L<~V;NsD>Ma)r4yW>%jE0buQ`cHu zPVN;IkM*?SK@*`y7K`73Z~h$I&0>Kjl|UJS-Pq}CKX(zNr(j%~WNDKk?UBDW?Y*^$ zRTBNB^63~}LfqmuwMU*c;mPE&c}h%%|Huy1c~fCeip z9$sE#ObOINSXu^&t{$lAym!CR$9`KES>fT5a>ZX}MM2V(=}XN;=B<8Y7AGX^SDZa^ zZ^5&di4qKRaP;7}aHe^*08347T9|kZV{#;P^rW*8eo9K1ZS7R$hliaQCbLOGcs%Ye zXDf|^55S%JxjqRJ1EbX@yTj@H2y~oMweso8j@LV_GjW5y9grgIvWJ@9i3T5B4;oEr zrc`V|r(W=g^(Vn;Bkl`~u|?7Ck-Z)z?_l96$nV8+Z}wfLjLzL2jZ@`xJVbK8J9X=& z!Qx};@q>h}GU)#_mclaV`l~<4Zl=x6!HbHI`1R?w_iArcu~L`!bfpmyG&dZ%K#;}% zjR?Y=V(uM-q>-@c?rCX?(+ne2QBtMrfj~F2z#lX-I2onCz{f1A-gU+hY`|FdN(hp6%OAAi7t6z3u2L0h%iR!vj zN2KL4%{sOF%L5MkIEi5dLm-065Vhqx^8jXJBX0aZc$ExhXjUE96HQc`M4|jBUdooW zL8L)0S=@GsJTSBhPCTSqHeg}M^Q55BpYFHSJ(IPiJ7!yshVNfsU^+%K6qV~+70v*}PHY-i#-QC^G zxpw`Ge0rWemuRx}eg>%16=BV$y!u#Kd6=Z;4 zDtb704I+-3^YkVQd#5~_d{-z~r0>qx;OwXqw~&F?l#025&M=9IlA^Rjwxf^N2e-E2 zW86$aQ@Nt`#=CuCsLN7H?e0#PLL|JrYoR9G&reU=Q{n`3+XFYT7@(0KiL*Zp4$CVD zdSQA&((KY8l8>YaAor_2WUg|+RY)w*r0u8YUUudmQoeWrC^ZR_K5<1(HrnXzC}l|} zQYRj*HzCI%mn6wePz|xJP$?If(G300`05Zo;sWZtnJ^aL2c>w>G}j;_Bw=%Nq`O%l z>4K8@kb3qw4E6nYV490Y(*tKi*UqI>hYz)k83UBIj?UKJUJ%|_XET0ek7A4yL5QG4 z-_c@ugA&aek36b9?LiU)!|q2gjBfZ`usU6hj@<@0aVwr z*+yn?zk*-O*hkbs%Atri? zMMF-7sB1}xiHW~2BQswMIdjWONHF#%ejX(Ca&%;k#^uVbjH)M&l*sbBOIZibHXZy2 zF~Yma-8Ulqs89B+47@-Zi9?%{{;cy%u)ex^5wg2^&J(Fyr)S|u#=G>_>QIy4H9YZIZ4W_@z zPublB>}nJYF|=DqYEqB?@}8qjIacG6FBAh9!baUQHSX(YI&0PCXDeA zG*V_g%Mnb?`Bx+%g8T$hHPiAW1m;inz63vgRl>_BnBI}+REn3e&N)<|?87GVaHiF) zgNY}X^*_~2RgI;aC2&6^YP;-#(`cWc(Dk}i?d$IsY|vq_(myN70)i|n5U{VgD#HD< zWg5tA&&Q}dfk!uK7^%3$xDpYoJotuy4Bs1Jcl(67QL0b~O71VW)rNyYo(%I9I;F`+ z=mascY3xTk8$R#jvkVuV?|j}*M-=VI8U+~YHZLzPm!0pgh$Lc>zK_wx8vXT+rm(O; z%r`3^$M)8^xxSG0`jlHDaB+&wDI`}%TB6oc%t`$U>D)M%MqeD{1?0u^+ubGibmn&@ zjHk4;-4(dqqu@0k2PHuW2O;0d_-R;DGPK?PgL;L?4#>cstTTa?!R8lwbvX~G61Dd? zOudo)OiPLpy^4ryq|Vh&= zqoHYWzuRi*L-b}FuMfuh&NEi3E<@^{`1tnW+g>ZI$ILQ=7aB+iUpI^Va)ocp%Ib-F z$x$if`iA02rfN;)0|Nth7Z0pg8s_HXg5WSraOBgKMf(8UFCaAalfx#HPe=$S+wT5% zk!mB6R5D%6wDD}t@9BaUC_7TLlVBqDzGI>VICNTpO5OIWc(R!<5D;FijpHxf$^=ZW zek|{dC5XBM;+MzJi=l>h-Ka!%Ufho`BSPX872j>09D>r)=g7UbEx)B#`V?HKYG(}a zWE`c8h!=89^bTFOn5xf6>tdN#?0c{=a473APp52am+Oh?Ft0badT3h)6x08xR8-GH zSel(^qc>UT5nY&gNhwj^Z_!roEVUayNuC-wIoS@TynWwN`%GieO~yZH&`o4(=hR)x z5ZRq4&tv)&B&M?%kv$zHbKgdZ2{DZrjh(LP2=(!!33e|oo5h#iA!?)gjzG`Qy;#Du zXq`^-5P-B?MhjbMfSZn7I%iJd?ntxG``cozDdM`8J-qy#xiS7(QiZ9Umv{s`!3MEZ zS3++@-g@7)JdaGbowUjsfAx*SK4Lo%Bp>ct#F8VDB7WCnL(8Xcdaskki1pqkcfbV1 zng}>?d$=q0%enNJ&9j||FUEMcitjoCJ&*=~5OwuGCYPc?x&zffm zFnWGOA(H|SYwha`Lde=QrrXwktN)q!0j5tktot-(J*cn{Be;zfeWK0mZ6jvvLqgdq zf+^x#B(bRQ*8X|MBiTin93dMSxW4$iK#E0%t^I|I8bc-Fr|d&Ym}56uhpXM;*y0w4 z{ju)X`;Dx@+2)w@v$KGEQ|}MbVW0DiQeYDj^M_m~Kv5dsTbLoCpkA@|Ai;@_vJgvq zYRNvaZ(*989^XEUEF^8szI>k9SNXVMEKaE7fbk~kPi>4*Jjq{pSanUG&_Ck+{_0}2 z)}s2gO|!PedU?Zi{4|9Fr1HR#V5rd}3xlJ_oQBDSRyJQi@U;HkBcXIIK-jREJo!EJ z=%|>p-G+*%Er9R^kIOcDF51Hd1e9Ds+rK_Y*kqCMud9y^CZS|n*9$CSd$EE6nZRKl zU6k7ztLJ{P!Y#GY*5yXwTM`1+PBQRSr`0(}5Rwft&(4rHpl{0WpuuZvYs>YxxbSkA zA`7tA6Su<6gb(8P8Yvd>o6#bE`rWN~>2rs2( z&3P0u?q%4x=pYC%-b!Tvfj~{OA*pQU4=PZhF)@gI51WvV7wc=_iU;MAH3}LR>+M{Y zq&AC~X&?yc549_NxTE(8L*6P-W`|ZchwLSwRHF*&V2qml1&Kam^;a2=U(swff19U+ zZf7%ZR$xu5_GOsl0`t6K-Ik=A6QkhO` z13G_=SV&nr(NozXd%p?NY4a0%v?qWeTL}>E zcS>(|I5>8>I*4HC=mHU1v`idrUr+E4_D5ha>GuSt=K}Hmga|-d(-UcQ&Qkz38okNY zo+SF6g&^+WUGY6C=~f-nURuCaG;*VF|h@g~=^h;R;TVl%H8ZFc!_6Ck6UE!VT`rL2ki zFCz-7y}lfmR@OxH9Dr~D={E<6x9+*b#60c_ZN9Q#owxp=9ZTdQS|U;D*&ub~gU4id z-EN8jU6I`&-cTiB)eb*2g*I)62ei`E1uI@jJncb-3x*jNZ8mVqJg2X$OHq+ zs0Q`Ln)M|OzM=IZ1>qyd**NT0ldTi`37u`I-BZQN6m{k^`zDHyf8BFdasqrm?McZ* zjuDH?D7MN)#04a*>pR3n&s9p@Ny#M7*3jG_rZDb_1;Y@q3O=d!Qf68>hu1=)B; z-#=lD;LkX@_Io1_d&zn?N)vOX>f-J0w~kMzZ!e&QvE1VDaJhAw%Q=3nVjRxnLxy7f z8nIVAPLWHWYw&H~$BC>ozwN!%6`M_Xq+7T=A!UWoi4)5SHZgp?>E}}Q5FF(h29!-b zZG&4Z_h2EZaf;a03ZAo2C(W@=9}iZ|D1$QTI>?=me5q2aB}CmYEj}CM6$;B=gx;W8 zY^opnK#2lC?l?Y$FP$2hluSp#s#ZK!I4~j`@>G2m4Na`n8I5)?-z+t7nj#WL-Dag0 zrf`Erc?WIxSsIOz4$35in)F)uFib&Dwy}UyCVePm9OKcXAlfu~!R_#_*eaV1K9w>6 z^7H-0!u`HapFOb}{*oRs#~ZG}p6m5Ehhm8e)wNanx%+(d#^u$8UdbBwH1h6F2%daf zgp#+Y!!9AgHT$)$u#gn-Cq@`b0p`k@=yOf`qRA(Jamydx{TYEdHN{Al3b z;FuTk#;N}(z>bMtQ!k?E;pL6!5Ky1f;bYosAhXycvLK9a<>rZUoLKhL(3_j+Jyw8- zkPuFDn23n}sqP6Zt1o{>3#s$MgN{eU*Oewjg<)gc_WqI%if*JioNCxkBC<6wkmjI% z2&sDuRY47oPGfG4qw8v;EZtJYbzO5UP9<2%h}+8B|5yCYJdDe#UC3QG(gqwbFILAe zN+Cz+Luf!Ld>+WAM&y6V)(d+Nh5~{PWtTya9kZc0d)z1$ z2;WP!4vrjjD9tQ4}0$K%Hso~ux2^u#9SUJ;WntJNmO?<^mH(sBA|)v>c6FWgM) ziOoP1PGK^4=Ua2_+PnLM&5*eJa(j&7@EaWx^3{QATr6sY@YX?H6Vj-th-Vr4M^i)k zo#uiJnc0z)76K~ue zFzpuWIXkjSLVGhAG(_flR_h<6@!J}X4I8m#nLG}tGfeust=&=UI97Q4^N{tq^2*vAn)eRU}g+TOwA%OS|Zw zCHt4%m*20;S}?%*40Gc?!h+1InIs5H7{qFo(2Y>f)A+gH5zK-1&u5J%7UB8Aw1i6D zDa^)oBt?ooDfBeCB4vckyC`20wlGPbBSgeaf6hlW+237X_p(gz7``0_TVg38m&$=U zwQ)VLLPXHzgTwj=R6!4C@}cco%W+C{+TOrr3LGp=5k2;~l@7{XX^fw&u-7T$Zozq` z&k)06$%ho5wl5NbDoE@Z>$IUiowpe0qLvr3$|!kY_=fuT;IjROx<# z?^y;hB~)RUK6Iw&!5){}k+PLi?pE8AnUtr>j5#&AJXmgo!eAVjE*cWIKZ8eOrvV%L zxJHpXJ1Zz5CCOC1Vtd68|Lj$P&D>0ID;=DgIojV;ztJ^14(WAl1gPtqzS`+NgP}qx z+6ni4+*!o8-a;aXMy5H)*&jO~O8Npz%seLCTDdNeYcY5gB7QPhn9E=uF&<5^p(^)_ zO!26<5O}^%z*u_yZI_(?y3I`%B3ep-@3Q%IC=Cv#IjLQm8A>d;Y0^Lh4i%Y9y+5pD zqRCk$I4DO)?fxis3s%GYW{|*V3>7hqp%zfKC6cNt3JMNX(h+F1k4~^u{|YQIuiuW} zK1pleNq~t4?hK}xwC@(uic2YT75#b>xKLFKyqL2M0)Y1&>jy$^nch~7c4Ek3$H?@O zj?OU-xU`FHUhcscZc_I%)?P1MY)-kC1Cy>Un)o~$ZCJR~TlZa9eeSu>zO`Hr z-`5ErX9u5E34G3xaBYwjPj}T%LZ{So_R-}pMfQ|Uch-DpNX@aw^`Y88FbtL&CKjFR z%xqJrb>-sKc1R2FJeqye-YX;lg7QcN$RUWSS6AEX@`coHZ*Hgx3eJ)gN(+;wEcJ&p z&i9(zFd~Bck{#hcNOv9Q3a3)^Vr~=%8+!>^B8MxDmH6xNIedT9%P3EwtcahD%0Fvh^kP~Jo3|q%= zRncn1aM?!aoJ?l%ru}M1qJQE2`)mkLtGH~w&fO%9eb{hXQm&_foeGVb=Ei6ixdpmZ z$jI%K$FRz4S!f8yy7l|dj4f_&(fnCOO>G$T(k7<3YNhc*Idt!%P~Np&x=+ zm^%9VR&0O2k%TeG%cOR)*ppc;ql!il+6GlkJXEJH{Q;R?qg;%d1@OHAprPhOS%A(x zN^O+90otikHOA7sGRdmke1Dw5em!DcCb_*}BrEfyL^O%v`~ljd+ZwW|tix09-4Yit zrXXR(SDcEC3P3C&$MSmCXugxRK6w63yu6d^+6R#m*D6J(P!aQ_lY#6zwTrxm|H&Zr6@>-JI0EX_ z5Ry_(XP8|tzqM;<+uBwI)JKp1HyhS(6}jr?;ZWt=Rusqjq({tdMN1Gv5=^vUz{evU zbK_U!e^Q8T)gW$;sthJ70o4%WX&i&?_v;agd14yXq=TcH==FacN0|!9HjlsRA`^V;Yc{&A?j#>BbSFtS=1twO`$!QOf|5=oAzWua#LDj(d zvTIN{gURd35fpDs)|x*+zA3}&+}hjdoL!lNF8)yO%mX}^#n>s~^kSqhQ57N^60^)I zHJ1uj2ON9@^w>DbZV{3u82dmycA?KyWucO zjx$LwAGbapuO!nb6U4#cBI&2Mp#2Oc;JJ4JYun#EXmMfz>5vZ|f0%GRDQ{E&1&IE{ zcC{M6=Wh;?=v5=N)g05T>b1kp^WLH7q8I>YV+b}_lLst16V5D57kVeWppaoI%ig34 zQ`#NK`mU-Tfv}u6tM9gNQc!%0KdN`^f_#x^k8f+xQLSm_x@YM|Z`p5!)7A?EhaG2# zXu=vO^?FsHm$ZMob~;TO&0NFcbc$cs%-Ox=bGft4nR>s?-pJ8E(Pk7R3vz6@ej6iz zvt=h2I#{TAvHsa{OA$CYXe2d=^JkISegP!~KORahiSO?Y$Gg`{q{J~Bk9-07!62PZ z=0yPjOmtd6Hb+w_pxS`X-sPOM6aP)?0_yA8`e60hsXng^QETlwEv=jFFI|ZCNFOF+ z9Wx7TlQ4x`(K3V@`S3PWLHma*u8g&JZeBn7xzs~bL(RL{y^)GK6!9efWD;^F-3$2V z0UgA&62cydzKAxCB;n*W0}@q@UMfA$Cto zwStm$C^_e>M8<=Qxu-GyKkkD~GpKX}*M4lA!9)f<{C#l{v;&?sZXADZ!3Bk9NAGak zJk=tgI*l^SAe%mvy6$2gP5Z= z3!LN%2m%**D=r75G`>O*yHd^A9X$BvoI&_`^IJ;AA4^M_*s?s3jg1QS( z`rqfdH*$uTCoU7~cf1`feh7Yfo4QJE#Jt(6--9bY{l$m28Kh)%XcMo@IQUZ8p`AmA z{QEc>N43@QXTkCB`X||vW?ya6@%@Ltg#FE;0^w86wrxVZBg*k2Toon6;vt3Jdy63v zlf2NN9vJAJ2?LPfqVb#asP5AGR1H12Gr=Z=GhXUvjay0w`g^S2b4T!qsIU?4zsthcV5pO}sAGl{153XVjuvQd+mbGBFQIEkib8RyUH%`|x6A0yg* zt__lb^`xw}q%_zm3GYD~u|C1ohtTBvv+9HrYzDB-n<42lvZ`>Z^Q<=^a9EPE(o+oc zugsA_2_c*YmFU~-PQ~*8pV_Z!=AqIS>?1us9NBMkIVk4yP4#-9tkomTyXRGz^3$tZ zC@U^6q_mH6zMSslG;jfdE8)qabyuTJNI3Zurj^>k)%|Uu;dYiC-DSV!Pc+EIut^N7 z4OkTsN(}lL{b69LeM3b}9!3~$W2ItJ$8Rm|0OeRpa$Mc)bGPUOKfoCDUZzdU8ge}s zEIGML@51JGPVjpjQ`vLsw{qV&G#?Y%I__|EF5YfeF#B_CHacRrx*ZOfUle2*Tm}Cs z{@AX1A0hOfk=l`Hae;Ks!3K9EoAP%#E!Oa&=Ovv?CN>qmNtC|jZ}>*8BK|TlY(lGt zmB7>y%SS)!B32TT7!*+m;FOpA5#_~uv$htsk#i)Iut|YCN4B@tSv)d2PFPIk0Na)4 z)&kdIUSI62aNp3+uO7!o4OiZESArUDy961907kXDQ9wbb_pky9mpAWK;~hJokY`U}HoZ%@zegLaU= z)f-(h?@d6xYlnQP)^CbJ_>yG%MmK^mvaLU`#D1vwd$SisL;zWSL3WeLL>z6SN8NFu*CYCD3L0XbGE+)QN_B=;Rru z^3{$FW0kSU5=%?aY!|p1Ll$REbFVf#Uz#{H&8|QG1~h>aAXpmK+nXO%VQZRr(m?I@ zX3UB6cDju)hU5#$-IgG0lk4^W)Z&PQk0#)cPNLH?*p={xfhYg-j~!pWgHGC@)^~sh zG`T{(g=#x`BqT?i>?oNZZ)=0$bJAx{yVFo^gq zW7|nd4G;(1DN%?Qh0KH_AmcD}JP?m0(uBBO?{f+1)qDmW>j(nh_9jLdZ7(A(fz_+5 zHd^{_CW|7h-{&rZw_EPLvG52m-FvUoctPnSJqLPp;@X})Ys%~E)P4|Xcs6aokE?5IM5bCF9IAm!rdfh&?k}K>A8S>f(P}Wi zYjxsVrCqmSsU)FJTpuz9Ny4=1;dO=u=wWH+_;Kp=&=wet!%-9>+47Y}`7=S1(QXz_ zL_gHQn8PKipz3}^-bwJU?@KWxgK}vl1ZZ9#_+RP}yzr0B+>=%!7Mq~pv9HL9Neu1N zc7t8lbu$6?qlJ{psnJawT~xl%AIQ1Y$^d@y{I6epUT>56M%&o}LZd*AceBv}P;kLX zvAVkYsYAU|cb+!YO)FGD+n)G)8rPRZe$S7B%1;|rhL+C#JarFV38_zHfZFZ$L~5SHxJ{AnZbIhzTNJzt2WI^H>rFaWSRof$>f7VHOTIw8Ol^rM3- z@vBO%Yq!}9Hkralpu&Kdtr%3XPqoYQNw2F(I!6{?_Q{mNIp*u3$_?3r{BZ<%R+LE4 zQGnuJuPHKq*8BS<_THt{JCs}r8>f+!l|FpACaXwTtbwKx=Iy)Fo{87eZAYsuA1y8- zB4*SN)$?xNwcyPQ9|X*)D%T~8S+iN;P7m+CRtX6Sp$TuS6>P`jM;?0J@Gc+d=p5^V zZWu}jjMVz07+I~isUxy1-I(aU)|rDG?}+sRo;_kNt`CMAPngLJ;yaai9!`%oa;Z^Q zY!@?XDRe7$hY!r@%--9DEeJ}D{GWEN z`YFojUkieO#7cJ~A|Of#E8U&a5`v`CAq@+NbgF=oQqn9f2uLkRh=@uGEK7HH-m|E4 z|AFt!ojdo=yu%O7?)#oS=ks~b=RBY1dFuU67se&9NY~e+=oezAc1()z7Xx^7xsk28 zdyQeR|C-S9m5SW2$SQ`r6O-g9*3Ti$k5c?rQ=;d>xv)aawBX4upv<*3DkS<)Wb&+o zg$1=%>+gKrF$&$A&gWR73?C?_TwLhP6j#Y#${VQ~^O`ch8Eq*+ZJ`nf(ApY>udl;> zti)@JP24(^cg9kpk?;h@{#YeRii;wHLgg&`w1$RMQ{&atj^=mE7iZfNo0@S%xCEOr z!(m~}EG#tm;=Z&ov_rBvdj7=J9=E!^VC+oJYz#u`T|{(9l(a1-<1G74 zQk{`CHZ?hG958QcEo3%l2KoL*2C0jMPPcXR){)SLZNLt%X8P9?$mlZ6OL;_2mLEAG zZm}^53sXcTl_1K*aCklEX`kWt;dN`z@qOhqiZX{i3SnF>{ zoi(7qcr7M}C6+4X%*2kn{O3D%j)gI?S%j}a7@}`w`jG(oT60-!+MH>I5m%bHKTVUb z4ad~fl$nXDt1DlTKD_Kg=B@;{3L`Hs@8NpG#wK_@W7^$&K=5d<;`flji2bqPF z9aOq4x*i?2gwuG$gMyR*Bf+HIX2Up7$a=+OLA^vYWec8-t*oiYQS1>^s#6^zE^GPg z`fd1ub%KfI&FUh}pVe`Z_UG&ZHp>{B8^7X{Mv8;V0;YGQfXK2mb0#azY8v|lC5-;v$HDcj4 zbzUo6Pe5sNPzymuiouum=w)oh*?V|s5rrfO^a z;R0y!O@l6IU^v7ADtD@6*}DmPphiV+RgZvrAj@NIY;m$lBox9}b_c-c$6R0=4jn2r zviK59|89DR-3C(&&!vFEn5O^yw2pWMfoT9(>R{gPw@A}j01&0gCu zAk|NnKuIL?UT29`h=f};#!lx#PzU}<3Q|#WYQc4S=I7$nAmY3`LOl_!{QT3iFY3cB zAI6jcBSj%DQ|e53P_>UtYx^+AMD4)UWqQ{8*Qe6TZ~b0c8hLmP@#ghOu0?u0PK`bL z-0czpa(;U!S6~tdZt)21@u(0qr!z$~iTWDTv`2Fd3Hhg4mde19&tE~G{P#~R#KeK_E zu5sUgDc*cb@2kL(7}Yt%HHn#+x)a@9#J`vb%=Bq43T=NM#^w5~dX<~Yyk#4*Wt}?t zC@_~}CBz%Rg|tm#{+Yt1yA~Cur(Vt?(veuwwRy=7t5zr8J4~M$VRXoxut$~#2weJN z0a<{dC30*digFtEW)=e8mENI%e+=jX^vDAP0qg_4r-AZ$T&WrF9`BZTivUde!>Q(v z^7+FWUh5rbBRhsK>!v{_kI#sC1kIzRVsXUeWJY{kWynq#Ou>Fc-rwFx2qM-n5i_Q& znkMYF#(3k2p~0soY9FizONb-2^ZV_Pc;3uOOA{9i(p8<$`8pa#WAgKTleA=8(jIuW z?ygUvULT`pHbi5AXFyu99|uQZaIY_2QJ5UXQPA?~+hn1SGIyGZU+g{G?xey$`vIF0Zg5G&Mku@c4LESD6P4O2-=ciNeAKvdL-pwe$57^YzCSNG zSK%k%Rg691b^8tXnO9qk&fohU{nb4|Fyg)ivg7C2v@5dhqndZD{Qx}|1NDoctePzV zRjT%TOJVM-O8P+&Qai;?0iEq)>jS#v_~9Y&&qsLcj*bRZ0U{~2>+lkK;<}B=q(d3P z*WnhnhjSdFXKh$rC6xNZ1X7Gy)^I94+^yg7d;9?=d->nshBITwe#I~!54$jLeAo|d zyMuGjU$X?M+D%qT-^#VLx4NVCc^!Yf_S(-=sw?r+qpG}R#y`L;-8LS_+Q3PDPEUfQ zkV1R`EIFsBY)6in(vOpEcJQW9*_uL9nQKAY7#%JO^)y0sa!pb1V~#f?2hFRu^Sn$9 zZy{I)ngeJ_KYi}essiW2L+cIspH%^S>pzC!waYJ&v8!#Zy*2}9wbmaFqApIf@d@0x zWG6_-_skW(rC+&1L5}tUIChAD5#xA2Wl6Yehf4`> z4Aa6G_Hjd_jtfTMl(?QOZc%ZXFz~1)ypC8R2JTDn)5iPY>;>y?!*)<>PjfEYWV6kc zb373um*7Mu8Ch7SNGMZBizpTr)_eIFn)x^{HB1jF$8+{kL+f>{ zxs!YJrX3rS$vBUH1<=>n>4o}@%;mrq#9nBMR@3(+-40UlnT*N~I=A)q_5bzsJU*j| z<(yQb8H&-tS>56POuo0eyM3?{-_eJprH_V(6x;QB9LM7~ST#I~HP;}E+qvf~KCVRv z{CCx5Q}tqWO*UE!%&-0`kBM_tP0!HgRloQq_pSip)LkTXDu947-)(1S5vLJyB(OX3 zGN@_w?lQKn?YgJ&(Q#rse)-X_EXI@u;MRb3$G{9s6))c3%tzMCL6G_&X( zs|#<>C=dPFN{trdh26I=iPoYyr}tUV;N4h}`E0;Ou#lRzX)#W6tKfZFS@Bbnl%m^( zmFt~B%#1Vxd-b5jSAGnRO^U=2C#(9!xG!&7h+#1+`DNc4H*R;8D-T|X6+>@V!H+Wy ztU5Wf_^S=AHTDN!U%%WE3HUNcyu+7xOXwjbM99;g0g6%OS%!#e?i+MrYX+QyL``oc zNKIG!n>S=#tHtQE;Y_C;i^{F88OEcyt=1jtc1>^9o<$0Frr6vwpXx6_bbBXlr?0Aa z#L8o~k9G11feVkA$7u-(1v4%uP5f=43fH>&`ezm9=PWFIV`Tc)4PBb@bIMkzl^F-X zrh|6`vkc)I{*>{P{VJorCm3f_xN;$=9^{)7ZRnQN!!LC7_0M_sb!x#;qy?a1NXuB~ z*&{#Gj}jJJ>bgkW>zFR|}1h7;VNj0lJ zhZ{BnvU5mgemz%O0!ET|s8_$8nNCZ#AubBG3_mQBKYq=A;Jn1$Q^?Hrj@*J__Cfc-7rU{{yQiaZ@>yBj8D{2|B;RWy0Ll@D* zsTV$kZ&mn5Cl#6VqtVb_%}3G{SEC!Uu25Ib?o0hhz_HMPPad(xd+ag2BSO|}D| zYi?!KEXaLnnBl6>@+{lp^l)noGNtFw$JSJrt{6~{S6EmmRKm?kIL7%Y=L= zUxsO=rBeDlK!tsH%Ns4H*4ESp5-820|A_lEZSf4`+|eh@F-q2kJ?OTw%65XxcArCh zqGWqp+iu146}{Do2L1kwZinOXPEsi{Mn-rJpr+C0$j>?Gw_PXaoUP^PyyUNgu7$K( z%XGHPDHP$NBO9D|dXM0Os9S%#Uk&&Ab?FF_o4Z<7)@q{rBNjfHW`WO{B;T+ zV#AxyKV=XyZ6xuSMAka|mM&qf3{U_hbxRAV;jdnf6L9!GKA-N0lJ|gmtWuf!ZOJ*P z@#=}Di+WdP{4n_J=iPX=|J#H2DniHH6wUGjS6~dDfwB8|5u<{~T7NN2!O}8!(pW8| z70{EIqFkt}%mR+)C<*AGH#9`ILMRk6VdoFnRo}j!IRGFtXd-hbaT^B1&W~`=?05<` zK*i!O&1e2eK4-e`$Qw*hp%}p8-Wg4OwW3TlU6{P`a2RN{qJTVMQ9$1BpP5C=D6WBy zyFm|lwX1hU!NLq^rKP2-aqCvf8mbf9-zIT3H1&*8$%GNfo$RmGG>3=3s;h&VR1@@$ z2b`?=m?Qw5gN=T=YQMj~zgE6F{o&D37HD|@?F$=^aozi!H=n{c!VE+&5ks_k(D1U` zG6tST)LjY`GwxjqkFi1wZl0cGJ9_xZ}pViAyl)UAwHUeA`jV`eG|H2uOV5Y!6zUHJNNrd`HkdK9NCR) zAnV>0_CV;Q$RM&5g*4~EbxiVH`jNMH&5lGPR0mo2)*+0 z>CaAqtKFXgzJD1?7R2RUk-2=c<}1$ptuANt;y|Gjf4j8uRnDP@{bVC`<-KGqL-j{k zFA3o8t0P+dwgBPFmWz6_K3?xbWeo^#RAN5W;tc+~TH0=Xy-!E(Sy<$Z)!1182IZr! za9uep6+n?gi(a&aO-FDrJUtJ%!@jTPc$8qHLm5he5fx74D?R<8R`uI6NJGHcu@n(& z$)lRgJAfEp&eqm8X@E=bt50*( zI5fDYIZe)4adUe+xP-V3bT$55R}2vOFPQ##w_F#((1*Zs**YlJ<)Mmed5pOdbi9`^ zwy;zFLnrXip}mBRP+f?=QWt1NDBN>b99D_A2~B;|K7DC4f=vT3`IB>4>GDM={8 zpikZ}IwMS<2MTtR>=Duk)S2r=a8I-M+w`S8m*KLa_VK<|+{y<=JsTtW$@fN?@qFyC zcaC-X$PWn5x?JgNZ;S=y0j(DLmT5P3GjS2cCo~70qoOqx@kV+ZUuU*buoS1{F?r5w zYObEeI2|H}uq;_*KEddXDD7Tvrwt|iVs|%G{ETNTQS!u+jXylvP_~nQhTAS@_I_6= ztg=?wF}b1N?mSup^JS={Mr-vhZ4&f6@#oHn78JR{s%~ur8syxTYN@fL zG=`peia+Fw$iu~jC6kzNC#Fx#0gE3`leX&syabeJVqC|#+z<|gJj31h*P)&jZ6=2~ zUN~XcQUj=RlitwF4~gTxSq*Yi7-A7yn{Rl!!0O4~9U8bP?&(gb`GeR0LA15@e|zTy zs#J&!evR;f`Y%F$lrv+X(%1DLZ#h}DKXiCZCKtB$u1fY2=5@lHW6-yoHE!*A4IA?m zqtErA)Ns^m9Lx@lY{1ypp?_Ew4H^y0(4rA||&F4@zFTBPxaePG7so;v#69TdB)Wf1% z#=%C#a^KR0K*Bg7tLD)86@M3V8xAGk-)#9nT-*wGORVt3VnR^;7N9FowKp< z!QJe;tHDNvj?Sg`#)wnjEeLA4&r(y9{z+$B%$p7EYIo8DZrb0@zBniOETyX{kSmNS z?Whe;-$L#z^(-vZ_lT~7+->(qZfa@k9 z2R`L++r?;t|HJ9*ekS&9f$Y15qoR)IlA8E+gLJrF*F^GSZhe$EZbP2bJKUZ@$2qW} z2@Uk~QW=L7u!^livsndM=MOz{jSaExUI6BTCXM*H)f?Q~S3NcMT5EUWUyR?n^W5(!=PkmT9f}c+Jr3d3{JRFE=s9qPVjjQn+sj$a$cTJ1 zBJ)hE1%8Dtd(O}I${$2kS6i|bm(I<_-LYG-P35-*`Q zWv4A-ucB~*a-$Sg2=tNePB$fkr%>bt2*oRney54L2ndV3@r3BFdup9WV`nj${M6T! zRMbMl@gC$q7VB*)xt>xWXsDtbF7KL+8-tsh&)zbq($}&@prxRcdm^AEBj1FY*ESUz zqBgO3-hStBq0_X|Z2s$Is|75UE6Y^2_@e7u9AZ|*SV`q+xtl~hOs4ca#OBMRPdVhW zW3g`Dc=aY+@yCIyy1pJ6<(xiCzsdM7Loqkv1lG^&Wu{|=+8JEBSwv1PIUGO4l({f; z7&$1i8Im>ogTEF_smQa_B-LjvmJ=w<|FKUqQ^HifQfEu7Db{P;u^ivM9iQQ$8xKUN zFJF>S@;unsO}Oo!l((6c2YN>aDI7b03eR#e3}Pxraq6%&6`P29VBD0TqWO)%0dV@i z#RjhZF$$}b1Ff!)xZkN?Azf_jqP^D$ZETCQYqgHi65T*7d$i3M#T$HY>c>2$CS67? z%YS^&My>{>HQBFbg_Zyr@!!%?2Dq9zhdWxbg72ZPmUA$g5g21d=-+*D>ISe*qsyip z#l1`*&>4e)hwmd5N|^uN1d0VHHerkMf|o8W;3lh?z)S7yfY?s)@8(K`9uV`VgcuNh zm)8lf95cW0N6`GE*Zl7u2KKl@R#q0&|5$*8loY>?o%^4qNQnyg;h+565`71YbX`F8 zWZYrJ_IDmRO2EXfzF$E9AvM7M^WS2$dynvRDkW3)43NSJ#dT5rT?fr@^ukCjJYO3l z2ZXGg$6@rl*u|2V*h+z)O|X?Rt$qiCS--=9%#8-ej%@9Gb+5IWVMmhJaIhoMy=(dK zoNHjZ%p-POwzjVueXDNBRG$zM0Z{s2P)Uy{#TXERF&L2m{Z4zy(rag%mHk@oB&&o# zw57ucndkn?L~cV8aNoh9SZ0~fDWH=Fq!u*EC5)xqdQM|;qUQdm-NhJn6Gr!f+6rDa zGxe5<)6ijT6|?W}rMzYn6sk81^h#;25l~%m;Y-W`8pi@)&(NwDp9jKoHdXS`Bbsa$ z9#+0>zU*D%pgXBUPtKMfyUZyIWO#W%8R!MrkVpK?cx2Zwd6qcILL-HZ=r4{}?v^}m zHiaXZ?Yz2NrV)5H?JfJS0^4r@i!3>_fhWtjeCf0Tj)}3xsluuvqw#HOW}R(> zht3AH#rK1gh>EkCYNAu`+g*F$ND&%PPSJnx^m&!7pT^(N#a0g#r^lYeF8Px$poGN% z(Y~-~Q~T;NUqBZ(4YYKFJTbWbm6G4k(f;80T`79AM(@6uAlkL2Oj@r0OFB1n(!~Z? zbNwkd72mgjDJJuxEdN)GPSJtbNXy9_RsIteBpl8l>HdEh|2Jo&j?#RE2b@XG)}-G`{Ck~W!E^R{`9J38`5$#D0y*@bvoYO-#U Date: Tue, 28 Jan 2025 10:03:29 -0500 Subject: [PATCH 07/32] Update settings Signed-off-by: Fanit Kolchina --- _vector-search/settings.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/_vector-search/settings.md b/_vector-search/settings.md index b03a2a051a..5d2317af52 100644 --- a/_vector-search/settings.md +++ b/_vector-search/settings.md @@ -32,19 +32,19 @@ Setting | Static/Dynamic | Default | Description ## Index settings -The following table lists all available index-level k-NN settings. All settings are static. For information about updating static index-level settings, see [Updating a static index setting]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/index-settings/#updating-a-static-index-setting). +The following table lists all available index-level k-NN settings. For information about updating these settings, see [Index-level index setting]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/index-settings/#index-level-index-settings). At the moment, several parameters defined in the settings are in the deprecation process. Those parameters should be set in the mapping instead of the index settings. Parameters set in the mapping will override the parameters set in the index settings. Setting the parameters in the mapping allows an index to have multiple `knn_vector` fields with different parameters. -Setting | Default | Updatable | Description +Setting | Static/Dynamic | Default | Description :--- | :--- | :--- | :--- -`index.knn` | `false` | No | Whether the index should build native library indexes for the `knn_vector` fields. If set to `false`, the `knn_vector` fields will be stored in doc values, but approximate k-NN search functionality will be disabled. -`index.knn.algo_param.ef_search` | `100` | Yes | `ef` (or `efSearch`) represents the size of the dynamic list for the nearest neighbors used during a search. Higher `ef` values lead to a more accurate but slower search. `ef` cannot be set to a value lower than the number of queried nearest neighbors, `k`. `ef` can take any value between `k` and the size of the dataset. -`index.knn.advanced.approximate_threshold` | `15000` | Yes | The number of vectors a segment must have before creating specialized data structures for approximate search. Set to `-1` to disable building vector data structures and `0` to always build them. -`index.knn.algo_param.ef_construction` | `100` | No | Deprecated in 1.0.0. Instead, use the [mapping parameters](https://opensearch.org/docs/latest/search-plugins/knn/knn-index/#method-definitions) to set this value. -`index.knn.advanced.filtered_exact_search_threshold`| `null` | The filtered ID threshold value used to switch to exact search during filtered ANN search. If the number of filtered IDs in a segment is lower than this setting's value, then exact search will be performed on the filtered IDs. -`index.knn.algo_param.m` | `16` | No | Deprecated in 1.0.0. Use the [mapping parameters](https://opensearch.org/docs/latest/search-plugins/knn/knn-index/#method-definitions) to set this value instead. -`index.knn.space_type` | `l2` | No | Deprecated in 1.0.0. Use the [mapping parameters](https://opensearch.org/docs/latest/search-plugins/knn/knn-index/#method-definitions) to set this value instead. +`index.knn` | Static | `false` | Whether the index should build native library indexes for the `knn_vector` fields. If set to `false`, the `knn_vector` fields will be stored in doc values, but approximate k-NN search functionality will be disabled. +`index.knn.algo_param.ef_search` | Dynamic | `100` | `ef` (or `efSearch`) represents the size of the dynamic list for the nearest neighbors used during a search. Higher `ef` values lead to a more accurate but slower search. `ef` cannot be set to a value lower than the number of queried nearest neighbors, `k`. `ef` can take any value between `k` and the size of the dataset. +`index.knn.advanced.approximate_threshold` | Dynamic | `15000` | The number of vectors a segment must have before creating specialized data structures for approximate search. Set to `-1` to disable building vector data structures and `0` to always build them. +`index.knn.advanced.filtered_exact_search_threshold`| Dynamic | None | The filtered ID threshold value used to switch to exact search during filtered ANN search. If the number of filtered IDs in a segment is lower than this setting's value, then exact search will be performed on the filtered IDs. +`index.knn.algo_param.ef_construction` | Static | `100` | Deprecated in 1.0.0. Use the [mapping parameters](https://opensearch.org/docs/latest/search-plugins/knn/knn-index/#method-definitions) to set this value instead. +`index.knn.algo_param.m` | Static | `16` | Deprecated in 1.0.0. Use the [mapping parameters](https://opensearch.org/docs/latest/search-plugins/knn/knn-index/#method-definitions) to set this value instead. +`index.knn.space_type` | Static | `l2` | Deprecated in 1.0.0. Use the [mapping parameters](https://opensearch.org/docs/latest/search-plugins/knn/knn-index/#method-definitions) to set this value instead. An index created in OpenSearch version 2.11 or earlier will still use the old `ef_construction` and `ef_search` values (`512`). {: .note} \ No newline at end of file From f009e688e62a2a698bab1e43d067d2121ac9b331 Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Tue, 28 Jan 2025 16:42:24 -0500 Subject: [PATCH 08/32] Formatting update Signed-off-by: Fanit Kolchina --- _vector-search/vector-search-techniques/index.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/_vector-search/vector-search-techniques/index.md b/_vector-search/vector-search-techniques/index.md index 377bdc911c..52db1d3274 100644 --- a/_vector-search/vector-search-techniques/index.md +++ b/_vector-search/vector-search-techniques/index.md @@ -14,8 +14,6 @@ redirect_from: OpenSearch implements vector search as *k-nearest neighbors*, or *k-NN*, search. k-NN search finds the k neighbors closest to a query point across an index of vectors. To determine the neighbors, you can specify the space (the distance function) you want to use to measure the distance between points. -Use cases include recommendations (for example, an "other songs you might like" feature in a music application), image recognition, and fraud detection. For more background information about vector search, see [Nearest neighbor search](https://en.wikipedia.org/wiki/Nearest_neighbor_search). - OpenSearch supports three different methods for obtaining the k-nearest neighbors from an index of vectors: - [Approximate search](#approximate-search) (approximate k-NN, or ANN): Returns approximate nearest neighbors to the query vector. Usually, approximate search algorithms sacrifice indexing speed and search accuracy in exchange for performance benefits such as lower latency, smaller memory footprints, and more scalable search. For most use cases, approximate search is the best option. From 6cdb7b07e3d8d41f504cfea273d5b6ecb24aa659 Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Mon, 10 Feb 2025 13:42:57 -0500 Subject: [PATCH 09/32] Add more explanations to getting started Signed-off-by: Fanit Kolchina --- _vector-search/getting-started/index.md | 41 +++++++++++--------- _vector-search/index.md | 13 ++++--- images/vector-search/auto-vector-ingest.png | Bin 39123 -> 35304 bytes images/vector-search/auto-vector-search.png | Bin 33116 -> 30589 bytes images/vector-search/embeddings.png | Bin 0 -> 16138 bytes images/vector-search/vector-similarity.jpg | Bin 0 -> 47530 bytes 6 files changed, 29 insertions(+), 25 deletions(-) create mode 100644 images/vector-search/embeddings.png create mode 100644 images/vector-search/vector-similarity.jpg diff --git a/_vector-search/getting-started/index.md b/_vector-search/getting-started/index.md index 3645cceecb..141aa5edcb 100644 --- a/_vector-search/getting-started/index.md +++ b/_vector-search/getting-started/index.md @@ -10,10 +10,10 @@ quickstart_cards: - heading: "Pre-generated embeddings quickstart" description: "Use embeddings generated outside of OpenSearch" link: "/vector-search/getting-started/pre-generated-embeddings/" +tutorial_cards: - heading: "Auto-generated embeddings quickstart" description: "Use embeddings automatically generated within OpenSearch" link: "/vector-search/getting-started/auto-generated-embeddings/" -tutorial_cards: - heading: "Semantic and hybrid search tutorial" description: "Learn how to implement semantic and hybrid search" link: "/vector-search/getting-started/neural-search-tutorial/" @@ -46,9 +46,23 @@ auto_items: # Getting started with vector search -Vector search, also known as similarity search or nearest neighbor search, is a powerful technique for finding items that are most similar to a given input. Unlike traditional search methods that rely on exact keyword matches, vector search uses _vector embeddings_—--numerical representations of data such as text, images, or audio. These embeddings are transformed into multi-dimensional vectors, capturing deeper patterns and similarities in meaning, context, or structure. +Vector search, also known as similarity search or nearest neighbor search, is a powerful technique for finding items that are most similar to a given input. Use cases include semantic search to understand user intent, recommendations (for example, an "other songs you might like" feature in a music application), image recognition, and fraud detection. For more background information about vector search, see [Nearest neighbor search](https://en.wikipedia.org/wiki/Nearest_neighbor_search). + +## Vector embeddings + +Unlike traditional search methods that rely on exact keyword matches, vector search uses _vector embeddings_---numerical representations of data such as text, images, or audio. These embeddings are stored as multi-dimensional vectors, capturing deeper patterns and similarities in meaning, context, or structure. For example, a large language model (LLM) can create vector embeddings from input text, as shown in the following image. + +![Generating embeddings from text]({{site.url}}{{site.baseurl}}/images/vector-search/embeddings.png) + +## Similarity search -Use cases include semantic search to understand user intent, recommendations (for example, an "other songs you might like" feature in a music application), image recognition, and fraud detection. For more background information about vector search, see [Nearest neighbor search](https://en.wikipedia.org/wiki/Nearest_neighbor_search). +A vector embedding is a vector in a high-dimensional space. Its position and orientation capture meaningful relationships between objects. Vector search finds the most similar results by comparing a query vector to stored vectors and returning the closest matches. OpenSearch uses the k-nearest neighbors (k-NN) algorithm to efficiently identify the most similar vectors. Unlike keyword search, which relies on exact word matches, vector search measures similarity based on distance in this high-dimensional space. + +In the following image, the vectors for `Wild West` and `Broncos` are closer to each other, while both are far from `Basketball`, reflecting their semantic differences. + +![Similarity search]({{site.url}}{{site.baseurl}}/images/vector-search/vector-similarity.jpg){: width="450px"} + +To learn more about the types of k-NN search that OpenSearch supports, see [Vector search techniques]({{site.url}}{{site.baseurl}}/vector-search/vector-search-techniques/). ## Vector search options @@ -70,30 +84,19 @@ OpenSearch offers two options for implementing vector search: ![Auto-generated embeddings search]({{site.url}}{{site.baseurl}}/images/vector-search/auto-vector-search.png) ---- - -## Quickstart +## Option 1: Pre-generated embeddings {% include cards.html cards=page.quickstart_cards %} ---- - -## Option 1: Pre-generated embeddings - -Work with embeddings generated outside of OpenSearch: +Working with embeddings generated outside of OpenSearch involves the following steps: {% include list.html list_items=page.pre_items%} ## Option 2: Auto-generated embeddings -Work with text that is automatically converted to embeddings within OpenSearch: +{% include cards.html cards=page.tutorial_cards %} -{% include list.html list_items=page.auto_items%} - ---- +Working with text that is automatically converted to embeddings within OpenSearch involves the following steps: -## Tutorial - -For a more in-depth look into vector search using auto-generated embeddings, follow a comprehensive tutorial. +{% include list.html list_items=page.auto_items%} -{% include cards.html cards=page.tutorial_cards documentation_link=false %} \ No newline at end of file diff --git a/_vector-search/index.md b/_vector-search/index.md index 4e2a9ef676..c07f9915f5 100644 --- a/_vector-search/index.md +++ b/_vector-search/index.md @@ -52,17 +52,18 @@ OpenSearch provides an integrated  vector database that can support AI systems
--- +## Explore vector search documentation -## Bring your own vectors +Choose one of the following learning paths to explore vector search documentation. -If you’ve already generated your own vector embeddings, OpenSearch makes it easy to ingest and search them. Follow this documentation sequence to get started: +### Learning path: Bring your own vectors -{% include list.html list_items=page.raw_steps%} +If you’ve already generated your own vector embeddings, OpenSearch makes it easy to ingest and search them. Follow this documentation sequence to learn more: ---- +{% include list.html list_items=page.raw_steps%} -## Seamless text-to-embedding search +### Learning path: Seamless text-to-embedding search -Simplify your search process by letting OpenSearch handle embedding generation. Follow this documentation sequence to get started: +Simplify your search process by letting OpenSearch handle embedding generation. Follow this documentation sequence to learn more: {% include list.html list_items=page.ml_steps%} \ No newline at end of file diff --git a/images/vector-search/auto-vector-ingest.png b/images/vector-search/auto-vector-ingest.png index 59344fb5a3bb98bde131a786e843eccf8572659f..07550a3a95ef6d52eea4e313146b1447fec0d619 100644 GIT binary patch literal 35304 zcmeEtV|Zmv)^^k#b!^+}*yz~Ci95D!bZpyB$F}WG$F^-J-|1&&-g#!;f8Xyn*FH&Q z7gp`HSFO6&z3PO@%ZkClV8eibfWS$J3oC+vfLnlofQmtV2HxQpp1lPDfhjc;5|Wn? z5+anhw=p)eGy(w;4^2#lR92F}7&_><74-!HQxvrEcLWoM=JoY=L`;l-&?~X*QJc)BbfJ6joNVQ`eO7YVf z>5v>FHa{8)`gUmbXAoDWbUIlGU(1Ncqtpyx;o#HY>cW^Y4C0KyY7297Y>X|ofXG-( zq~7@Fh{vu&(svnVouUzAic!~RTBV?e?c#8VQZGvx9Y@ng>Deuy2oWCrLl zk4rtkXX@E$gjjAs)KEACEp0F1l2UCk`5;G=%}mL{glB9?d-oHoTT|tN)-h%=bpeox zxnboFPZ-;(qRZh+k9;_}v&l5mV^Q}h%D_F?e6nIV%Tn>l#v;dVuCpSclLx|?cnm#4 zA$n?Ev%1yx<)*$QR!j)cADk|KSBHyVdhcQW;_t_53nI~tn~DWucv;Itj zL2mRbby#X=T3s+|fTy*gumKMw{=)fLqa8ZcJvwIeNL^P0&OUR4$i`@>oS_rbcb*4C z`C{_<_5n;_;!{rH4-zC3*_U7O{Fnj|$~h3TzUaTe2bEHnz)d#c+x_^V!1M^QX~A(x z50nW%)AV3hBklTuRpXt4?R_UG`Rufb$N{F&BVh}#1-2l7a0vO}iyGm3@cm0WI0_=f z79ouAk6A*iVGyPt^Ihz zxCOsGx%q5M(v0AWtb=0{`Oq`88Tg3n3Ec_mgS`>#T;xjL*mQeMcE zVwErgkvs%Om?RIDF(O5JRh*WL4T%By0%apcoK!oG#EdaiZdrhu43Au!94?kG4sE2! zP|luKU2t8pk}R8SgQzQ4o-zp4Me+;TkJ>~t77EP%o)sXGL})q>E*CqHmTyIwpY4e##VM; z=B=1t=2li%awcz~;*k@j%C4eY-l^zS^(=CS;gGn`HMKWBk?mo4t_=t1Qzj8MXb^1> zO&)A6N-C-<60eiKY2SyXL!)!4Fs#t8n9;~zX0cMU>U6HT$2v1{G~$Y<&q@Bx@N3AC z#qrgVllEv+n?`HQo7}Ce%qMCpt2>Q^YOZS3ZDx#bM(4+qOxhOi8}TNrbJvX9 z;M*AP;qUcd$X^s7y@EM{C4;{OA(Nyi3^$*K*xlM{UAX?f%8&0*3YtMG=FgbORLa1~ z$fp0Q(XA<8_kB@hA@HnrL*!V}CDP$-TIINCXVAIK-eb4&VA6?y^F6?1_;LgJp+L4sa2cC($Q4CRrDA7eN=1 z%-w)g|eu7j;TkpU&DJ+uvX3 z+od!QHwQIOof0~udGKVR>0C--ND=1W*h zkV!B+X1&_Kml<~LdW~Et`74?$bwU$=nkapoZJtFD6wwR26iidqP}Qg!mQJ9EtF+aq zu^33<5bz|#lhjRHPs$Bo5!XgG7;%Zqm0lPR8%LlaQN^E797Z;Fs$kM?Fq(_E%s$Ol z%~x|!bt#`#ud|!@rLn*Kc9P?Cb;31&QZc2?U)$p%v?jVem=L+(KQQT$s{Q%L96}+# z=So_2gGc&XW5-I(O5h5lbMsjVjUweJJ@$Hwt%($42S?ZAS>;|^Wx`_8>59xM-4Y!g z9IX}248w}feG5-%dslCMUW;PBz0}?d?<%jC$Jzti+03e1^~Fr|D}l|$!`gD<_Tq|_!;~d5(eJ06m4GeMDUn?mV0)A-IT@BA^?XhG9S_a;U4#zYj)mn zy}nDmux0Zr2QBC8=|OWgWE2GM5yvRUvg7zBV9##(>J`1jNM#IQp;%T=*IMxq@DUp2 zd~moQf;a%jd1AGg-3DN+vu&JdfdmU-j&58ZyyuSnPf$`ynV3?oMt>miu zSS?;Rz3~3Gl8}y&cC;YD8qWR~{tvphrYVw^XsT$bEK~kcAM>}{-I6KOsXDSL(X8++ zx%S}~?X8E=|BkL&AJ+pG)9#SMRnSINup zPK*Cr?280?|h5`W% zHUohGmOz0YY~Tk7bK`tfaO288Hfr0YT{_YOAL^aCls==HzH)C|4RRrm=A`KkdVjT(3ne6SoELWf&cLm zn>sq$axpNtxVX@}Fw@)En=mkPa&j_!Wny4rq64;|b8xeE)N`e?b|Cr3ApaUi*vP@a z-ptm~%*LAV&$xQ}HcpPb#KeCD{m;)o=4s?=_HRkn4*z^C-~%%JsbOHG|H|+`V*|VL z{JF~|Z{})bsV;101ym1E1|KsMJI`P3|5wewCH}1^;NLwt*jfMH^KUi(-BZQE$X>|C z3MkT%@856cpPm0+`Ol6#41b>dZ<_drn*X{B)H5Fp55xbwGd>tf^1wY15CISg;cv>W zpr@ITo~SAq19I*p0qY(%zdeu=h;xXwK}8@2{CyV%_|YuCfBNOC(S;$N=`AH$kAA2|>eHG1@6{k^j3!zWj>&zeX1TqZjwppZLT86a2qK zqguSe|1X)a$T`r+)F@BmCdgfm&J@ z-qeBk9Ibh-gR*TY?hEnnt6K!hlcQWEDJnppPRhwb>x3%j-T8Z0?%JX$mZ^mSTsmD~Im}Qx@EJ%*uoq|rLpzU0IesX*$3c!SrSe%pAvT5E1G~4cs-TRI=I_?t{292w7MkI$dBYwbU2iYu zkY1cGx?jmoYHnOv-q+kdUgTNoVrFn@nX0UU+D1h06`$z@}*QB)v6uPa4ex zHIR2nMC!YG+wuow@mpMXX1!Alb}^3va#=36)?Kz%muARqMtYx0L9~YS4+3R=d$3Nnew_0?1d8r?=0aaqIQA zRbi_Q6YP(7Z~b`rRlKoNvXT>u#2S5fW;s9E!Eg83e6~Dz(&}U<50LLj%fSDFd49Tz zqGbGT`26Ad8=&b!oL1izPZ1^g-tp)>uS~)=^t)xm)(27ktGl>G4yE2)jQ zPAuTY8-vlIi`3d^cr3k9 zcsJ()q@t0Do6*EE!0ipscq%8F7bK6_|I*i`b7rnB>AB?Q>r<JiBh(%+2j}@17LK znHxhFCw{{TtKg48y6Vkfn`*r^>=F^&H7Cb9{yb-hf4J^HcWOm@+e$B(}0QE1`qjmLo60EyEgTM zLe=iU3C*p!CVtU2?l#OZ<)!Py8nYb@CEbr*26=Je)v3$d+kPNr)$Kc!En3EgC)26X z8AZUYA2nzGcne#VzC(wzpjXxGr<1K*PNw$pT)W`I=X@JeBQ83;%!1ZL9dxtDK-k;R zPNh+1Ep)nE!@A_Qj}ettZC=!Oo?oX6US!ew6 zkUe&`EjX1(rPgCuI#*%&B)QG^L;;Vt7>qI$onx;+piI4O6_AF`1?PRm2g!iKD=_lpeX!ul28$*=sjK4Krb z04q(lJlVi@Q>_k4?~gk%I&(Q0Abbf(%UWzQp%_*BX+Cvwx0x?$dK~)O$kJhYN2{mD zn8!WiE9cMb>I^4b92^6@JnbR+!{2ob&(1dC&nht3G`jS557H^KX(d#pZpTkKT0N-d zF#e(ccJ*2@AOmTzI;bupJ z1CwT4phHe>BRyv)bb?e?8;G4k=zVVRNxf4Oi^gTK7@4h|Y1=Tkm+;};VMcK@=nR2J zv%VX^Q#?6tr9=B>Zueq83i0K511}4I#K%i2iP~fpjo;OyU~R&Fs?rKTlGJ|8dNMkO zF26Hm)8!vV-EisDksk|KkWFVbIaUNTXvmgC3oZ2F@p^)^xmfqb zgS#(+rz}$JZ4wqfV}B9BqI-SfMzJ*9uMfSSpWSBHH}$!BisEj=mNGOn4ES1W*Ft!5 zfjcp?j)?Fdh8ibu{juycvSly7l-ONxw2`mc5!8Hfj$Wcb4b?Qq5N%LvZTTMORGUTD z<)u-UL?<|h{Eo@GEy43}jj5bq8a1KWFd!5SCzL(e^zA`9ozp5#-DN*9Vo!1*&h6## z`|sopoTi~xv>UACL43S)EDRAih&Xm#P|wQ|+V;0)+q#F9?icQ@RE&+UZ)Tf_XuQW7 zi$!)uM_rj_2kBT!5rEnIirG?48;26>Al?hhAPPedU#2}vbpEo4yR7OgC;FCWYRQK7 z&XcsphmBVP^F&z29=Y;XuE{m-Ml4wU4#_7Z3<5|iITEX%0-be6GQwgaWZZHHt^k>< z%zxXic=so0*N`^#cG#$JAxR3qo}DBE@eA~$^312XSq*2~4;|=Z1sEc?%1?L-$$W79 zRrFEG^x823Sf92o2x3eHTC+~akCcof)3dXE^4fR#Oif=&^zXuurxiQ((Oxu|%i4nm z`8^LoU@+){5bav(*tZkg_mn8Wahmi?pJvaRLzhSFpbU%xCtx8tDSx*?@op+$69*V)XUYEz zPz1oDpykUs_xrT|8|~S`0M}*lca!43gPA|&%@m*)GT3pMY9jiVQ;IPMYiB6HMuUwL2m%rnWAYi65fH1v-Lq50 z-9yY-{u5sGR3?IdnzD=dJ_M%s4vmt=&;w)AquxPD(imo_%fSrhN@f#@KTS%>x?cp% zC2DY?4Zi>b-H5GWRq<{s=*z*=#^fXv5};-JIU)g-q`e3Th4?K3RMKB1dvwtC6o>h{ zeak;#F(KKX83~6xk;X6rLtLgH5=|n|CW>yHx)oi0*_c1O+izjuR9Lq3^^nCefN~u5 z49Jj&Qz2XpCQ}zO>&XAe*@e37jKRgqqVS6Vl0Hd{xJxk5yO?0@j0Y)F6QO^CX@Q@A znovvA3WFUH1DnuWmJ5S%ri0jwbG|$~-2X8fTVBw0j7o~_(d-?+W+=jaux%1KBEHvZ zw}q*F9@6X)BPiFlad8erpgc}? zt&nIgxRS@;4<$;MAzaYH!Nh#@ioyWWZG-$>UP(T&zP>RG{472opck>c6>)vq6R<=A;<{xJH!mJ#y2rk$xakh>hT{?ZcpPg(BxuCJ--3*w3|F!v2OC@A zC-8g~+B`|d@ow?o+EG*0>0wjYFYF!H>`(3tN0cHO2+awPjzkB=_`%_(=7)jjCZU6Y zh}8+Mko!T%MU2XbbYRshH;^9E{OHSc3H1{yS}PF*VKf5uYcX{?s#0KRpmmafXZu8f z_jLi!UTnSb1h(h>l1YhX6MdugTA2CV&oxU7bya>^+ArXrzX-hCpV=Vlt5s_Cw7cDW z^?G)EyS(kwAF>isNy&Lp4;al)RhOqEvLiAM0CA%xm>wnNqQA3cn8@ zHoJ91jlob_QIpL^)QWW*T#LhjXkQTA)j_t;a+@-&%HC+ALbI*j7c3?SBqSt2hkJwB z5Ou*`o6ChYpZE0$h3kH*$*(3G-2__o-?*AqrrfaTwADYi!WlXRDl{78J3ZYZa5<3~ z85xVb@cMv*Mjg+UrSqUAr?onr7_W0X9{SD{@0qFkqhe!+WqUtU^`vIlm9QHVOu6nL zr!pIltH%jUl67xfWq6#;FgdJa{1&9KTY3ynM{>DD>9@iOYCfClyLhb{J|XbiHp0o1 zoH3EFocE$sY$UVCC=c`9)W@QqMUE!llcPp;!sWupOOBEoQg62;*@c5*--%v%&q$Sf z&6Uf!F_7d1)tFbD3L|6o^VKL;<|wt$YjZxc9iwa7OxE@8$rB0B(KnsS;+0SC#piYZ z-Wvd20;nMlg2yxO*Lr__s?qEF{8FM!sh%B;$89XUNmJGCk4~rYeY4w7lr)Nfe;v@S ze7WX!kokDGpnLU82qCqxOtr#1s-wOA^0aPB|7^LT*>($z@AXn>Je3(Tn&%qoyz9fe z{jwJp(CNu38i^ek6NBjS;VxeeVD!A1U|(!^V>X{JPoAGhW_Co|o~(+!Gx#>FRE=z(cnsA=XvZ)fG}1Q%>qkU3Q6>Pr0zup- zyJLSy6p-}@!x-GRnb^d*ea#Xq_C3{-qpuu?iOxcOD2W5FvHkn=azSrMd{Xhnm;ly2uX`o9?f2vjC!{hUq&R3Xi zBGHr@NiH;-VL2()$A$fTe|ym+IJ-FRO!=x(BH#1QD3`_KER{@Ft#4pZ<8UCmb9}h9 z1?qLAQNJ@3y>e0tTS#u9We#d+FYU{2Jmi@o1$Hj#+qHAI=>VlZ>Y~+^9E;w{w|gHApidbCR1aF-$|&ScJ#pGDKs% zSYHqA@KlM=&c5Co3kLSUImyy^oNX;u<)bN;B2`)jZigAp#^v9@mSTZhcioPP!(@t^ zF5-EO-X#Yco*j1(iA@Z~PZq3*ZL}3aN;$IwEmVvnl>^;!ea$`+xtOM{9lKMhzA=%Y z!&yCG6oKY5d7?$9{`~tpq18&GNTpWG<>n8BJc)Sn&q1HBN| zbzgpO;`DiM@3ZIRjc4D_c!%43T}%9fUhYPX-`nkPy|#9vL)MruLGcgdbKmnRm4C*9 zbX^hI@9Tb2JMZbURp0~Gz%f8IWn@v#+ds1thVR>`H~dsC1@khyjIrMvP5n-MoOigh zV{osfL{v0Sk7i2Xc?mkpRkZcS3YK4KL?fRAqFiu1&|Z*A%gKUEt!<(MC|q7ll)L4-&NRJsxwH`zbBUk(>|Q z;kUpXqBz5R?>q)Ku5g`}m_2Jb&pU0MIk%hO0(Ey)6DpQq2gaK0-ERlKbd_MO=7=UM z%Z^I5Q1L?zWu)=+uZ=f?s>+gW8sFgm3#QfmRPJACrZN_#z@ApnMpE(A?%Jx?n46_j z7*OnW-0p`axh!umg}zaADF1*xOyks!RxMQ|Yx^ZxTB$LAz3h{2#EZT4(7 zdcnVmBs}D{xfb0esl0YkbSqj~cw+@s zNb`b#L(q)HF60`&xh;j-E~QZElm=u1c#FhB10c3jTut3jcgCIqbF&>=7xW(lB>;vF>f#t05zP_w z*kMDY>-U>dy%aAn&8f!bTKiMg^|)G2!d6*^gJd3Hj7lx-h7=Toa5IYC7^8cX=nr8f z&`7o^`w;jTl21%Jm?Ln-Wz1h=>7k(Iy7?5o!0t(uk?0~Tj%vZVMePtpx+w^;dNg<{ z2;fBh1HFbDAy&6L5iuek-*)7yg|&AXRJm7l!cV{2?r92e(AYq^xX7VeoO=gy@*~@R z1GBa>aQ;^Lj~t+(8raf%r>!fK|4o8$XP#5-3E8mfj_TwnPVd?p%0Svb9KTkAKw}qU z8>Je2_#aeSOXGLyt{cML0hOgNBwy}1b{fW~8KvpwArg9WWVW*2zGRPFipesdK}}c# z1$?x9#NkTz{qfu$)qiAzYWKXpv02lr(mOQCY_pS?fr*T*g+0dK#7{cL*R5MKA;|s! zqGNFA2rOoDa7~9*>8P2NQaL9aFE=^p0_j)t*%Dd;z!d&k?wkG0DXrMlU&$##5g-aD zjix-G6-52HpG8dcq~4|~O9ahkZ@YqUSpaaTfbE<9lhK0rizN{=mhGdeKaTMEQt*}3 z_#bQv6wr*~-m&|G&-{{`_+K5+Bw9hHjG;k4v;z0IAP8kEgT>?nX^)s`xPc)`dBH^I z->H+;AKe3R!--899^+)`Q@vV{E%@gtl=G`sKka4zVOIb6ohnKAeoYT?Ko?u;Red#! z^mT#AQtTMUq)rjlV(HZ1uKDkcCn+|O9PojNZjt~>48(?@1%5i$TEbsnhW@1I2jmto zw94dmFO=3D>P97)U^R`U=W~&O- z{O)r9(2&U!8VoFKpD!plBKC2X9B3^rB{%8+MC5+7q=*3BJuW(2&l(owYYio4%hV7o=F1rifg9h|^GQ`c&`%z&wlJ@I-pI7y z&1;fh^oODp>1_1|PFHGkTQ1sK#qO-wbXgsndf2pI1tA5mwK_51as37M$pRwv)iz3|ADj|KJPrhxWWuV2PiRe?)0MTV{oy?;A$^fH?8%e z{RV!%Qac<&$F--vbZb;dP_Po`6aPQ6nZSfpNND6x{l>%gVG9k13+E`VE==^~Z&2e~ z%rbxX} zxGhWMbw+Q)G$yrs&1+V_&OPxc=i{ClWwZJ4td{E;{iOrb$wzX>WN&{k z87Xz$FEaxSgCe-|p9p=@Sj_a{|3JX?u_QVRutZmPcWl3+^GNMB=Ybyxd?E7{nk2fN zo{z6LQ)Uu%AlLt(kh|%iaQO+Me4I@(a1=$G4n||jzTk#n6MHhzj&Qvy)q0Ys_L~M3 zL|8Kject1W4AA9?2pLmnxYiE#y%EcM?c3#Qw#WlWZI60Ku8=yks0Q76e-3tf&-Mw9;;s%xaxv*T;j zGIQV)eOQ_Sye5#YFhW0|SSsJ6k z=k|_{8p|an7SmtdfCcZzEy&xGg{j>#*O^UHV8keYl1{FDF!-;uf2A}bM>k}+u2m%W zF{)6e{nNNzpni|>a2vT(Z-m|i;^9H$O`Ut1TIj1Exy|N~k5F{9^hwy3_Rrj@MU{}K z$i5rxveZ|6o2vd_Q||-k2FYI=eRzYd>g?>stLHoN@M0Cs&m0HKCz#7`xeyxw;r+ z5_B7msE@&oP_6f<4~e+nn72fSS35&W9q_9{qX%8j)hfYRmmInk8iQdC*VAGOKMlkZ zTCjW6$DYO%L-Ddb{o{S!LqqCJkm&qI%28>PI6?v$j(a~rgtF37_uTV5?Y-OFqNjjY}lxDY^}j?)ZUv5)Y33Dtr5rkccqG7dKp3N$@n91_ImhimnfxNuA zVMb?0GfZPy{m^z~lFs(*vC|5T+!{YfX zgYt*GlO?1kpMjbsl z-94N1rmUN|Zhfj27IQ<%2KNTZ?=m;H5+7MQHNSN}xdF6PAW9P3fB28a2;1 zix4R-L%F&?VfW8BLjXe?6}bvk-_;>wj3wFdJ!VZn)?4qoafJRAQ!gGAVdSfa|* z547BFLe)`}-SF1E8@=`8KFTD1kiD?s){7av%d2}~S7lm_&SuOGjf9@z7wewPD!RUr zUr=S#apqH5spEc?>Pm8{`xEIDO2l`?tY12+ATZnZj*6$X%1xiu`(DZb;R(Uz$84Ci zI&8t-@0@dySx8sKf`_%Qn#9V&I-w9B-tu-OC%dzB(~^4wnTiYX1EQkD^6fUBhCJpY zQjE~xbRNz#MZzL9A!GYjg;HB&n#g7zSG{A)KR0b^%H6_zPbn437l4G#OzlBAFWF2M zZ#HC*FHF$yrd$8uVuX8}SLECwtJwZf3IphXtVPpDYLmUQb3q`D;(Z!l!?RY|khP7U z^G!4tNs8C?&CSPfy5U?j44r@u1GS)#=7#Fo2l%L zdLZVIa2!(-%n^TL1LBNuyU++I?m)1@w4Rj(;~vtSTa=Jcn7&jYpY7i*YlCwv1@e)% z>`4>m?)_5s_bE<3OmK^O9gclwC#>><%S?q?-J5n}j0G;?vufgEBa$NRXO+1}C6m;o zF{Geg#GNO?(s6D)zyv}}4Mgp;ix!>vbyqWzEf#nDxP^>J+UL*HIV7!}bb5*yqE2|X zaL^L2TuVDXK?q{y5EvF;jU$kJlAt6T1^puyODF@R>W@IwRkz5pxY*Mjc{4Sj!Nbu5 zTdi8{?eAU$$Dh|rmS3Mpkp>4u!kYn+H$(Fhm*K5YRGzS}U{>4gBQQLvq<%p7Xt%X% zoWb7->J~o7xV@lMZZ1)HR<`cvvZgL!WVF}%6NegPoakW zO<}=t{DshN z4{u0UiXF}`<6X=p)qSMP@l1ryLYnR-Lrw_gQQ-H0W`yb;drnQ%FVS6kw2(%8wzMYUjj(3MImNkDrpKCY81qXTA|C=7H$&w8CO6R0)>^ro@j6g0|3VaYbZIwH_TLAOcokTWYAqtu$d`>1KX8IZlhRJ z_V9hJw)lz?#_H0woT2MXV!KsbWvdq$uxdSnF7mrthpcTPen$q+uUD-LoPy0}FiUP- zaJb?O(ca~HYf))|TB-%dFGjMypm~K=X3A4J<4CG+LKm@Y`aN=H!o4vk<~P)+frpzb z*IxGiQR<}3LTGgOH^AYym6P6YhpqLi`cAYuPu372XmKN=gf%G6=n8Hcc{FCI0|^0X zthjW=dn(O^3A1}wb85P*M2{;C7QtOcH)1qa8=ZJfYtA_KV}ffzwWWohp=#n7CjaJ!WSD^mnBI&bHZqatjuv!%SS_yVt>8AQ zPOcqarz8uC8Y~vJ{b_ICF8dI2cf5t~&0BQYhCRRK@tXCI0K-Hgja~?sv*o#63kXTH zK*|53gannOsdGR9~ zxJR#s`@*1j?J-OPw2Z1W()vdo^{4brStPo;{V!pu2`?6OIzbydJhfAWl^@-NbvPZ) z(29nNjpzgW!vHW(VRP&r1$(4Y`tDU~N#U$;xOx5(LYu{8W^N6#W?pEDBV=g$)b6x2 z8Aj@wjh0wZ!+caLh6wTBp|i_gs!OUhh&{qg1yInmen29yP5dO}aXgf_*It}rx0oLX zT;29dA=^KMQe=HS8CSlP$>f@YssIwtSjnPgR%o#{QUEP9i!^e%EX#6ZnKTwg)kbhg z_#E%IXY#7pfD;v(j3QwE0o$(B;%zoE0eIw&&;nXZs|AyfZ+%!d_FP(BiqGRNN!RA_cD4ApT~r6KG4E!(LvGZTdot?8Ss8szgt2~spO)#+s8lQ;&=^&uxle6 zoS_(MQFVAZkhwqHjY+U)sJqFRxLWUiVenl*cW4~(V*a-4J) z2F(=90KJCL6en7rmim87y6uMkY%Cs8BW5>Kq}i5TdQ|+B)XJ?PF_4!y8>krXBunQD ztwgmHyw~xt&Mc_-DO=-&NaBHP@cWV!N!HlIGt=niBJ56&W=)F;d#1a z_thVU*d}&)5IH4PpOGj$!!~12Uncjs6YEHZItN!IP`StP_E5p&rp*p($A@e3|>A-C*9gLbq#OM{Esc4(8zE!bXd#BI2aNYQY9FX0{!eS_s~%?Fws^ zGUoykN3z4-RI2v7ze|lKvv@7?jQO5+<3$~h)7YJ&XH8@OL_|PpT2b&8a-ohlW~5bT zNy(WMQLBKGctJ~(ArtSQn-NGiHq(Qm34*qSr>oiLM}$W*ZPT%lzj93%3ewg>fQ=yCLp-5b$p zR5v3mBTDJR&1B3AXF!OCq6vof#*!zTAHLAAJS&*;kBCF5g%-v? z@0HGPc!mN=x%&IO^XPgl8R%Qq;R6r@OIz|3y>()UO=9$@JEvq*@;CidO(2lTqzrH}R5qyzJcsRJ9dGuIF6+Za=OjK7{;SOW=i_Pop53z%X zKDI#$il*G!C_`rqOhU%e*r-DsRE4a5DoYDVFbAD+GC0lxz{W=%%OY8Pygg(i(VJma!QfRS$H5k!%M`_;JFNjCu~~oRd;@2EaUaz{ zAux?WM$j-S(JDNNPBTxd#lA>J%%F3< zwkVsGP&^cwEQdb+`A4CjgyV#Uy~Ih0RAFWC%oPTeVt>Hpf@!SA$4OP!ATZCJqUDrD z@ef|r?+b-3CowiS#d{>0rkvhMuDpZir7|GsAwCmFt7C^H>zT(3A-}?H3ebo^V)AC< zC4Kw8t%k#BGS?$~u>NdKGx*-jJKa+6JzQ>4Hr;aP_W+#PIC4~dRQMjy{aQlQuEHs< zy|aLXi1N%*hLp+x)l#_22Bplox<1{~?O9vkAOE-dqfbW{RZ~4XeHwqlp}c;DxiegL`Z5uOa}>yt^;4UCw9YS)zIORUs<(!zxm?L|9_@OW4HR1-2~` z+33mJir3$F<2-#Bjy~d%siOaJ>Y300%9F7^^lbwbYes*tDNjnY+~>j2^TaY1Vl`U; zxQB@R1Z7hGHJ;7SKh0-6vNdnts%lkd$(5E?!2hLnL0wTI5Fp5yR2>m?EJs}mJa7>A zE}lrGL_Q%?B44KnTsaj&$zBT-o*yCHgW7Pd`YAlxU*InXvu)P&2reLRTsDPGIo6Lg zhrG{?;oZBsV0hCKf!`BvB&1tb{llU9aFcxFpczpuKBPodF}G2$@#i7k2|vQf zM_>Bh$IHw8MN{pQEbycyIYid0BVCdWUud66B_T02x|c>$&n!`6A*7twJwp-gI=EBA zj7RUmkMzwfQL^#z?9^8LnWR$@fbbVVe^1-P8~V$KD?vv~+;hy9+2#kc4r3Kq{UaPJ z>CDeluVFHfmX*GRu%GmL#+q{VYrgjn4&=X;6b@b&zo9~14duWmetoPQT?8_m=iUaK zv&eu(ar%hPF1A*-7UvD|VHI>WJj;56RyVt@?%miA@kU9(! zq|K8+pY@CHY0%MO-%^8pzMki)NCkc0!(1Ze{RKJFP@JfPB(>bOVZ(d}8w`>2J%lbS zd$OaiY+;{Nsh+~ArTk`=VZX<=YkrT>+Lz9DrRpSli?POcANqPAiUdQUOP`O{ts`>F z{ZJZHqvJ8XMV714z@+MXU_=$56U3uU0HiC}ZAWv=ah&@?WQab?Hlb_wkuY^+kXvnf*nArq*|y2>eJM z`?~Rxg5KlD>piSFTtTD0r#k3+((&q`B}C{LUnxW=_{*McHO-wUt^xUCEj|aWZ^Cg! zO4|xGFNI{1a{y*_Su)udntr z!rlw88$1hO>uL)YKCPzFNajObhe2Zw=L2KJapeWuC$HuB9K8A5{;wU3oB3VR^V^X_ zHRdl=v_5roMLbKG9?`nJrLahU5vcjX?*h1~^Jsl-%0k&SWf6q+%hJbhJD<2~MCOC7 z(rhWvN7{$R*YpQb(Wzd(k60`e$cnytG1gOo8R7Dmb6Tb;_|0`$xmiA4(vt)g(Jos} zQY`2QJJPPdTjTIy$`kl>%;1|ah%5)+toSr?R=Ahp@p;z!LG@c$KuBSH=`B54i;UqV zKY^+!FpA2A9s1Ff+&3BOV$f0Ur&<5rJc9prrh3z9a?NY1)dE7V6)M^sVj;FLZ?5*E zp)?rbFDQay8wQv$GZz+XNGKhU?wgj&;6MTUO(6#Ef zIAHEFEv%zjhqYIJ1)eiZteQqO=$OY@4_B(Hy+y5Aa#5dG+!$GHN!vBOiO!Q|bzm#7 zXssF_ND1KZ@@qC41MVJoLeMZ4>u}ihvzq0PpL7lBcD2+|p7Ku%G3=gM^bIfCZH%;} z-0%CSiy&UL#zlfor!m<)UqsS zQy8YlWv(91%kBvb)VW<9JkmN2H3cFA{ka{0MDo{qlt^mr8+Qf0E8o}k1-$!kH$ZW( z@QgH42?usjjxB8u#Fr8#V5DczNXWl|yQz$xzi99IK*62OMzF9n=VzoZ>Am4jROa zw0SR0U&2bX*{YCq=WUHY?7GKGzYV+|&gDgHS??XST}zI3$WI}hD5Dv{90(Tbw?wka zk1+r|AiQj7Oa?1e1!)Q$t(;0jK1(PuP>=(o$8w5IRCaNTw7D|1;7aHhVXYb#$IkQU zA|wLVkdV)BhqI+9_+GaPWgHvY8Qt6n^WREBXl3fMtjXC4S0bB~{RrKVmr)QRKVXkr z^;+soIh|%OA9mo!eM-^H6X1KSK3=r5n5+j5AWe1a}PtcXxujTkzoS1h;|UE`bo-VFq{i!5L)kaQZHzkFJH)(jbGQyhM;Ke2S|2p=MXxqfwuP*yM zaqqPXx99>*%PYS1&(=_>HTA-*)g(XyXWoKFPkp|>{|qTB{I>U1 zk7UixhzRw-h2bv z9~6;K3XXBx9^DwCPGK0@PO=-M^SXa_>uo=zTj7NU!<~Y?LDRcpq1o3&eQg^NgU~c( zdQ5c1pT_MI{Xg=zgZB!oWVL zN(ERr{Ig;7FfoSWBU^uDrs4F^i{p8TbI4+BB47CHPi0Kk1zviKRbqOa(D@-X4^h5)q^7j$>+}VeApCSpn$dRG{Hc^z zf72$;9-T^5o1+4@K&F$rsviGvb+a1Y>4D{KG0s(OiB)f{6@`q-ZUWObO`23cM3cmH z=6wWe+D*QJ3*H>wgj`p#$lz{EYBQsV1yR1>(GG2b%>-zBZC<`%>~H@N#E-v&ao z);jF5;a#)Zj~@X(8D7iUF9lY#0@3cr(%l$SMfBR#ntTLDbbJYxGgH`r{D6|fQDSP> zw-3raXB8D@>-P4&}eO089ux!WiFaIvtmh z6Q8XUvZo#x7wJ&1&~Eq1mdSuj55>kdcY7VhfzQhqH%1H5dz@OA49KiBM3(q$nc~Z= z6hW0%XPnW>fFI+(qit$a0_4IQDzn+VovGoCH1MsMNoN-X->)JlVqd*18gz24!I#|? zQBuOZOvc(*uR$Qg19gdQl((%lE`C)jtFNqHVuX_h%CRKi8MBV56rrRHabY@6HfC08 zr&kunQ(qNP4$G={YVWFU>E&J)TT$Dx>tDb6Os=E6l^7tPelAC&6qB60cc-DGWhJoDe?~l9O!@-su37Y{ezvu}MlaF)78{K;6PT#u<5%Oj%%SiIXc7a%N@X?Ea<{5t<6 zHJd0|yoK_|uPZ%&6g7uFy#~v&`=VAK-gB;bC#Fsq7mMh1>ei0iwV-z15b*=QYpL+pK9ARUE4|LmXzA10^m)Bl$&O*cuz=(676}#^aA^AMJ&XA;9%2B+00^|u8ATe zG*!Wchdc15qdKy5&GK(H-O3iRZwDJ+XGAV}RXDEmKaLB%n{Hs(Ffnobc+K@HiPO#6 zNCkfoSR4J5X@1A}?m0#t;Opdh12P`NI+kO#Dx4MaSkkXe4s--nqMvTlFjshWp#B^;%JpGj>D8(u+UTjLEr2>ylpxRlns^d*> zMntB$FS{ack-D;D<8Gq&$T=LYEo5Gn4DItXU+UzUAj_f#jRMsJ3s+VF{)P&p!oyn* zFLUd5LzoTU0|=>XRPh3Ng>3n$N|0(Fu;E9G{pRe5bA!c@SXu0(aQda);}(A;4eN}3 z_3LeL&hYEWVmopiQ@tQTxS1<>M4`hVuZ^61qpZULW5*7xvN$3qcQa=dV!myI5x+)W zJ1ndw%b8&?y8gc3`v&?BRs!**UV^`7fhO^F`ZskA)ZNzDs9U{(aj2~E3WGL zYjP&P*}gty6*J_K8YN(!l>?Rns7)~=aqxlna$3<`A+HqBv?r_9hrcLc7 zx_HN@0;OGlD7%Qv^+<-I#bO2Xx|=Vq)j~j5JL(BU5UI^*&R<{YVbKC)@IsRy+*mN_LCo zq|B-JnUiBJvv?&vh_O{9Uh^u$Gi_~gcq}=++Fzy|MZ$Y}Akp_YFmj}e5W7ug3Cjm4 zW1YGPm^pL2LnEv7UQ5Dm!gJ*vWTMlx6W2GMJXiQmo5SfAjq*mR`TH`&G+XgN4;13d zX!E004(Wks{z11dx0R}xYoF&b-qE_!ny#VDE?pyrBv{db#bsGbof>zl?c+~VHFv8M zi`B$sv)KI9Dcx9HCz`q&`*qzp+E%tMFT~I_Br_3^Db~?wx_m>n_S{+K|?ogvb+K0LVEf8M{b6n zP{iUR?VrW(cocg6##eM%%OzUO4cmH1xKC4AbaZRq{NTVjF|?4oxI15BnrcSgfGy@L zru9Nfl?E#PCrY^xZ9>c%OFYbX^Tj0Q!`dT<(}KuJn+8pS=)@ghZli^?BQ>2}Bdr6c{a?3j9u}0?2Me+)j30Kd=$+j(0XUHa>`}uWE-(y;CU*`%9Br}V za$G+4oF<_)=4Ll7rXXQPCb;m z7pSsHgBadb#Vc(wEC?i%kf62cstO>GupC*b&f3 z7G}RU>*^S7b6>PLe@rN{Oe5DW69`63m+V#nR>k;jznKA~VWwxI_+S@CB0m5mES}lNG1J{NPKu z*Opa2HA^0!8-{j5bGw9m9v{{%^!;|0HOA%z7gv0#htg8&#^#3>hx9A_uj=Ye&J{en z5Nr)>hga3B+`IZ&+(zVfpuxkb%npCMNC}m9&KYcP?+KPHIf<9ef&W5#8KcmOpz{pIkVC z9i1uzn>OTU^fr~5<25!p2vR*$eeP?Hv&v{awWSVetb6sjjM^Hn0ky5;$&$CTE+9>eddnMBykuGdNHw1hipzABonOV={3++^0M(fDGa{RIb*l%M-_H*%SqN? z-K@gQuUW5%F+E#@`xbJdl_o3YdBJF(JF^d%ywIN$_~7nd#SC)QYk6+IW`Z{{Y0a8j zr~}CdT78iOf{whe`+_fuT4M@=EUM<gqk4(x=S5n3MMaiZU9@6_ZA&>35m!I;B9n;EPcl>3YOtoN?v)Wq#$xIP~9PpaH+=f7>Ba!k}x*^Mtim zj>dhKE9E3J$(7bu+B*-O#56$k0ex@e=OLv?byG3E_l|c%4SO*m7t|{T6;YL+4%vMW zBJr8N@YRAL=MB8$2?fRgU7TH84VLX*_jP6r!OK7~VZ*#TrW+!jnbt3McPq{(_qU!m zUFLRIktzLT7@ax!;py#mSUFk`ML)TtrS!2p4oUs+C7kr?@~Z`Facs@MiXKy%)j+;Y zMl^SPzhw9d^@fw=I1JLTJB~oA8Q_L)_)F?qc{jbLkDh&$>Xk$AvJ@u>Wy**=b$?(C zdjCu+C&Sp@WQUptrwZqjn1jgtZJ7N`t92;9)b%R zx;AEoIz=>H3ez`7m=xD@hR}{}&tco@U%d|B)*&7@v6mP_k}#@IL>Te`NQ|~`#L@tT zq0gRwEeypThPl)?bHXjlIczjfZqUDpxW5+)#67$*c_^tn799o9=8kOXXL7tjJ2^J< zM0;ZI)Toe|R(sQ6Z(k`3q2L+d#Z$S@bDj`Mt8B?S z2eQ=-FGmH(UmHq_sLm^j1_cmG=7Q2DUNGa`&7B zA(aOT(KhC-GQcZ^B(xUN$Rmm>8f`3u?0DOo`QELv9bw9W%EMNEK6o2+@2DWMx(3_6 zqMzA0)r}Grp-__3@3|aMOjKeu_>|=VasKRY9P}o3=U%$In%nL$#Ccqy|K9fA=OrkiVjYO>@sRrSo+v}~!Fd`XVI{cHCFNMedm6{^xt&Nr3TIRE*DM&n zR`)381g8jdJ0u6ez~fbCryuuplFp34!~KToE>_Kod5d(pFtkc``bxuMj#@%zUp3HVEnU0&Pw{wDn-4WcH8$_7w&p>gyZv#ghgT~ z`7xU79JsJ8FM%8Bdac;A&z0ZneZ@3@G$T5~3uxT&Hl#YL_TRv}`86Gk+#GK!*8Q&9TjW`GZnZp@Za< zfTgQV!58ldKOqag#>c7YH@!x=ykB9|Waw>cSxm4dibjZh&A5J@5=LY(%6=kCUfo=T zPW;f`_K|telcdw*Wvk>->hfXD!GxPCvnDoeFB6p(kgw%%iPePj0v8n+C1sAO35g2Z)HqLfc- zfU4ETU_f1P{f8pu^^93_G23L2)Hi0l3k({%xTtD$`uwV;IuF zSc3jdZU5l*bPGzxHkxUFVo+s&eQzGc_o~J^&RMal?kQLx0xMA;zr|uo$3>^D_B$8sJ2t`sg}My ziD@Kk4LyoEZ}`+?$-5X);jb1@_Fcs$D>qd!zrBRWy(f9^UDAUgIZD^+s^%z1P)R}k z;gMtFV|%4{8_xrSNwT-e_L8`odgLu}qX%J*oljFudzp@TrV}Tt@!%`fT#nbdm33M@ zDjD|ql6Mqp2vw`WjUDsB*W}HJw&iCvN-weU%ou}(Yv26oXh;sXi}{WoSlrNmO%Sy( zA?Yp(ilufzez-V8uc;5ow~C;@enz={j>lCMtD{KvE07|QRTkb2w&kOTbCV+!xS2X$ zFmaDT@t%I_v(Tg?kZ4c2C>|N%ygriFvrL+}&0MgZnq?CDa~6E$Y(NNpy|)>an(EIO zPlAKE+L9B~s$fm1wAkqAv*^@twh+mCZR}6BY`erqRyvPeJq+Pw7(E2H*Hrp4*rl{y zpZLD%Wekzkn)ewehX<0@-wcx0!^qlVC&}5v88T&ven+K)jjjF`4F9cz|ESKl!~Q2w z{s+9{Wh7TepF1Gw^{4B*r{`JNP10zB=Sh{|DHn}CuU}}jzIS{R4)=A9mEr(OGx_ib zgK9}V82j(b{muR8o5ic($R1f_UBc5RuUC9g0p6uRTN;M}QpenxE3@iHR>a+2=FJGm zaRYK9$M^7g4aJ|yq&7(dAIx46W=Zt{bpWURu^*r!FOa$N_g784C$B3sCJVH29HV{a z!leNPDV%8Y)$0$ZRWr|S{B8sz2OFOgUa!jj4%CJ@6~W?A8#q1GNmo@AyN^VMeSyjt zU#ZzcU{kKx(5wKq9RBC0C9zf&c{Q1@s&<*fJ?2stB)g0aC=8NLysGLC6!h~blfY>2 z+|a=>q>>g?Y&mos6_wLMH7_z;eTI`hiNcY1HAjx%Dd36f^`jJaGRO}na=SGqrO-0z?`LuSHM7GE_p+!g zyV?9{!kI@OQ)6WH`pE*=wtwr_=3h7n5`x8vH&b~N0bw4%eldvqZ@UF7r~9KD*ivBf z{5m(a`CFck>WD%r8nXpfM+hT91Zoy7y{|0Gg6FA*X!<;8{F+- zf4SI`RU+A+!dHvqiRi3HL zaMAOc;DK7ZXj|4QdAZrKfY;}wWw%nr2X@jw@CydHYQcS*6Mwob2Ov~oO@zl;DbRbo zuh7yy)Hl%QZf_L5G05HBt3zq~B0Bm>i-EZXDG?E!SQzH~HBmOrIh?N+_Io>gJ{Bh0 zb(=Xc>tBlE88Vejp7+hk1U?S3DQ6(hygnrp6Y0u|zSeSs&Q1Tsqn)oOFYn(Y8FryT zeRlhuwG_1RL=*~Y0kK=Inw_ETs_9xR0Id_gvpF zduU$6xIFP>%qSIL4+;LxfJMNv$N42xU-p~n`TUb#=j|p)@l$1~txd$192+eIQgf1~ z_vNINBg|(*Y&HGg;dRp8*~s;l+K3@2JM0@H8M{5J>Y2SCN$mR~5-l9we_5Jj>Ma(QgXVOLYlYU!lBM$c(oyqTN z1`jPeLeq_n(~p6>;FH_^(%Jgd*#^(pn3x}O)rvhW&iLT*RzNWnw2~2&p!R`~VJ)K} zQ-Rmj+sAdJ#QYUsV;6MU*q*k#pKtki;q9tHv2aI(Vei4xuM8AY=d02pc;(pndZ*8U!>++fEtkovr)58NJ1}m znT>ZW?DO=q%S#vYF+`blds1w@Cksv;`0w^QZqE}xY1VHsk353i+s{VCinBj4zxvYE zMYiBd`sJA?bh{0?xj+HOWsi@bnrD-$I$3ZccX=$pOBEwf>D?hV0U6}gdR|0(e`410K$!^bfNSMaBZrg(8Z>qZ0iAw|JEgsVePSPmJFgW=TMU9EqQ zN$i0ggp*?+y0spk!yW+&-awRn7nDS3#Ba4(s!Fd5?Y?P5DqXox9Fvpfa5Ck+L6krD z=V0ya_usC-_ir8c29Rf}RYoT)Rl7npC%HBTCROGd-I#)$U*vlxE)WPga-}@f;8927 zvTfsyki?>F%N9#JKuW49HI&GWO>MsB_}U&$`DwMI#9kf-83lV~LQ>2WmiZEi+?%}Z z`qMrsiZTXf2$Ft)mxy}J(*Iky5*?swVHqy0KYI#uulPaL)P+4JFo7(v&Mq&cueS^Gy^qZ>N(M|0B0QYE zvNEhRK3Hi!?fx`V{%KP1h^vWH9Eyl-4{|t*JG#>-%CZ`#*Hk&k;&%o5sKTYf_G#=g z=w~>#(Z@pMQmfN8$^7(VXwa%{J6G!CkLMc=bxNzf5{Cf3BMjBo_o{O6eyp`~E_k0n+K zyFJV%(}TfQ<%!DcIH_V-Z-}y$YgBj1+FZ~VKhnm`;bY-MqtRYlzzD@jF;R;%yMsGs zah+IqB2Z1quvng+I+a)p_J8K|A$@2WY6m{Cl2~V#Gf^I$O(tGH%_LksDh?mzUgcSD zv+=wKa_JEj%Piw?+s;RaURkG&mpyJCX4b#WLFp>!SDeqBZ`h6Fb>pyOJln7LloT$X zwO@JoY0}93>-h(`l1NTmYx# z(BSEt&pQ_J)o9U0PMS{p^E-%Ch&4;9`}2Mx#^%9-atAKPo_jd%j}RTS!>lianEeot z`UtD1<2lAUw}x+6(CHQXt{R8dzN`yXxjgY#Co;j9_S4G{V1x2R+DdVIV^MWAS2c^4 z(8&Ysq~~Z|LS$zx4yjZ~?8|RY7UKo;_4o=vDI0%I=h^YQ6;Dib%ln=!LIlVY51i+tv(b1W+1n?Z@dhQf|+TRT$>c2|m|Y1UW+%XqXuUZthc?iE-_I zdF;$HpIHyz%cnFxI#=U6Zimd@)?OG^!VGGE;w5Xf%{EjVCHJ2%T`STD;#%<(R01D7 z&#vkxJx+?iXABbv?pGct1ybmuSLNGzLDGH77jDm$^5kt^r*c}}xVAm_6b+iJbYz^d zoSBAwZsy7;R)CZ6PIfso2AkqMxG9%2Q{CHRPCWZ96beyY-CpTA=L*XR1E9 za!$Y$bg>QRwzS+ka1)yT*;EmUg6il)ZHhzm6d_x zKF<3E^=+%kM#}~!vIbX1r5Dx`uSMpAf<#5h1CPE`PG1Na8TSn5jIi8{okG$_M)bx6 zcJE_{a_g2>#}e%Y+H}WAAT6Qd$L^=?@9*U|8aIN-IOyIoa(#kO6>`k)LX~}vN?$1s zCy_lpQe?VNOZE@-hvbfOx4Mqfe<*a^vae1RCz(3OA!HzX-ZkH3G;s8g6 zDK8psnVEnA6jiY6%W$|4?;S1l08-;h#1 z^3afCIv4v4{s;D1TG#xTZnHSbaQRktKWXA4(Q1mddiX0mf9Vye&FU^h$i0%LKDDo3 zz4+zL7sh6I98z7&UF-d&`d`cn)-K+b(>6{&sqFk)&m`|V%8P$>HeH)I?O>*}`$S&G zD5f+y*B%FvEIcn3BRqYank}ZSd~~eh6r&VkWQtQGYp3&XyTqHsvQ#ric3E~CZ8M0( zdz?gfo!L6SZirocG$7#kc#bD%9A+%&JV>b%wmZV#D7M{qef#CBh_Z306;sX=T4FN* zuOrMZ9k~opJF?N;A;F(5TIkqe3lSCj{|+YFuvBYlZ2_VHmdYJ;V1&zkSh~uJu>lQ8iLMV@s=s&wv^yaddjQ=q-gwoUGB}e0fm6cVX7<$_f40b zSBaOn_sAO;^ToR;js2<%uk>x)7g?Y}|MPgu~6+&7?cu}>=375rc`IY}} z{bkGDNXvO$gW_DDY*iMESYzgTTB~*880Yl%_3GGYV~4T!+(Czq*=NYJ#k!kR43J!z zmeSTjMBR}u+lImu7*Ztdn%7_6joNF@{4hR$9*BvLq+JV~^t5Jdu}avp}b(LJSt8Z34Nmf);bnPrFd*OUY z_;KqwJkpo10sZ{sr?>faffb2RUaJgKR}fw}TU&)4ZC%!3Dj=DNDHCsk{_7r&6BN-v!qsg-lPY&|7fS@3pT6Zh9oF-c zobZm#d>FcH=3Iff*wdimGbLqZ5f2VQD&1?qyx%T)XP{k57bj*;Lt0Uy5$`JxYkilO zv3af3gwax}y;M$gSl;H{@YI|I+*G={<}5_uUi;r*gqc2N7<_R@z!ZK4hk$AMQP{g% zn}~jXiLnFP}F^k!?jc0K=sq_&vyvt=^G9vP%DuK1oR>- zaO34AMjuqcM%HpC7^aKIjD8O)*ykMo{96+b44}5jD79iM8ka)d`gH+3BN8i6_U=AWB^*C-y8#zl=g zn%;`L?axpwi8)B^mZX781ZoRk{*XhM03}IPeIP!v7QLYzs6h)k+sEN%v>5)O_;G3Z zOph$1gVEoh*By%J`|j%7xiJ(J>JC^$3=_1yUAYyzj(_gdoL>*Fa`Qd0Bw0{PD0M@ofzMYTTLzOR$ z<^Z-BpS>jfE)NPe*rA2LXa z<@#@#^J?mwb}1gQYXS|_pbxc!40Z`5FL>Sj&5g@_ZiBt4X((ko&+%1O`meV4Z|+of z2gZvHC{M|j7^cZO6w0k_jyRtucn^kFoW$i6Gd&$!x)xow-LzdF#%Ge&oo1H>ck!>Z zeIloqur42_mYiy>QHnhH8u07K=9Fo4PQ%!PqyyTY!_=EGY_djH66>YO|XVLtU6r!l79ArK)UuY(Z|vIo zF3{+=ir(V0G_S0s!uY8eWV;gwcJu!xb6rns|M?UyA$d+^Bj4Dx>CH+h3{G@P?!3x1 zzInVHX}Rt^-SVSKWD6S*bM_;*s)=y22C}@#Sdet zz{H{pU|yzNlgpwq3-N9(dXKW70qd+(jvAYgFMG4Ef?rK`D>441QF6OG6X5r8Q>pyN z{WO5St$M!-Z@7D3N9lBUfZ}yCb@y5NbE}@}ve+%NWMTv@nQ@69QoWZYuJ;J8t0nymfv{maZTj&MN@^QTW6~=_@fh{F>mD8*2 zS}C3HUFcHwp6R4L00JUrz_i~;8^g!$(U_{k>-q7``cI8B(HP)A26E5oQuJfpRL*R+ zve&d(WSwR&UIgW|%N1XfFd2|?eImr*%=+xrQO{lJ?o->IoyeHnFpGv7CO<$xMQt9w zwXwg-60vCgp%C2BE>K&tm-qEx7DN+;JF8Y}WZ9-#h(e2dU*^t(Q;y@$Pl*yT zjKT!{EY)3nAR7`ilYFM$KOg#qV4&^2z|9A9y9KCh{F{wtrwhV;n^5k9VzIY>U@e`{ z>z(C}bzYf$Ec@M@4xc+)HY#ed-&jjP(SONgr)=3Mp&{9zW953|4rA{i9)_LY?H5~2 zbSXuUdfV+TUUzn!w#fQ5fz2}Cqia7sGR^+*SA;2ryc><8_x=^CYBLy#$G;Nj%1Pi4 zs-BED)_~Ebqk#A#z5jH~mxylM&GAOY4zmS`^C79sV}~A8schf`WO$U4M1%jH$aACD z743c}?Tegu(-;xoCzP0!fpUzMBxK*0Fg66Xi)9yFNs`FhGjND(==G6Ftl7_!0hL9r zIf{4;J1;fL(e_5G=aP%0oO(G1-kp~MO-Itbqd4c?cdMz@G5n+Y6(Rec zo1~iL5v(okjHWEnoQnR3shJG{##GCMnmu({lB*;^Rc47cqXj4$>- z-?L79!lm`Mh_Sl6qxm$GdY!jlJbZnc+a>CfR^ab_S8#Jg%yWD^4PbYjIFyx>yHZeA zPBSp0Xx$H^b*@tNs~1B*UVPVapSz$?q%5xkf8bk5qs;%D+4@jbitOfgWM>ag>5c{- z#H_=>t;+=%>^ym5%U3LX0~)VZaBmCs_JlNbw4ER1a=t#nt0xN-Ms2?8+FTG*x60Al zA*-4rmC9}_b{_q>KU+Px59!_yDkx$TG%-wkd}3~uhem+*SV@4T&U-FD5{2za0KK(l z=06fbP^oEYrD|(y;q^M0Qcp{v%&NH3$1%CGtLt@CthOGhiyz?ts7a(_DN9EheIE|c z7V$`5You^9ZqpH$eN196lLT(4ho^?C-U zmA&7x9?Oj- zH5EUBSQgXj@XX;#TK++IKd{bB_20)^?Me^+=#riM6-EVszL}~x{kgWZ^uwKQz*73! z|0TY##fx9DWX!P7G)~o7D3(>3{|F!BbIP9kDK77X&AR5Ttah8SMW#*b*=7)bThE%1 zA*j2df*8@inbYvhx08>Kd4VA$%uViY9fOL}s`6HOJUN`AB+~$qSw7s3|Euh_Na9V-IlSf|| zOGab;jqI1+(hLCm(~k|&Xv=gOi@WkZ6n z>GGFwu=xPblot^#_zZF@miKl9%@;RU+H;<%jHg!2=e|i=&n(b`GJgb~x{StAE`i`8 zk_$PD#_V2b(Rk!b+YvaZ3sY;eNYR5)5$z~Q0~e?^5dN|; zTmuWFVrAXj8ez2>@?Up@-V4{_ znL5~o{k3rXQB=KDfGI#gHVd{<^OuT%T_I=%m@*EmEusIT?_gJ!vCjszy_0wl|LZPN z?K6x%bhc(4yIruy($wpPz<-JomPU!I3^COdZBvGAhKRNW&HTGWd?c1vQ!hjck^l8( z@-;!Y1kRhjB)osiAApJ=4b;UH&-kx4Il&>wgmSN6m#NUQ|KB$4sz_5x=0lNf0KSA(PD!s=|fR?78X zb=HB^c^O>f`d6OdwrqWz zEC&3Q?01GW0+?>>{}LJb@BG3%$gssmrQV9+?;>Goc_d-FmGxVG{=f5M!l+4X)?eE$ z{MV~uU~8n!J>-_f5)qrD^qWu+}A4d;V>;zR5auyq^dA>7=t)7sLIm9 z0vltKJuoT`6RA7lDe|H7fb>n4MYm|sgklKrM5`2hzx6W${6yv5lMNoU~?hyiuXPD-=j$CZ)?s&DZRLF0@Rt1KqL(-3*Tu=Z-&!Hl^5#r#+@ zf8WRTU9hO@vy4dJBV_5@Yld2_gI7~H1}|aJ2+-)t&I7FP;%(Q#gjv(l@W)!M^X04gWV@Pma8lcpKLg4`1&e4 z3Oc1Pf|=LIBMiK&29W(*ZBK3-E~$J}kbeJU>AMDe!s1&O3!J|nn;nScZ`?F25TlD4 zW^%;MG;&R{JqmK;iL?Rf?MV&c=sw<-`oelVkc4xWC(SnKH20X;p+gNlO?Zc_bs}5i z{$C6on7(toAj;=sPdE2qf};?*g#jc;rr(|?5(F>>!Ig8tXME8oKK)TjTl{3YfzakB z00pK`h)w$mmvmp5@H0&pb`{c&A6OOM3E1v;g5uB48(%oVG`l425VXPO1rZM*?|o4t zefPh^wS7YQ0=`KIBN{M6XgvVJ9Po?i^A3!gh@U(ZDtw<%SnQX0LCaiOc~E8WB$4Ml z_XE=da3_Qouv=l*Tp}}6cX%&<1x74Ye>Qy)0RM<@njHx4XNDk@O*A|9Ojz3>=}qbf z!WFQbfXc4dDpYTTm;#XSKt-X$+z|!1L<(vsv;Zl=Fok3J$6{nblZ`qk=azmVoYk3*vVkQ=HQ!d81!jV{zK=$+Y>wE` zHxc6Er1%Z@ByA#Wjcq*Hku)I!kaclvqwc%np)yv0NPtqHLQttn5lT@) zg;Lo}zA67g8R@54@j>~3%81G@Ro+TY8Ij2wvusubYn1X}mpJzPKow?111d5aG)g?h zBUNlARpkbiX!$Y43VHrQS{0!Z%+e}laO!2{=wi5H&-4PW5Uvmo)8Wh_E-%NmvN08B zwcV1vGWN2&GH=C#GPknA;!}A`6_4C#RSp%sUmc2Gl}}=~7>-GM+~d0=qd6W%XFBle zJ<24ahV@_Tzoz_YDoQS@ERv{|xo+Eor9-20EjKDRD4*6WSYowSv+i)IzQZ~-bu#8o zpwCU2VwmW6Vs(0PBISN|ZgD1eigN04c67>cCOAO4#dL>u?{mjAsN z<2~|%cg?f!Q(sq$sy?T_R;yTd)i_jb(dce%Z82J1QPp8AQhiyiVLNSdJv29xY}z`1 z*MK)_lecQpiqOh%hj3@`O#Z9@=@r5mA{8POj7*ZMFwk@oYJX#=eGZtqEJ$cq3Z6## zDUdmurId-2nM2R0`CCiA_WOd^e9&ply4aDFYn0>bq{>mz_8*rr2albK{V`{OjkiGa zZW$ANqmG-Zn=c-?_ki=mo#wggUl>Ef^EB&a%UY8>E3GTfc`+4o`Emv0sg+sh7VEX% zn4KVhh3xv;MdR)Xg)qpy>+2&!LRVEbZxS(Mp1`M z)ya&*k;C0pfqiPzKvQtj_z9s4ng?$-njV(GpUa(-J>LBm$B5hwK-Jw=$#79k0Q2C4WT=r4DFfh|!XVnWh;OVKM#i3*mHCO;ydx z0hvUK_zFACYRkS@Qaxx7 zP_;b;%pw*FcrK?`)q7;THncBSF9$6{x-^{@(EpH8o+Rh>`Ayb#!) z->R9-Qeuigep2LjQ$}$j#EUEy~ z5b+N8ihE|xXsxbOqp*2{k&~9|GB1=*jQy)-BPiv zj;^KrKJYy(+GYQ4yHou|(L-_HsE=w_Y55z9+R@|I9uFrbg;qRI5 zuL%YODBX%LtB%wX^fC(Xrj$f3086T-~`_3UK__rnq#P9q_fQgN_;F{Z+42u z&BklV#=mAqWXrV;JnL-U52gHJhce z?Cdp6)1TySj}uH518Bzwh~|R*yY!Q8?x%m>gK~eoD5NAJ zDGB^mGITIDwsADGby|nRcLFvwXRfU7q%JGNZD?yvuWw{)U`!9Nw)?08!VBOA-dY!yW-|dJRI~qEe+c}xr+7N!Ut8ZZI?8HY*{L#_>`unGx#sKsG^kn1suVDcP$na6Z zz(mi;@W0vyR^|P8$}MjWFt*YVHMa(;2iONc3ll5vKi>ZzCI9L1FE!QwQ|7*_pVFsXZ ztwBHpK_o?mlmVb8S&#r_F^u3&C$PW?kMnjC9l>y4UndAuDj12RXgo9_B%zI;l#1Y> zV&H|8xu~E>V&OMy9=1e$kK<`ZDY6nIi||zIZrP)2?XD(gBdr&`cnfiiD{P%?` zS)5>lmZfjA@J#M8YJEL4u-0W z@OflU^1lZ!@jD4h@v|Rj0()VkpB?IdRg3L~lEKp!=;F)2kYeoqG5!1Zm~+B6(*>0O z(y0U}coFTAL5c$7%mh8=BGlitC@SaMP2nX%AQ?3jmYmxFt?H$LK>ttfz9HoDweY3Lv z8JC43rsv|VvbW4z&{rUtJ{wkV9rChtT9j=f&2 zRWL?JMnp;hI`Pl&Ms^5{{3SwQ-~CJ%VGK$>ItoQ!b9pdW(Lv7soq|48Fh; zAmnDDEK6%+FUoRn;fak#zE0HKXf*kgE>_#6=QBOvJ)uP6JgEz})8&?V?O+yKv+@~M z*4}O%|M<~6rDchLuPB>Bx_B#QeUxQiN&mV6P^Y{gn{|;#HUDyW_24qbYKhZuYS~Ha z)qoxwm3m~!g{E+tKKFZe7(h>)3(UrgCXD8Q$*@k3G-MHxu5Log-Zr0!*{>#6rX;GbF{E0S%Q& z!#=HVT`OsN-(M1&F(2Q4rm|W4Q!RUJP%61>RNY4nMA~E`;;-~CsSH;cZQ?qeF0RjP z68zO9^ds9jTU}f6v7t0MZ+~Vs8Nww4GsVDI>2#fUKUs~TC@@GmDpoC^SAD3vDOM@hg?tU^uxvm)S*07~iB3Ot6?g+H?CoiAxpj!d zTv$CR)2T!vAJq=M-XFt^tFiC5-x*Z0x8q-0VslXPb#y`3Lyj z*`of@aSp9UZ3LkfgsG{i=iMabji(pX5r>6MbeZ_kRNu9UR=bg;?m(P=M$=(Dh+M>2YM?bL4 ztxMpq8yJC1FeXLhvgquOUhIgI$!arVjXlM71uS}f|%{H{tect)@ORQY=_%QgZ z(_wT%e@w2#rQ)1nmhPdoXu%n!JfjtDOV--(@Qrw*HC@qD!!GS|SC>$-EUWq2XKQX3 zvH80wg?c0QZ7*pCqNIi*UHd6)C)-X31nSoAEq zYHm55x(M1Kqas(@ZkF%EvPN^~gmI0tr&f|NbqDVltKo9@5iCRN!Rk zG9|6jY4Zh(n8I-%xUsMP41$>!V z`R@yR!+HyW;NF*q&vWvBw#5+*S^*0~iIuc5RYu4H?-z8+geCjsr?s~&oP-j6hh?8^`HfO& z1je$*ARa_5x7nBMtibNW&04Gpj$xcV8-B}U{HBI}Jke;Q9iiwp{r#r+4fE}c6E)#i zpz{&O*L(he$wKE(2kF|d`(upPb=vutl!q%cY8)IR4<^mfbicG|YoT#YCfXXHKh2P0 zrZJoLVd&9!|0IZJwT!rRFF_0LVEdCPmta!tJ9*Lml}&2kF(cKbLyvJ7O2B&EpnS=h zJp+4a{<3-LfQITA79HWbS7S&$+xl4JPE1V?q9}RZH0#sdW4~>Iu)b}=Lyw}`v)jpq z(Dq`PA}_44ut@Hy`5y_i*6~@InBTs>X8niY6w*qb~@)BCQ#Qv5!uEh7> zKV&f%nkzny$5s{1MmX=yKuem%5v*YmkgrTsamj{N=m*}Pt;UfeEyj{ZnFb~tzLXu$ zSEO_4?qujhf1pent?1;+NVx5|vvhF3w-t0)h6$XYhvl>&E-I|3BpG`gTX;{AI5HDVrT5(7lHI zWODsiqy3Nj&A#!!yVm~~TKs>27XMe>|Gy%+|0C-5EelvSy`~ENJp)ZplsjohsepvZ zc+y_%;z$4;D+ojM`}`91>MQvcBS1$&bb(hT9EjDDw$=@v;pG_?3dCa?^a8A?B#|cW z?^X^CG2c5OBun*9;GkoDU!g8MR=jDIM5Np3e+0Xx8Oq3jeakZla}&-UW&-~#vlQr; zbTsU_|aHFDU#n2R#G0~T=*ZpkOU^EE(Y?Sn0m>7 zO^lxz^aU{v9X9@l-SJbF^qmh_X{?XV^XtXm|u)pe|az z2-y4*1G-cHU<-Psd@&Z)k*fb;m3~kIhw`JeF(aTJ1!%!|{ES#gW2b;!WK6B=N5^~| z?f5akQN;8|6#g#}I8ipl*vE?1V?T-&A%GIbPcP%b1U`TYxqdnFLCfpdAF1tNepJ^?`0_XOAMjin28M#H-y-EKRXBekc3*C%&b);TmS~W4b~TAZ<*mkrU528 zNK-_&Q_v=3(P9sT-+VloE485bo8$D^E_hNFW|7@AY;D7Gc9Fv=4C2cT*5!N`S69O2 zn)Qq}E1b%u%7ZNeTviXy&xV`bL4eCXj5v1dCCXOM+andH<2iWg6#A$JnMAaN#c#Ss z35z&?&f*2ZfJ>1V^g>ZVMHd72#U$i2(aoDe;1g~@!nrv~m9Vl|n7V^(0+x+=+vZsu zDE?vl#-4%%KG5kL786S@gQD_6S3rvh3gv`_M?hsl5B%b$T8)0-BZu$0aqfW$+W0P% zyKW8sxC(<}Pu(jT8|022sMKO$C#8pLm){nOz%nQ*9{hYNOvuVQV(qmXzT$Y777kd zCX$|S((!`$S@iOHhcwm<{DCjqtT^E{44_lBt%V7okuiG21rxP`SQUUqtbCEkAi|h3 z8K)O6OAV#5ZEE>hyDix_Xw0i?V^?j?Ci|4hUWsq>8R7v`2>rkn67ekOPr$HG*ZZ1A zQxOB)51Qx+C$cEa0Ii~sB*jPceL&$AqM!l1K&z+ZL4it{s(Q5ngvCs;UlgZlH(s;v%Iy`Z?R$%GLQz7FIeT{PX&Fc)5mWDCpX8I|WzYHy0|&?+Nh z)g<}85W zOeT$fvev%5KrWYU#j8NQmhExI_40IFd9c>r zYTl|`qG+<**9XxP4Br!t$78oYh9{H7jV7H;+htkV9-N}<4sF} z?%t~dSv(va?{6L_%Z-S;!znR#w1u4;cw>og2#%hlU)VssW5h>;Jb*@G>x~M^%MN?2 zv5|OZ&Lzz@2?47XP5kL)bTjUo@I!na7gsA`xhwzmKstvN$Q%E>-TR%!O2-SG|JdVi zG9~yxqhFtX)y;*?{qY=*{K9Wp+z$r@F)|Uy^OYRe zCCa?;n2f&z?`1RC;Xy%}g;>$cbXwW@A2vRbq~0J{0~gF_8?6_=+tm-~Pnk@H!O%D} z4LEETMnlIhbP=POT+>k9k_qH#IzQ!dzRs4ajJn(&&2FC2Cwwzk1#9au7wdvYmSFqX zFBWS@0QkiDAj~QVZODoW!GkyjJMLGmHf>uHpF>qj#cGtAosxJfF*PR=Riu*bsY|q2 zd-vuwrtP^)8c50*Wdl^*lAWk;7Cp+K%7s#BnOu&4My{uR;GiqT#En5uGh@S;F`-HD zx!+W^)$v<~=9Z(;Y0f+`8~%w@MNAZgnpW6)=Ht*shOK*6l0U_ zoM91eU1*Tf0S%sMw153*lk>|VSPUk9o@tTL;}55T3#r3%J7nCP%F4=1lah-m#!kom zF`>TL^lw{ht*({OR5M(v?jr`+V8x5|Ggf?hrsLW5s7&J(6-XpzD(YelBWWy#I|Ffl zj*GH6Zd*$3M-8xDBs@I9+6MUfS>_1+b|ie)w+AnEJea-qoQ0YopMM{2c?RO*hshjj z%h&9I>`=`?9WtW>m~EwyxyqsiT&w>Ci~n(|2#%TVvxTc9of2zx4d@;|lUV$&3_jEI z15q|`vdP}?hg=SRW!n{u#oRCRa7!@QTkUd<+COb`;25JB9R0!YI5Jr{95%UfIs7JP zD%relBQKS}g6*f<iC z1VgSajVxDX8o2YB+T1UVJb&r=i7C_@>b0ZU1UT-gWb(DpL0Ye>#L4;eM@HMVwuvgy z(bQr|F8KQv>YLW*Z;$T03dBn2)?G7QwF7`o0v5pQ*?~oCwT2X`PIVMlN59f4-q?eGc-|UM&(rr_UmK>xs`otD{qc?Y zBx!=OP3w&L0w9YWqtf4S@j>a&<#2C`MB+XeG1N48ZM&>|_z*UkiXJ72o;r+!MA7XO zn-24&0eYt472w_rDs|`;3t|ANIg!nDs(A)5Q>er6<1Wr5MPp2f0GCC01a43A2tF|LLHdomRWY;hE&?&BY1{?SUaBdp;XmerkQJN&Wg=F6he!^0{Or|TZ+=e8xLZbn<}PK%|q z`&lE_5}hSp_P2(mL@w7SQWM$d5v%buCZwbKqvi5)bFFr@`}3fnYTZ_MEZdtSqdQlh zJSv2)9zoNAX@X6R3MBCSPi7d`o0CfXm76+~E5tzmHvdP`874;NhnjOwuE z%aN;)Xcwo^sM6pswIriFitT9QL8aM?AUoBy!CA_-_+y@aZ=eE5V&my+vI6%gxVST#bcB>$nq2&z&5w9E?JVz{A1){@+9d zJoedLUTa4a1stCOk#y^i*}C_qF;ck&yg9}JXE(Ui4^BRdS1J=N_9{yAccLlunF-!_ zVxfTNyVFNC@Kx}(+s*xAg;xGG{H0~6n$$nW%jk{3pk6$^AF90$6tw6;iR3_XBXiA8 zMXHnjPv2~+^|rix+%1V_jyHoez6Wt!~>Pl#Ug zGyGMqjYyO!3|Sa*v*xr~&y7Yf&f?M9ZjlR9!ez-mA(_orTUksAGGJ-H7HhT_myC0> zDaswMpV-o94)sOnl-Q2bP7TP6JB(XyD3vM{6WcxSw+D;W3%N^Yiz8+Qykb^)8U++9 zYy!v+rrIkP1$IVba@Sg@r(Dq#aa?i5vppdcYR_2ITLAQLOEsVxKlCOSL$-h7OsbSv zq~0F`InRYQee1gIp=3T&Cs}&|AUZ)Ni9M-=-8bo`G2}NIL%Y2(kP%BsFaOR> zL*z?60_5NWOzM;-3tf));BW9*k@2&$%@jJpuxBa#L?G{ePqUPB;ZNuI2Sud!q$d{1 z$8BvLK3&IciAe`$i6a&dJb5e=aC#gyQO)OSDokUXCImBRc9Rj9lX{9*thkM;w%q9M zas3tmVTz(@acRl{mmoApmZ+EyinX-d#I-|ej+l{7|E^?Knb;`p8|bR5m2mP^Vl(Mv zjoBp?JYK+W$30FKX2;n)XrM@%Iul(11sB%}VG$+`4w+lM!a(K4^{B$_gxCGk2chm? zk{bB1+O=}--B$#pugm8JJs;)>GdW!-9j;^A?wWnZPo~MRT*42AHnI-mnD5Wms?T^_ z0G-e}b&1Oj=l=BqM}-~@Vp^7q#q9amjxyWfrSNAYK!m#l9>41>g#I|HvrsMlNP6me zI(SR3w)qpu-MBB|Sde#oM7{^$Z^(u$5eEJz3Ma*^hg!9c(Ob7VUO&WzP`Aj_xENEj zRFX3LJk&gazoDTtFKl}E_U+vT8}x%OSLby7+;GMf!NBi^K_-l*H*6XRh_;!e_1!t7u zc)z^im>uY+UFqj>e$ebK9qb@JZI-mLWqzP<_@XkJ zjWi+x-uA7|$giw)j;^>+l&-ty4CkL}8pB1Rekklr(pXF#V0Z`tH`M_6*BJb)0aI{p z7c)dY1`PFR{Pu*<+*BE{c=F{`R!1TeDfS-Mkk076a_&^>3S#%88QaJ+ug?Y2$Dd7d z$VVq?sEdI@ebHHF^EaVVKAvcXluAh$9NxSxPX|)g&OZ38{Z{W*t4(u8-)1H{H_xL* zs*wu`kaQOvc7y{;_4VVhpEyrWs|OOfXgVGokk$@Xv2wgQr&zge%9)BfDz=R+nFbb3KPZ=-wwp%J!6bgL~%?J-oW>bNZT?=l}TA2iA~4p z{zcN@9iPoT61_C3hh8u0g5QYWOb!d(S8g&p# zkn?gETtKYGD9rwkR1dji!6zr(2hSCr2d~j_VC*${5OtwL$+hfYYOO?KO58~LJV~e< zepqbH`e-Sfei`hkfU+jFhR!w|8oh^Dv7V=cz1$5J>iqigLaFRK`MOHq|ramDq%6#A3pJGC9apeT2S?8GlSw5KL zt29B^)FJ>k`mJJH0{Yd(IpLn@!ledFYNWOyVU%%eev-v>EEPg52A2!D^YfO%@cKG| z}C)kuUy1-6nRj&U~oX@1da9w`9^4I9z4fs)2hHSEOQ*@_`G$&GPL<3 z3cP%IyxJ8>qE@N$dUjC~#UOhHB1rjsQRq6GRcr#E7bgD49TdlttKH!uT_Ec)@k0{j zWQ`8xrTcVRPJqq1uARy7Pimy^2X`Qz!Dbn3IbWd>ewFs0F-Q=meSx}>=GRR&@@Lgu zz45Ul(Y?)9CWEQ8y8?Q&eeq<{Ri@+kBUwBc+1c5cOvG07Jq(>M{^R`5BcT*%M(1tU z2zRH;rh&?3s^xaab9A)YO%tK@LrF9^wsY$~?@>UqWGlnAgUPu+h9Cxy$7N%bW1ZD> zrk<&-y*+2ZjQravOE&=irfcsU~%N57*wAWUbQ)Yvs zvdRu-=ReVS5hshahObYzoE~?y6}s&fdk*!s>-bu`5mJ15KthPkxlFAR0hpOrJNoZ{ zcq}UP$~~-T<)M$^wO<*h-Ce^XzvHdG`nX<5Wx=J#@#)-c@lr%NVvNTXp`M=A*%}_l z4z|zv3{~x^$L`QJd&0&Q|NhwKRWjWIzmu8&at&+MH=EDT>uu(4X&d{+OV7tq9Ck9^ zG$pS@xC}3MTK?k+xHwIYw}K8QF5$=5t44SHzK%Oc>P1g+q9*13YZzPS$j$=sy!0)yE4$H~WSH{{fOnX`gJ*{_U&!pEVMU zGvOGII<3g~S5<>WJ$-@bKXiP)idRbvjOyLDeoF;&*bM0op(X99oI!zssTl=2Eu@~O zEfcwfR&J+ko)jF)7<*&Z4Rrf!6w{Ej&nYUu%-U4ktsGgj_x_65ImqkcWoTeAj=D!+M03Gf zk+~v>EEoGbNeL%bXDwvM2vye}(0DXo83SYpHxmO%Pga_zN0z57xO1efyZj;Sk|1O_ zY*s=oW=pXu=j8-m1A%xyZJWB6l_?}AhhT=LsOY-;-wB7YiO>MI7X4L7_)Dw8m11Si z+C}>Gzi%z{d895BNJXCZ%|I$Cyy5b>vv$KP4!;#=>r5Ak)v{T}>IlEFpwgH$ zOz#dFhG#!Mnrc71y4J0C5@dz@TYI@GSK6%q9sQ3tw63)5Twj3PZzWSC=0 zW57>PiR=B2hd{tCrulXWxkSJl>KWfFeR?xh*hHj6lAY4LzqCi6#%(VhZAz{5IY`eF zsZ%;|tWY+iO1wx~86%I9h!?n8o|u2OJ9T;&ofa{pk#s^0&zPk-sQ5=@P*h~EC1QuX z_xAr<(b4A1`&ifWJ2M&{_p_26+FJ(caq_iz=s;FvWRqI2qb)5biOo`Vd(d5>qbV4X zz$SglP1ks5SSCg`m9Z-j%iYaQ==fl@DH4k%-{o|PQmcWMejvr!ruoxXpzryQ7|IWc z-+hbF@I$**E~75XEi6;P?!Oxdni0;OY>K%&;e~qx@}Ze$j&Fstz#a^(3{eknS>{eV z0^i96LT=0r-l&kZ2yTM+3Z+xFcx{&?hfJY6?MrnUSI4yT{4eRP4_9#>GdRS%E@XN& z>7!)CIbU{Dny&V9E?VtB`Nby=8|ZEk1pwB-&Q^rDFBKscXt5H+D&CVF&6;74=kgAu z88k}yudE~+s!4dU=09OAGa>O5KHYi!u+nzHeZJG0ULW1$2&j+FE+{p!tRU>vQC=uN zUqF(JppXeNvr1O5A=RRd08A*>?$WQ}?jyWWZXF{UF{m@J6-dlf)7Y+iq{LP_sTW;{ z^0|S@p-HYz6*>?fDMdjftU>5zR%?rtBi5byEVK<~V48x_)4S z@Rp7DOW8w<(}~&+?b9z#I4qev0P40bH!CZ6N=&7g@49u5&LaiK_D>Ro^Hi9cBH4^B zALRF68t`n3!4*Bm)lOFsMfkr4gr&qZOB4%dP3w4~#73y?-GGbte)Fq#^Qmb` zc=1v#ex!-WKENDYI*Z#mp9<-IH643ZC*Cp%hZ)?tx3#d6ZMO}E!KYcjG|x>J`PIQ? zwv>~@T_-RzJpK7fYpVgOp^d$hK>6UXjT!#6RvigiW9Ct<>+MZ~MuEv@ir}`Y&!Ss! zcx|DTLwGVD8Qsmi=X28lGuM)C@kPy#@pmm=-7(_N8Pe=~Sq0+T>A%b^>dk;M?FSR( z3cSH49)nYua%3)D!`tY=42R0{-jUfoU*Y$tanYRbP_`sW^e!{s87CTO9#hf-WUHzr z4RTc&U#avzqsOR)W06AsJ0&+pQm|cxP1;24Eqi#AzWZq6CV3!-4UasmQCckNK03C!XcQ2F(iC9Hne~f`nl?!!|s+RLV2h59Egfv;dRB-{_vpyqgJx(VL2X z*a#OLM)+12S_}b9)NL?^EP(C)ylmXj=+3fNyG+55~NuwJ^}4l7i>T z1nsz}yG&=KpvYAWe!Xw_YH}$r5jadH>v5bFY-rl)s3Isf2cC>t(~Ls!?)MSAu7omZ#j{z=`0_tVrYoZ9 z?Di&A68|ccG4s+^vR&dH#Qkwy9Ut*1o!TWlsQp6&d76I@!^r`QS>#)>erg72-tlczM#K@llP z<%3Ly`r2zTqG-AuTOGjlQ(-?#o$O3dryWNuCiD0;3cE|`Ot8>t8Y0zWD5Z}4D)%E+ z#NMB>0=1&?NOxwH%sLT;O|ow|axP)BZRE54l~DY_bmOYSi}P)l;=;QHx=rhA4l%Dy zd5y9Z$%alQestnp_8(JT>bjdes)_SZj{TYJN0T=-?sG7S6sD8Soy=l^!EF|})>&wH zX#eq37X26=)7ksxv=C5)bsCB4WV7b2>6Fz29bX(f*7LcB-@yA(Nz-DvQJla zBN~YTWMS8VsP2Uh7DDp`7&|tb$OUB{Af5eb1S%>Utl4e|5yNN#k zmscv?v9Xyy0^i;3yj})(%S26j^ke!DR~`BYI?bo@{Z`9blTfOG50n!oywSDhW9QSa zs(iW@kWZr!N|BRL3^hL<yS4TY1}fWzNoSKUx@jAv!nGI;BBOK^Z_x;n|A@wBgRwShhlgKR zA}Z34=p2AK1_EBT?yvI;E!1WGt_KsKN<}h4Od70qu|~9t3W;Jhk0;5qTXaQU4Q(BO zOJedNZ2a@~d;Fuha#&fgc%2$h*=$~{&eyvPB-?RGmwY{+*Vz%B=?1pKwlTx$H^hWU z5gw!6XCCH3=cAdD3GYsFOTRYK2&xV$jR;o9t%Jz|B+3%Y976g0v_C7m4K5(0jhb5(bHF#{LvGcs{bxppqT7$j@ z-=kPSQHfO52z*yA+&4V-CM_a0KK8ev$Sb9n$GI9 zIKailekCd=_PY-XIwq=}z?Ne!BABObI)6)3VSRYtOF`r&S;;X4PSoOPm4`Tjv{Zd6Ryd;MqBb>{s#N2y#B^-#YM7er4cbWld(fQZJpynx8}2v0hJJPKN^4mb1!A|f-NztrA%EE9{w_!R+OQn{6kW=${o zmXj7;Y2in5Gf~-$oX2$iDNG-xI!)d6p(L2j`8jM5p`dR?BYmvIX1*XCJv^{ z<=Ex(-1FEsXb*S70#wZuS#JIx77_gG(25kIF3_)`5Ri{wk>#mNiD62e)*ho6BM2ul zyL*A*4@di3RW)NXNtzCT~*pt_n3Kuh2XVVEfX6~>XD56*$i0lVVehe_h_r0D* zB85(CD?8CtIvM=NF+V3I(MYH(FZKBuTD$2HgyS4C_0*%okIFn08b5*_na`(Z5WK$j zx>xq4=Gq|9`sL-`?S#jXr@LAJQKx5Jyi$DOzKH?%cFh7jhMab~zf`3Rzfp@eM-YrU z+KE2Zy=KaNU*t(9Xd|f*li7&In3<;3RBB(t9P0brOZ{kV$J}eWo5HZUTlvr1dOYix zd>TJn8Ne;@XnlXZ&p{%JnRW)|c`!6Eie(uVVtsPXR#~huRw&nGvoGf!UV-BP9wuli zM?s3!x4;DW)Sx7lh2rniZhAmN9t1;gEiQrrOlF95zdg#c+w3kHO?|>$+y>$UtgmZr zZh87)PrC(@Tu#wyPDReFRtozu=I=lb)_iKna!#cDebse+v%GOFyn_4wU%w3WUHyMy z-DdmG_9e&QZFoH4ZAYTlql|}}FsyjP?lE3X-LN(v%Wdz!56-b5-FaPYgUY--$_IMI zJa;Rcmz$)`Wz0X{pL0x;;y3Ei%7Ia(3FZf3TbN~$cTS9WrH?XH6z$gQTIIPlJM1!p zo%V*I0Mm>bozD`p#W9S|rV1mVNtB68!sZTblFt1s}z%GVbczt210ZFtq(?|(M^+SNxgOJ+`XZ>VgvS+(d!W=qI3VO1Va zF)>#bN>jG(uf}O|SVp8P-R*j6PcgRFFy+-DH_)yrc(ys`U*Y$}eB-bs4jDmfqsLdH zc=R+XQJSf12jS( z$>{vxd+?>^ud@0%=%?%9--fGR&Z>Qmt>VDrDf~oHbi>6HM2a~zstv>foon(%e(pNi z%=2sHGSe5m@3bVVDi`Gssj4p~Pu^TAq`7Ay8$1OmRekU3w-FXg>4O2n_eGhSargaG z0%$igg>t&j!#V;Ca%wM+gSB&S-ql--^y_zh6go5-_hd!|*ZW$Nc`l%JB}mD+qRZhQ z#Aye~|3lVSMzz&--L{mr&_XFtN{bh5vEUSUcXxMpcPSKi0>LG?y99T43-0a&mz(GP zp7+YQe=-*_OEYhVO<@FxB)AUT{^=#ut+;d&&MtgkV=u1N|l8>#Z-H%SJ#DL)6_47@yr zFgu1-lE#vXk;iYSuruxhFrmOQ|_4Zz8SltB=XEbjvaN&*l z#@&J2NPqC?!k@-Nnf5*a(#rl4oKVHhwR7OpwM9zz@(DoBnsTp|FzO1TkC03=7x3v> znbKQ#ep{8_b>M=VscH>C3=YiR-$U&s$=@9~NPG7L!s_$=N@ly&sa1nljeL+r70<}k z+p36eWx)-x7KE`Z0l91OZ{9!8`^;Q<=nloYp8m1V^IG;9V@0~F0{28TKc6|ebns-b zGLFC_^{lmGp5*9Z`8!CZQ7Uyo+AQisZGOTECfc0I5E|&74Z;;O!awofy$00jKP=7YVZC zHM^qmv?!tRv}vJzWJ&XX?pN{1^OVL*Tll~&p)Cl!|8q9k%`@W7#cedBSK?mJ{pfQP zcnX)IS=>JEl+O~dW*<&MoEzG<-a@GS`}8Sut!WN?Ej~g?v~X0e9#?gC#+$f+6|iw7 z%lig;V_3N#Ch%na0H5bSOzJAW_nFI@8IcN|9fjkx3%uFE zhCZ5QoBORzz#;X(CY+HK?jONb{AdOylAk7Ocd@^P z%lL5q1-L8s=xnBJ^m6%FxOhJREG0E~%=`;mibLi-e&?6*);Tt?x3!>yv5i*JdTNGy zQd(WNlRJMiw2(Ouq*m58(8078pzbp5^Zkdzec_SS2uJ+30F&UJJ$AE{#&1grANl=MfGNmJ>bA!x68$Sq zg^lC;h&EON6mD(RwowbS?soKA5RI0s`|lD3TG5{;u^6yzvI1+o0S>p;fhN6I3FFgP zvBN?)fW&Qty4GiQ|Lx$L2riEb$<8lMF8~G?+rr@=k7~2#m(9+od{r!qXp`kIe2xtb zwZ9J(jn%Fy7~S2*H)IOC=8qf$JsyIXA0W->Mg?%}B;sqdxvPpv;K*35oq&GKB$sQhkruU(kEHq%2K9kRz- zUh#gPY(6Wzo;M;_CZ!>nbR*?|mYOg=MdS3fZ25)@jcAaDWf+= zAs=(okyC^TQ8P7dJX7yxjm@&8z}y^XJBGf4zrj>QKM|#R78#AU(G>yDbT~pn zokJI1I&Noxo!*pCl-fVU*Mr!@fgrMK&`%_q{A*sp$!n6iN}V0>3|{LyLYf5=h|SRR z2Tbui*)KlELr%gi>SJCF=z#E5eQ`-GEEX{DqOwg&bsXm4&J7CziAKkj@dLxp7>WSB zO2LmT%W=OIsjw{lEJ&&i0=> zO2xBuBd>m@a|E1En%&KCjOJJbbmpBNv{b8Fo|a1YjU(#ecnQL(lnOLIK~&}%$41Fy zhH*AU9r1>^Jcl`0J(EKg`S^GsHe5>2f4b5_({q>FjU4U+#;^|4mDga(t<#k3q|Lso z&Wq!S$Di=J&>25NlIZ`NI3@d6jEAr{)a4+@^0f_<>w`<WMmz^#vKaHoW)xl@(0v6^tV@B-;n8e&3q}% zI?NyVGpoP&9!*&6&NDGa@rC<>WLk}1b4JrpdsU%IlJ66`5v8eOs@c!<`r*?{Y#9ZO zyONimLj+8*;~iRr-hU;09{oH=K;M6C{a6Pd8^xTqk>=XArIsgB@8HKB$8Nu!98$MI z1wRd8d}!Tq2Q{MU9@O_5y{PcpR4^zwB}^}KaiX^Ve)XfWOh#h$_8wprpTHWe zQJ9mvoGvj(jaR69EGgZi+&R-}bSqC0Y$y`EX{KM71dM1p4%p4Y^bWs06p52tNA zNB55jOMLlz?g%!*2-0e#KDvA1juop| z^7f;8RN?J!()0zbSW_}y`A3hSvA2#uw?OWV5GZBFA!;l>qaZ|!a(H@D6l6w|-}y6u z)K!e)`2CW154JjU*rk^C>uHp=xWntV6agp@z=N)<<^FQ%F8n@ zysC4fd!t8{f|YAJC!e5#2p&7<2k|`WLR@h2R2BGr4H#W)Yz$*?f8KxO?MoA@a?E4V zXxvAOrEj`1$=EJ_e5wbd5Ai~9jDUvSGAhr6>T%Lw<{bEIZsbNR-W(X%n6+($DA z&19H~UuMzAyPa1P+Xf8vKF?#>o0xKqZEC&yD&4=UMAP(~6tM2Yoy7fy@>y6i^)IT= zOTe8yw&vK>LjYRV%CFjUhn;NshsfS8){V=MiyS7wooB?%(IGAR)BKE)N_m=$zNKxR zKlL9rkrksdBVCr}sz;WvjB;3Rr_S@Dg17Vdp^QYJGHiCNS?b%u;;1aMK8=VgvvCUoz_W_FR)Trz>(GmJ+S9>gpqtq$_qEX?0cF`7$5{ z(THu;R+!I-y`D*#I&IwOCrz)9U??BfPfMr$yG&sN>cdIiHAEhoYLrgcZmO|dWZN3u z2q>h(g4=e3$`x>6kz(YTf=3!h z0z^ADL=UL}rP1vqh@PDkKM$8)dmW|DG zU0MF*_Xs@1Q=^{o@_$reHT7j&x9G)>+roVr^j{DXbY#B0)*zYXN9dwWI(Vu~G8UZ{ zDbSpzHfAf+X2i0LkplRBonjH_A(53z6f0=ceT~zf4YeXmr5E7@M0Q}D6!=%Hu&yRu z;|$$9K+`RtH9PU%8#Ia^AueM`oR|JS`lRDk-dvtcmlBxP4Yo7pBuVZrN4_{%gSdJV zClMVDPN%f3cdjUuLwKz?Y0YbOXDt-^F8elZdZ#ftNn|VCOlF*CJJ<^0zK1J$%vEBE zGw56%YX=c21&c5&#h-F`VSPUS8pNp_DBq3m9mOD}2X<5-o(y!4=1=&&KptvlgaWmU z)}OS<9^N2^@7%54x0WZ6Zx03>qF7OXwu1PgSgeNK0NTygqYmc>%#NOQA?QvD*fhPt zPY-&%&8NEOI~`ny2r?Q6GT zl>XDG=4h=9kfMOLKZ}(t)L;A?7XK2FkuS|>fV^wPtoy1=Y*vnN^}TAV_nV#PE6n@W zTmSWXb!!~F&wgy){hux4z@j)U(2vK4T0JpbO$Jvr`wh0+=HZXTQA3HZ>j}v+wG%PC zRI-nJm5iTd2O!^vmwre+j^DPx`j?DC?mBD=3s?8I-0D{X}9^)OkN5abj<#E136~^oo&(6r$olC@%_!(oHnhvD=uE$YUtA>IeW}zRcIQ zlAl%#DdR%b@T|0c&E)38bpcbQMNotouM;qjd`=}Xz}Y%&#B&lZWz0&RgwRWwZooQk zR4Mds;jrDNE_iIw2v)an%?SChVy*M_nIT_d@j+^*yU?whnAd7?9bRS*FC1-vmj9}Y z$3ymHaR1$O^F17o&4HujfozMv1T&NQstZG!zCcOLlV>A5q-%~N+Z|hx!t79RFWDk; zeb;AHZ!UMIfFe05Xm}wcgsYQ*)rCE&fPYwG~MUb zi0K0>ZIW!j@Ix#>*e7hMww)iuPKGQ(!1tnMV@GP=;etD5MRpMvZqV5 z4{%M9teCN_f_Ft;0-G88oJ7}W+`-9rB1ZdvqyTLwSjLI2UjgSUY1n9Kp?qPk;iYJW z>$lzLT~ey+GKZ|$yB?=0KUhoT@IUrO^q8ql$@KKZC{fAseq(hl!r6I(AmF|%=s^tB zU9Uw5iCU0WGuMb5zq5&xpXby6aM>rDkOjw*(K}5LZwtYb{4 zD%H{8X0*9+mtyTnasJk4X#Ysa@yx7mG}(`y*TT8rkYwAsY|q*BTw`_{ZYJ>Z1)$n~ zRhaAQTO9~!$>KlTEsWcKpTjs!1?$?ylr7a%LuXZyB~p<-;&2~EA}f92xqDHVP}1tO znt5;%{+-(5xNx=aSJE+GCnpe3sfn~edc+cEh^|yjskcu#sz`ktGpK!=>CZR?YDIqT*!kFOpP4mUao}Mu<2SF+6yccW0pu z{TS<1ULYv1+PR_Vh!_1?5H?>xAyJL2F3F{r(#KL6^awsaOJjD>D~(-{Y-o0GszF8d z!R}M68AzU#Q0_j8#X3K$NhDwR1QNzE07N zIygXagxOD}s(BBbSXriCEEcz|^{H&w%3xJPu!3Aqm3>8ww0aph_(vi+JHw3{bH))^Rw z0)hd)q7?_&+v}-v+<$lIlcBJ&K+1*|I2(=RnnXE=O*9E>$6-;`u>y)h`YUHbPHuy+ zwRtm#u@um^eq}itk%r5a29knRNgV2!DuX~lKYM3J%QVe)WXi=X?fa}n9Ovb$IBL9-_@7mx@qkG*~T2}L9^#`!~(2cVi*_r zm)@IXtji=<=Q#}X3%YJ&(B~0UtadkN`2$p*?)$H&704TVeHfxjJNsku_8)1~*D{Ai zG^n+|*bH>e>klRN=Sw=3GebsI0tE@hlCsgTuyEYxUpAx8_bty8L)(?9CbE?fS#UlT z!1>l39e8Cx5V^T=Tfg(gk{gLIXR+YpYv(6r)m!s*PqbZKqi7bdCsIx&&YGSNTde+q zi&RMztlT)`tDp*!qGv&Fo2@z2Vjd$w;K>}fMS~wDAxeVi=$g7GO%LFXAR=iiGs-`w~b})h~?7HubrS(<8n^JM+s?mI( z!C1ow#awM9_8TtGqFwUfBPPjf*otbb`ux`#nB>BVN@Z^0S`LHMS{5>8HiL$qiHBx6 zO+u()XGgV5ChI+MaI*zDZ`g8diYNG9=rOVuZ|ZOur*WsCP`3&zJduH(>#)I62xzsi z2b~Heok+H7%b~&qKD8Y|{#wV?^R>j_bl!@0b=nnvJ!NQ$+{bFXlB6o2p}Xt)#e_T-<&W8INc-t>={9s=wb~nn*eQeKi#Z-j>d?LIZ{uSnxAA zMkG8|(S~l1=5JePC(eAvSe$Drcf&gG_I&ZT?p+WT+d%S~?}84b;_5UOH2lpUST-K1 zb&R!b$BQFuNN3v;$s=PClrBOLDGgiNR`F2BP%7Bxq^#YE{Y59}{kJGOER{&0Cziao zf;o*9JU~-J=$_u-+Dr{Q97!=*sNu#oUtMu+CJ=pDfgU@M83#QSYbDJ}QeX*Y5^-{PoZg*CRLfa$&mi>0EF!Na^+i(K^yaw68@rrGM8=ga&MkHuq z7tx}X_E37<6QbuvqdJ&*9GBR?u& zmmq&U)^Ux3fbbPEhpmxpb>6(M9*EAEl3~5!6k@SRPy8 z>rFesITFc&0~6 zUq_!~U{mOO3hLLwj`{Y^_Pt)oQTYV8sASV;?OMdGumWQ`fSdE4TfUqvkD$|bs- z+zyE}tlGcR-q%UY>P>MM_JDbyI%zUr6YzOlzAA|W8L}k=?wE$hoUQd0os=z?0##OV ztv;El=7jP}_3ByRfC}=E*&L4!>z<6XRcLT`1{eSYmolEWTsyzyh3vHEBy1wN%%=V% zJ}?6aiuc?npFHJv=RW&HeA_1}JY;r^fv3CAwvf8LEnow%krNZ;u_X6x`JyKq(MOR) z@(7B@M^C#4W!2CXB_6NfZHb-Dz3t3f)f*NB%b9oWua{3PKJf8N}kXoAnmPk69WC@xJ`~nX9 z(z|^oNG_LFtv`Ta>;k^VudumeKyfb&6T{2mV1`(CG(<&2hZU7cLtTJ~fQ&(TB(FZt zaKBA%dV*>-!f=+$+U!G_tvZ~!qF+Da3`X#%lNm^|PuE+Ty<-*grv9SeI?qrVjwB%i z#J&}UHXAz~3R?F<4Zs| z?swY{r^%J-gFl!SVL&{5N0~mA za4H8us+N~Tyz+Sw`|v0(#P~xtHUI_hn!p|P+qtNU%(YF_J#jX{+%!iHGU?+RgsgRY z_P+jE&u$N}*@lA&F#chEfYdTdq_))Rz>{XklOheRh2~`G=xVhfqU*1iPknhT5ERn?aTA(_({BG2L07Nw6mq-RVk8xf z&6@@qRBH+)1eyD$&$z%$tu(o!(VV%k`GfNY10*~sJ3(0jHrqd*EBXIp8AWl6hwg(+ z+Q7yVy@XVv1|zZ7Zu@`@tO#?cPCTwK8dLLRH4TwBJs)xkBcWgG>qi-J-H{ln^7yr z1L#MuV2D6vLjR*%6CDE92P5BLBm|czrZYM;w96yfhfeKau4aikY8sOTY-R3}qpth) zaxlKbQv6MY83%+0p0HfpUn)f-kM!t!BwT9qLCcWXTL&s|cl6)%<=9ovdWMM-siE&N zntu5@niko9)@Z*v4D$bOuaMx9M&@|tV)ne58; zj9oiwjwDe!0qMdK6@CVJ@LyHtqgfq#vc`CxVZC*cNoHlH$Uj}DEl zsw*#DT(?TP-r^jMwHuq6LFMD~YjE3aazpWYGEyv%$0yth^=e6(zD>6qmczWgUem)f zy9`$&((2k3;1@8|h?D-4WJuB^gDrzr$vtFu=sZuaATwx+Uy7TBYN591+y$+VS3)Z$ zs~i}?os$%4OXI@#a5?_lG_@pwK>FFwS8cQA2Dd+kN+$5hlMexz4DJIY>puW_PRZes z*dfuqQ9yVtGgdR+7e@y}Q3S+c!s~sF4lf7SB8ZYNmEfg+j56;ej5$NB$%Y&b@R(48 zB}bIGHI$h=449ZC3W7AvS;Q&Ctg}Nz6+iEDbM4?gKUPTQdgWQFE+bi~9xm45&bFrS zMp8$v6d0=#fAu0{Fo_|fH1t{JvsvF(d4TI|!Gg_)fbTp;Zhph*XxM;AtJDr+OTcPd z1gExM*g&5$cirY3VW3yAq11zGi2Elko+uOoGm~xZ!{S zw@4){tdB5*H;`HS;Zx2l56{37bX33iJ5;<8-NuxE(!#`;kA|? z!x(d8Qogxgwkyo8OpV!#kJ-=L?eD5O9qDvI#LI|pHxr`tu}B$R%6i6~R(U=D^poma z?G|VJFu4z2Ikwu3%AD|@C`hXQT<1iHyZ+?q&{d!Gcw-)3wEXUrYZ={p<5xN3sTE0l zO^f})M_rAmN?Y_wuf?(V0kfkW7hOJK4Hw;atj21KP+NCGDDC$s-?fUP4ibynjt3aK z{^WIntI4MR6f4%A%q}28hdr3=_HH#&=e&b(wTohWHQY`wh4sucB)`PUzKa_%TYFOx zQB~T#{Yc<9tgdqlr|_m_w_l8C26LR+c8Ve*!B)@*GMxipIc5JdphHs^TBeBB|N0eE zL|M@I#F0$9$j;AE&~-Ah(Gi7%vkP8p;)c+i4so=i zPxZVrW!?pbvUBw5Fd(!HOT?MTgWaYecw|;vgY8WZ!*1vHk7~Pu(=StmTqooMWxY&$ zfhk?Po0q-I75l@l3qC92Hp4^|#}t;Pe3^JR0@XQI{4 zU{Am2R3^pvo)yj#!S!hs!^}85CbX^3=OT!R@3IuC^KN-(@h{^ADc(nBw2zOGuXWB) z8B5V>84T%jTobkzi0P~qq~=tXq%iBv%Niz5UoiEdBSlv%bmVaJkF^R!yq~i(5of0v zgTgo_oAdedtDMf3Ht0@aon9&D=3wCoXZw-!<#E|oK>D$tQE+|AoipVcHH+agr)z(z z3>HgF=}4?hM6g-UMDXQyBHmIE!pefI7{tJQfpl8-rbW4tex+#YlTQ6nm^X2>a*)S% zO?bLa7an%a_do^W9m=hrrqFSNr()(Q3WJtnrjEa>y9NV(JaV$5Sp+qDwdt%`Apu48 zbpGc-r(A~Tl8qL8qYvf`y{)X%{-kmlIU1W+YXE%AT0%~>!5_R)DuZqSHVn8iLLX@~ zql)$>x~NsQ1ie<1GwMHF9lzVf+VPw=<6;2!-Ivqr{Qz|=D*gW3io1-}$16s1@5>0a zN4N7^nX}E)HUmiD$0B&M-MJft%k==3$IGaNe4nyGsJ>HktN#bGP=UjxCJ$`u79dr7PY(M27-sE-MrtRV8~SYR~Sp$*Q&@ z?IUN!(XDY3=PlPo3OG*j4b#R^!>(;Ane@EUYY$nIo4@*>U}eO}_Dk9(+-X0Qr!)7x zG}}Wt3ESKj)Vay5O$$oOU`g{){z9Q$V`FnfGtAO6>NVaI)Qtw(XK2XX0k62U&wYC- z;mH`%Xbhz35>>uHWgEu$oBT1}(@)1e4l{^CMtnOq-r9ccnMA{Ct$0ryPW%~>dM>W3 z`!gR6TJJw-sdJiqiTr_#UNIbXP^VAHU9yU7A$`=Fra}J^JuMELw^DrI7Mi6tAwKWw-nMY_a((iq zHI&k7?JPywH*g#|ZM^vlA0MI$uVA-7kv?{eAEzK;9cIg-2da_#~*yh&x-9cEM+Nl_5q)zp%EK`_JpZp6dE z!;~~hbPT3CjgR&vGO70>bdb|=Ga2D8ggx4F=ty`}JCY49O(u4E9Isazr_Xogsg(3J zZ*H&hcvA;xIG(3o!%?Bgn1@8Krep0f`qZ1{9QA>K)v;)m7bFCy3|dP5i!$C>lhnLq zHrJM6Mro07w|T}sD(2p1m>lR7>9gzIXv>Z`I`1pLDa5PSW745WR8%y$%r%4o0SrEs z(|SQEF2}L&t=pCdj0vOhT3&pqwHw88W(%zXXu0-Mix)YZPtiv(p(p%Ldt$-*o1BZ5 zL~E#iqEjDhsEt#O_nXQ$mB?SI+d?av$(mV{2`YZ?D^`b?oQD@&C>@`5!rr?KmBo|~ zY#LX7IDVS+HETwOUX%BVCy2pMzS)Y~L3>Z!vief5IhUJ*TGVSwWT=gE#&t7=*BqMv zPCEG1M?_nPUPk==+o7C~JaklFjIX1&93$A7Y-V2 zthEP}FASG0sZ7l;s*9VjnD2uH4m0G*EHx$ai)_QqK*Us&)K5%fzwzmzh?>+?r(|lO zKh8Ed;OD9#-4?VbOSB#Ozc$HZkxGHGX^uXZwmn(zSZZ0mp<8xNjE-7qTOA-q*VCJ9 zbz@9srE%D3yH2&DyX;_+CHb)?<(E}TirvjyTBo?OplcOenB)U@S?{>b12ydNGH z&~i11PsRin%5*t97wDDWTL}Vv=YDvX7SGfQAhbOjz<5Vo=78nNUVaRPyENIawMiH) z$Hi29=;X?nJLXGE0o7r~mW%>loVaH(!?@$;a|Q1-9<-&EJ(jrZM-auE+JnlRT@%a# zo*(l3rWAl^31ciT#HhpkxS+&7%X%)|rxYz}1@~pWH~d^8*bn1ITTUT&#kg8OSw}&n zGb8f1OlO1nj>taS7zo4iIS=TwM6e~tM0jLWZe zxoMy|!{>*J;|Vo$kz&k?H={~f%Oe8^8g+@JS@QKcb>khVl>wF$#q~RFu_h+u#Z5rj zdYi53*a#C!-};=Jl+(5EL$t`~iEF|k-byJ zr!Fryk5bery6zP)U zajDnOA<_!6V!lVAJ$}9Ty`E=r<|8cE$b~cRxUq@R`Uqu={@4y`kr+6q7uqx~Jj_hb;8fK|Julbh$vpN0Uph%)a?Rp${o5}iWi7`bDt0#&xF;Sf zToUPRf_;iaB-@RM1PPr-&0<3$^7X|<7|q6!);i=)KDHDOdxyH!JvjV+Qn^%B&Kq=> zeiRy6*P%R8q-13aPJUrI6K<@W!?I_hpD2q=Vz%{gA2w~q*zm6I)bpxV6)9ZP)p&kW zzq(TxjEV>$%cR0hx0qb(*&kt_gP%!Qjwa%q7{_y|YMsF>I)vl6ojAvqM@41~bWXlY zg8OU%TfJmpw?@DyFZd(6A|Pu2F_9U8KOt}BXQZ>6`r@y~bE;YsvN{W3@$I3oR#Es> zX9%oIwgG!#fxe3f`^G==uC7p!QBZ7~5A;vN?4Z!FBKOa46YbO(!U@c&l zPg9`9`zI}TEtk`b>kO;Dfw+N-op3gL&i(F%svTF&G1x~Nz@qcfD7}pH`@B|I2uj0U z$I++CQxrd9bk|bV%1)LcH=djbPr+C43B&uJkIZf)l!mh463uF{y3?i2#=M;Kkc$Oi znBkePLb1O4r*xyu`EyMBhrbl0v?tSDkTLtgj8DK1$caKaG6@GU$x$4)3h$g#>v?04 zASX?SwBOks3-&mW|9AVZ{9>uc!3wyA@{RteHK~V>JBwiFYMWCDqt)jw`%Rk7T=S>& z$x9G=mH9}a2%_iX{>@~=G3iHWsOKM(Mh5$G%gDDZSR@|ArKXAH!s^GDHLFjXx7XI*1 zT9?|_hYFMj^XKl?l8{bAe3&glH7B0*8!zrXw%a>MVqK3KhVLy~A9vGawzU<6XXB0i zvk8G^vAJ*;f9~fb&)*LkYAx1b5ZK@*1`ZA}4yW0@?X?TTeviv`dHpI8(B+WGrtPM2 zIYTd938>I)L+BjQIO_(tumjdvqcYgw%rBKE{nC0(vA{*b-P#i)0>djHywG~LMCh}M#X{B_gisIhaQuVqr zFoA{!sOjkkIUAI`^PXTRRBH(xx!LXmhz1m>oBT=Vc7nt+M|}`ujt8oPWACj2wI34RT{wl=Fs zVBR-b9`h_Op|m9N<<&#J4UUhU&du#CN_P*7@%5H@6yOh)3p1lcK>Cu$yEqH?4-bGk zH>}rR`qc4FVgYriIh=(Tjb|(-=4hSQbx!yfHNXN$E=Qq>votvFpyj8kPxgya`8@n< z_3-mNwn2gm7pdW;!)y;|cy)x#--aZ{!2&;+_a?s(C@W)NL@gf6w5k5Yb}==ohl%7z zCNDPDYFtJ@JThr=G#jfJu6n7QfMa0(kC4<%iQwFA(KRdt0ERAs~?cK=J@m_~RQ$LAVNR{)8%`TjtV>p!vkBc+=WiphwyGf5M zV%XI&rIOAO10pWIKE5*Na3yH+;0O ztsTAZHT#U+a^styM6>8!|4#=QCA}=_P~X+ET3@G5Egtzl z*Sw5@uUCGZUy#bQGTMvjYWnj)E=DXuT*OM%zSqbC+npnpM@3h_IJM@nDsEEtpk;*)tWpO21dl!1ypHmk;`qa-q;k;to zcO4WnyBKW2fNd}+^s(yMoK^cMu%?seJ}I81r?c8#j>{64*|77DK;>3y#w zuFLC7eXG}~Iq#IT4+~9LQE^PXGw#o%O|-l1m%BUk!W2{yxVpKi-t7h{p{A|;OzTSk zfyh@qZ+JXP1e$lQpFDl8ns@AzpPzxw3wJPF7uT;y1EtGN?wfn>Gvem*IjN(#T=O5T zh866)hxIONx7^pZhY#5u{;nsVxoZaFntNlwcNbYzgbrAW#4xlrHYQP7RVs4@Zl1py z)nYOG*IP$aNI-YqL?PF+u&U}f9Z1GWs%ZW2xLwV1G+M`HICCauf92-_)|m(%zp?($#s2yKscf!{N?h3)G&-;6BPU{b_^*Wb=9+}Awe+-}dFG>b^Z zTWi73ySa{kFS4p1L>C0=VVCx*cd(EVV^8!V)PJp&#aKS1Cb?-jV|oS-vobve{(}X4 z;Sj)xcJJo^u5y3k-N#(Js1s`y8Lu8e-ZnxoPCdp`YjkK(`)rrT_9_rH)*x^07t4N< zVUmyIUu{^$2R4?o1L0$?zB2Q9Y2)W6)$4A;M0X!Vr8%$L8qKMOfZ>Iz;ka^z(%!14 z6&1r}ztt_!3-|UyIj)YC#xMLL)@b%^%AtFnpm(j62Oi%S_tjGXC*AbO$VkI=QqWoF z7#LVlH#&b99z_jACjtk>ri#BBjvf*uu+`^D)w-v1a^=ug_@(R|s|WKSy6T3W^#VTT zzZ5z(oe5`9HTREd(UUh6QE!uUS`qm8__!U;_zb>qv56xfJX*DHr$Ss;a4~SPZ7w6f zakqccn%47@gPDu&kIS>9(AJ<6#w|1-r0zm2(!+0u-<~oNEw!9e?MA2dZR1S|(a%z+ znPu?hjXh4X8AuNnE?;tUz=F}Ve%L)g$5ItM50+;L%2Riown@u6zFxN2PwMd?AT^ME zreBk0gE#~qJ?h11D9OvvW6U?2#^5bf3S4>cQ{o^%i*x;y56zvd=p5Y?B^E1CgRfI- z{h&iLd)ppcc`|vTWB1su{$(nOJ|xXjpIMF%nq{bR^i7i#sc`L}+`C;9n0;Wq9?`o+ z6$PtTSYPodIrL>l#@q}$9-QZOH-V=KX?d(8V$kcA(-#d&Zg`ZP@|?6>%4NeoeRjS>U^$puDnM<;WzWqh*WJ>WnxAg<}tE9;lG8SVqt>SUlPOLgGrf zS@qAgFLXTjwjom-h`a?fU%mjl*Wmh*hcSYMHe!& ztf%F=Wb;c^6OGYo66>*T-@=<-D2u20ju-xK=V<~Pt=h8gl&t8@<`q-_io0wl(@<&+ z?J|o=IgnBrS#T6ZKj^9X$lDtZz9-OajSAp1j;dS$Wo*kX2=BtPoYCku{1t*VGGd^J zD462}7zkb+TFJmpS}7+aEwEHW{xpR5yspeqkbm|z7ITA)j9yMW84Dnx1J&qcr$?-< zI2jy16d9U`OP0_yu@aaPXFe9e$N25iZ%}nCx-*yyqf)#yoD!mDGHtB06FDDaINja= zMF{CcX+d?0WZ>3(+AZw_bG~N7koqHu`iuv&NUhtob%}eoK~4t&kA|;D_ev*+t_h|o zvGTECWmln;(q2LV0;g{V9*nuR>t(}iYV3p~Vk$`ltTgbWg8$^D;U8J!BNUp+hS6SB z?9*#JJ_vK<%6crP@3D-xnkQ+O=L0>7wai&--R}HeAqWX%w!bNNNSjN*48!!KhYy4w z{|p3N1ids`);3ce>tgYhW|2t@#+MJr!pqZgOo*LBK@zrG3xu5NOzIj{8d)tIY+w+1 zyBKrVCHu6guJRo&z{sCJ*RJOv{V2d#rf$(gSSs#AR`QU`;h_Iqigu2?U&lr7!RfIb z>MRqTdHnWL@yJ2&9LTf2u61~4A^~-3lF=hwex%uZR<}%JdFNvN9?VeYz;n0TiFU+~ z-=*#n7h&9=Q|4XZQ0<6(tD|2BU7@70d@ggE%(k~$+`L|eNsx9>=g~u?&@^Ob7|<^! zg1`*-*~Qy1tV_~yo_0i)k*(rVDzMQEl)RvrbV$EWEH!ybxp}EDc*=Udf)B)MLrWqO z8IP@f*76pWntd^kg`;;6ZDaQCMT^pu=m}~@bGU_^kF<@8pH_84< zS7`H88q5C1K0NWAU-h_g(ehP|TXz^kVIK{H&n|n2=ZQ1NuBB?R^$x`RV)o#UVdK!O zj=)NxdXU*@LnjEj)6(QwwkICaRHc`xCYt5aRX178hwEVpQhQsuDl#V8J;j>b!;E0) z*vHM%?lFYI<~TYLE=4&Lz3aYWJ~ly!jEwBeK}`J6Xqp~w37;W;A8wO_i=HZpif#_= z(!0PdN-N@TvIS?dMhfy3O$TN2rC z2(8zvcE@f|Lw6uf3NL9pz-9{S#M~iZ9I04bcsd8SiAEymyPuV<=#Xj89~*t&kDUDj~o%P&mUaP(gV8cD@b$RxsT>qH$L zYZS}9i{=0p>De_}(=Tn&P4*Q`ErynI#FzVZmYR>@h;?3Vyr_AQmmJ9NrbV|oIbAO8 zU5;|9d@6?^JcfTq`hTKn-w&o^iq>7|)e4i_I}RH5+|3T7buu1}qmt`Cz}x)Q)1J;^ zyFP3V?NuD!#^YeO8c3b7Ui8r<-1cv~Fh8C0E1!zB2orXs`fchYG> z+~)^4o%x@r*H6+98Izp%pDD*nWb<^gJ1_gsg@w=(_(n9?<^A8t`+vgk0wh{{!AGcJy%?wMu4lx^QZ>|3Pp6B?#a+PVp@S z$F--s`>zHc@J$iyc)5A*uAk%oy=Ds$($vtq8O3vQ|F)_<=x|H%@Be+9-JfglJ=38i z<>>!v`3u$SHeXpW2`;%3sHQw_@eSXph@BXscF(r-g!I21=~pItHjKp+6Cb`-mFAdY zm=q#Z^faEdpFMdw9$ro#yZ7BjeedGI&4GLWYV%EBBs(G3DYQ-p?eXL;)yd#6rp5mE zuOEkHoK_hYxEG2)lP{>(2o<~nh_H+Wq^{oY>Sv^RYs$vRLXCr))(!E4#____%*U zkK;a)8=3mkJF@@Z4;NTJcw@dg;gSS_kQg|>NDZWnSSiFu_>Y^B! zc)yb1Ed_`LCVJ8Q+BdtNe_CxJ2^kQ(zRmK1&}=q;s(HvSeN<*4fRaeqq!4MGx`|zr z^$u=jv{v+)2dgBa+1xgpJ`Ww#M(=uqNT5@RiRrX&Ku)pCIeAc)BdNa$=RBUKV|Lys z&s%P|Ny$6z^4`!jwq(?D3 zbnrCS()QQ)=?#oK=RO>bJ&!~0{@OE}y3(qS=}VO*a`@sL@=hP8-E2~~1iqtSY!MQv zFk4a-lJ7w=edY-?syyOQ%-jl7C8c7Brf_GuiV~IyEf83JczDd@z;%jf#YEr$e#4aS z`~~~)v_>>H>f)jon$wBNp%y{47F1YxV4B-SqGz10vBLdLp-wA^`rfLAOT>qoY4M|T zSzH!Pv-|bFus76J{k7*=D6g=phFJLWenQpHtw+QwZo|NOwl;oE&XlYmT73nq|8`48 z*X8j?p(49xE3`@e>r1N~NTtju2!UXv)}Ka1-!d~-0GSZC^_a2UHZ3v0Q4kyT_i3Yrn!HE-}tN zT9E(ZK$Ex#`Hf7AWv6XqXaD?mng`bX;~dQ*{e@qXJB|PV>?r*7FH;90SsRXNG8zrL z5Ul58L7LJoTADn^&BOmL2Sg2WMwDIz84{f)nC+zBcCfm~B zeR`sD8N0drV@K?#qF10m@$WwYrymU$E~=DUYiEeZ_#iquyeVGX( zQ&1#8oYs2kSqk2Whf~~B%h~^B*VfR#K@a4cpZ-nYIQOsAO0l~}qS1d;M0whJd#C%P zKi0}k?mDTRuMJ!5&)Ph{crND=hsQ9-=Po0UbXrof!4ECxeZ=^1bXwh4lo%2n#X?=% z^+^Yw!$KJ0te~w;SxBeVV$k-;z2YK2|7+sZx$E=HPoQz;2XUU!*#`CEA2q5c?O5DF zO_4t$*A{;__iEY;$}4}*L+tP_GA_#cHW>BFv2_6sjTa5zYh8MSUe~*G#yQVbubTz? z+x548zLOg|;Haa;#_o@QJ{D_}>7NMLiTm{wR@PwsI|x79B07rd0-;jJ zsXv$J`l-XS9CA{(D3ZJGQ(czJFG~paCv~1-T_Sa?CE<6RBh-#r`x9>&k3R&1czzPK zms!rYXzvi?fb#!^Lkk8uBa>%1?Of)AplY||>6BQtpigq1jW@O$E4eQtl@hauMIobu zA>~O1>d$uf{I2`K zI)*%Kjb3qh^DUA^(@o<74vf9z8yLfy1a{Wt6}4E;MK$ZbTpdzo1FG zB{RikWgENr1wcv|TO)-g77@fnTB6@|+3NhEUyEq{OgUtMUFI=G@(XfVyoWJ>AnoEg z>SANhdRL2B0Cg-S8sWcL0=lXvj{dMn2gFN)8&Ye~$${pwe|(B_cUeWGQQ zHq+cd?hXk(Ov_pRKWIgWzx|yKtWSKYqqNFqPDUvEf3I0)%ZA-4wOlEx+7bWlQ>3A?sy9V23| zq2ZnL7n~oOAD?&DTF>*W^?twWS?l?{4+VRD-JiFL=9?>w*1Ct9BH7lh6-l3dY}3G! z+in0iokDvos2{XuhTN?$lFlKwvU*iNy3?e!)D`aJnB2Y*k&IFw#_yH2?qFqbmSX4S zO;eyYB9d>Or;@@;P%rVHq4v~$vLqSGzE|hF6KiBXf2;q7y=YL9omYfK>pi(6(QB>^ z7p-$*?l4kd@!xdL<%mSVcU5w4$u0W_hj6#3`IHyOkF4`=xi;)b51zK4(CI&naJA8k zp;0ySw4QpDBH5Ee%`Pi8BYB#+X8Z3wYAW`WWxa;^_3yH;`omJgsfyGwBp6Q2Uu)P> z-ye|{YoGX`>qu6}z+oyY1wD>WtVoU7C%VFr+b)SXx5>P7GhxKiscnAPg}yyB+MF7I zd7paK%FfF9pES>FWn1}P%qG=07)#lJL<3Y}?4O1)9#vBF!Jk8}8QkI!u;chZPrPUc z{6Sk-`Dx@u{>7SDEIG|8YK`>eS?r5*>8lNH%1v35opF|)ikT|ehS#ai%B-6&?OGju zHP6js7rG{6zfg@sX{Ald-FTe*xn#zqGDeM_;bN5U&626iD*m;luCXI^GEOuJJcnHH z0cLq+Wi16fH6dlgMlu(tc0&m+t>45(`1WI49+RFR!vM^gN~EQFSdv!^3HA z&bs4Ukdo2}T{9rPNu^u;iOd2X{$2}{AAS&ZshUd@%$jrf;S(tK7F6Hz#y*wM`Wgn4 zVv)?vsBRL7RWlDY6jfyYUCKb2vqE8x0(}rnHc{zLwUI^$5!1hdGClV^DB@@ z#pCAT_0e%@*~i}aW>=)9R65DWkER!#r;*6E35=4j*!)-*^8()>RID$}q?~znXLoC8 zGLl{5dmj>c5RXdI67poF<_9C|;e}dCq`H;|OCm}Lx| zPIijA`Q`MK{+Lo>msaDKVRdO;X*{ox1;s`8A_RZ?;u;?6`Uceec_8 zI*xxn;*!#ujIZ9zvb#CYoOw|Z4kg5)-;y~5RazGF$LQcZez5tq_?V6^DtXb z20mrD!+P(++^HE*C+~XL^ym%c6Yr`Yeh>fLt*7B$Ezooed}A>VkL;B&ph+Sh>L3bx zb9;9kD9#Q0Pv`j`-D*n;W_{+bCcDNT3V7n?hj-k{V8|k$5As(T{)@a6Zsit8mYwM8 zkWp$M{RWCNz54ve+p{L^<8Cjj&wMHXpN9uGjd(LNvmrV@MEJ50hy#xmm$A+$hS-*- zi=)`?u(nv^t#7Y=Fpa4hc6zkdxvnE!g38gSuyN;d< zYCT9$$Vc>aWpN??F%!7gXT-!Ue4hN zCI(|X@{j8QgX|C>d%~JS;mI$J$&KOEECk!g;0lMZgb*PYe$P&l+s*>BfSo9!QRPaZ_>ZQu2`lIa>`Mm*xG@ViL7g`44yLTXX4=gds#GNgw3jz{ z4Vgs-V@wj{osHHEwTkdus4xc$+NbB^y2l>(09v|(wt%~qfc2Jcruk59e}dzPxUva}csGHCzPr9WLN^1#nk`v3Ui zOH@;3Uss}a`QKIDZ${&xs5aoXdQRWQq$`;l4@EE}Iw45Na_=r@iKce(F|iha*yQ78 zlDM=FK;vY9h3H6ii4@Qt032pK2Qg4=A)?2CG^Api=tbU+ld zT@%+s(M^tgCS-BL9{Av#%&dohA0B*BL!n9Lc=1%0r2u;t|UX<4vS<< Z&6y678R^+$b95N^kt}S@i}oF1{tf+bPr?8I diff --git a/images/vector-search/auto-vector-search.png b/images/vector-search/auto-vector-search.png index c9fa35cfbc7b0ad517f44989fbf1bc3a3c6c3c18..e80f37b308f460d27d23ae162d77252fe37a8ddf 100644 GIT binary patch delta 27548 zcmb@uWmFy67A+hIgaE-kxCMs*0fO7X-CcqNcefPo!QI{6-5mnKg1fuBeJ9<0Z+G7x z-xzPa$EY7Q>TtHKU3=}h)|~4sdP6=PLl6haN{b-Cy@z}C>J@^RC`kU*tJg%YUO}+F zeFJ`ysC(UCNqh>2K$P+N6%_aZ#P5%y3E(lIj*cE-sz1ZyK|nJze&k|^=6#0|ZTXJq z_2-Wtqa)w>0q+h7!EN!q5(DuoIzt?$!njNA;PwNFs7%O}B8RO;X65qeLeoX2a#O|_ zx$4DArsdRtx_WaFrFw8y!MwU#$Sk8CS=7LN`xx%d^$_aj@*-J#|IT*G#qo_>*(TSH zOWI9sh&&`5Y{aX7U8-K5tr+*Z02Ub^$`yuv{K2J^46dP0>5d%^`y%T|;AYpg^N)I^Vn|6>L~^*gwQ z;>@oi|I?i!-!1+>X9~Wrcn@wtS@1{L|8!U2dCK`eM$~12ROBo1Pr{G>o)%mz|-${t!U3O%_P8qpQYbpn#8jBD_y z|F9$Q=lDEX-b(6Jb-!Qg+4rGWt&I5izw0mmd;NoH7s&s;gn>eyaH6zilK*Qdbg-r3 zNQNE7{xw-BJ(LVcRCrjlzYPfd@ej1%W&(2us9Po zm3jQX-x?xJaIgP|V+a0k2LJzH?BMnP(?tLOgExO~#YTIGLZ8Cv=XL{GB1}R?hRs%{ zRjCe4$bmq<&#JWwj>3QMH>!kJ7!m017v?s*NIsB^t55Z^Ofs&YinLK+EJG zedB&lPOa#IbMaNC^GM@)_h%MjTRO9p26c~mrNdn1RoolJon|^;5O2rzeDrX~~<^T%yn!P|2ogbdb%>kqBdjLLR z+YEvHNh%xe#%&}Z`DCH&V8|6k5%;#+?_kGp<=}Y}0!XN`4)kW$U*Bocj8-?SFkY5a zz}-F?ZU|*ehAW8n-3KJtOTdcn2arz^MfjC$q-D%~g zVY8!g(QsmTw|g4XO54)>`hwMm^;*|02mI&1cb?ZK2_H0B<3N7bw{SvkU#>g24B1Fj znGtF9NRBI=-ogzLqDtb2o5P;@SP4egb@NY_zupr9SNp!F6(1LyTwYsfd%*Te#CBg= z0g6cHPN(1Ee>Ks);LZ>oOW&PiO$_x`cEeO|KFo2Hf7{qj;Q12c*sb2`PE4=G6&|ML znkldqf9c~oHcIC}JCtLEnWdN*Y&=(idQ4O76>2!!k!Cz!MYm0_UHtH3`D=6cIfjJj zX{q5GAhgvUlpZ%S2{$!cxRNE(-g)c33)67u#2>#@cgMEr!BsX%5th`K%S`wpTF{0L zs{$9M8rV}h^r+tO=w{8ZJ(d=HHo2pJ8fcTiX_rO6kiCMeRkzZCqCOXFm4-z4X8p#2 zrqcb4%l>#?WM&QNZ%-mh4;eN%4KO}Pc|bv0t_b%@CT^r8wEDb;BwS?MLU^!!KcrL= zJTz^(J~Pd`{?ls-IhVvOZ1L9o65WD(a6MJ2KXM@1BT`(+EMv^`%l+f*njrPSQwhn5ENITmej_sikr)mDH?T!}cN8jPEpo8+zD+otRLw+`<=jo;~IHaK2Y@n9W{ ziln<|j)gzlz1oR^SC>SACTqKA9PVprbM2MU2oq2&37|C3S~a{L%8IGeu7I?bV3>?yKj&DqL4IkFw8#%v`mJmW}nJTxKZG>=iK8c^*qr%bXxhvn3?Az0(F-tUw58yyGT1WCns(KcD8um z{p5J&QBsnw?+Pswn+`3@b<7;~CV}F;EwwCtQjJN8A*vpr^`!h9BWt_-b7CUO{~q*% z(w=aDXw{1M6tibkF3s-X;X><1kXVp+h5muCg~vxGZ%D#jg&@Pv88JvmW0D-&Irfd> z>Osgt0+2)TrE_KIFG7g*VyH>^g~&30pQs&g ze&a8%66Fsj|`HEp3AWM&Pp%4!-?@*bCVK^i73nnFV>ARdm=)=fyQb1q21rvyJI* z6uBe>EQV*NPlhJ-jgDo)3R_Usi>BZ;^1SzjMA30N9(WVO8b#uisVM;{^zEVZI6a zVQp{QRYIt25?>7G$86?Q#FnHub1a8Sn?_}aDfzU58v5W~v7EfwD*b~|ch`6+9V&j< zr=@{~r9fHn>Lu86r{B-67Lx1;ccR4s#)Fz(+9FxG|G~6SCmnJ;4ktU*=2N**POpj% zIvo{*2oQvlc)68NsnMYTGl1)0m|`SNFcX&+EYyOGg8^$Iql}G~TW)g1^w#Z?a@l3R zbJRFkXjF zlG7Y3ArUMpbWSZ>+t_$y;RKB}01lHn{h>J$JLe#6cdV+BBSXVf=Ny6_cue}Q!Al>9 zhMDT)AKfGAM5LXmofrf+4^DR65F3>|nSUlDQw%mvB0LDeK8QXc*4?f8&(k4&aiU6O2&ti>sZRE}EtT zdJq*}L)cJI%Qb1XC`7V7v)gTbXV!^9z2gcAJSUR4?4v-n*3MBKRGni&yl`bCeAHKt zP-@Z!r782aJ*TElQuXe)6FFZnVh;u6^s>oZoO4n97qNP6lAWf9i`j;Gd*tIlqSDeq zOz#1js77<5%(|cD{$W*8C}RjQQnq>JJgT_Jn|pV2n(npFfA27GfFbFLiA?j_$mzen zf3*T030z>TSM@(ZKLo1iFc>)JY&DNHM*kP1fp0ux!2_&h zvUU{t?-BsN?@SWGaIwbyhTZ9JBm9nceveT3&FJwzr!GoF2cAw;+zHQ)^nY9VeWMH> zV9!v;9R9!Jvz*_vnK)nCS^QTh&@lvsSGq{pObYzDI$nx`8KREQI8rQZ%xOU>Z3F}| zh|%D0q{Y7i?06X1f06#Rib6!x)WLZ2R1ln;&59h%(jib$@vpQhqT+eo{$+HpPePuU z5uY5I={VPIx`6Xhf|`&4k_mbSBe(jvf?@hbm$z(;Fka8d_Gq3(u|SqzgZue*1IgFVZ|(8! z!rInWAXhvV85=u6Z>P`e_v_ekftmq%-myc z8V3`X5jg2 zOCthfRTe7{@i`2J+T3o+_FSLtubg*Moj!Wp+oLmRYo>F4{;a$EGjl%oU~@1U`?EDb zBpy%yUAa_!ZzcKo_*k{ol_e!Lb!~6Y#5A6QEJ711OQ7skCL7At0HL!znqwy0)Hq>e zxaFX9nzj8Pl!y`A>;h+JXD5myqV)aZ_~5d=<;iMLL`Jgp;A+r7;75?~8)EA9GWkAI z6cmEI(QafEoU#iN0%#JhPp{-Ed|K@Q%#Jx+xxK1FePs-tJj@2s9R$vJ%|Sn(Cu*?c zxMiuJg50)(?-k>y7Q!xZ(%gh#L@_Xs`*%` z6y|MGm)ix?@;UK%!Ko~lx6MaL6y-6s9KXKfvejIX(AOn08G1J)eITp;`a@u*Ol!Q} zN=M6lsz45#%`!!qkBlEZ8vM>0&(OiMh8gL5uS)<+M&wvF;|F}QwYm9e~GjVN}SI#KGhJhDWDS_bK>H6cUpZnf%wgTaO~qDGa^Xr`($xG-!g4Go6jO0*4!CI)AKD5RJKpBg)fQj4Ef zhfW||<2#ix_kq<&{5`v0N^l}!IY+|+r9twWafP1G*jiA$4SBIq9S;D>?GR)KIa?N! zQh-OEX-yb*MAWWvI_RhZ2UQdmQpQaqfz4`J^{K}l{MN#4g;%-vIPgE8;-bzh2n5lr zEeQC?o)b2XiJSu~uQl+zRN$6gaz(-k-tR0}wIfu1_4W5RWupp$M>!C=JKu_=QK83= zX!Ce*Tx@cHUk^4F^?8F(VLDCyyTgg8 z>%{}94C#hQG$+foK9|1SZ z@o1h>HUoGCho#)M8!q(iWotAQ*AEUupw4{G`f6VZ|Fzy`@{4pdMjaUe2n0DnMHYb$ zl|^vhbv&U7A~;6OUocxsWrq)LyYzAuwSsgie`zZg8;>uoCH`YRt;}u*cOnGGc`k3U zhAz-D;D23I>r25SN7VbRml0XP8;vAn#+Szc8AF576v@k&!6YQZDnknbcccUt7Hj8Q z!{e=PPDd1UR`7=XYku%peeX-}7P{E&wtI$*Ab&#~GH3|3%>CI)ePk>J7b=ptH{h-~!#beoGZO)2oaGlHs`at6zYI z`gl-czZt6Y2QajlVrnS%Yz9d6%cxIBX5^s18@cCwkeB%424iF}qAd(`2^_C4mnt-P zIDX2|tUj!wFkq^dH(`Xl+8QE=1YFoUjCs)=ldzydA>^``(9;96NGcT;gVINW zO^6g!DZZEr+QwoMyRh@q_8KGzk%vyk=JU~kGR&BY4Sff_b_%jsRY0EBhy~JJ!}w1U zfG8HZqP||F$$rmzwF3fQB}zQ6*F-hBqZDCi{|ug~VD+#Bqq#8Gb~3M*lyO#wYWP?I zKe0OAw|#{N3jgT!1D@$tBkgxI1@0g5nUGQ2?T~fwV25IHvuteuhNyKy9o^l6CsCFf z+3Uvga9#zvya4F#3284SHKG*6j3GHHb&u+H0ume6%E?scmOzhOdfVEHKd^x2SXPPk zL(DwcBooi)=VX`GHl9Ap>Oz;QXu_X~{|HRj#XGb}3SPmuL!niLD)QLyokzrmrX3JZ z?--36WAB9mpG|I$3>cH%&V8i7Km&hS(cO-n1^5GKdVx>^TdvU#PoDi+a#s{_VJ7w? zDUD^QHe<8jO&Fba#WK^N;Orwc<5_X#3eS(^L6cc5JB=p&2gwp4pj9;2p@$(|XY=7u z6N){tSSt-_J~JJyX57LViJOB@Qa$D`|Db?!)FC5P0AH=ucHoq18WmBxtk5xSkezXj z+WqMD3xCYIluHyiB%p9P4gOCbo>201UIm^@74WlOTtYg2KBTco63fPG$<4-Sd}o-) zF)!e>!(BM^*k8dyM-i0c@nYf4n@FV>gKPmfSk)-LDN+EWeUI4)$0jRr=&D{tQ}+ZU zcv@pFcX98JOWC@BV;RxVa_>hJO4SM?0RaI?b_$uaDO~=gc3>suYa!Z3cx3LpQ8QUY z3RIb9Q$otp0%#=!Q1f}LJ?LeRyYWX943`P$Pux+;gi8sD>v`(vpg5-(d6Lc_(-z>} zpyJO_cHoUK*eSW_MR%byxdVvIekg%WRp{D{MI?g4>FBKIG2EJKspL)=t3&HC z+>vB>+!zs`b<=nO0x%%{v;`6pN*ty0An|~PfzfEft&@Qq6vzO^5RJ9lNv@w-xz+CL z{iL^EoK;m-Qv`!TgL@{wt(ec>&7e8Nmulsgm)@G~2R$oX^nz2)3C#Lj00}869fy>N zh{ymK&c~4L)HOC%J z<-`)9uW&lio+?t3XfNGgY-F}vX)7K6k;0CaEfUT;eRqFQ&S<SY1lh{sYV@B%#I zakN3qMx(2%oj%5sdEYI8Y_;mIU?&z(NFbw!HoMf$>v*;2v-JASYWdAdxE4{Oyd~vL zkl8{Vx6{e?m5Ne|=fQh;zBSh>EZNtMi%;*W9Inm94I5 zYe=v6eno17!)Y8zL^@S?FG~mN{{Y++$vrxQ0#%fFp>epn*8TbVZp$K^JRMI zK6ANCA?my5Y5m0u`P_YdZdjpbj}04$*m$9ij^}pON40T1Tcg%gDTYP`k@x8m?Rc@# z2aL9~;+yUFP~AN|q#WpV+TQW<);1TovNT$)@GLDSTeSs&U2~Sl!}aO;c~^P+bFf}- zaHi2%nvBPV31Ip;QM%6J83Fg#QwnV>XJd2s;19a{Xkx*tP~tM_F**%@X0Y>*jV<_e zaW8GE9mXW$5ObF&Y%-fe4u?VK)d6!;3EW6up3I??x64DfzcewiNV5&@OcjmO+`4q1 zI3uIyENEZ!W7fFTGA>%|zJQ`+IB;-BB03Sc?-Ba4feMjZ*K^|*O%AL$`h@6EQoR|n z(-f#KLZbC$Ip;0fhZKYgb$MjuXIsLIV-a<%dH0RGHEY9@{%xs7er)2zP?h}cQ9Vj- ziDEI0ig4x4ayO$xx^4SCH9;Y(smD#Kr`8skA%P#mQ1<6PtMpqpeq*AO*GbOZWBTf#w*ML8xV3oJ+0*^Xs?8QA9vIE4sM}rKC@26A zbXkr0TA$`{q#-22uLWU{`#>~Yz8Pws&`6mXf|DvV&DROWzU+Uf0W zN{a>DK|k<5yfaixP85%wDARM?CkFDXz_|Zv_Xxu5jNKQ6?N~QycCedk;KUY9wyyMC(2OgiiY=i+QjOU!MKylbEWN_0Spe9ie6I+w zOHmT(i6MrEc60~uhsUn>i&w~dB5YlY8Q!%&zQmX^_o`71Mfz7+$oKrXK*;3~@X#T> z8YaQ%PF)x;o(+9I6r@eNkrIEnw{9_s;hTB_RwLPU-lw)&NG&ki5!|k@|4zx5;_6Fe z?=93bY!|T2cTc}ABgSF=3V;l+^4u5n+X2`Q+^(0D!RSHPopHRL{-Ph<$~4dUFz6f| z$E^rJL3>NhY~OCzV2<`3)LV%eG*}H2gdPyyd#(8_2S6DuH@k_X%MftBBlJMEu=u#0 zG$mqnm0?BiHB>7V6Pl+{-=(CM$wd8ZR;0qlfO^0s4jq-IQ3Vy-4&1{*bK3vBi)XV0 zubVgpT%c%gBxx-MnLJfE0NiFOBg0YFH-;3eWG7VBB}lkBSTR!yDbX^g35DpXw0|48 z8lq8$h_p3~&E49aDpZGX5yJ=Q72Xt2bK#|=%E_ro_9W?R>&zoZK9>-COyg?ascLZ4 zl!Tm~?h=_(^k$s{615*ss~IU(8?YZ$MxX`sX)b;dWYgWuwD5C>CUD@B!(jL zwnV-_>P1GOd$apqlPM=OK2?mz1=sAO)*GwUc@tdR*sB0R0A3VwBz3xb{BW*Xu3Vd2 zp)iTXY@hGq>EXsq2&7p_cB>gDJCe*+9tPiNy#@*XI}LFqeg1|AF)94h6�MYOug6 zV~Op@DudWL+IHANd7BYu$~3!bi{l9i84|ZIxG2_#bOR|#&T&qkH40?+$Xo8kFMV1f ztqM0{rILXN=|+K|bK>=~IO#AMP$II6^HmYgF)kC-IV0J^FavJ>$I+wdRxBqfom^+< zScdZ0)JLn9FVt8wVj00NjUKzmn1$Gcf`?Pd07^WqT8*CFda{+Qwwf5|PBi3vCOz5u zGW`imXb9KFn{8yvw1a}=wihBGcVleC&X^HgsP3CI~2RY6&T_@>}>{YC0-LCcxwYM5a z4P(oo;W2l^>t)y#&F9d`#p6Z7G+iP{^Y06?8RLwO_`H82zz0tKNEn32na1U>^h{xQ zIaEt_J3c?xWuRB|_g!?iwIdAL`?%><1p9lo^V%J?tkO}e88bkLa!UfW~^w(-Q(t~9FOkjG3*cyBvV`%Unxal9|$b5P?^Eg|?0 zyx^O~^llMiV99^k6fKRwzS=P5t<9B6Y=uh0$@aNbSXe~mOqZt9;N!6wl-L?xTc{j| zGaq?efVoME^1nJlxWfjjEcQq?V*1I*kPG#s{lD?RrZ!)F$GC}CT$+-tG_ZME5-Yl% zLxg?nMR}za_7=%2+irsTXU@YR)F#isljy;@0hxn({@J^Wo9k;~Dhy+DNE%Zj#$RH*9$8A)(q@g4#O)1~pT#c5DKR1}L1@m0!Hon+E{U7q@v^7aMQRe0pD zen|O6qlMVS-|$ofS?=F({7&eK47b%QghCR!5R>SS&^<36F zvtsY(lfv)@$snPg2x;*zfmC|619JI@nlW8C@wb$QB9OEAnp$fanp8;^D`5{qTN3Ni z21+!08h!f9j*Pjd10tZI-|s=X$zQA*IBNv?n!aOtqutLYDod*F?Dx{izM>|gdek_L z1=Pn)vegGo#?rV4$_cvYsK6qNN`s5B>AD`dz;hIZ!x-`*DYb-k)Jk&$dCmz6vP|l) z8xix<+R8snb4ny&gHSZGhAI8J7uTa?xk*og?uALh|I@KA)bSmmpHo{+>kSBX-TgQZ z+%K4x+db*{P>VX{3yVfXV+*LM--*Izh*LY!&P|e1fi!mw8W)9!9ELRL-t+w?Wq$KA zF_J`D{v{0#CTvxkV$JZLWH(P}&sFcNRCk{?3B;>ztg11Q=MH6*q3(9l0U}O6l@e9C z6TY;=9&>|NRWT=SrZdMcg{kd@?x2;JbLwOALQn~6>~!!~Fw6BP|0aq6RfSOy01vZ6 zD%j_4?JySkJ})L~8~Jhdy`|1Q?j22wPY36Y{M0NRqiQm|fKIJx?6-_E#lom%sMB!X z=bs{taWF^IrqiTwKLvY{YphlXET+Qd|4E_+dR9va2cp{E^_coX^@h1f7SLa4hkBj@ zn8#f@$FHF)aG*hPg|SPWyvofYV+o9}XTUnelEWG;=&K>QGIEcf8T?HtcU5(9O_4NH z&q(A-C7!A}1_06=8VJNnfLFTy`d#FAC~{;?P~TmiIwMu`Jdw=xJN6vbJhaGcO89jq z^5!&&L@>E?z;$=_bhENSSIY*{Ggt;%Tp9|mTd}j_Y;*sXhVtpX1?Sr^mtA!FayNJd zmj@JRWr%J&m40Y6zyso+Ad=2VOq>>xIxx#Q^@W3;ETU*<`=C{D+1aN+Oo=oK^s`wg+jnMjOxPmOFNnM~6 zf4tlp($T^9_;@jPd%a*auF^}5P9+a2S2z4bBJOucb^a-`RY17FP*ddl`>UF)kv}O< zQ9me>EE1co&Qzncxn4Im{b^W*2Nb+E8x&i2%t%Jf2HYDBdod(fq>$;wtB&TgUy;{{ zTJ4_A#Qc@}K<|upiMOOEx|cC|&Kr6H@S2#xpeY6TrGM;{?_?z5q-L${YDTEr6Fmv}_EkNdRPd9jcpZZMBiUy0sh z&<0R7&#YIMN;qg-@1Mw)wveA{1dM%>Jqgl6M#sE|T9h%X>&$0S-)MEHuGri~yqZ9; z2J9R=gW29>eqCOlwcC0(|HOsE=6$^H%1y)CwW-pyd*MFmy1L!H+p!QzkMr@QcG+07 zpk?&IwjVBZu2DW*5Y@5$0b|HuI9}JSi3H$c^9r34@1CaPEK6Mo47i~ul~~>vjbA=! zT+r{Ofc52R`L6K|AU+&_y~%J>hsXs~0Z`@UU62#)03eyjv{g71VABkOO)Oz`eh@v< zwV;}7abez{E;cf414ndu7YP*MQ)x&fJJZidB&fyUSHfJJG7jA)9@R$WK^j{*44k&k z=JSG7Eoc3?Q1Qj5E-*$ed4`^xn2^SZ|&WJWgHP zLp(bZLsq?ymPrh=wiynFe{n|wR*nMSy4?y`%f_xA_NjNt>y7H$d^QsZj?Vb~yS28utcB(wtR|^z4ztQ(2 zCPp=~)DcV{DmM)g=CekI^nPul)MzcH{$g6***gx)P2C1bVTiBx?T}$fk^W|6Af2)J zIj~Evixa6T2e{!g2Gy9ws&eIM^T=35_v)WaGaJhnc+=lZ>*osX2bZ{S=r0LNPg{m$ z#Y&Z_n#r8___(UwxTHFMCbSmRkZ;jka!B=^pM<_86?^NvRpu`c>q4Z`iZ2aEbUI}0xhgdp+SC~BH|D(ut7nr1n9X4oQl4Wi-^~D73(+GY;l&O z;ixtmV|9pDIZC7JJX<#6CqvW<0u;SmO_TL0lupI>!?8tkxlF3 znCVe^QyOjVjmmYjCRdF2>ydvXjyta3rnK2%tW$55&|(SJ(+YE^PrOA8uj}Y%n_uWe zs!;;6%pD19ZqI0@7-ZG4xW63?#yi;|SqeBCkS$ZGoCM|dc-!>6WK}u0s)*d!H#<+jjr`>B=s7_2EPSO7jIqZVa37 zvdc$|7~w6}8fl8JO4H?XQo1#IT8u9&Su-TbgFBb=Dbbo-Uo=geCnlYGPxOP{U@C`$ zK)vPCYPKFbSgavssW?%js?&2Mu33=t@;jy+tuh$&HXM%Ii*4$Wk{lWoSE`DNYXA&- z(S!6Ob?xQkgRvM0m>+@;*17|jPak1PT0~2sddgD-AQ@pnLgyi0pd8!V-0_OGsY6wi z-t|sCw_`hOIAXLD7GGTG zB^52c&>{b(o70CPrzG{heya;bO9as4sng*KTVJ^pT!QX)NTS5smIu~EeT!aQu~>b} zie^}9&!uC3Xb$f$_2M;#ANJK6XShP0OmkV_0&E}=!{|?klyIP_TZ%UX2$ow5miNax zYOC8fo=c;LglKMiImJ5Z1|I!_?AlH z1NZ)W7Sq+|r`=av7bXpjjhQOt+S_84qrFGsCEQZUtlwk%p-@GopfufsiFC6KOOgtM z6x35BI>>o*fLvGglqQo=0Gi2g66v5n#-X!*#XR)!Lc{`r5iX_41Eb&aK2vl%g))|< zTpCk_I+A5*T5O$WbUCPw9(@GwXo$vZLn{7TQ2o$U9~LzMS>B_)J600r^O(e!@ipFB znwiG7YJ0B14k=!~5{i{iE=VUTrlED@+I=NY?b$VOBqtTNaKWxy029mIe$=>WFP#qF zE)NQ7Abk{_ps;}~BXMu_=C7Gv#k+R;)TtdAGa0OPq!1MAVZ~SHuwY#PjMM0P+KY`Q zXSH_wUi^=Dlf^P`#&%B1awhY^8glZ|?l59~Osh##g76*%@_GOlxlx}xgR!wG9f5evSFp;n?Uqg6->)r#AU z#m1CVUylYmhJ8b)F)+WNgOV4`S46y;)c(0lhb!H5yE}*s-|OKU)~XWG7YUn*tFOs) zEDFJ%4Z1A!s@gD4!{UnT>V^ZS77~@pY7|8~RQ^(_!q|fYz|mUnVljyr?(q9T8e+e<7 zV3pWYV}d~I;+lA=N_;MYE!Ep*OVUQUvw@#9INmffr1zCM_3xeqk_+ic&&Rpr9E={$ zRMKXZ1lk&^0vf2MGV_PCm85aBYTm^v3ucu}CrR}d}(qbb!{&!QJd)2|!3LRS{tqp**pn)08-v=ySPCaIAwA3L7uwx*L~;`G;70fSxE71N61>Fz_EOqwPtB0fy91-r1X}JU zv~v^}A%JeIx#>vPgXg$hX<#D~+K>6=##%?L^y_p5pIVI$yITC<>ZUg=!t$@3_Ehe` zZ@6ufJ52sK;)Q3TSWh<=AyzfX%Ds&~_HB>Ow}bLERL`hik2R2NenfGg>Ytwt(>Wyi z&PPL5?Zk-+0D=w>&!X~>=g9PgC+;i`tF95Qq5zUo@W4|tMU5X3?Pi;+FLVsk&mMSjv5j-UR-B#_JFs*V#FC6S8Jh&8iCVpqDh z%4Z!55kF28L%s%+TQ-{mks!KW-1qN`ZXhC;`sr$sL~WTPpAE+%Ai(6-=Ag0NJ`y?{x}it77&0$7O%2OLP}VB1KLwSy zT4QvtZ*D@V_313;(vh$0dEDOvXH0SX{S!z2gomE%EIf7gY)wxC1JI6!Z2WpZW}1!h!D=ahg(!8FogfS&E55u= z7=XQE^cX}O+0~)QB*7e%aD)#W5s!I0=hSW8k2);uPBIh_V+Ph@6)E!=Joi;KpH=E| zz~%@;o=4!=8!*&`PK?f?`V)OY1sCY;D%Edq=| z;Zk!v{dyYQ=X!-R#5qm?G%RA;ww8hm79b~)>oQ{^Zy?r~&!L`2PG^$wX^fH!=g8-Z zE5mxhc_IsRT{IQ(G-d6ofo778MK$bl%mg7f(;4**HrmTxojLAZ}!HQ2LQ z-IT3b9YG3{49NI1>dujdWX<=)PUw9aP+b#`iKro2sI%w>)0mxxPLo6UJ6vdje9F90 z;Uc-aI_!9m9DV6k^QAV{!3jNMJTXY-wru zl-lc6{rYz*N3;{l0Ih=t>6mNsoq=FGywek~QDVn?N&M(@+&k1v_&zlI`CH|L4%wRT z{5v^TA#nI!8BnxTOm&8)+Ny_Q8aEs=-hIE(cU63bWq=4f`xc^DjW2EHJ)`aBfMRQP z9VMYwvtw@tzt83K2iEh})sKH#$vonpF|>U04gN0cS|RSsR`M_4HQV&@CJBxflkT!v z>v)6gg4MSw$-CZJl2BRpjpV$-brWh#Bx zq3MjYRh{YXSI5b?%awh}=d=3AcPEzw4Y!>ymsc|nzlvRXNhoR2tEkr3XLvy&SxSC{_6|94`2kz`skSs#be=pSc4{Hfyh ziV@?Zh4Eoy=rI$bUK^`#?`Vx6YN@yUr9h#;XvBXAoUjrYJ&gEL!v7J0j&L`?>P608 z9b>2LU9gnFywM#xcHm$r4`7R{$H~A5X}Xj6a(>K$JS?#?Bo!0I^y4xTe?(_gVd|6r zB7t^Z3%6E(;za-9BaV_CM(0&{+hlXY&Amk!m5ek;Nm8%~is_6Q^TxnWx)7TyeoxDC z*EB#cp>9WI5%Jq8h+7+kX1rL!(o(&}ciK&qTJg0P)dB@)@kC18C{|QGV%3kXR|1;is=AWC1jN1#r2#J`M2~q*KHRLxYP|0g& zNOv_Ap;@_|Rm>9_A4+huu5Lq=g#$uWNyQC+z{7C5m8PV$+DYwrJejg7q-IWQ2;MIP zS1Ix&aHK%rF1I@3N<$!Lrx&)`fs}%pTofqMh3Wmaj;qz?aGQLo-zng^1L$Yaa58x& znNt|ejtU8(5t#BXb$V@SrhTBy&|S0zZ~a(bm*)nq{HcJ3q}l;u>3+I)GJ>O26nIS9 zpXu&*=U>2?+X&MCi2BonS1(hG)xTIfTKlTP?BB&GBb|t(D}yU3G2gj#FvKs3n^&C6X8f>F#YFjmpy0G=?3TJPUQ8W;LY&xsDjxA(3vO>{a6v z&$6upefe(sAtBmmun0AZ<4$~i1-Gr~agK8)k}4mUqa?*Uyog0aw1-rI)7BQ8+J@U1 z^SX(x?E$9%jz`PD(fURZ)31il9^8xXms+d|%M}?QR_uU$2TmMj7ZdUvwPD9w<6Yy` z{SGY4bc6^#dDIMeW%&vBMdcXR*PA;#0jZo$`>~~c;e_E}xpt=g-b91V#v4oq?a&{| zYzlW3uiRM0e%E0X#`1|e+Y?1uc&!{{}@4=Vwr-5d$_R^ZDkxb3IE!Hdw8bA zT$3qJ`w^jNMY)2bdT2lN{&mf*%JS~n+G zheYPY!gf;czFNL`6C_`Mvo^6jsM@({8hry_=tTE#glKAy7ogXB= zR-G?uFH`8xCyOIo@RLz|Hxy_weA&PmUz&ZfDFUW+D-8xwHCtUV!1XmBbMt$n<19FA z2qS8oHxy1&3O#dHf#m+oKWmh7@FA%=H2cW7nQ_O zd;G*faf>XQYZMZUlC`zlQ#_mL1WUrd-lZkXZE|ZFOstq)%C}by#Z^xHVxyMPEF<&Z z5dNt(;;Q+}_`(NaCDTVf)BrCl;a%ytI>R2#U3x)6tQ4^GJZ%wd*ex6JJ7H3!-`yz4 zf!O<2H~uz~j`}lq|7KZ}_70PS>_-Jhg*#8rR~~6R9fAU?wgT#@96PB3u(O5r zFRV}ZI*Zxn?t!cx2M^M7{@?IT=P$9@KmT#+7eX2Mk(s@Gk5WYrT0*YR$w8hAl#T27 zk#&6pILo0(1c++tqhT?^eu!5pO%oc0)>P>{P8-r&H!Z>>ASlyHdbl|W|E*G0b_Atvtf_tp!f*?1FW!@I)D?c=IK!%tMWiFbgdG0* zSLsy;D%2)1PZ`YP&XGi1pA@y2v8E8vYRjX-wylhm4WXl>qgbwuPY8?znEJqmlSM>< zRV!j*P9ggL{m}=$OaZ@3@n{gZgfD^#)*5Fy#%1NR=^rH3UkD0SH1c=p*}y^{|G!Gl z_=bL0L>Vr#h5c8x`bQNUAWsG^GNU@Lv-$Z?y!N|!aOa6P-?Kmbr{zDWFmy4nMl`s^ z0`Jqm%giLf8rL-26>wt68yw$p<)T;rHEE(dSWTpske`7SM(+one(Em5BD&{spvXm< zGJ{@J9e|wDKOh(R+InkpgcNkN0Le7LP`{OS7hUZI2l!Ya+Co|7Av#-RmgD*$ncxZ5 z*bz&_(I$K{k>?^$l>Dm#Zct+Niz+4c2>#o4xB5#yo`QLF{R0Qr{fzmt1YZZE>jPU1 zJM^A(D*(c+_M!f!hyJa+MbdCS%fc!!`M&gQxk*jc);?e!g24K3I>PvQL)DxIP*o8F z!!M*_*8i`#ua1hc>-q+yk&y0?)KL&ohGsxgLH&qi&b?t-N zx;t24m-iaUKc;vsEkG1AI4*_FuOTO~b+B1a7$xXr<$kDW;kB`q!hXh{BJ0lOruH?;|;^yYqi0|j<0-|jf*O_;=^2CSh&kFC~(5(ma z(+Kj=y$mzG{zJe4-Uzmhw9X|{9rC;yWKiUL?7Scr_miG&pA+nl;1gInvqGO3bg1?$ zN+>oZkig^%~oWMkWI}P;DYC_;hm%~_z^%O#yot-^V#Dm&#xif@SB1g~HQ$3t+DsM!%I$Rtg+EH z%!`vG4lGT`z;j9W9uOgpF1cb7FD;L?=>2O=GIbaZr%D9OqF zZ%PK66nuN<8FNHJNf}56QCTdTYmLDIud+we-~G73IXXG{n6)b|Jwuy7c=~pb2s9rB zj=e1vkUa@#cy%&*ro4;1KALgs)0g1(7@7EoJfPFCr$L3Is)A=nRc%SdlAQMF%Lf7V z9O@O|=|+GPro!9K`D=o^)*<;i@7m)9fhO`P=21F1t*q;&TK@=9lXGN~vKTAp$fWGr z6|vR_l5w}S!3VduOrHNkdwZ5cFWExCTDy|^qBJ1$a36Fvx`mR-%`6!JVKBDx{b)Q&Z&eat=sSJ%@i*<8jndY~Q=qbFf?89V-0b=Z*Qh zTD}2wOx;>TpBWw2gl?5!~=K$_g~i2vcoXb9ZD7v3};Wk<9d9rN2Tt3vk_Usm>ZYtrUyit~*|p2436M?>VheEdgAR{@U3-*R~8Qf77m%#0f5PdIKKl+Gn z{8Rn8)(HXsdKgFDShX1|1a2nNbA(Seb%Q7JIBTY&r|`988ZJ5AnKD)u!>Y&)(IToq z(21{qJr*q<^L)HnEyK%w3P|MwqXgc;7!r75hxsJU3{9oqnrE}aY7^+l*+5ya)!D0j zt@kW@ICa1RlG*-Ub#A$N78me>K*YUclaopqJvo?*Qya75Fgt$R(BZKHvVLhci*=`G zGeaL7+Q&{f6A4m~l9veJ6jn@gd^Fn{KlN4F^4Rqe?a4k%mo`vpvAjQd3Om_eNm=YE zhTpmiXBQ)U)Kj#-Q0lfXrk~w#dm*}u3dl?GP2{gMnB%T1#uwtmU~W`DE}$r2E|PSf6+A|?e+x$|o)SOmiyPwQ0mA6rL?OEd zX>MvWXFhW*a&=&D|mC)?@YDe^`T` z(kpP0MMUGr=q&jF)(7Hfi;@jtt35?{x9$& zjz$9@D(Bv*WExAAp1y31EVDN>oT$@Rsz=KitR``5gOn+rm6#Gb&*u2ub`Y5PHqmP% z+xIk~M5O7BDc-ZaITHmkZN*b;JiKOO-Hz``g8T6OTJDMS1>H``WTbpy(X0Jxla<6$ ztuSgq=?Jl=V&ijwyx^eebnBlblk@dwb6?4Tpuj*GOKvJr2k+j85nM90uSkh@N+S-C zLM+;EiGr}Aw&O_cB-6p7blmFguHr^&5^Cz;jp}30E_b%8WR#bf-bGyM*G9+B=G6G6 z1`xY)SeZ2;?6ypggPHQL=b^{QU2g(I!cWiUX$8pZTNuEQ$4Ii(2l#$_BJwdmo*DG( z9)!K#Kug;4{kxtYmPx`(8xLJgwbV0*?6lZV56`A@!9L%@>jr`*=1q0x3tJf)WODiv z4Pn`i)zL>WTxn3+)wLoYR($FuTT;A;oKn`hMYnH>itXFseQRV%BNR^w2-@=dnkjF7 zudm;2e`LM!_Eg1vSrw@7&D(KvPMfL7EMr{=YLJt;-kMKVli;ZMVk zrq|Nyn~sgEn*JJIb1Fm^MqT+vRdd(xyi*YSx@%wxXnCvwH6X0|E52ROv$nc$?EoTL zVxQxE-+$ca*iP(bAIt+h#Cds3jE~>6Y{w>Wn>OA2xjKruyYjqlKV)dx-X=G~dR$wu zAch<;=n;~?t5U&MP7hS_p$cM%E;lB)Y>_wMe-XcE$_$Bp%MTWf5jpBRNV>7t zc;Iy`UE#O}yq@o%GHltL9ZPB;sMxrwMnK^XYL{pcj5g@OhE5Oe5+;;V$K7SMlABx- zp8iCSAb}8@(0@gW8O~)l1mCc`2{B?-c{)-36)8(Qy>s0GX|BnuDdezn+ZF=lQP*zQ z0uz`KZLtu8-Q_`0bNpNoJSpur9}pV@FD*x^>>!xHW}ah(t>e@E?-K!+r3KGv#|8{J zB#e_*``X5x_dnvQ*i^WZ-i(*!rz+)^*q19zCLWzj?1_hMcFr;l1B<1_uPmnSsqWXf ztmqtQSBJ-SF4sbIg|+yL=mR2>6mg|R$7{;z)~%Kt_~CC(tdFu zPiQX5?|d7)3|myLWfA5!NY<>F3?n^+N{i{NsQL`*=e147Tmr2McHHWBo!P^dzO9rj zQ(Af)+V@m!g;zL)xE`2o?^E*FBA`<4{ZVMoCMQp2N0Uspj5a9UKIq^iHsWNd<%d5+fTZso1?kW_r&2-0*HOE*{ zBt6_WsY)kRwTlEIS!K%xc5I)@>bbwSIMlzz5WTRnf~T&iq+^l!d?erI?&SKULT=X~ zvQ1j){^w`Kl)=8d$9U2fK+2i6Jib|zyAq^CtbBOyp^4*7PJOVc)VZ``TI|Oo zk80Nm-pc10p(61NT&iIr3MS)9dAi>m%kdQyp!TjiFJx0$ZlE@^m@Skga3e)#!x{?$ zOZ8uTeJ;z@X`Tbb?cUi{emvrXfrkWJ;BOg~(z0Lq`z#Ptb(to$Cv56i+h|F;m9Gh2 zWDj|GliyL@T_ZsH7v1o~n-`PbeYTE7UUz{}lzBQbk58nLdCjnQGwUYPYTUTd9>jD^ zK;JwEfj+B{l4Qnd3_1D{ogXZd$vveRwTXw2A|?C7U{RI45nOoivuJ-?;wj3UD9+t|ki@``E-9<1g4eoJEF>x~xk5|1Ov34#Pgb1kKVZMnMe~K$j7E|C* z0oeLxPpIEwKsD+6EKqz>rcm*6qS}s7Sd{Xbx^VlXhyeX9Sgwze8aqoDvyhN@nZjxA zShf;8T?Cw2M%-OCsEgL!wXJG?nh3?{V}D?!zLtF+**c1EaS*xge4h60wRB_YFN$w7 z53iBz|W^IfvPYnzd zLLkP7yo&8u^{SKBQ-1>35qp;qIyjUg0pa8z+E z;lr@M)9Ns)y5GuoKfE9ycUyuV^OLBzFU7^lMW&-$+{G4_Wp8d8@DE22ir3h-Qs56v zGO%-so@07&TWc#}d}_9U&O7wf>uGIPeSLe~+OTbLOQKj!w4v%TDWM%ssoT_Z6_rk6 zr!2HXJEp4a5EnRw2t|9a+C#%`BIce6lLwz{XpEw_TNv#b)$ag|U{$`qeW|GgJex^F zQ)!VtEB(2w6wkiL-W-_u(Kxs<&EmAZnd_6*#yY#)I^!nbPyzsL0ziUWZE76matXM zWIS3y))fs*UxxA79wf-BRn05K#o$ueAa=3jQa<1TjxN!bKFe}lQU z*n|BkU~8>p%52VXs@lVK21b>_0kNecCx1n41&;UIyQTgF3vtgiwb=lLq+SBpZR;nX_Caym=}lZx|#p!uo;~ZN&Of4#PFf z>pg>DvceB^e^afes!e)6qNWQgKBE^2)ltwGF9ydXGiOvk@LC=f=Gl9Kjk}6ppWg!8 zg_iIsPJxq?9~&Fr#@@nZa1Tp-WycfaI;DOF$0SDYEaIld;BnIVyq7+Dtf2GUL^8M6zGA{5p z!(+`-CZ*<$P$^jox!_Wd!Ed?@?|A@Y#Hov`E8BBFm#h322px+QxPw_!0Fkh+>8rsEG43j|grpl*&$3#ulT`{(EHIy;l~NO9zX`-ZMB^=grPm|Zof*L zGvuD{l~m`*z*Is*S6CCs0&4R>o`9H3*&K}9k!LrLJ%k``%=8Uf{LUIQU}Cq{7mLuI z+omE~&rVT~7S&3Vs42CtW1`F8{ori`Vv2@5p?eb4)SHQBW4q zx6+uIo5N6N8yXr?ZT49mEyByk`6vU5UXd*mry2;`f`AyvuF5ol5inRKTl@O(^$(c) zb}ReQ4nouVy}#L{!r^u=&E8w!jyf^uiVZyuQ`3}ixuu+WUBGeG0KSC7Jf0XWM_jDli?~ktCho!RDe8g9w{*1%?B!x1aZP{j@tgBSH|SZq&y^% zvBQ)_|A(7w|DFQG5bh&tfd@P*bCWu%Lmb06mny%K>nY)^w zQ@QHLO73a{;iVYyeLNnDN6MjhL}QrTZFj-Yf7np78@$x)#8<()=Mw)B^mayAO0>#s z;VN0tb4XT?z10zTCKt{02HvSFl6@BzT|AxVQds|5NZIC4c5GcUrSU+KG znKB24rhS|jh-mwhcJo)NK*Z$@20o1m&7q`HwNJ@^1U*x^A3(~xIRMovBmCh3;(jSj zIVJG?g-K0<>c;N$MilqH_72M-cFXUPD2;Q>*JoFof)%n4UP&?RWJEM&^{If@#S{;C z8Rr7m_%6X1K#}u)F3;H3N)zF8O=*`_xovhWWKG)+)xHjSTaEWjdv5cOl>BJK zF*FZPh$pOc7POcy!x^u1CsG{I5OF#kcpgO_IbaXvr}(WvsKkN3UP-~%f?jv{WE&%z zquN>INQl-Ip91e0_q~12OPs8^Z3i7_YXRdum0~%~#TOsP`m9V{?rlXA^-!|*QnExk z%lnGqn2-hsDjf7tPS+jiS#Aa$m~Cd5-P{UlFbTrB^_k-Z?wWW!`D$4H+In2qWRso0 zhFMm8dc{Ib0Z)qT(^q>P9q0S!z}dNc7u2{blm~=7Mg`E&v56RAAL-EZ2|aSpxO0|= zqZ}!%`a|KHoFbktG>ghij~)!s*aUeOoM`PL49ddh$XZBuVSC_$eIl05p~w0JgZldVp0^juwm0A> z{fhuE6z@2S7K9MxpoQu6(__NBqKGnDy=hH8V{`fs?N$$rEK$>MWl3gT5bFhQKZi4L z#1L#|q`o?Jn(*#(_*>P__?nzT0gfAiTWv*(D^q>`Xz>jBD7G%l-!7j($1Y5SA6VsUjjX;&b3^2cn$7e2=yhkQ2n3WldsVWY^1qN7OT2O`F>R}BlD zzu)cfZsZ(Rdu#$g&fG6&Fombk74v0GT%u1)&PGbh&=c-0oUJco&lCKpxT?eYo&Ix zNkiKO5>^dnUEW_f|MNKnM<_4FnvJutcFfGtaQ*Y{eyQx}rJV(A5u`7!h(`wNGSGXOXsy&wO*2(7fOX@SsFIz392lDv2&Ii$Ef1|-glI>;dmoj$ z512V-s-nE7lg_;oI`mQTkDm9E86u4jC09t;&*(S8*Mg>@=QotoYC`fRV|N+*N z?0sS3!f}hrcgZ<1lu0NkWQBaV~ux2!?O1NoN$|FE- z^*N#o_~{a(^*v>-+Z`tfK(PGMRTE6_Ap&{cN6??%uzqaAMlttO*jX2cr$wIQ(VFu{ z)ioQl6B9#rLdn@7Wr+S800^UGs5UG7HYQD|xtDiZ+ zzeNYcwg$mZq=_l!>t5c^%wkLB6;`Yf-QOA*cpe%NG3eW5=m79#vPq?-8LN9vNL4$0 z3)R>^Y9Y^U5q(-LaND?=SM`TXjXQee&-vTLS4RvU!C6uDq4(J#*5U58o-HT+`-E&U zgUmK&wtV?P{ZTIzq@SqnSk88MY?Tf}>5ERN!*WWS(v>XmwsYADA;OA+JEbb>T0r71 zXDe5tE(n#nzrDD6b{;a@y&v9MU zbh*6TI`Px7>yi_)AofPCzN5Lccb6g^jg8GH`;P2Nz<*g+A|hDMwW|#;3`;Z)He>5& zvYRk3;J2NDp};YJ8#mBu$gnYbv81U4{{A;;^H3Y8e_Jw89vR zY5Z+-4AIbJ?wk4jwJAu&zyH3X0=OjO_hbKcD!}{a4>NvIp{1ecTJE?+;T#9U134)# z5(L&T!ax^Ssz}3fJ9(M@@)hnWdqej(LLQnAbr(`?u=fmrZ>y}(eVjxCmW@EHnNHv2 z;zo7)2)cjxT609LW` zu9+fm`nZ0bz&$)PE}?U+RnMiygV`)mLA+d3AJ`5qHe;tC)Y#1tEb%teIgQ@{z)>FI zita1n_*jHqeEw5_s56Z%rrUw@W%R3 z7+)@VdE#0@wFXl)qU9CkGm!mpfYo}R9&P7wEmfq;eeH`vq~%!gV7Ct#xUz!aVIm5y zc?p+NQVTSqpY3+nG?vtOs;)AO(?pzYZiQ5Jc%t29{c*iT=tCE>qZ#zGSD4cIXl%jY zYOsbWyHv&lgtdfQO_aMlpw?#nCG~5p_e6BUpgZ42?p7~-Egh~b1?Ed?x-M*Iv0?JtHIU&zqWI zr+y#M0~1@*>fd!PR>vM2ses%KDs-4_8s-L-?uX;O+Ac^UJoLN#r`aV zse=|*J%9AlQv6bvk76(sQ${$5|NO5efP0}w<{K8t{q>-)7fK&YAM4*WsAxvu&1uwX z7XL>ry)i*s?cG4|``!Z=w-FP;+M)+J|9TD|B;i^R-Cw%;mX*=#Eh~_%{nvxkAPJbn zg1>bFT+JZ|L%AiVQt{V=55QXqpT+Snt$r5gg`yOfRj1BZ zr~B#d+t1}6i#6vMh{Cs)uzv=rNJRcQvL3egYKkQnC78`KbF?Vb>HW@Yn-<2WqIF#Li|IJ*`UEa zaA{CDMlf>zfBk}q3jM5gdS(Cbr@%iRL*DVH?va6g#r)g5|5zBF2*rr*ytNtj&-MSk zS?zRB_y5?u2W|_piCC7$9p%3X{%b)c*SX_=QTi`#0zM$uP6L>^rSUm<>`9VF#zhfv|PgCo0MHJl}y_(1snTTrouk-vD zIPH@D$q1|7B`Z|_KG=Vtou>@J%nFy5VCX*){uB#=xcye4O%nJUFA&6qzrtx*Pthr` zeEH9-vrh)CLHy|~+a>ltf%g9hIm0~VEIq=%a|?wtB>*A3zs9#q#{QqD6^(6FB=Yq` z8{5Aa1^y*Sp7&pbJKaYGO#V|eVwxVAppVWj#{YxSTO7ARl#ddS1(x)%}dJK-MX?E@H~&erS~?_)?>B zIAfMSzytzql|-UF;rjm!YVV)C?N{90ll$9IO;P`amb7ZvSi95j57mZ(u{^S`;xD2S z`6VTYsIrUz8WATt&6rwG1|I79>4|MCm-A42a#W#&eh^JF~ic+oD)A9!d>$nGYsQfz}k} zUky_=IP5i35je8F_+}rapHJ4SxL>s15k!}##(CaF{q5FefFU=lIO5RC^Y#>2@fZ7( zHJshDp(#MU+uQb0fWO0?1oz14W?K9BR};MIUbXOQC^K8DJ7RE1=++5|V`HTz89WNX z_bs93$lhD;>w{U*l$+&WI6Ejw0+J48m@A!bZ!;IF;wEio?Y|7Vf~5(H1b)5S2>Hd? zcs5HylRY}t>sAvZZ*Di(VNRk1v|7<~RjCt`4<7S3nXgP4SWPu@YuGQ-9vOD}_^J7t zoGaXTdUs+^{xW*}&8>2fUU!ahP09w7D2Xv3d%MSRGV&EM?!@&Q*=qM!n^d2>d&cUg zBx*T28nXz>RH_@>t;_YBF|BVVloqT8QCK(@#&gqc9QKpYROZXjVzg550DZHc-@{HU zaC_F*EN?qiXDgH**8R6{H_sX-G=n8>Y(^eN@NBJ@q0{Y4_RM=W|L%3kxd!JM{CtnH zdM5@=qSxc4NRkDXK-_@w&)V#t4`Hl}hK(|nha{%Zc%={OlcyYqKY04x&ZA>lJxk@p zW;SEO2b*uN1Hfm~JO3Y-27~3?bw-2W4xddh;v#J|zX(2}WKxZu{t$GWdZ8YTrCdf6 zZW9StBIgZ{%Wi*Frly(SL;XmcG9Auieoaj*kLt$)+p}J$YHdS7T3P>+@3Ni>8cDrh zi-)iH-q&oFIZZ~g6mkuu2zf2;oSu3sAQ%fNsQB> zWkr(Sc(b{oPL+cMf~Ud1={Iy7BiW$PfI}PS^pl3+fQojPyz86kw#0G~mQ~mBbX`FZ zqj-#-XNc|$PwO|qpwvCtod!$k~DWk1HLYk;e!ok~9!y3@7MYm=Z_91oSSC?DDpu-tGeX<(4OYC-it^?eVMZ<*c6^a#*tMFtRIT_3r0p==U}^ zjYsKi792w@s`v^_$8_kj*EMtmfbkHLl=e8c5b9#TXC=RR>V51 z46RD>3H4%{EIIe(rd*{$u3NQPyP9NhcaI~gmlNG!Tm;myYmS$Esf%fs4aFb;#pcEK z%sO9fC*B%8xCzt_fg_%gi$2Igb*9m3v6A`ly#suUWo$gaqCdNl5m;Cz_i+4hi+#vt z7o#ePF?#2Z{w-$nrNZc%^aIH2qWlOZB(iJ^egC(y@m%ZGI`{p`)cNC3YwKkN0~YP* zWmQg1K_%h!7u>RZ(~|Ac{`YBJM#+ui$G_UN1$48XpBN|&|K^Da%l;)4ww69)$_nsl zIK-;&ysOo3H}HBAxS5t8BA$|Kku5POXL)Vf#2_N=&_wf-4xnRRNup!DznL-0V!{Nf zb-r}z>xde%{>jY~YtEXEw%V*jX1Fb(jXm8gsI|jo&{6RzfB|0Dy0ExfX`>{sc+Eqa zHlx2tQ)uSGz(W69T0O5CArFjwq3-9yfh>vE*xF-PKiU0rTYwgU*+oPq>Zp zU*4Ro(+3s2uhK7Oh-RXoLi`-R;6}`qnv%Y;LD5SW8C4TL8D5cF9(Wi0HwC9djrkZZ zAC>I#lRmNM^5&azr5aJAy&6$SLwdf>f!;Z$GrV`KKF-CPcjbUWw!j4|iWztBK>RbC z-3rzf$M5bEO&#w!Vw}1}@7aQV+%Pqx>P%l3)xrda_qtZ|->cUl$#_AYgnVPdr5 zER^QCc-%*>nAuE-Wnhqw3!nA7AHk6c)2Srg)P$HC=ZoTEP&Hir?nEWHDX|yhv4} z6`y6I5f6`zUb-_cA zeSMFW4=u#}oj6lz%6082q{~z8L}`-1x0Xx*w(2wq^WNZP@$mgS1*)Wc)_}!BdnH_i zb^=|YxtS{-h-39{|FcnKBPurZoy;DGDjqb7TyfqS7x* zWo_fmX2KMBKv#5_*!uIgkINsKIu1&g7#k_Bf%89IPkma5*{$w2J|+K~LkB=amZE_8 zDF0s<{2zWF_`h)Uf0)4k%l^RMuw{e(zlG?Z4E4kP-aGy;$L>>t@Y?tjtqQGSrA`T% zXGod;1w8~*XRHkXYrCL-60!smOi-xfgn#)WH}|}-`{U@~;K1d!)lp7(8*WiRdRwYZ z_#c#kf1gF3F5KdDp6;2dZKpPeJI8*1LI?6SIWO6@vGIRL1pZ*;+9HueF1L=#2prl# zxtk~H9JKxC+P_@@VUoZ0K`++>L$HC zTpM&8&Wh4*|elAKKCi4(;#^-|MaH1&-SDe_ZNwxqGA%jq^+Vu5c8HV zbZSb8-}?{R$B!TB^xL_fo}X3i)>}9KI`M#gh`kC|2(D?8@weC=O`?ZIMMcGB(!#W! zDVB&P=04vVM6H@13!%k;@zYe?ngRBd&9ABVNr-7;KRPoe1VeScGJK__+L)KP)8~n& zwwJOhV$`Vguc~5xXAdmH)w2*16N4mUNEch3sSLXPq9-!nZ$I5%S*^6zm6yBj{m3^T zODVUlm%?S#Kx=c^?F&Z6!NkW8JwJE4KigSuvQ;isqWnV4jpF+Fxno#Ah}50t1gS>q*cAqh$U^t6gPOD`c$j1# z-aBgBRy?>PM9Tpy6ntQ8W%tI{mx_>(P;|0KDu;ylpaS@DgU*D7dyW9MNC`*6kPsJ# z0`&_a!Sljw8s~5E<8ERZUut`V3-ty1Vj>IdlUPE1vBydWCw{QNy-+cU58-N_;v7j( z`|zpV0FoehxB=JvD4QIOzt0g*PyV{Zn~}_@*IN73-|Bv5aGzzdi_fIx|CaCbux&>2 zS6hxp_kCardV44u=XA=8&ajJL>OPIny*g{|@$P~ug+-s?YN}GN#bMQHXBg{+iZOzE zf}kovTx628KaT4u(Y7i*5?69(h#SfGmiXg4dRuiGM@L6%n(zqwhwB3ohcxGr%iXb` zO}3iXXX}RGOfG9|i-`=HWQ2B+QSlOm?809-^r(PjESW@u#YC;!(R>XPhVxwSWEv!? z5ggU)(_tTC|f_W-MAtl2P zpQ11f4##*K6y!iPa7HB>6jDzq`0%?8EBzh$_4t+OBol^_QL!>^Ssj}OvC6itVu9dQ zr5=DCpa`Cut}kzt!A8XiSLgxhzJU|$&fQ8vs0S%AiOF5wU6?jn{oKlw-0>?M?hnJL zb6gj!H0ac6gI9TjORw8p$zLRmpFy2Jl2jlOwYApiZMEE3mhdH34&pn5Mr- z!!f^yQ#ck%@A)_4e`a%~gPZx(6k-zh(H6M*uFghJjqtmejeYV-zW%({2=1Y?uO!w0 z$-@=3chW$3of_vlZMMTO3WRSQiebl}yqL$>0U}<*3A;zj~kq)Xc+zwXxE> zK-V*=ri{Tb2YiyxXZ(BL<%CV)x`6kLsm!{~j)#$gjFOx#*I`M5X7eRN^N|BwjFQgS z{EQ`-G7nT|{c+c46gbFpAD9Z5myU_0K7JOU{FvID%K!RTynm61om(OO%z(B zpzj0MT9+@X%Zyqfdbd2ef6-&1Urg6yGR2OBrphNo)Y(c7LAJ^e26n>6{EvKs zpBG?@b6I*>Y@v4;e<3bizW0yzEyMlFHZT-|OJ@5xT`x4tdPshg_<*|L8TTB^%(Eg0*p*esa{VYwgvBUZxUlSLnHJnu)t!_L8lo}pFfr06Kd|e1(%@FsbYTp7kbEwDS=Ir5jlM{(9D+SD2~r%9 zYrUsi4ThyL<69&~1|egbH)obzW$O6kjPs$roE_A`+o zX8<_}^TB}0vt8G2#t&LUd$fnc+Z(IZ?nX>vU&h22)>O-!g+VA^gnZ-i59OLbIto$? zP3O3%$ou;F?xLuB5tMQe}Eh6RMy@5i{ z&@Mm%;_@DiGig!x0OJW~sw+f4_5>#AKke)L!iz?x4--E4ZqgIhU3^yRBF(GG3D7^!Zs zhD69qnwcGZLk%t?kI&R$^UUjw@Qm*qd!K0)&0UG-N@EVh5UC7*kP5AKbjOlP#QWUG zac1@_rpCv8IqkE297f-H;xHyq2HcilUKlZP znEgQgE-J5Wd^e(LmhppZ30^33{2qRi-G3v4I3ziA_HN#-V{)`o5Av0`b(FOK>MU_M zt#=(n#rMw-fZqRLJHR$u^Sko!`a$1Ysoa@!r$QSsk>E9kEnh9}nv|2wtf=$`kfk-N zQX=Y>PY2`0L`0=*{OOk@qz4d*!lBs_?a0Vw)h`8GO9 zmLeMiqV?;}t9V{;z8Tcq7JbYm8mphyL9P#*DZtNc> znrjB8wEjsEO~2etHkXC!TprZEV45e2ijE5Foy13D8xq-b4F7>RWJFZnf1s+O#ct4Y z-oaKKF5t+%Xpg;H499bOF=!KQH@?YcL&i6o8z1@#tj>7kU5I6!V z%>1^vtqwo!jOL*4%HXCx4Y6tkH%AlaX%wL(Tt%P~)QQi^M)o~%+nNiin3!x4sCJtn zyg*7k6hD#nJskq_fpzsArsa>vYvZR<8E}pa9>3|9@eu-9ofYK!*wM=l>|G7hg;6Nu z_`E-_th}}m9-sCd+W#t|q0CMv!jC2odLy414t-^~zfuqX@G@=i$EwfYFRj6)Nv4dD zwOXH8^5pSip$j-y8{g7o-iC9nGcym+<1x0kj^L@#FkQ_`2doTAnwhp2@1qu7?~t+v zursJ-X%#V1#E97ASin_p9BTM?oRt=Sl%DCS7hho91rvaj9=%WU2dw4gm9#2Nv#MN_y9eTC1#r)46G({TXt8pmxsav>YcGgvOPEEUTiHuC|HRZ8hOcORUe!=2zAZK}MKQ{ZmuIYq2L$2&>fJQduRME_Zi$ zl5O)|X>tR4TU;5q9?vNCLU5N{?@F^aUk$vIuG!xvKTFVQ_-~=kk7vZB+#GJzRb&ER zmXt0_Tjbj$Bhlz3PAwHTPq%47DW(ldc|%+G2RJg=o=o&iB&>5u!QZQ^OZCeN7^2zi zd}w~&O8{i#elp!%3?Gv9vjn^~9Q34Di=!FmZk?sf=~}-<2vczEaDxn4qw6mB=YL5MD{*|Fq8 zHl2Idh{3N`R&O~q;Ce+7xr56fu2w0^x35iJ^7aKHFa=-t?;$}$to zL6G#4Ls-~yDukJGFuuJ?Cg3Xo7ygGqg@h`#g{U= z1lAh8(PSpf{CpEwT5fB-3WRsZYgDVzbS%b4FR-QhMF~2Q_?>B4#;xuffsi?#YbRmoc3bd_jF%uaDsj_@+fw9s7 zASum4xIb_CVpbVb0#-u&j+it)j%Fd>+s-9dKr82+qt%7=-v(PO&^vhmB}i~!@)y+i z4&iT=$42I&WTLb}4RyJqnvjTxTWn{Q1SeHH-!Zf}_wEaJ3SSUh*efE}q2Nl_Jw)%u zcO_{*O-rDRX+<0-SB$)5q!;7$&dl#~J9IEqJw$XV2@mUA_4W9<6w&B7m$`KZaMWgQ z(MlyP`Jps|2ctA1Iau`oJp{KFd%`(_n@=37XofO8*WoE#;CnKmB0~;mj;Du>t2d0P z<0&#_?=mGL8GN~Mz;V2@&ZYw$? zF^y>An#*PusY0`6D3MmNaBlPE`H_xJgZKikYoJkWZ+9$}{nI|6#P4&5>2^5V8wigI ziA92nWK;XTe@8U^9+TX8_k6pKg^5Wqlfr69*6DqlGCG_i;JvZ87j`gHvOS*8>EzeYc*Q_6X=yjb1KWCQ%fDsMk8_ct+HeLfNO6P@67{M^teVlXe^_@WrzFT4~bYfdI zOKzKr$=`w1kn)?>Te{A>S5^J}T$W{~XoP+SFG9>` zGSJi7!PwY_`5sWqMHzafxDk%tr3yNyY7xmwmyD9j(0rXv-33cNI(77PA9lX+@v7#Q z%EPk73i`CJjPT@Wf6xYD=K-8#p1$_U zvIB?Qx^q`NwF-`?B#&De%&D?b?*rV)-e0Uq4$SWo;-B#^E3`uTRG5f%D(h?tiobAa z=v}gw=BMe~G9%c*BEESlu3KAMqhn0bWHRXZ(CoMl#N3*^y|$Prupl!GR~OR@A+H6o z4st}Do|>deuscE0eUVHx@|o|3)7YsXsXkh~XyD}@@&0O0vc{kj`#MarOf~a6q!_(1 z^^(0@37W5u4hGud2?s_d_wr`6aA>!JN`U8(?@fmJKzD4b*98;8jZLgY)7Zv3@cH?ZyR>qi0C52$ni|D(Z;AhEhY4)M9RpfYtO=Bq zF-5)rg;m%AxiTRgd6A|j;l6>-FEO{FVm2int~}b1fB|v3X$}1K*G{F}VS$Lz82Qr4 z#!7~&`fI8V6UNxnSElaPQMpP>3ya_TJVrpUlOk&G*;Pq_Xgt$my`as~+JFu0LO&dr zD8)F&W|w<#UynDtRq&o7zUb3|fN%$fezKZ1XZCwXGDsr0*8A3hr&QD6G=*6=PQ8+$ z-s&glUNV^phim`#@=_&Xz)P>ggPqsykUDqfi{tadjX6=Fzmo6HuZ%1jIyy|T$4|gO z{r$|8y{JSk{^T38-h^n#V98D}G7M+do_FCE!@KyofFyr~MYAfr@0HZu@{ z%7{F{5*EfNdE@HL`lY(Ni-59b?sSAp#&5Uuh5yh%t&0tpRUJR}bRDbvmIAno*x32j zQf+#jcS`uak>F#qLr|#aNz;lgi(nD9misvrlR+mQN_M{5;eIJA%(C>7ayNln)n`Sm z=7RMZ%}Jbo*P@Q%D&CFQBG+BVIgBnXF5AGn_f8BC$YWxeZ&p#fI%z zizw$et5%^I6_9R~kITd!xoEbJS7^;jA`~KX|2_E~aq}ZSI0+Uoz5tL_yvbj-d_z#& z*{~&y>6j5tj6sXn)3wG@wvHH&HglL1)A}X79ZJV`!65f8L!n@#)HkN=Vvly;-|R=V z?zNp z7x$f%Sr_!pDahM%KMr8%PF50}iCBlBXUC5?&!lj#H%BdzBylQ3ZyfNKd(*g+$8`8q z5ukxKnH(^`5cC0-y;c>$KGb9T!?%`I6tFDDBc$!$p0xA)~&69|w?eF*6I!+Q@&`0T49V zoz2#D8_#6yY=VgtgR_|MaKfKFaPa4sRM`V|X53_L5j*?<{j^4HYbilT30UTX^==`J z$n@t@0z%hBM8#m$r#SnVKE;Iss#Mn!czU!EKD<2YCpa>XTO zd{EXQ0R{KDZ?>%O0r|SFi{avM0+b)_I=> zgC(|q^o6Kc)3L_zMd1&U{ltL9O4E_i0uS31;bXozWPC2d(! zYkf$r3CuSgbm;fI3^F06Fd^15p%u<7mI&{))pPh*x2MpixW7K>fKE(fBM18v;#QDB zdexNa+kt~B`<+fNbX}94bL3J3u;OUQD6=9r+_fmbu}KOCn9l9FsE{2;w%SW@ddZ-17*z}P7khn{NGGibJ3GRx|BtwUvm7k_28 z$axH3`n`Xt&^i-0TdA$efQ`#0FB$|bk3!FN3$VQHIi`Kwyg!8J7clS>?2=OA=8CFB zqQ|%YMrlpPJSkiZgP?^iHMfE0i} z0;f?>r`UbOA}C@uNv$YqUw_Pp1JQiqi zDxW(q2TC~aP5BW`V%duNo$JF?_~kOc;1}j=lN-ZDiErbxfcybmJG<^D;AK_x12m45 z2~2!`*Vg)KsXSd`D}7m)mlsC7aTo6qK$l{ZjPPKtiNiU?Vxr;Ikd_ZFW41^@#31`p zu3j-}i(*s!0a1q~7N*p4JdJWSdbl$J7tKwX3B6GVyDF|wi_#B=b6HKnojp;Wk%>ml z%B|$)Xkje2%G5c@W*G{8&?V`!HmQ*%tKe2?woXvFE_W5s>onldXfJpJzB3)2eU zX?OZ`J)!^5>p-B9k+e@sStg{m0DwWlR2pcwj!ynsnE}qU$Rhi1w{JsFpF;xzzjD)C zi&OBmb*P}NHrX!hti#BsOq#PpgD8>_Of`rT`fGZXIuO0c+ZoDatn=pL3&x7&Ga*@7 zIxEk@w6fyHHrKt-2SAf0gkG5Tpb+9DQhLPp4)*bC^h+|5XW{)HSg*>B$2^{aOx<5>H038Yid;X_tx8= zHjsW1GezY+Un|(~pT0 z(sN7u0#lDu>e6n2Z1vEd#PafTK`qi}bzB4*40x$TcoBVLr5p*N>2Ykxx32bv9!-d` z@e?Dw4y3FC3ki#dTAjD!^wDy->CMbN9(JHze*Xy5HYvWFTKdKiB9eGBP{LFJ1HY5^ z*b&aRMEzOS*ZDk;LHBbV9E&!wm6M;G7An)MUMfQrVX`~$`W$uw^4st?N;XJR&9hWg zH*k}Mg%`AZ>}XT3*4jcl4jvqNeDZ9I#5t)mVqp6CI6&dQMF1la=gotacLK3CY9RbpcPW`u z6w`z4UrB+Umu=b|LF}9MPl==bYwC7VsZAjQ@M+2-lcPmS!^%>%%|0Pam%ys`oyM?~#}Y z`G|qJLH}GP46f2#0hiWBUar#9FW;*jK6oC~uXvoy2l!t44XV)8?M$M7aNYz#a?nB5 zRk|idB{A4TyRQx-RD;43+D*hqzsJR|c$;=RP4`$$CO4__o|yj8DdPt7A~|(<@_w_|=UUG9ypG>+AJrLNhubqyQTR+9B3!H)!1b1W< zmNf_y=mTpq)TLS(e&(QNV7jn=EmA%TUqsTyYW>j%hb`LEL*c+W`PAmJRaBC?0{Cmt zekCmZg+XpSbe^x`=~0Aldjn`yPD_|jGMWFCL`!y%{q!&Hx(NcLsgZ%N+5;&;p-AWTZgT2{Y;6iL~UsfQwLzv(N-c-+cuxUF{eJ9 zr8itImsw$P;Z#eK+#V0AzIR{oeu8_9d7C;p9xSW3v^7|3eI}?R+?eU~y<(<}Q9-YR zI``_YQi7atws3LQ+{Ni2Jlun4`TFlGaggvz1W=sCja@cWSO!*34qg4`J|^OHFkQ4e zWb-}dZK7m*G0=NKR;Tj@bxQSapv89K4fEiSzQ5!y)PpgbD}XvE6GO;93zi<oDPM7Q`+><(1l{u&~l9X!$zqF2y&?Kfw}ZkLGIy4P7qPFlUq* zg%@c5qZ=#1zb+z~uvX*y*rPWl?zuf0x#=_+^cEn$5+T%ysGlP4tO#=T>Gpdr1YQzo zWOs59f96kf6a^um!?f*Ag<4%`=hJx+3AuENRZa?tRDKd;B>B0KWrC;>{x;m2^Zjy% zedS_dRPZu3CZ<0Y)yJN(F;qyrKOkLa^5*1HPGpp3;J4BH0rEfACeMC4n^usfk6~6A zqaz@)6NWd+7fNcrN*7Tj?p^qKGEM$TyU`Ljsstj3_%x|f?P-2xg zFBYVWKcT7c^P4YUqwOqaO-E-ORRt^^YeY6uUw5G$-@nI7 zF$T9GEl(!!*ffZF6&tYXH~gbGT**O@2w)nl{EDc&&+OyMwy3<4d-oY8z>m!1e#3Fm zq5>)eI4)CtNffW+itBSaoTXoBbEW+l62Lb7+xj|dTG`8f)V3}`fw2zO>FW2y?wcvp zL7nG9q`0XUl_y;r?UdcH+IK=%Cj{OUJ74cXBM7{woaj zNsXE3yxJ+p$g9g&Oj|OBv(N=s9{>sE4s2+*>zf$oI1mMeu4mt%9{8(D{`{hYtcE{p ze-8=MuthhM=4>{P4U?uymw0$L+~Pu-GH;cA8{13an*}}=*vrM-2At9{8%iqVM+%RM zKafPPYIXr>+=91H!%x*C$PCwSbAg}C6$<@bYg&ry_Nui}Mwtes^% z)oz(V){7=8y zvN17c{vW*)C=x^iF*EA4QY%Hq`|5wi{~G}2%>xf5@~ zDutC7y*6wjQY{XNN^EE&xdZf4AV5Y0nW)x78pfo@#FXnOwe&7?7n8o|eFZD#kZN-H z{c9eKR|=pI{W2{{y6XSYQ+9i{MsyO2T>U@H$=YAg z#F^xqK6?n+3H+Wr3{G@#cYfIwyFa#=yl?&G9se|5ei7BN&|tO<6mXT3;ALfH zJ)?kWl`pWq!nv(T;amgWq-A~vzv6q_21^RKx0l1;#CWvPLnn{q-hkGN? zM!6b}KYtL&NTw((GY~Qv^wcWF@3$A>r*Koa+e6C+B^dB>j(IBo$x*58BO5!H=%h-M zF?A*8uhX1F33F-7Xi^QL-LRzKz~(o=aNImh@20YYq?vY}&l0n}dP>L`S*)^Y!r)kxE1+IQ;msQ z+qsqK?8t+%N(L?sk@9s)mymWGc?PNF#%emZi}-&I10;|KNS+A@JBSbUiLyQ1fz{lZ z0()xdWZmtmd%QYK6c_7=WZ>ecjd!SiN|tD)Fw^u{N{E|9GzGH>!5(rai2zh8ZN;J z<&xWLf|4J|=zTYdOeHDI_rcEdSz4O}7Qx*S(L2UoIJD>qNbf>3l4uznTY z>h-~lvedC4>5PmbfSW1;{j*RDmrjY|2RNEsC?_a)n|c7lEd=Uby5X7_Il?F+u89d6`&)ds!|GW`$6jAHOY}8b#gJ~kDoYR zb_}IVjo?!Il!3kBY^&F=6}8+Aa7A-CuWtcA-QW3yp)(G^t&-6al$x%P!5S5fh`x2o#*4=_m*iBm!?O%j<*jbjb`IK$cXxjRoD5OqJZR`!;SPnIDaZyw?v(hEEJpqg3aN-A0kjNMLRQl7-c#DE0HMgRN^9ohp_mDN~EX z1IeZ^N#zQ59_tp#B%fcHs$r&iSx!t${QTzbc%!?Xl_-m?txn8kbw0t_)eHA2;;&xs zJJR`>8>Q3_MMGV89Zr(V%Xq4$3+lc<(ghPqjn)CYhP!H54Wq5Hs_DZ#YLB}gu`pPUw90lUlJ{@+`fnd`0KQjvpW-^t7j_h+7 zG;9Eu!KPDJk=`ll^zlj~un8%O*)T)RRn+yK0e{@yI-??Zi?s@iO)(ee8X5gEWc`sA z;+JFO#A1vRxp7t;OEC&sq$qcDR&j%fa7j0gSdxRF@+sf|ub{@1g7tzk9@$T!@zP1U zUK~~y_11wJ$)Ulmgz>ghlkM*YQ>&8~iGVcm2wkO1!2wr^6Q!up<*8`W0VfqG`(W?5sX3MwE!sy49SQrOPMyO22sQPuyUj37rl>x&L!) z&FCVU3Y!$AN+q##vZOhr_(YcZ9&ii|3qw3UKHkTV!bh6wSzPW)dQ!T`9iYfSm~s^F zIU9pw4{sg?2l{cMnuGfHVx#eO;y*}CmrM5Mu8k_jb*C!fuM~a5r$;7wTY(fJa+_Qj zoG@QL9mj2}aquEW(h|?7=o2zM!yuC+#6Hm5E22ZjrB0yDF?GZxYCIx1q$xndGxn3|EJK$K2ARZz56^x%FVGPbt5k%J>|Nhj&*C>SDUr$6kixYwE0aa0 z6B^`~$>C^vCi(vn2Spq?mdf=Q64MC7T0SgV)G!5=zz+2AYSCek1M4fPoEMOm$w6UV zWr~8Bq2l@JNkhZdcA6&faHE%6OduGpOOqZ+MH$J1kqHrhIzbdA&U~n^c2d`{l{6jf zmZ+-s>}4`jFJ<4H1`}ZYe)m_4<9FMoxs45@HbvNz=E^Ex;>M-*yA*kEUNj;OI1DoJ zberu=MFD-tG(LL|Kr>39{@nHM$G@t@)dbjM>D%0yu+DjH&fx8l1#I+r{DSA?@EdXOYc%@q4gze zKTZjgvnydh4LD9tEy^)qDoZ*$n6aD@5fSW*Ube)=^R%v&oW)7-^NVCY(=Q0PXnQOyJDwUDCWH)Mr!beNX{#vtH zmB9#QLp`^D4TpVC-U-XGo|_hC&JPeeiOOqb7;Ne#p+`%q)~M3qtzyF@Fpp5@wKRq) z3xPsMgLN<*Iyl&dOoy0hkgAMD#pdS%SSCguAD`ZgN2bW8popQ#DfD zH~HG0M%{d=r?0PEXCi?{p7Q~AGi`b3)&wjZ z1e7VXi>tab>Pa-@Yh(29o%0RN;orZ;zMgQpTtg zWkr&~k<-Fp;pLOGcf3cCdQ-dm8OyCe-maE4tvL!Q7!~e>v}%k+`ubbm#HQ6;vFl#J zyCrFnZafDDoGCQui!+hp>aWILyVMRC|A&nVrDM{&)?ht8EkUW&MNO=e;X+c^VZo0ceI;4^A6zLMAyE{Z0 zfw@t9-v2c-@60^cHS-Q1_;9ZKtb6Shd#~8*caUidM$*%%CP)npX_ZKof6*5njfYm7QVvu0SFhSds)#Y^24qx^R;*S#Z!%PTCk?xMZyu8D8rraN`KAYzM?RRuH2x|?n>CuKV)dcI0UZO zr@9s)vrcO`_~hm0`yQ8-Mw3?Y+3ShV3Hi-i}Zy z2ce1cOG`OP!atA6Qt`K@%7tHEx|y5>wYn}c0+ zrI^d=c#6d-tps>7gN0jA-C6Xrfn)3YGbf!3nI~~wZv1v+eEaGlWM^!QHG0Y8a2D{l z45dJe&8yX!Adg|);TN{aN!Nb#7rl8S?l@-^th=nG3z(X0;dZ5~1sOIQfN4F)XLI=e zym=9!mC{laxsMn8FRsVYXLyR+E>&fR%No?`n-AEl1zbY|OL08G$<9gyr-f!WM$Kww zaFohBl1B1feq0AjT#Uf9t+#MbG;GsvfDI*j*rW^!Ydd)2HI`82mY1HEuV9>ab#CSc zW6Fno3+Iehq+4>*VQ+u{eG-FI)roKqqnjaz#IUYwcmMbnOoBJFY-xrSpTAbeiDh%X zVv*GAZ>c3TSlT>L(P1PAR-u^gsq+#OQY9(Zn9NJn+UAq}09;O`FecNnH6oVAHK?Bn zeO5XNdHKekOhIQWz`T_hkN0|(Xs)mxe)CFVO zNig*i)th{wfO@~zCUC*rroLa(GgR!QU_W!n``^_BmM`AhF)HC}9=K0^KW>fQVbo6} z0TWy6vjH^qu5FSo_D0cV)-S0%w!K7%i9kXEwUoKnMcYQoY85b~GuiZBaG%Vi*jdsQ zv~y|7wO+(MEk2k+*#<(1ugcwum~^&U{l04K8@ zKM=XtRM?3ynZ_%BNXGNiAYo{GwE&|f`@OYMBgN-R#o0;z2ln6;+LEv(H1Vd#g)O^= z$G@-S!Cj5Rt6fZGZ8*?!m|?X?&tIsWN$;(Rk6mZ3 z_LD6d^)OXul6+on^JxNaNkHL&fFFIWE2{@ehd(%J_|-p3BT0&5ba{V23@q^U;zt?{ zrG~B#BxgxQQXih4c7iQNtK&_@*N?s@{GPc5MbIHn$;iy~sCKGa-KMi8H9+>2!$u%K z&nx{e;t0W(d!5KNP$|OP&8>4{-b#RBzQqk4KI2#9ceC$s z#uoMZ+;6oD5f21)Z8&9()LgMe#qrB^0`DBFdd8F5e5L3#!8AMZ0tw5>r63Iv)ezF1?B85 zf}iq)>n&sznm}uDx+Q{`(Ps`);@#5AgOO|U1TooYAdCb=8z16O(d%@cv+mPfNyho& zmN*!|*m*uw&rw>Td`qv^Ovi3*Anba4?O{*ml0LLpR>bHU9B@F3eQO{e4=sD-YM zsCGAh{@adejp5LL#za+|ez1*g)hr{%8E7TE+I=U!M-JEolV&RqA4X!}s%YIOH?;7n zk6$Gsu5&ji5FirAjM@~^`*j{30^#k|QwBmxsGVu&Xt!pQvL!aztBR;}Pee_zb>CZ< zXU;~Udc3`U=D-nv8V1s}nO7bVAus#|WZuB%Sdm8Pm^N;^H>fwoF!6LkwcAs(p- zfePjMjy7a6?c32)h37xk?-3nt9bXK#{W)zH=pavaXipaygWjjBA=pfP^BG>ZsGK!L zEGq@0er8l!j;hevPmJk_uf;C0owir#;7VA00mF8cSQchxx%w*%Vs7N08$-~!iDQ&d z5hWQ)yEkr_4f|ejv3^~#K52B+p1(uz)phYAj?)4HOCfJM?~nQlhu#X4BZuCtymxR4 zzuS)QvC};rz`?w<;_x_;v6zmr64z&^!m6S5A4DFvj*a2tKY878xFmq}I^i%9K%qqf z5vXYWYg*sme;tgCXg$GBM;|^a5fVtVCm4pUE~#-IsmAXg7$4x9X!-BH*#K=v|L-un zz;8T3@7Zy)GW%=Yf9CTc<#*9oxeMJa<=+789{(Yt5rp%K`1jrcDR@n+-Q?^`CFVz4 z%F|x_7bXA;9+vGE6Bz?DbV+)*MhFpU*)1K|MS1IpLrz5XEWzb^0GDZJ(Co}|e-xbu zSl~$yo#v*(emgYfVyV4YJn0F}FoI1dF-q0_QAlq)*z5cZ-l{YJOL$<9M^?zvN3;DN zkp~jm`e|4<=FL;Q{btLxIZs0p-4`bq?;gJudf%i^ek$C|L=jis=X)1@G*3R5vbSJB z2;dwulv|5?q@4c%j7Gu+9Du=ql+UpL(R+a02V9ACp`O_8e?qRf33Wny=wte=5c#rt zYYOguWertxCSJV&}TZXlkzS&@oBrd2LF;pqEz}N`PMj}!qL`ezJt^v%al#*3#r+zyQXp}hOi~k5&ptXAuMa|ebFdeTC3w9)Pa?-Vy zWO$Eug%i!AUm;7CXIH$yC=59glMxlsL^6;v%dl~y?(+)Fh%6f7U-a((>C$8|;v6x~ zi2edBv0$h`9E^(+AW>LSc4xGhn3%`gV`#Tm3%9Q_#ZjEH zca(Bu+Q4DyCs^I(y5{E0eeP%bDFVbqL`z`)GmOhcI*nDHxjL9~Rj5$-x;O$Hh6UcM zbQ%6ZRP+eLIYhto5qQ2kywNK2Jb{5Q-B@Ve`ZR9VRSKK;@SJb3enFBdp=Ga8GYW`HGlc`;3ogsyB$C$UD+b5EjK4yMHZym(BdJT;Nivp}CMS1m zEK_%;LvKrTI>&;I7uPTGEQZorwMccEtb!eQCRo`5Uha`}@;9+EgaiaY#Kp4@jW~wa zEa;^daOOsa!AWhqgLyI#qaun7yc;m;_Pz(KJbx?E;#Y4B4Gk6W^8@^-!M^JVt&EYP zJOF}Tl<9Y2Oz|KgA}Rt{RhPjsN2fN{pTsS8;95FS{&-R+YppAb3NN zGQ`_T5*qpj$~m{?ZKgW-NW19vbf2_mRP1JpXEvM=9V^TT{5WW|j)8Y-)IO zSLk0Z$s~V(?PBdT7bFb^HN8zi6GR!+n`bX`O&h54W`LqwOnpoYjJ70nmmiPWH~oJy z5iypgN|*{b5g2MI?*j+5HhUJ8Hy>8dqJI2Lqm0Ot@y2OMCyhO2n&?%mE6vxBV^;rc zF~=8ilwL~{N!L0$s*JJwR5jcgoSDcL$q@A&uC>;GNG|ZGFOKEQkCiU#&0#8)A~o!I zRxKrk!HvKBe-O%WYZ9KaKx7GfR3bg(R!OyR?jrtv2NJNUu510n>H!&P2 zsG_Kp_V`aIBTqcwJ~c@+iXQ7O|Ea+%$q<+~evmOCB>a15~xpElbztRul&*b*$Ro2lUmtZ zUlsCt`#n?;aprh`jz4$ZWR1K|myFj6qCwgjO7ba|ulCiSBUZ`@fs~mu-F4h`!L>-P ze~}q+kb6g1rUOR~YaF&58L(ppCxi`LuuW$5&ZvKVI4j!L_RmyZeO?@Nd@z~}W>)a> zKjRez7k*dazZb(ua2mxADFpH&*sx?Sng%_mRChELz6&>>5Fd+1yBZphY&HW@5tSZ{ z8#4!59vK0W=?MW~Kv`C^$C&RCVIyjds+oitCFxwPT+}b#3j})Sga(v$`#4qc-z8VC zuy|1F)aDLd!E|L{wd%-FPtVi!;^=YwCBbaU+wo$vga2>hp{V$D$lYZb6x)+reo z5m@*hMmy~QRgXM)EZwYbmuywchH(i_RbO@1(F|&Z1XX0%Xxwm;+7c8?mZ2(=Q*maU zgZRE*CLj0xxVe^m(`*kN7j{}_;>F8jE2bta6<2TTCNmugi7iL}UM&l4QOkX6c6MkT z{H>(VWT_L+^Y9SaG%=ycs!8c7g+5gTql~vRshGJnfM%D5hZJO)emsr$VJ!n3dzvm{ z>MZZs#wH~V87qW#oqx3a3wZkqxxa8sx|T4C(R9M(+%D7q+seGx#>4{ZCxUMK$K!Fq(}6HWpD`TYd;Y~wXDU=}a$lW$5q z+?r-EQ<9ZXV?g8l;e8o~w*E}Qatq`JAkOQ#HX z821&8K2Vg%u$rk{ZXn&!bsaDQA})}DpcG?rYdw`QAT&@G*cNyEb-<&EDXc>LQ z#v2IBs@R&e zR>@T&Fe4p5Zutr3f>XdbvKa3TBz3Yk7k!t!`2_NM$xEV;Grtcsm!;KJA3>MXbm?gN z?Qr+=j|8F_wVoYu!Nv-@9j&Y*bUl;2@?M4aSp`EasSTizS?jmqaN7P%%sIihBt%ei zoysgOgi&ja&}o4qx#<@`ZM)B>_6(pcnXhFiM*M_X9Cbn=Xic=w)-+t1!DU_!?JD#~ zvfkO(&!BzSDj7a1Wt{ptmnQhjEkU@*>#LAFr5b+nh1t;A=VZ;>@L#zT3=4cDdF+_}JTIl5FkT0=L!2HSAFtUI*^%q?80(i} z`+9C?E$(I}mIGPr!(31(bXdlRnVgf-CcbH(%qEXSZSfvqi#!jdfCA-%+@ay!7@ zEhel!rrcy;Q{$-yFMLVi=an!^Uj&>eMjxmsd4%!7HHLiwy7Z@&t(OcNHE%QfVXo>pKUiCMUo9Iu3ACMjb&PVy$NlhP+|-5xv;mTu~-w-+ac$ zFH4-P(SK7|daXE^>QXQ@0W>>|mtC8zjsLW0)+_6wVPKFg`L!i+iLWUPmw!!K!$(Tq zl@1U+>dp;+oMPQcWJElE$SeylE&OJXg_Sk#=;&xk=}m7S3>-402Sn+C9cT0*;6pFX z67cnFp}lH1ILcJhO=T@zTK|SZ@zOhnKq2l)Bgqyv&4`3$!G{^;8>F5=A^XvrDMJ1n zt0P)m0TWCK$C=YMV5g#iz&SPQ)ddhU5Jd#@Gq*1ECu`SFK5Na%{#k7 zr7bDP$McrzPhs+}>r41APLJk{0AY=~6Cc3EC=12l-Mbxwm4kVLJvZH8)`-ukerlZ@ z$EuZWqzl7PBE%IJtuD`WJ@)K6#5?|B;Kalr-hSQg@>P7}vO^65cZi`>yC<_O#-4%A>mEy}v*Q#ON*>YP7|(h{bk$n4*GP3OdF62DHoRoAU)u_a*R(+P9l5sewViL6VF)A5co0oI!J#^7z{F<+kbIyVGUHf*k@bha9 z@*fDO*I1OxQo%>l zF6m}`K1iDWT%oV`51;>VHKsg<;ns24d&>3Uoz4!x@sRR~ZNYjL+W^n_9tbt`jOWOi zt28)o>s|l!4)%dOk|V`)n+}7kZ;})}+hg#Wp0A)mMq)6q2v`sp*jr?|k9H%Ui+1z+ zBRPMrF7H!Pmg6|-+lHY%I(9U$%7FFF4R>fwpZNAzzfT4&9Q;(R(q_lHdh)^YrDIf> ze(gR`ztb=8uCv3@paPg`?pK!bg;z}0TW zQ8!I^_ZLiI^AS4+U8FzKy%Vvg9xYZ)x=0ttLjyrSPh#H*%i*2lnk_r?dX4P)jvZ&@ z={~ES;+iCbaN(O;X3c6ji~22k^;)Y86L(^j%1YAY11N=Q5|3pnQMh#vUHq}bV_N|( zE@h*^kB`;W)s3)$2Jlk2td>?vk8)q8K7}LRV|tmFqIc?jgaCVO8FkaPbFq;|;Ztah zf`DZuLdhy3*4V{y{D5TV9Fc4^ooA02#-!8(!_WQro1}cTj{8N9z{z)IBbXQruMwE# zozj*#FE2RTFhpGE=2KpTCb|O`izUfKXLpJVbru0*w|1j=u!M$ zx@R`ZrhXNlw3Oo;?Ge;=Uu=l(Br+)g6===eL3g-gK-H)F2fFiI^E0whtbNnr#CUF) z^YfMPfh}iT;A*cXxeS$0J0myu#k~8~`{!1R9_mpyo8Pj^O-JduQbk;N+BF?vqqeuV z4*{rKvso&p(B{B}%V9doqw!K5OJ}Z2xz>9XASyzPiFfxf^}W>)SbbsOzR0!mnGBPd zC7(P(+{Pzgz;Y5>&>@eemyhb~d|{g;6qOC1`gC(xWB^2%WEXtMXSfX(@Up9`#~=EQ z!+t|YM>l#^c`9H&`MC{&I9%{l4JeHtz5O}%tCC@P45||gwzZRf+R2QUmves8^+ErK z9V{_MywlutlrQFsusJ4VKYoKu%f_Z)C-c3xSB9&4Y0hB*UrSBChHFS*6anGVam1HU z+r&s^qztRBuI{LxYbc>B!J=XBS=~w)f4>`QyT6s3xVgH$P-u@@QNtKA>7_6-v3ZpG3}d%TFMWNLmF41=Zh@ z;5mJC11VFI!;R|`F9V9mN8m*Wbp?2@Au5SVvQhCFq2?@va55T(@ z)?Mdt;9~*tFrWv}_WW^C@FzsXPdQEEmWGzjy8C>LMYi|UR-JZfz|;8|T(he{4`N8= z?|lWCRwlj1+R2%P3`$6@Iq`FvPk#YYv%LHk;9Ais3QoY_1_mWzV9yYciD%7}-}(Fr zQB*vGf`an8p7Vwv!11BTeVQPNsYd1{6WhD)O<(1HxgG$EV;h8uxcZU z?{=F|#Tk*AFJqzUZDVmc+J9J}Tx^6;viPU#1yq(JqEC~TSRkO60M*PVzug^N|HcDR z)0OPKZmNx)=cG(|dstFKBp4FcFAZr;&R?3-{(TI=&!cs7%I17{pXwD>Q2O3*tYDWq zvKOLpa$FDVtvY-p{`%MY_cdoA6o>N}BKIBdymHN(3Zm-bJHrt zH)7u_&=Ua-q0t&WCI8}REQ|-om9G3i9x^9psxLm%wg!q~Z9K=~uPD$wTeH&=Bx3-L zlX1ytQCvdISZcZ=QUr9AEIR$)R&i&ARtRueGPEC#aPp_hOs!bERYN?S zKRNDiA6&i|UUWuN7QTP-&>wR1U?fqIaV+h2p5jE_hNV#WHf2ML5_V_~lSIUuLL-rn z`d&nA2ynhUQh*tyLV=LW9N~%lYq~LL3cPP$w5pFt3#-VN^k`d=zZ_6n!)17_jS%<8 z_bS{dPivCkI#5xp7p1DEnMYJFs-fT)9mc|lc$X$6_x`0%fdKblLRxa&eCi^8BWFw3 zm!ja~Azwk3bG1u}l9gC{DbIJ-2tMFDs<_jg@&9?#?gLN+PG~LivD0KNksp6?_OK-| z0GCB?_P<5q-7b6!-f#TRO3?-b)NCZzPKkLjEvAoGP@X}Bcl&Z zjj_B6{jO?xB38BXTJl7xrgN|?bwxu_p~d2u`@Cu`p9c>3qmXkk%BQlc9fhiz z9lXC=UO5o)IHiUpoZ zzX>&=A{=+t92!OD4jJC2qT5&vF>tycZAGQ<6i@uTJ~v$P+|#z~Ps@3-`9Q;$C&*v` zi147Ga?a-x5o2R?1^4$q?9?)09a{M0tg>zi9H+W!UFEt~t;iDzTGaFG>m?bi%h^{= zyNws-WN2U<+>Ve`9OfW!G02+ChSRbek++Fra(MZXQ@V=(73wd_be_)4(($GdmD`08 zwK8?M411y!VJ(}kSQ+4jQEJ7*Viu%q_XP|#wR2HXPi{6TUQIYdS#eH!Eau~H+X(1x zt%pzVIip-e^7L@XV#Z~S_Bca*8ZYZj2>A{)ii}9E%e>x~j!Ja%3L$mNL{J{_XOV;& zCzC#Z^MoZ-#|c)cs*;pD>(C!V=K-^4k<5mH5l6D~+y2dQK)6(tgtDl5QEmvY&=FSo$oD2zzRA!aUX_Tf!&&9@bnyT8+a2jEj4z+QaD{FRR zV~VIzLYZ)h&ID1`KN|JEOviG;#o_L-_?q}zszSKP2(>wf>e0$`$eteaYJjb6$}-@# zFx4oQlvFKiJ4ZND^-}@oVQzOoQDBr1^^D7!R8n&zMDz^4tz+S>Up-$Vxi_NanAt(j z!F=I~Nfzn*X&Nwc!Rcp;e6?;D8O<6QqnMLx^oxxz(-WyS`J8}E*OQm*w`F@~P=0n( z=z7J7Xb92mMh1;!X4X}Qx_%gdZf-(tR;8*cEBB&XmjtQj&tXjG7*dL_=;&BvmAGu= zDI-&bWR#SY8oQDk%i!C%f0BbW$$8^z*3Acx$o2;2+ikNNldmzhw$0;TK^vUHXAgFF zW2&o-a7pbd$7PLOqO%QFT~@Ffp_d3KIfSyh$BqYzg*`aGjDl3We9nPuS7hR4W9gTF z3=iBoEfMGx!ul1s3q~%M&2|m#f@8 z=7s1E|5qA!4vD4ekxs>giS&1yzoY1p?z!;SS$|U$2RR7n{UY*r!@RR+kSCu19Zvvw z;{Fdz{TK2C!2S1v3J8Zlp^QRAk!Wzz;1Z7WRDs zDzT@<6GTqxlOWWhb(s4AjgoC&1Mam-sR*}2UN8B2&b>6GpENd4Hi8Wf1z$?uWZ!^1 zy4CG$jv*%}N9pxzdN7L)evHm=A8525V%55>J&j6@V+DYVr(d#KV{kmR5jIUX9~gda zAqG>(BCUq;<7Y)`Wuh450=$zFi_Z79g=OriAbbfx+}*!o=9ItnFVeUU=G}N{kNmI% zMiAY5XKrJx(qHe#@>hDRVQqgfFb~3r8i!h2mw|cyNn+ez6ODM|SzY~lYEM4DfQxp^ z!`SWiy7TssT>8@Y*X}e)v?gy!7=#o7j>YXiwVua;EP{3 zFS9>o2xrierh%B{n}scVxxhu%tm_5lC4cD3KO9I6g*~?|D7@nzAJwu!h8NvQl*JH$ z-;VkFUlHgEMdwl>|KV32SD=#jYdD_%!#`VPJktMDYX3el;QuPMf1eoed$s=s3;j=6 z1MZaY?~Cm}q4w_+1NHxZS)>0RYj^dp`+)fWy|MjISOfk#!v7~70Dq3rRvF4Mrl*0? ztcN>y$BC}M8VCe`cLNvck#9t z;rBW1Z^fO1XK@=+s2u)3j_1$dA@lkFj5pwa#oT`ZZ|rxA_@4{zhT^IC4jNfWdOHmG OCoS<(yhK#j>%Re63tzDS diff --git a/images/vector-search/embeddings.png b/images/vector-search/embeddings.png new file mode 100644 index 0000000000000000000000000000000000000000..d627de1d0cb7094f280c0df4ab5c70822f6c6e33 GIT binary patch literal 16138 zcmeIZWmsHG6F)deg1ZwSNbo^}y9al7ch|u!1b2705P}DnB)Ge~26wmrB;?-v?(XM( zp8c>qXJ*dn(^6I4Rb5^6BUE1Y0}?zwJOBVdk`NbB1OOm4!1pb%ufg9^N6mTw0DP&1 zu&}&@urRT_qn)XRwFv+q9-5d8qpT!>)3eihA?62wR1~rcaE26zrAp{gm!bKp}iTsoeK5=x;1QKBp3im`p!pZ~SegVAMD0Mm!k&&T8h6a7>-Jrx- z4B3Y$>z*#3pDp3Fb-7>wRJdy(TP{khHKSh85L)PafWQ=qPirDxp2U%mxG>i%EFid9MX)SN-IlCe4I75 zkCCysXg}ibB5vA$y?c^*uUph-O4$#(11f#GUM~)ZK2Ui!<9w*T$mWZ}az)}(zpNi}S#b0KP& zvY6Y|(Mh=xbyL*GiAY+@>sp@?`VPis39vB_bb)95_GAiiAt< z3TNgq_6&voUJc6XP}`OpK};+k5}@BXnAg`pil2M>{vI*FpVb~9(Ls=k2QWUVW~M+{ zOQp~x-=?H68BOh#+8EOiit6HNsw=D`1jHY^-f6bLrFuli^zUluX(Bmhu8`Q7^n77x z!}Xiy0Vp32-(6fo3JkrQS$l8!I0w}*=uKh40{C4ya zTc9vdq1TAvMFJ;@ZF&LBfnP{oZ^F9^`^&>(B6SId#-PRtSmnsbLnuQh3g72?{4)Cm z?Tp+6c`4+ULt>8Uf#e;az=)?Bz-k~2;v4i!wFeNqW(dMq!?I_~K(GsvTBErkUWCjJ ztoZ&|iRptJoev26s3^FbGpK-=KuH6O6(}hXs<1DATY|2P^%-s|YENWIL_&c<{_U7L z3zE!-^!&^*pI`PYFs+cS$T}hPA7*m3MsY^j$Ao_|U8o_x%J*mb$zhl_2Dr(@eFHJU zn`HlipNVV8>%*&e_GAqxAava~c9GZLdsc&P2)y9hAbjyxf_($BiF+{+VP!uOLnc>- zRYuASn^P_lN1#%Kz>AROAuvUxNH2;5$=T2t(2p@zV#MF+#F1Grh04tf(2x^S=ujZV z^2K5G6&cGp0yTt|CBKqqldq7p=gL!k!gP~FBoC}kRFvbCa~HoQ{t#aiW7OxcH9erx zTi1Kp*IIyH@>-d~1&=jdQ;1ixRnVJ6E`~kfI>Bt9BEBMqpM;-^|Eqvfr6R1NxC)iB zxqL(3i85NTdC9NxUX?+WFRDBh9MZyL7v@8dy6Q#jr4iTHUb?Mom;taxncQ)X8t^jjhaoHYtcwpM{qoMyM32yqML7ZWAPeBhR zTitsZ=5oeXgV*VE>8C?iDVM`P2c0q~Gt^k^Ml-C3Sw>jwSl(94SHafyR+-m4w`>QPLJ#1_&hpN6-wwVZ5omOesO_-kdPY))UHP2jq zBOJ2LT{3M(Zf3YbzB0V0xL1Jj4(14!3>N%^PL`t3+i(!#aAB`~3>rVpk8f4_G=Wyk zpD~oFl<_7bo1Rg#LrcC!e^zuR=&*W4bWhSP(&=$bWv^(X+qKNmbMx!YunYg{(?^RR z(xyblZ5NdnsGbDZpyS=m#_97fIQ;`Nv@2x`T4UUc&5QTB(O>2A|m7<{BySxy?tL2KLS*Id7=}lQ5JcE*)~+&U+wlgKkma3vEgv)Ui%ZP zjt=L*d>8vp8{n%Ep+nHpV0gNLS{B1fl9^jOx|NHRd;Ejv$G4y*1a{=)k8UFJWQJsW zWXm5sM6pFB^S3{gM>b)X6AeY%hJWu8=_Clj(_3#fo{5W9Z=P+L^)D2Ys!7t-C~A|g zJeYLcbG$mscSvdIZTQqMazN~g<;jzUrH9AgeY$zD&9l?w6yBK6gQJUfED<0jr0`=x zB)Ky)G+)A2f?R^(ChNiRsm!>2)4T6jDL~OusSS?w)llipWWywekf=e}iBOuVrmAK| zuXF-s+*fLP@=}<)qw?EaE!oMtyE^xzaO(VS^~NWU53%ioNKjF6GQR zbtY5s*4YQys`+Y8s%~G#G-@1%Mm4wRANO-yPWQQ{_sd6g_^ZFW2``DQcPB*7I02o) z9G7jw7ALdHQkzNKZn#&D@y|EUI^6@)Gue;ti5_{M?~mx%wEDk!4ee~}(NzMesBg4; zsax&>r%(#{y%y3c>pasRzqKw@Ed(vVxHcS?&?-{()8j8U+M7u+wQ{uI9Ddzu{+ckG zbg&?^NH<4EhXk~tonTndy=vqsZE63JpVz3E? zMkrSVXIzuh#>=(s8ikFkj2uADhl8DlY?vr$f?bY&j(O+7RrM{0`O^pN5)+jHbt}cP zTDqq4>yOW&QLZ~z8|~^3ik^x)#$D7~N(-_Waywa1t`zr!HyUGeI6LSgNN&L61~4mF}r&tE(B!rh}#^z4~UKmW8(FtxrEA zzY+ChT`j+AExup88(z9-a^J4k3n)rkZV6p|kCB2(e2LzOctl)L9 zu~jooK*?S1$D7W5ykKf4vqyS(lYX@I9Cyuc##m~J@WlBHCvFb=OX${N8tl8Ony5>d z%E$m{!1u5Kh+qo16}Vz+FvSD62cG%Sdw>+1bz=7~2_|(1UF3e>VZ}fVjZ7HYUyn z#2_1MTPH3MFX^8eT;TiP#SEmxf2ufJ@sg^`$P)|OIhqi&(KFIBlJdb56BF||8k=${ ziirJf4u0Y#HFtKl=VD-Rb91A2drxoYXvV-Oq`4yE$p2w>}-jD_iJEi=iU$(#uGW>qSz(mi; z@NeJXraZq(x#TTCCe|7v7B*n&^S>0TI(I_w8H4?7qR8#Y0cKL57{Ion_MK zAT&g10l+_3jyxPD-Q;2n9;_eWpUdwZAU4P!07?AMB}NR0HGwYaMgON20Gha(@%4Yb zbijfOA^=z#0snRa5W(*L?*SIqA0U#1mP0Cq3V`@~`St$5{r3R?L5Awr%M7V(4ENu- zs2aFX{}Ms04-JW`A?6nb6#SQZ04ajee-F?R0szthSov7;e>efa3K$z8{tstp-~v1d znri5OhyXx@03-f;KxKpk7mx{vCjB=qSQt$INCI5?{~2LSi}hmsBXBAsi>G( zznZc#Ci%N}DJqZhsTpvyb91rj>2DgFn+Mb`_okfM#-vBE-GPJB`=2cRU*<=^ANCzN zxq^UzpO?*2Q({I&#%6Nu5yHav@3{iEq_Gxoon2if=siqKOp}FDID3nYHD2{qRZKQP zh(ILL0RjC6!avKR!wasnr)MGp@4fle!Q5njSBOMWWMPw1u7a^F3t$I z+fhL%2IcrQCG~T5855B9dMBW|JRF z_CZJ=FS23xy=m2;cfa?5_n`NX_prBJ)N8pU&MWvm=s&9_#tw3(La!JC>UB&_P0fWR zi}@(=iiJb|9b5*Vx26X}ety11R~Xjj+L{4dWB}!WbrU2O#LKV=9p3|CN=3u5C!1WX zG@D)N1oiPGGbMx~w(}v(NM)URmsFi;Jsy@Rj2HD-69krNE@q9GMjh^UaE9e%19;CCkmcQ))L9#&T-eQEKs1(aWkOFB`s>(2XqSW+ zjn-%MciEpF3qd18lnsWSr|2;H1g`NZ1xQ|!Fa2T?0%3W49@LNIaImq(=;`TS7j_NJ zQ`qlM)1952kuJ|vi^q~u1VP9pNjT&){=+RSENl*iOqzf`7IaqkL?zWu)lZ||$=9(a zmg`g~FB7E?sa0=VNUzg`n}G}`;0JdyyT)W}8Q9Cr>W~2QXWf8l>+Kc)tXcQ@`!0MO zF(+rLwvNu06x{I#!Mz9;cZ}6PotoP0K`IdawJo@ke(l6di;IXdsyX))~84%a@RB zMb<(*-`cclRw{%SUJc<=g!DClJdy0urBzU|g{S_(+kyYsVccM_5q4-GRr_m!0T7WG ze!b&20D<#=ZEA!t*oJaw$I|}g)6cIPPQcj03y^dBViHr~>|VgWq1X={Nm*}m`=&+ojCQBUaU%ZVL&vBc z3`L^RzLHAo%he{GHc#3f7B}>b5WUJ4IQ1QGJhgurI3qn78KRxE>?*!k9TTpmw-nv? zkj=STMmm!e3Pp)8U5kUmbU2P`v2&gn-drg1EnqmK+M?~(76-(?4n+}KMh8sh1?+xzVQJvt0ZvM=s zJGN1#s$4M?+Rmnf#J*v{K*ia^^{*=oG?gM_YRzdEOqY_o0BfW0Z2sKy`ie25(~7TB zCU;gRh}ATZ*^wVpwqQz(Ap(T|%9-(Fho1~WZ&d+dkXL04f#mGyDjh<0s@GndiqJ*p z_#E1Ksk`XgG+$#{QOd6UiAQITw>lwF2|XITO5W_l`3aLB3}1OY9{E75%L*R&djzG067 z|B50bBXwIEo;J%k?)QaIVrvH$jC*qnOtLMTLXPlQ;9_~>0)bZ zYmRm_%h~V@=SCfmKOZ|EyB@nA-PV2tWqiOM{%wZ+Uc*+#tXb6DZSn42u9zxnRWwyk z?R>1KWN5x7R)Eil{jt$G;e6F)Lj}Alk{&nvseMMhFQ`+-i^c2Z&#URp?)70qcTX&e zt?*(lx!x&ZB0;d8bhGh2`_C;;@j;|p-JZfYuaUcr3ajtcgIwV~sB$Q6E!L9LS|Pl8 z?U|49_#szSQ#5KM@ja1UnYNk=%YJWDVX4sL>&(YU@p(O1Ztw2KKf+-!tH8o+OLg@1 zh0fPmQR>B0$mIRr&cXYd`P0KKi{qyFd3gig^nrsWepgUXzuMVSv#GkUa$gFxWCFD? zvhMPkN>G_Xj)(TM&+yzPgT0%$5qX_rR4q*LLusCqOCr^E`%PVQ)fcYRLzPT zX<3LPWNxyN%(snxc^KKUZyinFTI&gQi44x{(0Ps}c++0qIw4|vCQ^h`w;o3y=ae~& zoz3UhDnIgadh18yA+%JYv?mK5)U~~p@uYtiKKwj6v9f%X%s{148(aK(PJNY*zb2bz z_znI3y?jD!k^#J8HtO4V5b@!_E3e9q_1z-fR*&2_%F=$X91A5AOZ3`(CW~a=oSmL3 zI9N$5T}~!oHl4)t>nt<}A1p5QerCwqD62$0H(zYtffHBtt}6xvanL60c^5hLJ~H z`p_kbNm&9#sU$D(Jsn3=qGH!u1BM3<-RA^i&lK0t7GE5QgX^w%KYE|Qf)fM(jmA}P z_-Avb(+eWc>gmu@?kmtO;W98_-04bxm&QP?<(&%khS0-sg(B&d<5%QYw4& zKH{RGk!;j{vsow_S4}CJua6=uT+L33i$Bq?O*vuGz@jxx3s4A~Ll^hHiTQ#2Am|%) zD*#)byLo1KH=%E;WuOfF0EzXH!>~dDBl`2?Q8GiJdgNHU!hDKvu&(7I%;oWjjY-RF+uAz5JO}^2f z#@9Z~rEE==&*Q8ojmouksw`Q56_`gig!&r~7YeVi{ax+|Q$(c7_!W z=j(R!qxs@$2<{N@AN3VuyOzs7{0Iu878<~}S%frwj2R2E>7_p=Io=Q9qCJivbo8ydByb{;J+9$Dq9*R>}c;%^v+mPgP_Wm>Qv_ z+sQ%lhUuhPs*8jkV%uH1)k}~3$vd|nl%6H-R_{VRRYwbW+eaU%%L+kOwVOtttMxyA zVfUPOzCA>ERx8@s=aR~(7V{X_)n-=6TKBmAxr$(5b zhxn~GTh3Benz^;X*<9>vGl}0_OzqGZY~+QZzr3k7RW?6BI{m0?(RdN~J;+p|LyFHn z0*k-OJR~Um)rWzwVrch%M*i+9ZEZW`00x7^IkM*^H&RADuNkm{W-cyGw7TE+O<-PA zF@2K8cW^)3l(3vEiKSN0L&U&X_1Ss~GNH7$8KFm~*M|LaU~W$V1wq}>b+IB4hL_mt`7buC~REbziX>{2iOi7e!O#chK{W93R|z)`<`tmgLC2l z8hb_t?tLj`_>9K73E+mBHN^xiBs7#nPEIZ?{3jcNhUe8m-g8VOtmTz?!mY!>m+LiL z=&#uvvJ!pN%AS+Kez8G*=(-_5m5$tgxZgzY^{qE*!U7Yc$F)C?Jbq3OAKAhl%8Eij zAoD!MBteAE0z07+i(8{+OP}_^J0lGVc&=N*UuHzf3>}?3YbW>6Os^>)%O&{kzhFe0iA{zrzK6wTs&w<>v!FgSE%el}e+9 zlg|XtKHoC(j)&J^jqSj-%kc4JrR2 z`F>J8CQ5G_-6!52`^dD|TiCj?9wFZXAH;2=e_l7tagLAW*7RyvGnUtgRQ2fp_PGpx z`&``N)o2So1sk!sliwtD<`+G~j$Adp3@Ia5>a@3a&5b@#e)5spI_w|yTfrH9B5dQ1G2|MHGt&`dxK&F zXS;DpEMb<;p+I)64HlU`dCVrn7($lInF^St(NwFBbrk?din{;XdNxWlqr!d$QJ zBX|hLad6*GpQv!LF3L?m4y7@Sft}IhH&SN(%0@7#8u>XEj5MOmw~s_2c$%hW&G54JH*^(ve=hNs*V&F{^|} zZiY>^N6XD8LXzTw(bO3qM#6AJ5^NpdlXG_uJGx%e25jTIHP#I)6Un5w z zQm3AkZYWYuR2{7vN*82gZSFRg-m5CS8EFzKd9_O~<$t-?&(Nm{n@_?}OWv*qvsZD5 z_9I3#O<3Yh+KLbC<>#pgwhVsKkDB<6Lxhq%`+bKC_ro}jC}Og`%Vn=V)Q?{_;Cc)- z`(v8lA+p{;WC4*}xzZ5-D3Lq|^mNvJct#sx%;D5=Ujk7wv;Dd6uQFBd0%G@fMGI#} zJWJD)`DvIe{%gB=zXA`X3X*@FC$*l%2es^c&ksIp?U~nLZjVyY8iG#UceJ`?R0=Ery&-pC23noV9~#vc*cY90o$`Dc&LaQBT30|Px$VtLgozr z*?e{Ui88U%Yl2Q^o( zEsZjMT`3n`<49#76^LJ(OT|>!8!%L-j7OvL1$qjo7A-(u1zM4?MkEylve54n8ECoZ>og% zvui#2@Du7MDcQrKFl3X$l0B7Xyd@k5O@f)O0oF>;Mls%;Y8dFf&si+5&XXMELu+Q% z(ZYO%9zRo;`p4hlgh$PrSbDR%QN>y`=m-&FX6CrXrKQM(=BDcM*(268UR%V=-AQVt zuAg6Z@->&5lunX{Hg{uFr>c?OSZrR{na_Lv+>%OWq~=T|(6g1jDzx6c`VQ4J0jih( z9Y~`@K|u+(B$P{c(C6q;s8YcZTqBSxtRYQkOAziRCj^Dr3=TVwU8?tzU$N2cU-?Tb zu1qNMYF+jgZP(k&ODt9DV*bH*;uHTW&(z8Y`t8W0B_J!DBw)NBuo*Gz+* z^8%0&1EJ^A>mNnZCz-_Gvt{*iCrzTG(`J>~Wq85CK*qn+KM zJF0?(Y-uurX=JgHR3(>`pY(^?TyA33NN~GIQ2-b%y@&co282qx&n$s;?>r zxSnMkF zShT7U-BE;6V83(-)MuQXh%5X#o!%=UK?LSCQr@qLV(O{ihoS#}or-OAq%klkbNsQpp!^>_DXT!Vm>9_Y zTEQqvpHT}LH_%h~yt#NmF;61H%!fp4KS29>6liPYhOxtJHuDvhsTL_y57K5zJ<3ovIPZTGXUdc|| z&3d@dkQ`m=RE1tPDJkg%6AZ1|*Mi&gor&h-HmSW-Y1JrKFmlbsZsHT4Mx`Gg9$&;0 zhigWEl0$*VR2i)rb&#a4JAKMi_i=2&2+{g0L+O)aa2{$>T)>4i+^B*pECm>BIaLx; zSNEX3ClLXIfQGHk8yH~RqB_XG>q+I%)4*aOG!;ddj~*3;$zn(JNK! zJ(`<9terY$HY~42BKt(ZsTc;t=Byt`gHI>)>-fX;WxwTKMv-ZII-o}#I3VSZ39Ub5MG7;09R!66_3^no_o}vC8{Be4; zIm3d~N~pdx8YK)AgcI@TRbkPjl^1j-5?o>AkMOBtr4NX5PQP#`!G2**2edL}br>d{ z26iUOesJD)E9$9xhE-$uDy=UAG^FBY`?^l>TqBfaE^0-*{W8P*_5vjDkd%_=( zGBz0^+c+g(LtDEeLUrKh+F;hjJXQW`=+m5a;&+G%S-!`89HUmD+oa{_JwnxB-0aj^ zqmNVjt8X0_i0oljgZ(2LiU)jZk;wMBD@FXc)8RH-1D8-;m77)3Cb{2rJp1f=T$IWG zByjx^M#E~lJqe|5CWmrgqy57`*V{5iRQWd$Q=wm40Nf#*D@W_0jRWB90Ke0AuNunQ zD@o7wptFOqz_c(6;jAZa4xl%T({OZlrBMZLdsvv7*hVpoOZS8=aQ&?M!;ghc^=7uZ z{*$*t3a28Xf|0FiDnaZKyxLp?9GAVMQFIDCTd$Qqb?g|zH#XqDp(c$A$!4Amr8Tk4 z_bOd81ezl1o1GTatX2*r5O>!OYiDM-yxsCdK?|I+8#g|)cge-^@-lBf;rm%D2x$K> z|I4hrZafcik;QV3LjbCY``l_IKIt)Bf;O1k_MQ}<)a(?G>3GYrre zT}0qchh&(>IK;|Op~pe3YG-)2W}~!iw#LllAh{fL_9dkR#WU~B=s;tgdLW{k=yQqp zeXpdKf`dh*_EW)tHM4wwv_ zn)>AO&tyb!%Opjp;qVoS@et8LPIr-!_!3NA{u{1|S++m=P7N5O(}mj4hPoZ^aQ=!tjm zu?Z@U>e|T87gI>r^CgF|vvapCgC(G!X{gE&+pOM`NfChgXcn}vXuT%%8JOhkheZ$J zbCFTZS&3`GHB;v4x&_ZsQTO$Adn{k!9x)MWA4ty%O)4}<+)31u3FqcI_*uNFwo9C@ zC~!0e5<`B;uwcZJnm@7!VxItUg5n zTHN(e+kk7~bEkpy+^*f|rgfy0l702-XwAloz(%sKuF81_q?M*$B~E2mJ!oZZ!n(}7 zlbxs=6tOk7Mf=%YuhCJ$eq1YSDs-rPKeTk0FGp1lI-d9Y?VR@y3ti! zx9oK+KqS`C(ami4)1oNpDH$SaBv6KP4{A!6&4sRBW&n+VO0qLf+&x6J!fFXSH{}t! z6^m!X$L`plZn1)IhcK>Rk}5p>5ZFBBs$MkLfLiQ(xU=Pe!>;Zzcf8}%8T&qH68odx z1rU_Q3fp7otFVzD(VMPpUjFs9oLtUHx&+=%_Pj$JwSvBnC#$KBrz6=T(Bm9cFr*a+ z!=CuEmdk;lbb@7ss;x7*t-XT`y~1&m-3q}rMSGX~q2#NNh5SJy;n7hR^Fe%eskacH zsqZ@-Sw4IlH;!E~drtPDtLLhWv1(m^A0{Iit+UOG#C|R73mY*oooC{DX&x&;WV3x) z0_uB>`!@!$DW>01^vV#!j2r$|pM*1^GWWX{_O@%;6VEX#F5X)yvk!yC+gRQOF`=?y z$+y4*mWU%6O^Kf1;iQF`lPo`LlnF&}4tBbduuD!?^mjy-2wCyp%MmUaj2J*<~>LqNiC;DS_&I0Lp5sP7-;`+-Bs|+3d&2WRoOetZ9vpkwqKH z!x5dPvvqJp`lL-qvKpcKCOJZ^oA&|w-R>od`N)#d=jj!=p*9 z$jIt^p7*xv)2Nr8kwR9zk^9|7zlKKLvnzpQOK4sBEQ^QbIeTFUHxb_F3fJhmnqQar zGMlC&P4VtiBqUo9oD1^<)PvYU9MwUc=32%Yi_W>W#H%l00RVJ_j9;rzJz>H8T%BaVv7%emj(Vd#o!M{530i`STi*UVMO5^4s=v=jE;x@ifvRw3>Of zAI)8c;X$|yc^llFlO`;LM9lGQ_q;li@ioX@>hNZB+N9MH zCAlKXB^6#$%|pFpjF}+`pt`O$LYE9J8M0Je-_k`ZbqcGxm4m=j)qG73K@C}%0z_GPs^MYi(4w+M}N|}`< zLFqS7anoJBJ#&FiE%?$7eWA=1c6#2)KHg3QuR@%`ae!T8S8sbQ|zu{K4JJd&AP>hV$K``^cQ z56@P;B1cWQ(@um_S+pwo?Bi#ZZegU76-hp%o9jA+GumNsVOvI9iOX)>Hz7ZAg{3Z|K3(X|bETjII3ou9g%&c13ysx^ z+J^Ic=Bu#4`MG6u|FeaA*?2jLv-r4&PGh(1A{Oz^uq{;>Sv6fAxIQVl%HYEm(9Q#X z0?xuTZPV*+=_QW!3;LlsDv#c(jih+)xZi@G&nL;@ zVZ3eNIqc>$33HK^K&vYAH_OnI*O7C#@zTRpy1?ZmmwN%C>o2#}7nXI8TL_%xY8ujg zRXyQ>OSj&`E9t$qrh4v!-_Hp)+a3menlf=I^$rpZH!iEcMrsK8_}V3zprG7j-E@5` znXq54rK6om>Ujt(Mr|e3F^SNyby>aNhLzK7k38?aU2waV?>4@yxc5DK{~UtFT~Rp~ zW)fR#h+U6cs>(G?uu$45WVhW*VwDNrid9KDhQk5aWJ9S@jnB_{S^~9V8qwAeJDl}Y zk3a^84g|NeaTG2ixl!=pwv&h%TSOzWRh^){3ldni^$piz`abKLqk^jvh8~>njuwf~ zWU7ijRx=$+=!qX^csB)=HL>( z>INFp7c;=8JkkvNbtpkH<>qe6m)}ib+<@^=IhK$VI2x3)8S<=*%6w*!ixsFEoHeDR zN35h(T<3Kga@WuOx3U>i%n+P>VK~KM72jrIC$Wb`w<(Of&{~pY_{9Wi0JoC%-H&FSi)Z;7k%m zrHg&+``c2dOY-7JCpBghe+>QMl5i=?1bgDpt!>5>JOmL8$vR5HXomnNA1I>(41fU! z`$?#InOT4T{$07=R_Axt9}Ha^a1^(!CAXUSnz&9wMHK?h*5oR{-`craO2WTl&=Q)t zIwbdGXRmg0C`1S86F&p!HitKFK#~paN_?maxhiiAQIZl3l z25b}!5vl2`YZuqdx&Qb-0#C41r>|1m)7I$gP~{B~NpJ<|@}VwYK#+`DkOl?@&f9~j z)-_C_p`mA9;^N}pnpWDpwRFS7eng?pt*ZCJg#ZzD#aUjEZvvc?T@OcUbImYGBBpC^}vzC~U7kancI zl#eh17;i4#SKk~gh(0v7Mb9c4UW%zIo8nL)tD!AGw1xh(o4cRwCq|%SA3saKkr5K)5!VtAVw3CBa%wN?NPmCC_D5a zc|ETRm`(a)+S}W|Tf_>aLxaAYq^wote_ba zFjA(a2XfV$vzW z^}zg_?Lmd)dDTf&9)y5{{f=|o+z$|qL_{VjNB1+n(Es}cfKh0xZ_U}VBPE|)!?53(cKm|0(T%t8qM&cC!@JCE8IbwQnG*TShX zwe$vOOtY`hAmB;LYE^D+hoRW?Haz_03(OYa-vaBrKP@8*hlI;0K(EMUGhf?c+rx@L z$>@FO%*oq3@v)-iEKH*odm_km45gJr>+!J5~Z>eD5>sLeYyUOCL z7rPTIM%{>EQBg@mKycom#^O~G!cY;dJlf0r1)#1nOtpGmD}o_GB{J!3EQa71smGp>;?6pYJ=O%>2e=&&i z2oRmdrG>@va{p?y3L4mwskr4g57H&-hU03vMAUDsVoKc{|l)v2?Cok zO%M3Lb{hKOc9zj$@K&ncW(4+(i%>tnpAqj!dq(edty{HFNdO1Nu2bE4jF_BP0 zQ>0fZ0U`9>dli+B^ZxJuJLkL4{qFnT=RWuUylbAB`OV&Y)?Tyr%-(CyTKnYN$t2*K zwz`%&;LI5S;LPa;IGH~4O-ohP>amf5x|S~F9~rHHQ#yYS0C08l#u#Zlx@}^Ly!~g| zKgrtG%j2K;KRBQA-5>muI{?rx_8;W`?~*Rr*?ZZZ7TP@B`7x)Jp9*{96yI?C2fq6! zw)qEE`V;$kdw8FgdHg5Fz>HK+vE3=Y>-aZp^EYhkf%&t1%xM`VR~Mf@>-v-al=!l} z8ytFi{_S+<0(b$802+Wte?7l@762f>2LPP=BPetV|~j1RLQ6NSqBFIU^^cGU^E2)n7;x5^k)An@AUBhBirrMEUr_z+)p=0 zfD6DLa2ucna0A!^#7~hF;2uB{AbUasr~=NN{geJYo;#)Ubm#x1-{|Nr&|UhCp8nEr zmoCv?zIvIS;R?g0ON>m6SFT>W#&nJT@^$9x*O*W7wLh7h`SZ?m=YKoB@fyP=hSSvl zBAt8!F#UEe^jz|}GrWMaOlQt9ojGX)@SLjq-06Gx=l*YW@wZEK7tWqLe}?`vA9)RM z=G@uyXV24Jr8{@w{KZSB?Q@!Uf$rjOOxN!)OFlHbbc0t4Y|V0$o$ry6v599GF0s6S zfK^)61{1;-`kvp`3!4D-C4LkH$*E<1ZjhDt$u1-JZf>3O<~Z|j^v{}4TjIjGvuEf| z^OTs*0M4E{bN=E5x(nwn{4d%5te5GILBU0RTpYsY4E}vNEP<0)qKM)yl zgdhgW7`zO-I?Ll@qZ{5G14=j6vd<*7d1yx!BtphX8OP;WY`_!1i26c>&oUgc^5A~| zO{HM&Xczf|OT$~`tfls@nLkx8G>5&fHF_w*v}$FvT1fE~Ukv(JD^h7x$E|Ao-d=mZ zL#;}Wg_)L^vT}ob0-{fIpI~H6gfp2_E%_mH6v5;C+mcrsUKb!ce+)c7bbcw2TK73= zTcr)cC?%Q+_71`VXVGR|9nFJ}2J{XV{S4#_E`@AmmmoS6J#cgA%}y_!z+7#1V>9#} z98r5Y%wvOIN`oAS$nvOajV2ZT3O{X?Y?sh}Z((@SAsEMYKn`|f8cWCdX@@Iz!&BNe zoYn^$V^I59rBX3Y>`db~x->woQBldYENm4g0L{V&%Rd-Xu|M-Zv&G8UAb*L(vCdRm z|H6*WR%o99p1-<3rMrs>%pV5d>&pUAoYDl|bsh5r&voT`@PIxV`*)IMWo3anZww$V zn*)8@v2FZrSeDhvXX_dwIKyjiKmDs)g&pzEO4}{_S;a5DT+rk8+OyxGKAd~?Tika! z3jWoy008!uAy0)VvEL^StJ>sZGWBJUQ#JhFoIr}@5O za9K0fLEYTiyfOdX)5|7#ZK{FxBC{G4x`9t^2_Ouwr-AsA9?7@|3TTuNnq= ze17Q6x#{VY9|y}ctkb7|uh)P58+yhPw3jh4DVyk-DUwyZg{BUsePa+woz=*-N1pbe zzno^491IR0z%8Ulibr~e>O36xZ|B($otqqC|BxG*Aap~D(s5eK6hSj?i9Ilr-^lOoeVQZjRXezXW`(xYg55jp0hn#q|&NiS7A%=v8RY)z zg+3Y9*CNAg{VWlDvtYg!zmiIBtt}iV$oV{FZ~|DDPJX`SGVyCg^Fr`u^oV;^e^xqM z;gaXxelMds4-sxWr2rmJq_nNPy&TP7G(FrM*#`u{S%Tlp`y|V;SbE=?{22}w>I_zm zbE_mZ*}T1CR@z^OGSjnwGb;d{kfoL!PFV9_bOb+_y92bR{jCG5Ms1V6pX1}?z(ng{ zt^>T_;^3B1-pXhiUZ}6YpHfMN&L4z+6mv5(teK>{bo zSm5yU4TGRGJg=hb;DfIvIXjBdAADVAjRS_hc(j=6lV#i1bq&Pp=yo@&?ELt#TWt(j zyJ7r%9SDZ-1lzdadh=f(q%n^=(m=_Sz?*|q7RVNG%gcGp4e#PaX6hVr?Il<>4Yfh- z^?D5vq|IQ{-8n10^ur4M6!++&a`aP?Rh4ebl5uIqF(v`5!cANOvFhfx@mV-rEhVD% z35MrN=ghnQ6TpjbY3a}@O`KM^q+SCm&`-Q86R&L+4BM&-;#kUw#TRxl!2|YgW%HG# z73kU8@2R}s>4D!oxMD7ee(UjJMASzjEHyMO((Fd$)(FlXbSdaNZ2I(^S8qYRN> z!|<&0{&_*qZy%`qR0O??g&4JLbH^KNBs*j%<+@oOCCn9(PDQ49NzjGtF~2z+(w4uT zlHAwkR{7+Z1r)WVimKjDVhx?V*w zv-ZS@-*ku{7FCWwah0S7+G$RFI=VI29v|gDt1sY{T!f8U;FSxbIEP;|gmCV*h92Sfx{?03%BotX>YmD?pd;Y8+}Qtu4j z`&6p^gXKk$EBT>MBqA%*J`CvKQQDxzf^$dhiQJ4Hs|QV%^|;i22Ipp&!ec!_?2#=pTRqn`_tdxYZ-}GirZFN^=C;T8eeg?fRjx7`+s~WWbWYdJtH2yG zpkig?o$X74*|jb_>w~K7>e`|^03QcBV0TieTg_(+jre+`~q z(oDFUex-l96kO{DH)n<%)OIGSvq6Q+znm59O)Ha z=@A2#s8lbUzgsxnTpUw_YzIUSAJ<{?uHy4U=zRkazvqKyS+=&0*iQ9qIL!+NKUetI zXe3qopzh6>`cL;C8rrD$e=`jpgZ`W?{|`{?$7=l)Z~;xZ`s#x0^=G#$kSGgNE&U{g zcG(swqDiS0Ya$4#4ihHaI2c^gO>QOOD@f}?$V;|CsHN&>H!eF zzIjNV>}U3vN`Z~W057MorZB>6wQ=nL%usSQ4usA#?kpI#(+3-FSc*~G3!;3$g*X!i zg-x**F)!IKtKD4HqT)qaTZM(+;Zepqx9T44YmVi_Mz^)q(Zm<=;U<8o5-=Zb$mN~e zqtOpJaCxT0WS7`7EkkK+nFqNzG7?yyJMDgsibW3SYeu$`Zi9Tu;c;sG)XY6m{m!vIzBepdB zJfydV=k1Z_*%w39_*@ zv@W)1I`!g}9(Fx$CGs>SODVg&yDOHys(o$g;q9?1Qyeju-k2DQ$^!Aj;?hQ9u6jt7 zEOp8F=iF)u_tR`_TFZ( zcejK6HhDc2Q+F(el9`O(?t0~qN$Vjz+GDPbC>94>T+kaN6~ufg?l^{myjif3#tX<4f; zlL5u1;K{YVIF0F47uT1hy}gPN_UlL>t;w5A(Bc(eJWSqO9ND$N-TKl0D+UxpO zhowKdp1pWmj%a_b{#^Sx&7h5A&ev;twYXTV9OgfV*VuxYIbwDVHoWeyPORwHnPsgJ zG21hs4{?sPVE3zon^d&{WGZ{P%cexJ3Ug3;TK?CXC}o2ef@Px~Gh3ZL#?|V#ZBcx9 zTSM~^KV`E%mzpY!?^rM}dgN6G(OHA>8`|03ST`V|5dCC$3)Zj2%=T<|n=Ro$e6gTf ze7;oIrq{}TP1(%gQUs0pW6J^gqJ|aJ%9mJ9Y!3(!dh%4pqsKt&1`FH`pD4>`C7nE2 z6!rF!VKp!CJC?cX+HrhaNATYiati4$^HNt_^iQ!3L7Re^ zc4X{*$6Oxy@<;nq$2+4(Gd2NAS7BQb_3d+79B=zFqMV%;X!p!C+;)SgI}5jD0^fd| zi&gWOa?kmZSa}OBrl;&G1A6mX)geQLpiQ;uDk2GB)8`EL=(2%@1DJMnugzUf+R1 z^nSfjXa~0I#B#CG(p1uV;H^R<^WTMC1btL-;9YMf znh#(~jqtbZ2})9-$4Prt#g3gp7aXosh(;oU!iaI%AtnX!(^xjUVe_<~cO4C0TLY;|KSuvPIObV!D_z2VAz%YhtNQ4qy6ULkL)Pn7plaByD zfwm{4^J<`TzhuZ0vtRbm54~M4FY)%lc=6sC*MSXz(OT`GA==#A%ckNtUtb$7T-&dP zi;~Gy+*NgmfdMniw&sBZKN?AeTQ)ulIpkk2KVEZt=kA-DH}G{-vLzO(I{?$-#iF=c z1b^%s<0VwKe_6SduWH#BHR?9mS;|WkdRH_U>+NpDF|Uxad_K38g;pL&bHv=(CYlN)q5(dS+HCOzoW=P4aI2itmdWWn>FdG$Yavhx;_6 zA_}{%TgC?UG9X@??%`4nl0MA)n%P7JuJB>oU?e<5?0hi_yagj7{4)n&aL8;y!7Ln= z{f7UxVaLI%aN)iafLFy_$>VvIC#FRtio(iT;x}rJdPRx4+q!@|rj~D_lwfaaa24Gz zD4F+GLLvOq;HW`F!M)vqfePLG4m;4CVxboBU-&6fPsoX@vb!dWS}!iOc(_G?DI zd64y{n!yHMb2GHUIr$NhbC6>N`N zn+OzF!S!_Es|n7UUen7-JC? zK6P>PsiSxxDr`8pO6R3*@dq0u!&4RhN`dBti~%1e%@1`54Vj|gh&Np!(2x@NB#ibH z`zWB#*z^SO>K9|O>(1xi!HEy9G$v~4mG%SU{tg-K!tp;8SWf_NiYY<6^7GGsm{t6; zqr=U_2p*BbuRU=Qc6aAWb}Q+3%|(wijyYT0FwCklWKtR!gEJ|@as$A*F^3IxjOKoS zu&pvoSMKi#G!4kUwOgF%%PEaC-B`t0yDhc^^yrl@d|>G}`j)xnx`}Y!pnpu&(R&zC zBIf#ev08Ggfh=L?zTwV6KK5&j{)XCnAuP3VWzI)j@3A#H=)+S`vFM6R0xN~Ruu|SU zLC!U^bPV2Y>E?xl`&qQ7czxX$?kc?)cDcV21YoC3vO5kjVlo@3cqSu5QZK7w5#AZ{^xRYZN3-uFKaf`z|7>{8aGZL$iqx!k3#odB+OnWP4U68HXCS&HRSc6>dIM+v$RA(EL1(;(L)O;Psc=E#}ILL`QqoE zH1?3v2(MdLCo55;ifdNIeuTNeQDHt!j`3J@w#hA^?tyY|>#T8e;YQmKDe0pzM<99m zIbZ3po`q%NcS@tZ1-_~kKZ0CLiV~_~E{>`uwhc8Lxb{2&s%xW|p#jdXlWoO6AYLH1 zRJh-lJv@(*zOFuqAopg~yeO()|eruZAjPDv`JQh+8D<;e`NY=JKDjddH3c!yRb7^Z5Nk(hhs`uIbdbO zo~mvxPZgJD^nbf0-bdzFCv%c&zg8h5nx^j|n18ocUVY_WXQ6D|XE!hN z_M6-i+Iv`&1rXN)zfP26WJ##C&_}ZF;^1q0^_->wi0BuTOcAF`^qdO z=-1vb%QD#6kB2HCa3lzdy*`uf(&6nNHxz^~GiQHz?)@u&GcF>y$Sb`dnyX9iV7}PX z(uXj0x06vBl^726fQyCYUp9SGt`(VpkKKWbzP_KAf~_r8b3c}zqP1D1&sjdznBH_G zVC104h<<_Q=WID-c=7m1n*~WWG4=hHcXeq?SRS^c-WEFe=-VxzJ#`$4W`KfjxEBzr z5h}Wpye%8V7F8gkjlmH5bTH!$me*%*f0eGrrpDH>!JlI*nFS|VM|CJ0uoOrs(Px7; zG2|k64?M^SOx#i$oTGg}5a%T!yZMX#3!Ay43#D)5Lo6k6`~&3iz5?5zXol!wyjqm^ z)HaPo97DIxO6KLVZcGN`H0oH@F3+IzI*$A&s>fu!a%F0CMi%-W9VrI~9~=0VUa@gi zIOORI77n6JjcOlr*Zn$vRhOr&-Re+jzBCWh;xn#|)6CaQvWHB$b&Q;k#%Jg)GaGP7 zf36~XkiM-AzqT@SdslA=@7o>tswc{<;y}iBkbnxZs|5!_R;F$q#qC);VPb57Wi|#D z4-KFaXI;kSnuo;OzNO()qKj76Hkc3ZZojx@qtoOwvgAP^8Xznz9v>|O1$+dIHxe}P zRqK{iINZo`a6Cu)zPQxEU8$>gP>HYac_BFz=mYnK>I|VGcOqHkd^GqIv3cmg~-}VB$Q!?3QLOFIBdI!J;)+OyL}?oxR)X3P1zf`y8>(9v6rz`%`=x|9PU8NL?XFtKnRMkSqK~wMZ<;Hfq;6<-C#B$!$;Pgi95pY(MQn_joUn)cE_i0? zK^*Gtl{xaHN^fs)&v4pXC|k(*d5`s(JDZRi4m9`FmRRClf~A0bnQh3DvDT8uiAp@k zuud`ES+1k_6ts(3jels#NM=jFD3iBj3q#s0A)_aNyXV<-cXG?*PQQac%1qd;ynbtk z+5VTYsRW#<3GyVcNx6GY^d9Az9H?tLHb(P)KyzA^VL_(R>k~fD7iI=yeokRcfbf?SnOeBpOC)%G0 zb(rfACDBB=79|ecsuL+!hoRb=nh(9 z*E9dJ=IH$>Y5EM2b^pk?$)}999YXY9c)e`0Zx`U!-FnoC5ls+JR>!bp5KE5k1;c{X zoWU!grljDzMB|BK#7LoFer5kww*7PUCCedljpkgO8hlCv%oB(9V^pMjj>!kE%OUA ztb<_(mJFHgG^vXbaj9&~C&S#zTIFPi|O>3Jr{sb_`Ac)xq4xf#P90^X| zZygBYiu<$2x9Ek|d>8k=HYP^(ZGSlqtmF^XH#?F*k)QW!-wO4Lm&BBi_d9%g9kcUr zy*h4$aT7VzYQeySuh&EuY;WCZk)krPjP&fOwMfg)90<&?_bxBBl+*qZ%H7dr@I=(U z_wfY(&&;ZNhg}~7NPbX#qMPPhRcu{xTS&Z1tOl`lzDf=P8DmjwlU~|hDyDW=BxJL? z1Y{S-`PMx+7oCmkTxHZst_04!TJ|>+-P(Oas=DvMVJE>tmTC-p?~D)FDMcjt{XE#raI!xXO3mPr<^Jkw8LbH9BFdR9BBb& zKC+s<$htDiA&%NxP6I|GB0tym4w1@_QPUnHl+^7p6{}&#&vR2T5Vxkvz==pPDbK#q zCoWqD70=2`i`l%bkiyICzD0u$ADh|KnzCI0=VliF8j$}x6Sz|@7q|RqNhY_xJ~*mK zNjVCkprS0O5qqN@rfX=CjLfG#T3NN}DCYT+Y$=;oQ{H~nr1`V0{ z>H~h?*U?K9PFZ|y;7GO7{;!xHKLw)C?>Pv$C*i;A$iTKJ&U1F2L^-eVSv9pi{9DL{ zzG0uposo4JDz~({IopN z46L#QdQB)>Eolv}14t$P{+HJN10;2|!OW{M)tS1d?moGwAitv_u~I}eBDXgo!~3;V z`BDoqz4f$(p|!}!j-pzgt1)@`9eP|(M8TN%?lCqD=+cV2vko161gZntlDej8bpsW> z1x~mU;QWGjPk<$pYxz@5qwAOTj6tf|D+|8DK|33rkA=Ch=5dSJV_f)5?dz^A++vhC zhNqV=ho+_qez~lxm)ZRz+r)r=FL5JP_mIjJ(ZO8tD4S!Wx;U94J=`KyZq+Ufy zlUb~UN-I3eGgmH}FY#^eB8$;2A)olCz1$0e3$`gKj-k@g=fTUB>6fu>JXey!+`x6&snrM;g=t9}n#mCzUKYgU~}&m!5W3xCQ~4EeC!Pobgq z^-1C(Pi#}z>ZOc3*HRCJ-t0|03))slcYibNl_F2)Qrcpo9kmxGly5Ix0K|5qO&W)z zS~i+nD(t4PB^p_Iiv=~G!pLTLrNjmfi>AYd*Y`H}hAX45!oQkWXO%|1AL2+&=H?Q! z<-nJqM0$HHY7(wjvkGH zuS#6`ZPRwK|Lg>JgcFIPgVTX`=yw$N<>7&D{IG34uR}QW>Bs_KI#taJ&cbAbY41$x zP)hvdX&;B2@?SHt5$fKj%@(E><3EBeB$)9$2UT47>Z~eg=GX&A{fu3&$*Hc+un!Kg z6Rdg=1UH_|#u#oh*e_OBixr9~H&%3ut#MiaA?%Dr%+qmK>K_hrOT#Ut3D=kE#XWr} zL)K-msRJ9u+A)-{cxPbk+9I!U78!oqE}F6hB1wDe^Ex&0Fo>~Dr$fYfpP9i&#tcRB zQlo}^3-WTjaDFfs&9y9dFWX6!FE8t@v-W~!b8&PI$jQrh9@>1-jnk}ogvAfm^6OvQvicFn9Omq5d2jz+)=qr~JC<*>qdedH1`SF(K zT`y=V$fQ=g`sr|d%)MU)!NOp;ZF|QnvG2V&;{$UEKW*UzZU4C_ErCE;_3a?!fl*U6 z9GZjenjfvoFQqLh3Mps`?gbmo2gRfp)Y+CiwjHdP@;}ewI8u z0^`Q5wK8^@X+?X!E}5Tp2i46jhehxfGB_5vKFVc0!P$I)0L|=#sAmyyOmY@q#O^s{ zV9p+uDlN1#niyC(rmpxG)l4ZAir4WmLWS_btEl#x%%eN0a&m9YF>>9rJ|kiM`7bK? zJuPo^6iL7Ibg;2iGBt0P7flpJ4(aH}!_eF!*b;S-@%e0IAtehOnAW2zy`vRysnFl) z1kluZ=|X$?IMrUDJu^TUZEdqL>)k72KJSLFxjKykZj}G%4&p7lm8YjTL5n8s06cAO z_I8D>c@q*ySs5&N>S7>dG7?^b5lJ4vWS>3~_zZ-6{N~6DHI#@L-5D2&l{K19V+3X8 z&2SqveiI1!sMa1OH%S*DWT1-nn zk3!g{f3&xb$vGiJ*GxL}{W=m&X0(hi6|o@)2L_Nd1V5Qr=U7#M-y7SO3xCyh>T-bS zwRYT>5z5hJz#X`%&a%E2@!-~@41osQK!W`CaBediM;q6*`_aw*vzDxKVTfu!p@g|v zidJ%!Ysg15-6-Y+Ao+nRaBy|ZR&%)kZ|HA9$|84p!MR$R$&YP^=lU7KeyehRfq!xD|>|$k`e5md^C4O=(NdFuMWj=!T8} zw;YK%Z^yVFI#D=dw=L0t0Ywcd2ULfcT=CU(C0v=ZQ%$&k8XRQosBM#8NubVD>^%ZM{at+c z^>bZ0yV44%v4Wg-V==Cmyyl+qrC|t&7g8>tu!FA}4bI8xJGfcY3BnwLrOc9NXUMsn z_}j2Gb?^qNxvo6FFA75mZNKC(>us8y`1P3E&^nGEAC)3&+;W{)LlIhA5D}+8m<8NZ z46qXNQlCimH&QIp{N@Hrzi00J`Vh0WAhfM$5Bs$=7zOWBSfykrIeht8sF)|n>mYQ{ zU@jAgvhI@K_R9{|AD+0f3V7yR#=|PtV7%(}%1s_QANh5tp-5wm(!}d!+dnwOE3vQq zH83|XF&Yu0f+RYn_FQo4=Mk&4R?_fmb1|-E)Mf4vU^ZXZlD;3ri6C=DHjj5IxWwmva3C~w` zN4!E-s`cw586-uw-KiC18AU0z+9m(D8R-V4QR!vj{MhoDc5p{cH}@m!&WboQWy1UF z>8o;0hU0sBD=BTXACX-_{)T?QarsX-m*bjv9Bo#-Wtb+=m3$!|%GuoJ!HaW|1(6&; zkS}Duh6*LSlo*^&|3h=4nQbq&_%})%G-~8?-6*w06;Jd$-zhcPfvXtIUQIk zR*T-fx_QJTyJTx&7MB5=ou|?j%dnkn`1;x~jmFp9bg17G zySe7TQP-bSM#i2R;oI5IKL;U*T^6|ezZhf`N*!vNW?aQ6y_iOvnY^MNezJ55$bI6|*R^uzAB)VKC+Eg>w#~HFj65;d7RX1yfMgj8!+d=q{Lx(BeHD8!C2~%7S*pW-!{h{$f$})u>ps)Hi_s zu@bYIycV^iv-RptjU^Ga`3Jy1@Skn_--;swcX}n?v=T2`32b6J9}0rINJ`4*Wt;$5 zWqnJ(3!&bBS-A4vW8F+cEUW+P81ZV_bn)}reu?02{oG$_^R+d1tFg^=tv~G35HC9~ zFP0GO1s?JL@tA;nt=-+cnZM4vC)jOe{6i-<|7#<(f@)y=HvLG0%4z}Is}^M{pLi2c zLrIH6p%S-`pHe_1YkFrtuk`4z6>hnKd1ErQ1p~9`l$tpryAST0foQST!|;OF&HN|< zxJyB_@}vA_KO4W(FqiLs@FlUtYM;c33;2ErqX4(ab@?Z+A_gT3zeR*EE1}5!skMoB zmuKP(_qLLXI80iUa~FyO78U`&vHbo|75hamVhE#0kYJ&tNC> zdX}-D(WE{1dhWa;T0wPxwWwrpLd(-KzL`AlA8%z0x4rcz4za6+um+pdO$~lkq zRm^Rhm91?;8tm4cXgdb<8a|PnjP0+(7-FTKU$Ll}{iWf8e>i z1iP7DemKXGe{jYopzZ&4Ak+OlR7el@_fRc5{D0bbUdaph?$ueCChkRv zUplXi|7K(3si*i$)OF2jSD&h;Y?RpMRLtkn(N%mSj+}3&yo|>qIh;jAP&WCYtc2M_ zME1?*d?X<$L8w@pmP~osXfm=~^*rH*kJo_yTJ2Pb8T z?$N(xAB!9v7-n7?Ls;IMTKu-+ctFCG-rYG}I(An`(86uia!o0qCyDT)?Wg?6Js8%; z<;mg3%~!t;BX(y(aVuT$>Z6TcKFgLl`sCE#rF}TkQwe+9-NUBQjQ0=(&0zpwa`{hP zT}vMs>N^cG3w2d{qOQe>i${&~j+H=$4Fg_VC)6=?f$S^;hd=5sr3HApHd2jiG+30m z$Zj~+o1<-9^ZDnnU_5U~&dpm!l%P&RkuTSU41eIUcM6>N*l@gX0t(D>t6}o=thR$# zYSyZ~u{U@*p*3RP%9Tf&_k|*DKP8Wg%qht(x*mYB=;U<$pfu)L46zpoS`s}C{UZTO z)CMmA2|V82~X^+UH%0fJ5}giS{zv&}BHcsu)bq}&*b@9+cC zJN%L1)b+ODje+xEhD2k+9Caf!4`DSZKm4F1Odu%j)|=9xfYWtDeDHvZNPL{-b5M;? z)%}pIpQnG1H_qBBeXSjacU4wj?kz_m{fO-e(m7-1dp7xRifukVGY`tjy3l&+>Hk$B zmO+( zn@osq71N1?ZiQyt&~CkJjT8UaItb>?QA_i|fR6F)ShB2@h>zOi@=tk3kf}#)<{e#} zZidrJ%(3?v*N>#1knb}YHr-&kuIw8oCt`)Q8t~EIhDjo;kW+sRL-kE`;(r8tScg;AQy8>H^2-I#!p=s_ANV?#>uu zi)nyr1i!kOyBtMbfODioO^Y7`6tNv^s~(9*`<(_|jTwDGslwSG=B9HSMDM6<$sVvN zC|Zz0WxLEUL53`C3@lx4rwbL$55PtGf>|Z&sJl9o+44aRJ#X%p8L#L&*{j&t&ylu- z7#ZhxTp_u}60msnQVzL~{xm__&KxDTuq%DLL+MS2LeXeTWgO5&&Vb7xDl0?54^FY5 z2tFA6wbe7mB0E>U4nK{~y%KVc8Zo3#yt zoE!X4yO6(Ssz_;Y^-_dw(ckt7=0k#uv!@QmHPu-_VP$ulf4{n8zpaZj!)(J)06ad| zwj2cts7s;k0RwvR2fnLoYgc&cN4E45;;#(@jk3b(_*2>P#d-sKp+rMJh<+19b!YmA zkYn?q??*-}=RPE|m5Q&Ym;1p+4&tH?Ll@UY9E@!`njbi@Si)y0ul700jVY=5&x0a+ zEK(HxmScbF8-y12bom}jp8#N=D}Ew1eq;}__wI95-|w%OQ%NBIevbXrqY=tcz@Jwb zNrFhTf^W`_m7wz4qcRE#;JN}z-~D<}=|W~Av@qhi%iZR%NGpWkF zwyE^anx1{jTFHf{>k|eoK3^7>lMkWo35>OS87pl}tr=^yvcAPpYViV{<=lZmLx4fM z;fmx4n%#N4pxoH@4f#2`BI2ma5!9yQ3i)2OF}>YZN|~j#ypto+G+8~ zM%2~EyuOWIp=wzO(|jBwcF@M8@SXbv;5)E}rTOJwMN)HnXQ?!Mc@6eVrunJ2hI&e_ z6+DLB7V{n`wkvaGm=Q5$M%uNbI^fXpnAAb%?6TFYf$-_1!%%bPoZU02E<%F?oi_qb zN_c-=^Dex-E!WZ3A9}zP_}Wo|@JwJ`>bbsb3=6+W+D#S3<~z^pc$MWRa|3taZoh&~ z02zTvh0MhcS+wOXCxU!g&|3G8UzSYAhj}HJQ&U!!LqmGNjDpZ+PmG-FK@NpV(G;@EhMP z6!h90mG&*>;hUYt12m({e)f}Sc+}&@Y*-9 z>^zMz{JOUDMNuj*iVA;8Slj(pUc6YatT`He`RcGC*V5}wjMt7FCNS!iFL~6}t7Y|7 zN#IqE_~s{34q!YH4Cg3p>VhV$5{AkbRst0t<_*|HJA9$O1y9Uz`hEbV7JYO7$fHM` zg*vFC9BWINxv6wz;8 z^Q3=}*EZ&-vhl8;dqQmq+LhLz@;*8c&|AVYuy@6Fe8V(1b~Af5Y2Jt=s*G%9S5W4e z^G?_GT~2d(J368&Shj%6yrFzoniaWSp?9dh1c_YFY}FH3-+L(1YuY&I;f=Uk4nbGI zFulYf@eGi+(UN&4r9w<^oqr_8-1TW!_LE^n+y9Wmv|a24v`0t=GZZR!^N}`T84_v zT*WIL#7!SesjSB@K>LqdUWKp3H!232_!dIqHC79`^>oG~=CdoAwz6>rr=wTl`0@fI zxm=HN@~5;j?TSt$aIhm)NijL}+U9kn2qicu+X#|Q{}C0_(W0c52oacm#1oZN47FZakm^K zWBaBMKKLmJZsmgRn_l&jl}YrSY4r_~XuM^GF#6r+GQ?Gty$rdwSugb~AE^((lW#8GuTd|AsWbFQvxPP?IGqfo)_0ZVf|yM!zWn<^Gr7=)iu zdg<+B(-(#9T@I*es_TdG0%xm-uRhfBuckHLEHa_-WpIuUpluWE)CuVaDhOr6bTP;h ziO*oT>V?v6)BcRZq)UgH^iAc7{if^B?&}G~YBj0XC^PVznzplpYhw+B2Gd?zOnH@k z?VmU`2xFdvV8;q2(Mx=VDBk;7i2l}C_zFye$f8)8QGv{K^FRvkJpXd;_Ydk)ugHm) z1#IHVGeb+;LrwW|GK))W+1lVsV;t2%m{%bhXC_tWHm*8?MIo zDm_b0Yg*cm?>IPOZ<#L@T(63LH7cL7sqQC`3 z3F^uPap+Mh1hpQwp$5y9kH0wVIrPdv4D;~@6=u2FZ8nM57`aoW*+w(tzu}$=9wYJb z>|r~x-Rk+*KV?Kj1b;-IUD>fjhf8HklYD7!Gzbv(($=D4M^s03^{KT zYwuW2*P)%BHRK!RM*a3E?^Pn9c)F7W?bo4q8N`FrAA474!&wy~_I=w2-1&97a%hR# z7T(7%QDs{fAhKtX0kzS&g{jU$x$sO4z01+kS@axv zNKmlXhnut&A@AU`+O^OBz9#Lw!Z@cl zzi^>pOQ(NuOeh~Assd7q<{Rdywl|h!DbfH6v0Y_tt7~QU|C>ejpTjd83z+SNDVD8bVDcuB(xBa5=!VLbnq9?xn<71bIzGL=icA_=fLyugs^wA!`f?oYrWsQ z-bKboPCuaR7OyTDs8T2V?{Ge!ZOFR)v_qbsOVYO3R7bq|F@I{gYxXK==~LT=7U%Xa zu`wwF8$o^QxIX#NDfKc$K&5hmL1yP7lB2l#F{C3snbktmy$V=vu6GG!$qHhBvua2K z%A0iuWYlNH*W|c$D+qE9-gcG|0wKtM`KiG2c4fx> zM>=9!wwQ9eX%2yQ&XGd{b)L%4ut=OPcwTZy$orucg5$usFT3q+=~+`Ae?|m+Y=BUz z+1vPfKk~^CVEI?%gU}&&KxfV*z!CLuDXgYj3kald^acNNcl=9-t z!yo_gWNc2N=ZSStZF$imi}{uB6Cj^jTc=We4~<7g8|O1n;m`M)3e4pb`|!^ywq%?i z&hDbER=w~O_u`SEaD}|(NmW(esEhDjymZOAE&p zHx%tAX@Nh6BivA#Masa9Sd>UDa?!__*QN2!roXog2Z1O$xxMJ+^t73!e`OAM?V~IC z5lyGoF;EwYtN-7|n%3MyfX&HQ#xUCY{;%Hgsm_m5#x)`GY^@vVjSqtP0vhMq?d`jJ zv_<2k&)3S?SzZ@rVs4jZ^_lWK{Cwkx7??3^lK;yrQQfo-&0+8Rzsmn&wsF%xet>z9Mkcitn{dO$$=JUcb|#F|qEHtFjGJMH&umeGTZcjts}!%Mu} zg?(q^bTBOF@pgXl!l+M*cb*fV%zna?A2V{09d_JkuICTzqqtV87PMls)aNOOBf!e3< zWK@kvaLbsdD)@A;p>OayZCZ~hJ2h9yiOAa_H0~7K(y(@L#SNmc#Sk(i5CQ>fe0Rar z<(}gdibwdyO$Ki6f4jN=c2N1vzjFW2g5x19ZoA^pZ6LY~(KlxHkr)V=6~E!=G8h|H z$WQpPjIOFHx-n&ybJ=RyFCV8K?wlZ~`_Sr&&V_-*dF(Ufa6w%*!Y2FUd;(Zl!fyKJ zz4IkP>*NOq7tMq%bo3jstvkhC8Fe(ZV}wAH!Q53TCzJt{^W<47JLv5x{#XCp zo>J-hgFv*|czv~ljac_YDMwmmC_1ENLY*r7THHv$gM{exKg{&@a}bptz*?*=J3<{7 z9kUB_Q_7m;Z;0pte4U18$gGNwV8OW;^AuX*Hp8Hj`(ln5{OOt0hN@kSxf*@Ft!im%0KDKSP=!xkIWJ8mSk*@hfAmw-IPk?D;om-d6D zEg-ek=t{hpv!C!FPM(=_aMh{wv6ff;nBY9}CP-D_ zIMfVjQ=$`k5uUqA4R?IV6NIDqJqR$96>-NiRKaPg)gK*ztQg%XMARWBqt|u7pb?+xwg~q}xsBkMOMm!fX)Bd`7VH@M{Yppr zrsfa(?8_@*J(5**x0b z7L|_PT}Gu&s9#HCVN}aMLSa$pQFJYG;X6eTMj92oN9N#>Q3-l)Jz(2Jjy_iS`2^34 z-wYFf_dbv$FlI&vma ze)%Ie=RL&CUUqDJwhvvrD%K7}gN6zr6%&T^fELd`pksk|g=*s{&W7FY8bC!qh%A&D zYetw^b^}m-_G?8d2)5fI1GRzWlS*-~^|h=UE2b&!3bun+?{M&gW}_RW)^dG~;h)Bk zvQMf91lA~|9`KA$C-P{Zh-IZ~sk-FdX3Tn-wY~fhpABtNs3b!gwUpd_VCiB#&yjeL zOWm(d!J)-SV0QYYLDBJ{Z(Ff%Gb+f>+!AZGy32gJHO(#vyi=nA>CMBTgtdF}C-v;b z&R0(cWM0K91oMd$gUJRm>8|@SGYxqLJQszgQ$OGE=+xHiINjWPJ5EnsXio7|n1Aki^zM?pLP}bdE$ofdNBLX_?5F5f|26m!veyif{WV za@l~c!o#0yT;p`XMoW3*v~05l4%n=GtvXG8OFV(9$5k_1?BRg*8e}Z306UcPYY6C0oI=JAi#K|8sa6kAVA;ELBdO z-*nq}8VUY>zgNcGRL&M-xjBf5I#OUbky-fP@0ow(nLh|IOZ<0;xl|`E@*Bh)z`h&z z%rXMQxA+ZRrgM^6SugiSu4QE1m>lf_1S2#Zop=pVhj%8{EQw2=Mt0zw>Y@~rNb&0@ zlD+*+VyM@Kp(AwZkF3f6nl1#Vd}TmAAsrX-9_ubL&L3Q|$XXz6WNDf9>K88DO?^5X1(k&)JY9X>m6ZG)|~f6IAfu=JGY zds%dFM^bpH5_hxD5P6+%Jas~o+dN*`WvB)@NTKlv{gi1<%^y*Jyri6Xb%4_0cMzRFOqdL2=4Z+c?5TI}Om%BcExCL6T)`F`!d$kD|os_RN>XY;ZvQDh#TM2$;c<2K*{U~EC!uoa% zP$?>F+6}&cmM@^Rt^uM#ideN0TwH|ck6l|}YcuJ{DbZ61**e?{g3Zrv4iUhnK|pof zo=ak#OX53WyX;rz-wUJo)vX*N&5ns{+B{>H$Zc~+8K!x618o||>K_whR z^a6#8b3JM&?;CM>Jb`%gbq*J$PW|K&uvQZPIDQ)Vwno3B zXttbgPf-pN8}|t1t%878GM?q_FG+Q43_gkV@#~n(($vhyIs0ro*W13?Z5AM2{eiBR zKtur78rFz?-7Jt28*fGQs$@xk*w_W?7f`?F&wkAlv3CLuV}l0;_2Y*d zAHwyIwMIjh)SqB%(pW<{>s0&o%x`*KK|pyBroKXAosq)@Z=Vk=A5#)W;J&yZeQ`!_ zz$H_eIxn{obCz2nsb32{|*hVkZ>Z6PSGJ+l_dIfcF8)v z!kVgmKG9+?=JCb3;%!%hd7M+&WfmW*OO`wa^ax34wcHU3z}$pI>1WDCA>&?O=&Nmd z$3b}-2Uv;Wb3N?WteQ#)9Bw6H)-U)=(C#8B=R$;tXha(;DTKZCXQ1bbbCBb2rgsZ3C_SMP~ z>4cQ?7k4wUz3`B?mf1#l`W{=*i9U0uk6M(;H2HQ^4k$(-v^FwTX2F6yR-kp7xv(i` z$IF}K)~2j6GrhFuEl*=EQ)UddYo*MBzXVkvR6dz8(TZLB$Ao<1J|5OAE!V%Xf)P|! z$Ch76ZrC+Ad2fJ5EFKwgcstUO1uic7{q#Tj^@s2V=wBh@x&8+kFNDSQcj67k`8d^Y zc!T1_AWbQ86;d4S8`Q7@%Yeb&Ttju8)vHK6B(%)W7hg5%;MAY`F9f(QdWBj4^o%v} z8(9xhZrC}bPbZ*W5>WJcx;%lBp#hc7Ba{K8QQ_a@D0OCS=tUZmuc3+`x#g}#tQS_d zw%48NClzsoT$z(B;T*7+{vI>+3{sS-<^_Obb|Du>xF&LtII0JyBg7+yVg?_Q5E{dID?eJv%Cp90ouU*|t38m$jUA63ptC9= zzmtP>lFlId=bZa*QCug-s$$Gk5g+SpxMkW*zcK{oTkfT2XdOmS#NEyd335Dfj&1kH z_-XEW^y8SrSHK3`)ZRk-AKK4eN<&zVAVYsI$z4$n3VwiIP`xEdQ=Gm(XFRt{w$ zHpS~J2CH9!(AGoD?25^Dm?mgN-$PCHN3Ahzw`>4W^d62|`>`F~b)!oLSIFWr=L3W^ zm|>a-IGsP$-?+HX4i3kcXH_-M)Zea7b!dFQw;HG8%1a_Ydsh7_%1+;cZK zZ30-_b&kL1n3@r`DR@t4Agx-a_gYb`4pF(*JNu`QE1AIx z4wWG0qJ-rZk_N6V(zrCNXu%v z@RjC{cx}+$qoEg!hCltDX2%mfw&_1N?|D0z4c@7~%_M}jj)-WJTHP%zr#G9*Lw=at z40~MdE1Hwo=Q!^=9M-*V*f_qB)ZieX_mzRiUyIZXgQ@M_;9bvj8E3wGFT1wreZu!~ z_a77+>IlomCLoHYA)XX&0~2rk!KF`Xq!8WNO`+m%RKu3# z!QZ4B#`}Bw<834tg+FDQn8v-ntSb@IeuLhqWt!38n>7^cmoR(;W~?VzL8|0#P_yvUB6~|6q>Z>*?r_84$wIO^7!y@ z%JLBwH>kEqa>-=$FRNjiv5d58QoG-a@Az*1k1p!Jez6nlak8_~eUI9)v8qm~Gh~j) zK{`(>=mo3H^$~IQXI=ZwF7IrDl1*2IO1BDqy46Z#2INEXz;ZI;zOFsvKlY}JL%Wwy zc^%2RdZ<7zVXH^mGM$MT`V%<@i&Hun`jXiO>Noi}QvP4b0}Shn+Khr;bqUp@I?7k= ziWAV_$Bnho|He04z~R;)z`f#$&@+M1Mf0s=ALU~-6^Dma^Shhv`5q$r#2hCi=x86u z$&ZjdjqN?kXCb*6Fg!(9z3$m-@#S64h?O$u;|D}*-kMW;%1^-^jj?x{& zc<3Ctzab*N<};no7c994-oH^6K)h`7NvDXkm9Xt+DEpNG^msE?M8(HcZh3d(cqM?6 znu5!u4H!$d_bFy}H>v})xSMBG!BYh9dtP%t_X_VvoD}tHeDgd$r_QEB<_9v|?}*m8 zR4N~Qm^{>LWwx_A`M|o$bh*luttb)p{hfuIZj6ubvr9yQ^#pinS7sM(cn69~c?+&h zD|xKOs&l#yNYRScC%GOU1Bz^Qqo9By9XOM;3TS%&L~6ya{p)|@@t?W^oPV<`V3ANK zZg_zEaWM?Awa3d39J#wr*YNCj47q~1iqBJXu_Ia<+VE=~IJ}8_6e*t%4o1e@9yPU_ zn9R}DcOAc4(gfk>ylbz%ocO)AqIs5JHl$Z5p8M<_u-hV6p&g7LcA%G>Zj|-)gxYdxohiGQ@sl(lp>g&Y*l7ONg^u z$oSoML>pw(P~eyGAfb>{cl}(i^5M-1rb${Y-U0TFVLwV|*bmOr z8TL|tm0^$8%E6(lI-kefQ{e4xQOZvBBe^Q%q}O$QA&7C{pHV7I18!8^+6;n}?9|$= zrr-#TpELEg*Rr$Wi9g^9byJeg&e{i%XLGB78TSMcNgo4qmQ*L6A|i=6{185bZ%tH> z5~KbZqeW4XC2@F$MmBE+vz|iNt}GaN1%+dsx^)BK4IT$9$>Q=fCJ5)ib)$xsFxD~m z2lHeOD4DLwcIFUlIrq8oL5VI_8_;ii%tGVMn2S0TW<=XnWgj1UBNA-igf5FEU85RZ zoJ{q54$?8Ut@~H0Udvvcamm255vCV8kcXGL$`)La)kR`va zI}hPGYPC5U%yXv1gQp~bXOq9HzLW8C;`{l;3#Lky-J@o2LH!2GNs1YmpCZ>y!?QF; zQ!opR0l}4hLr*s=Rx9>JisSY$R0p>M+cyFA+5)dWHJqu+LtHRH&flCVF_iJ`EXed6 zZ_pnaL?Nn9BsrOG4puBD&++spESq>f$3)a{hh@~+l#yN0_HM1le_J$V5Z`%`aoPY;r(-4O! zuyD0*Pn=5Vc|bIM6A{_gR{aOqK!3j4%Te$SDOZ>wU@1WOKRDbE;M5Cu=I~kvE7IXf z+pe2JwRCrRzmZSL=P5`XdjoeZ0cS3R5vQFZ@*CDcF5fxHhkeRd+I5YD>sJGI*9RJ94RWChL zZ6r-WE7xIx3YN6Y6^A*?TipR2dPakyS(j9fWeYxnWHd2)jj3L-6uh^Z0Mfr?@n>Tz z3HPHLOF$Ppvx~3X0*kA$)FB9uxKe6xbuQ_+M@kLh6fmZ>=a74-Y%^b6^YSsC=;1t* z6YG^5;Y0OpP6)|Jqv{19@)0?&=0siS74lSQ)@-G}aZb}Cl{?3V(BTr(mEBMA%R0oR81hWs57XxTcy3*Xo|XJlEM7c({#_z zFQH(LN4SctUpQL@Oogt9xhL*%edz6ghv6+4z8rAUV(Sl<*mN2+@NS$msr#&m6=9A@cs@3UR@9<`VQM!S%=; zibakZ?^RHY#z?+1vA;V1CdA68M2q{Tr_mDHcQLHaC@=OWAbJ~8M{7g<`f?P1b^c38 z@V`FJzpV4Go9$n|^AClBD?Q-zO7Zss@TX773QHF4q&k{SL1;1JL?a)dVH(@&ePVe7MgFmY?W}-s-fx)9+OFWUszo?Jjn8@+gK## zeF5@G<@MK618<=@O)0@YrP!+ayV=le}5F}G#Fh9h&WoCJ2oE42Cz32pBs@^T!@`mor!vWjPWXWZUu;Su?Mv@=r;+i z6~&Mrl|o8PYwR-f1({@yb%shJ&K);RM)b}82}Z6lNlDz7tsnkSPBNtf$zK^Tu5{X1 z9V&74*Af2LA=#E%IYNuah^(y5^fl$RA)?>Ysbs`I?v=Np|5`WxwG7VuL$3UeKLGKEm5=?nuY zi2BbB)#>D+qH6_%I{bcG8?W_^9&wy|UOu)K14Kh}MC012C{qNFE9G6jG#$|md^EgG zRrNJ_TtWWg+`gOE<}Q}&F0fQtGbS0*JNh&xFe+l*F01q1<&RjNZGR!hSslj%0ea(? z^-)Hr_G#x&KW^th1tMVks65aJB4p!HBY~o_qyqX;u6gZr)- z%PI+iV@EU2Zj(;c9CJKC4kYHvN521fgWvn$lup)2FF4Uh*=AX?N?X+(F=f+$Lh4sJPSnSh(15z~Ow90GFaFR~{Fj5zrbI}v=A0W|lwjR1s@Rg! zEsVqZmmmu=iXkIR@{q?{e2I5>f;KxpPe-G6uq!G~rV+I$6piPWfkeaYR=M|Gvd_6L zC#+isI%98FejoYsZ1xg0X@`bVo3OH9tgICksI;^nC`*{QH_3&pl#6Mu6K)u2=4OYV-gR2^b>k5bH`^J^^aLFog;ZV>O@;b;E<%8ciZd;e`@IM1;f{U7 ziL>1lqdhInE1UE15J_51;5Mjx(5$E^52I^fE}ZWyMJQEr%=Oi0BS1Wp2;s22+ODl4 zTlM^vj_Zf|)08i1Ik(DIk`bUt)qo8bT&R~ z@5Z?Hi0^CU^Q#vstxcaoi&(yo$084K859lr3}1=`)=a?K%Ht6XEwhM1-l6>s9W9*Y z-yKi(_igsOkGRZy-R3~-bW3G&>d|AkUjIEWsCa_#PfWaPd{UOqWPBYogY`mwpS)^>L3)*d|eiHG(6b=jMFr4kF;1Z?n?WIxlKtUK`Vn#Aubl#4MX_IsITqR+ZxR+x~R_#NCdddY{ zLY91O7w7kWut%>?7qW?QaNifo^Wh07Q*lclwHFfp%0Ts~Y^azL5C9fJu`6no$XZnK z=WvL&SWIt*s)3EBuKg$Lq1!*@#QTWo0`=$vZ8CLc3|6Kd#;VztI083(9dCpLV&}9zr=qBcYog;*pKe?BG;~5B{Ctj2VIwt5GVNIf zPVp*Jxj1vK0qGsUA`t@0OVmkVkDNvPLa_qNd7j@(^Q#p}#rt|~1xrfSX3O|9e@RbVheyC>ui_C5Zd&isDYBO4RdMl##0 zTIraKy}r8}n&>j?Jt8l+!yDTk#6^z5IkzOjv{+N&@9D!>Z{KYl&#zQohb+>+A^JNE zwq4x_=k;f;Dr%oq<>EUUOHJG%nwm1F+Yhj(mGI@MZQ$U5U`9%#4HmJflYC)YSWFN~ zo%~Wg0THIk#ybFDnOkl9*Lz*t0>jZP!sTFM2$zj*})$KT^quzJyh=Jl&gzesK9a5IY(ZFChmI}-839vsBGqmnu7f=Q4 ze<9j2UYhGe=yh%F7A=8@6k@}eYlh^jVz@R;aH?t9ValiIgkE0a&8P#9+qrv0kDrz+ zVg<`UkJj(QrQb8$+njC4xjloEw};qtm`lO*-<&;{;kRDnpOkciAekdP=N9n`|KYwr zlP*VAfA}1I!qiG2m6H1S#3-Pk_p&nTIoF0qJ_jhj;vPn8yKaU6sWK!g2uj_HS$Euq}1DdazK95h? z($(!J6@0<`(md>lv~j-MKAta85dNnG?{fV#ieLsw9dYU1xl}Eo#^@dcWcbea@joe0 z`iIQ=+XtUa=~4)_#XH~AjS%;6joJ`g7_C#vr;kgCZrmUnl`IN83TH9Vcf&^TzRB~SwD#+R|`v7m;r5z z#|>77zUDTZIEU*E6%Kq9;^bZXZs0!HsY;|52{ zN^5zWxhjTv!8+;oe!WpIzEi#ACLs|m_FYqwIYh4rlKs-~3^3IHUVZY&(HOE3?Y(p> zgYOG91-Y`97dVw#wJP;~s~@X?kcovkDt8NrZ^+kgRsut-ZM7Yr;0hIvffiDp^i4jo zOptC4R@(+WMA))yZnZ|AK6CYx*&3xZU8;)equ()JKo~J^sI(vkL-Q*eIzGaVmeRg5 z9Clo$zEbK#k#-#SdvgZpzW(zcZR-1N1O7A5UY<@L1X%9yn$m}<89h~5RnGGws5zUj z$efv(wc@1~LslLIzIne`hw(jLAud6{ih;~=hoZ=SM>jN%ZE-W}`*ru;%-R6&<&9l@ zMpu`0AVEw%AJ5T{5&S9_Zlw$9%}FqAFjyrOOVa>KSBj6IX(o)!ubqc4T-2S6WKm$* zD%!{_NcYre@QSq@68g%pp)wQhnF4sf-Gy$^$$?5bVQr+fqV)takJ|wuyuJpYW2aG# z)9lL-aVA95VePT!w3wXv)xsWU?A~F{&kWb`C%alP`;4Mbs%_-cb{*-%GYjIey2x69 zBD9W&PW{m?uB~VX+|xIetirVcT*jYKg=}p0KyKWObm~_sD0Ifn-!Kv^S4eBaXC=v8 z1i7}bOzivDFG~!Iu??_KY$p9s9%BT)ZGb}J5%KA;34OMLR+^&zr*a3GBtN;N6Hn?4 zS@YFUeLqIXk7m`Uj2pxruRut|G9S)ci&h2EAb)INX{|!e>)E8s)-YiOzuxp0-vJmH zPW)<8x<2GDmid>c7ypcbf8V(r_D=Q|XyH$VL-g@*D9X4HJ4iG`f#d<&f_l?8I0p+N zSGnZv>-FGOcC$Oha@|GpA3LFEfs^<&pYFL(=1Oyxk$Y z4=&=99dl{1NW{}tUp3*>ksw6B$_f;Q?#=ZzODbYcYM*%j*}1%Sdg;Ejx4o8yKA*~w zV(s;`a=QR;otEKt=kQi4rBRQ6`w4;gv^e%!=_EZ$hXgq^D={6+hMg8gHdZ{H+_7Dp zW%$+sb8^0@kee<;q~nTRC%bJn@N%HTl_<(q})8) z{WM;<*-2&G4G137tXASULT%)dYkD==v~>k#WFvsSt+v;lZ*sh%esdMlP*X_O|2fYRH4NdDlx7Rv*>+tnJZ5W=cILLp zZAm<@HlV9}@0GNGDSb-GPG9K=JK|-eD5eyGQ zJ<_ZelOfLwZd;@e3I#3>%sYr4urA9#X>n0AohrGWq)@|qk$-yz_PkbJ$(vO-f)AA1 z6lNax0G3wTr$W&%JNB1>>(@nGFji%E{@IG2BWf8Lou%i9mBDHY-*UtNnj}ze;KonAW0h`%*`#}<#t9T0WO5dMx%nlxwNx=n3L9mocuhs5^Osy-^%n29XN0+ zcETQEDxe(Mn1?t_ek~>v5s3Y9Xi6*nS>+{PW!7l_Tv5y8Jdf=Xv0PP)U@^J*(m?xX z6N;bLv##%|uf{7YE+ZSur!yu3LNZ-Tgf<2T5_S!5Ts(V7KG{AQ)nt|E)!`D_o7 zp05lFX=;(==G^JOo&Qa4eSATF(iCrd;f>bxaK!vBaop1}85Knz4cj>vFz{xuFE|Mt zY`(rS)Z^FN`2#dGHnG-kvOTEgfrpc6dk!1H$uy0J$2mM~>m=X5={o%$?*0FQMd>ej zf@@C6ndCvQqtP&632PC(j)qw5W|4T~r|5O`szp4;YeAx2s{9Efz$juhZmmgsf=3$V z-p#XDT6(RhvvEU*=+BeTA@ba1@v$)#A?107b^A)9fgL~$-`7=S#eZh?P};jDxa_)YS4$WVHuR(W`>(cODBQPj0wClxp4o?`) zw)HwO`_{0hWxJv=01LSCY*6^>V-Lar<>GNM-J78>H`W4}@sNYWI#FdCC2q4qnDtehmhS_XEkhFrP(d=^T

Pp$X`*|J!B4RfkCjY7IH*B*1|F; z9$v7{&x`n}DuXs2{uj4GDS_;AojiM}pddBv4K04;{Kuh7Xu41Z`}bsJ++1W zRMio>7^gQxe=EU3)}6L0=l=e>Vv_5EXjj2#SzhF~)cpO9Q0N)uV*vrt!#FHgoVUNU z%ZM=@Pd!JwK7HC}oh}z4xw**KDu?K*$jb><<+@bX=HP z-w7YDB*ppg@c3{F4p^Y0HrdwIy1Xa3O?L4;NvEbF8gzCtm0mEsq%(IJPQL6-PLY0W z^G+izTi2boUAau}-7|RbG8^R@j!N6GGWmG@b8Ea%sVyYk%oJ)-MWRU5t=$hPJ z!Itfovse8s@<05%dw$Pgwz)Otyr1JowTJB9H9dy@qB?su_(3cx?`M3Mc=#GIrLe8K zs_e)o%KJj#|dr9x1d$yn?T*2F2-_$OAIiHdYrOpYpMZmA;C@ph>n7e;q{eE~oL z8}AI=*nYWvs0*M)qbSHnZDjM=v#qM88y=?iK|4SGpWjrfHS6hKduC4_IeF$J{a~Iv z+t*qr^k6(MV__^3$6Oh#fiP!tE5Na>w~x?QU*UK0fooyB1@+^FFPA&hPv5S*hEGd7 zZM#s@z~316pt~{rXm1A)IF#fBuX(7MknDC;3fBp#25?(F9uML~Ubr@esSGsoUQwBm znv0Jgnf}Va>|W@h6I$!8quu!i5-fjvwoE=Gn0Lpr;aMccw0NNJV_~;8+fgi7J_oA| z3%(_m##u%hm0Ztcn&kSx&}n{hv7q2pT&5{1#f;@{X}`vfShv%+Txmt+9&7-u=)TCy zIwdfVzhqq~oK$rlLz-^&-5p+l=IJD$u)>z%b83a5DxWR-a^j<@#n0FuFb>lv)KzSS z#@szZo3Y8Y1vjz1@)_XgFMU7=I-ePs`f07S2kP&1Vahh00I}K7*ZjtfG|@d39S1ax zwH>X?Nk!HQi9hGQ%fJoWd6wHaf?k3gh6sD)CB}uEbzX^46SgSZ7VAW`ci2=|qb(HU z+T7m3-X)|nJ!JM#LxcvPXsBW;x*K9}^o2HvF+yWJ=M1H3H7|ZejQ&4a!OnhZm=N`Y z_8NuI=Psqf-ti?mat;$#Rv{K}=1Ry92Tal#@U{(m9%+1QS9(jCT|T|iGKyg8RWV)= zg!=?*cV=oa6Gzd*;b`j3sCYbn)@cgonI>s6&4QL}c$aCoM@JJ!P|=nJIi#l`B8Z$6 zD%5*Fgs5bv8BnjTc=bxT_(@yUZ@^?fe~Dh_5!`Zy3jSD7)c}LVky~t;{{Q{@+Aj`aoIA`E3 z&kqhK`%G5^a?Hv$W+6qVoH{PgSz5?noQttG5%vFQUsUj_Hg&SkjMMD=XEY03!jFvt z6<3?R*Qpxo%j3>d@SM=_!^N|UJg+7aOkXx%6Ta~nNy{&x1-oi{|n>x7XbYF!Sb7srUhdRQwXRDH+ z+u2^G?%{hOj&}_+6)k8{mhpazwOVa1b;BGaw_N@>-5N4S2bw_$+!N$z!LY zV-&aoUgLvKn4|(-3Zpt|XDwo4M}XPrYVZ^Wc+2g^7dMs|U&ZujO}|mfNU@5#5aMB2 zJO`CO zmBdx)Co-H{A`{UrI{E9D61o<@s7@t?hKJMlL}b46$6q{06<3x_oybgyFJugM>F}!> zBQ0kqCYIUvKbG|I`Xn(wa-*qrNCE&rEaPvYpUlzrs*ckCv@21jgLIvz-5t$>ce_6*t%zClpN46yCC!KqDi6y6| z1E95j9}9yYExxsSTQ$)(M;7G#`+o(|P zX0khX*{0A!8&A$BK?~vU4L{pMl};I_NV(Y5z&b?Sd1YlrAQm-(e)8U|sk_Yw$*IqJ z@%6q@E(zX|I-Fz~uOxOu?On?Su7V7n(&gB9sw$o%!VppoEB>tGgu2otj z+J-4QdNg1ZH987Q(SIeRLA19eU$}lDvve!U)=-DYg&OC4(NuNrQ}GX_lL2YD7+~S{ zDD<4Ly{bA0_)EWN_Jj4ywmhB}_Y{N%>IWx}6D1NYx{nOoD1u_0ktCBZwjlqFZEla zFG{LvX>?~SsU=?-ZmTGMU`#W0bF4%-;~!MQ+CP6`xGesG;e=UvTGL#f z$Yv`*w(%;ZH+;egVgY$xM#MeX4@0e#X|#*0?6K0(oLS){dd$9gp7>trQW4(_b~}q@ zvcAHlho-izsbvv_io+5%0KU%j@v`%&TL!A?IkIqFX#)YqR(#NfG-I{8@9fpCeQX?d z6`d@|DWng%N6unVX~0AYCejbBC;MrFgRi6t7;`1@{vprN)M2fka*p$yt z3OSz*yW3Vq4igkigxxS;>2eW%Vqke^U$Gi;&DX8glML}CgPl+4uZ&*^W*(Q|NQRjs z64UY%2DBb(`WL-|_dv)#(PfB}fyr%7xq-Qy!9oAx9U_`3WDoTBPVZ_;#8=x#b|cww-?qe z<*kaqcE`S?4|6ge!{9kiyk~n9{~aZhko^C z{^_KD;>WR2Hb1pKcA;}N?<>Q$t(ZIOa+#NhK8fNiHsO*N@LZif;dVB2z2C!9ki*_b zVZ85AHzIyA(d4yjSGqK2|CE!V5E5w@eJ$j)ZBp2AzA)#pZ3IEgGoPV?pS>mF`$H{H z2gaattrc)_F}VJ8iN1bgmOb7=(g=u>OxS4<5*Tc9&mCuzt%5>IgO__O+n45et7hx# zA{@Yb79MRo$a^IcvQ#K^&NR|XU&a-F6p!)&$mM{FHLw~3q2lT`zsyzzP2gje{CdOi zX#!kj`-R?Zv5g82%?cK-nUdDKS4WpjU{9u7lhoFQG%0d%0-j&UAvbEr8cB`*A5Jps z7Brx5Q*iC3e(px6dES3vxOC~mZ~iy`zlXh3oM`k;X^j)uKKeju^?8hpOh|Q(D%|Ke zH}?4H)I;E8{BV{`DPLS->a>KrQZ#?Rwc-|VuUN%O zA0kh@!1rQnCvlgF7YQRInMF!laa8j>nyte|kzQsN#s9zR&NHfsb=~7`J?a(^rGyUd zLnx9E5D^dr+|sLTAR&QJ_JIT>Au2_Xt|$mdF@c~!h=|e>ilG-lKp;R2h=eMkN=c-O zgd)06*8X(XIrqakcirW^_x&_8Yt}pOdY+kg*1XSu{=Zl^f@^gdXMalN3AldVnEZM$ zuV}fnY`f}%Ux$(BF`+8&OMt?fhUb#ZL@XS~J!30=5`$xn(Db|t`zWo?rw<(ruj$ShHTJz681x5z7^f|0uv{4LkmNgaP6-5N|@(k zsqCRkw>-WY)E!=ki-USs3f&UYpr%8ra#{veWt=SytvI2(XDcY)!Q8D3j%n+lL<8+u z!KSWS?^|}8$jnNAdl?~O9~9a%5&kUAw+r+lNPi${vuAf=kxF{lJqm23{DS#t-zCLUPB@RUu7vz&u~>J$V$5P_Dm~1+5XU$9C3b z1LNjTrO?H70yi6z6U*5>$w{v@D)J!+L&K9{)Xt0GQ`2j53OYfx?_C_)rX;=)Oh%hg zEBrM^g&BUjU_y6UQ>aB3+SXIWK)KM6Hwb|Xw@O)GD$q8SwzARN+-=LCh;2TkTSqV3 zAeG9z9V~F^lI%S#J(}*c5645GOtz1_*Uau*|Ld!v>+c}{YO4Rk(tpf<7hT~{4M!=@ zfz+^POk)~Yb!1IrwD+LC)RZkVbX-3)2jRPvAWX@5aN2*duC4$S5fNpJRDa+11q<}? zGU3}3zru@%z+k^}Diy>7qH%RB7y7e+F>HZ%qePW-e!OILnuA1`Km^a}(uTX&igZ`0 z;RlxP`K}R)9lcX);K;cYkzTpX8cnh!<8rIY0M@$M?p3h)1EFjxdR`pZuQ2zdOuVK-LBvnyM^_C zgxc?07pCb_iq@C+&Ddf$l_&WeVpmMMQ$xpSdi5D8^i`YLtS_NDRW4Ph(7bU{WR)4) zy+tW`&v6nm>#d^)v!WmO+U$N*+x0kH`M!>R7*kFxZ8)$j@ zEE189UyBvhDIpUjvTNbRLhWr0JGT16$<_CcOk!n)m1$G!nV)~1(1`?WS#W%<@;axC zCA(SpG)!=$=VD$$5v?vW_<>UvQ;myMFDNG4BEcJL+NDj(vU1hcNi=h9SOvyt1)n&o z|270)aFap-m_oxLJ{wV zKbcU$l|<(*{5TDWfxuwr8wZh|6|<>pk@cVRBMH(l_^tR&s|A=vzG#1SaIxOH{^*?= zV9}(jW4y+0!rfZEw>JAp@-TCg5hErRB)33R81rAiug`H5sA7)G6``9IQT`ts-9YV{ ztgPR-(^B&%HdtS_yakOFO9%zz))UGB*CRTsTB`JtR_Cx8No8=ODjlZ%o>4G5)K&HV z4FQi^GF+}Y?X94hhyk~6Kr;En?A>=NpSQJAh-z$AC!Oj*O(2^^CLT!dUuV#Luw|np zQxB)zacq^_X5<<8O}Ku^k~WT)nhFah^95as!EhaMV+D~14o{m*+9qmi7mxi?kJNxl zP%tTHPMUsu80}85sp{he25@}vUNS~*lv0^s<5VZ~(fP9n(wTvt+RkG0=uUW)(e3!r zzQ>G}fFP%O4B{ebO)9ru$v)$$;~3Ub!1ZNi+#5l+bL3Oavs5#fZt^Zp(k8xynx9tW zuQEfn&tO>EC6<)rf;P6F?O8+!7WecM}nE25*X_cqf1QvdxW>&&-3zAUxmhzbA2{!i_P z2@VujJ`br_a&5>1)_E6!ykxN85@Bi@MO8D9v`ev;jPUn?U*{LTrg4q9CPL!jGSius zcQv9_z9=N%Ei?$YDbHS=9vOv!KoP_fOX{D~)vGWa2in92g~0?|6=R5Tv^5XNc`+`Q zW*I&Z+LP2wKNf)t=u&<^u_i0C#ziqQ&CDobk}U`XMxn4Z*LW1gTJCT#OwZhW(e>S5qGNj0 z3p>Z#e;;UG6tf|P#V_YwuVKH6C?soM{g@5N6GT~g^%ce3@OjMm^f5Qub-b8FVtFJy zIO(Tx;(-WqmFRiNiVkBr^$JZ2bh$7bfJ$aAjbS$=1y+WkFbocO2xu5tG)a7WP*sl> zHg4PSI>#iFWg~Sa_eGsJ*%A2` zi!ti8!pJcK&~udLxZ={1hMgD%=9H9_o0}_MGL8$Oq5BwQ+bdaHw)Zt95$m@yMW;Ru zzaO#a^ikp<&s(u`5=m!2ny;FxpFT|W9yq3wLQ7K%1vj7rY6x#jxXHHB6O^p1FkHPT zU3MrQUVa|T=EK*^&!|MXn*cmG2cz`u6zKYQed%{2esu>Vt#%D>m^fA_5a zeSz;kj{N`BopZ{VujZ?njg*g3dJQXt)Q#FJd+&6J*+%KdsdLU=z%Lb17@HFDQkFdN=|ZZ2JOt1%6_}HfheI*M)eZDa$zbi{WaJNP+KHY9#vVK zW~ndYWQ8y17?@d5){>h+$M>bXT<^xcG#hCOa7|j}_2sfrp;I654R4X%9{E))5=jPI~=KsEu@g< z4A$AVV1RM`2>i}JX7xvG7kUD4daGTN!pGM3R{N6XAMNw7$nz1i zP=|}1hVu6-$95!KH?B(LN8{$cc^qv@$ZH9501xs#@UE9O@l?36C7fr;1*+|ab_6w< zvPcD%OCu#01k!W^i65~s79OjzMeKgPP~pPU5P8|?b!YH{K47>UhL7ASO7%K?W2Dkj z^}^w4&R3N(gV^3l&CDmaq;}QCA;}R5_3;=AiL$rkg Date: Tue, 11 Feb 2025 12:33:58 -0500 Subject: [PATCH 10/32] Change about page and remove k-NN terminology Signed-off-by: Fanit Kolchina --- _about/index.md | 103 +++++++++--------- _ml-commons-plugin/tutorials/index.md | 31 ------ _search-plugins/index.md | 20 +--- _vector-search/api.md | 2 +- _vector-search/creating-vector-index/index.md | 2 +- .../creating-vector-index/method.md | 6 +- .../creating-vector-index/vector-field.md | 6 +- .../auto-generated-embeddings.md | 2 +- _vector-search/getting-started/index.md | 6 +- .../pre-generated-embeddings.md | 4 +- .../tutorials/bedrock-guardrails.md | 3 + .../tutorials/build-chatbot.md | 3 + .../tutorials/conversational-search-cohere.md | 3 + .../tutorials/generate-embeddings.md | 7 +- .../getting-started/tutorials/index.md | 68 ++++++++++++ .../{ => tutorials}/neural-search-tutorial.md | 7 +- .../getting-started}/tutorials/rag-chatbot.md | 3 + .../tutorials/rag-conversational-agent.md | 13 ++- .../tutorials/reranking-bedrock.md | 3 + .../tutorials/reranking-cohere.md | 3 + .../tutorials/reranking-cross-encoder.md | 3 + .../tutorials/semantic-search-byte-vectors.md | 5 +- _vector-search/index.md | 4 +- .../ml-powered-search/hybrid-search.md | 6 +- _vector-search/ml-powered-search/index.md | 4 +- .../ml-powered-search/multimodal-search.md | 10 +- .../ml-powered-search/semantic-search.md | 12 +- .../ml-powered-search/text-chunking.md | 2 +- .../binary-quantization.md | 2 +- .../disk-based-vector-search.md | 4 +- .../faiss-16-bit-quantization.md | 4 +- .../faiss-product-quantization.md | 2 +- .../knn-vector-quantization.md | 2 +- .../lucene-scalar-quantization.md | 4 +- .../performance-tuning.md | 2 +- .../filter-search-knn.md | 8 +- .../nested-search-knn.md | 4 +- .../radial-search-knn.md | 2 +- .../approximate-knn.md | 18 +-- .../knn-score-script.md | 2 +- .../painless-functions.md | 6 +- 41 files changed, 224 insertions(+), 177 deletions(-) delete mode 100644 _ml-commons-plugin/tutorials/index.md rename {_ml-commons-plugin => _vector-search/getting-started}/tutorials/bedrock-guardrails.md (99%) rename {_ml-commons-plugin => _vector-search/getting-started}/tutorials/build-chatbot.md (99%) rename {_ml-commons-plugin => _vector-search/getting-started}/tutorials/conversational-search-cohere.md (99%) rename {_ml-commons-plugin => _vector-search/getting-started}/tutorials/generate-embeddings.md (97%) create mode 100644 _vector-search/getting-started/tutorials/index.md rename _vector-search/getting-started/{ => tutorials}/neural-search-tutorial.md (99%) rename {_ml-commons-plugin => _vector-search/getting-started}/tutorials/rag-chatbot.md (99%) rename {_ml-commons-plugin => _vector-search/getting-started}/tutorials/rag-conversational-agent.md (97%) rename {_ml-commons-plugin => _vector-search/getting-started}/tutorials/reranking-bedrock.md (99%) rename {_ml-commons-plugin => _vector-search/getting-started}/tutorials/reranking-cohere.md (99%) rename {_ml-commons-plugin => _vector-search/getting-started}/tutorials/reranking-cross-encoder.md (99%) rename {_ml-commons-plugin => _vector-search/getting-started}/tutorials/semantic-search-byte-vectors.md (98%) diff --git a/_about/index.md b/_about/index.md index 041197eeba..1bffe053fd 100644 --- a/_about/index.md +++ b/_about/index.md @@ -10,6 +10,51 @@ redirect_from: - /docs/opensearch/ - /opensearch/ - /opensearch/index/ +why_use: + - heading: "Vector database" + description: "Use OpenSearch as a vector database to combine the power of traditional search, analytics, and vector search." + link: "/vector-search/" + - heading: "Fast, scalable full-text search" + description: "Help users find the right information within your application, website, or data lake catalog." + link: "/search-plugins/" + - heading: "Application and infrastructure monitoring" + description: "Use observability logs, metrics, and traces to monitor your applications in real time." + link: "/observing-your-data/" + - heading: "Security and event information management" + description: "Centralize logs to enable real-time security monitoring and forensic analysis." + link: "/security/" +features: + - heading: "Vector search" + description: "Build AI/ML-powered vector search applications using semantic, hybrid, or conversational search and more." + link: "/vector-search/" + - heading: "Machine learning" + description: "Integrate machine learning models into your workloads." + link: "/ml-commons-plugin/" + - heading: "Customizing your search" + description: "From optimizing performance to improving relevance, customize your search experience." + link: "/search-plugins/" + - heading: "Workflow automation" + description: "Automate complex OpenSearch setup and preprocessing tasks." + link: "/automating-configurations/" + - heading: "Anomaly detection" + description: "Identify atypical data and receive automatic notifications." + link: "/monitoring-plugins/ad/" + - heading: "Building visualizations" + description: "Visualize your data in OpenSearch Dashboards." + link: "/dashboards/" +getting_started: + - heading: "Get started with OpenSearch" + description: "Learn about OpenSearch and get started ingesting and searching data." + link: "/getting-started/" + - heading: "Get started with OpenSearch Dashboards" + description: "Learn about OpenSearch Dashboards applications and tools to visualize data." + link: "/dashboards/quickstart/" + - heading: "Get started with vector search" + description: "Learn about vector search options and build your first vector search application." + link: "/search-plugins/" + - heading: "Get started with OpenSearch security" + description: "Learn about using security in OpenSearch." + link: "/getting-started/security/" --- {%- comment -%}The `/docs/opensearch/` redirect is specifically to support the UI links in OpenSearch Dashboards 1.0.0.{%- endcomment -%} @@ -22,70 +67,20 @@ This section contains documentation for OpenSearch and OpenSearch Dashboards. ## Getting started -To get started, explore the following documentation: - -- [Getting started guide]({{site.url}}{{site.baseurl}}/getting-started/): - - [Intro to OpenSearch]({{site.url}}{{site.baseurl}}/getting-started/intro/) - - [Installation quickstart]({{site.url}}{{site.baseurl}}/getting-started/quickstart/) - - [Communicate with OpenSearch]({{site.url}}{{site.baseurl}}/getting-started/communicate/) - - [Ingest data]({{site.url}}{{site.baseurl}}/getting-started/ingest-data/) - - [Search data]({{site.url}}{{site.baseurl}}/getting-started/search-data/) - - [Getting started with OpenSearch security]({{site.url}}{{site.baseurl}}/getting-started/security/) -- [Install OpenSearch]({{site.url}}{{site.baseurl}}/install-and-configure/install-opensearch/index/) -- [Install OpenSearch Dashboards]({{site.url}}{{site.baseurl}}/install-and-configure/install-dashboards/index/) -- [FAQ](https://opensearch.org/faq) +{% include cards.html cards=page.getting_started %} ## Why use OpenSearch? - - - - - - - - - - - - - - - - - - - - - -
Fast, scalable full-text searchApplication and infrastructure monitoringSecurity and event information managementOperational health tracking
Fast, scalable full-text searchApplication and infrastructure monitoringSecurity and event information managementOperational health tracking
Help users find the right information within your application, website, or data lake catalog. Easily store and analyze log data, and set automated alerts for performance issues.Centralize logs to enable real-time security monitoring and forensic analysis.Use observability logs, metrics, and traces to monitor your applications in real time.
+{% include cards.html cards=page.why_use documentation_link=true %} ## Key features -OpenSearch provides several features to help index, secure, monitor, and analyze your data: - -- [Anomaly detection]({{site.url}}{{site.baseurl}}/monitoring-plugins/ad/) -- Identify atypical data and receive automatic notifications. -- [SQL]({{site.url}}{{site.baseurl}}/search-plugins/sql/index/) -- Use SQL or a Piped Processing Language (PPL) to query your data. -- [Index State Management]({{site.url}}{{site.baseurl}}/im-plugin/) -- Automate index operations. -- [Search methods]({{site.url}}{{site.baseurl}}/search-plugins/knn/) -- From traditional lexical search to advanced vector and hybrid search, discover the optimal search method for your use case. -- [Machine learning]({{site.url}}{{site.baseurl}}/ml-commons-plugin/index/) -- Integrate machine learning models into your workloads. -- [Workflow automation]({{site.url}}{{site.baseurl}}/automating-configurations/index/) -- Automate complex OpenSearch setup and preprocessing tasks. -- [Performance evaluation]({{site.url}}{{site.baseurl}}/monitoring-plugins/pa/) -- Monitor and optimize your cluster. -- [Asynchronous search]({{site.url}}{{site.baseurl}}/search-plugins/async/) -- Run search requests in the background. -- [Cross-cluster replication]({{site.url}}{{site.baseurl}}/replication-plugin/index/) -- Replicate your data across multiple OpenSearch clusters. - - -## The secure path forward - -OpenSearch includes a demo configuration so that you can get up and running quickly, but before using OpenSearch in a production environment, you must [configure the Security plugin manually]({{site.url}}{{site.baseurl}}/security/configuration/index/) with your own certificates, authentication method, users, and passwords. To get started, see [Getting started with OpenSearch security]({{site.url}}{{site.baseurl}}/getting-started/security/). - -## Looking for the Javadoc? +{% include cards.html cards=page.features%} -See [opensearch.org/javadocs/](https://opensearch.org/javadocs/). ## Get involved -[OpenSearch](https://opensearch.org) is supported by Amazon Web Services. All components are available under the [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0.html) on [GitHub](https://github.com/opensearch-project/). +[OpenSearch](https://opensearch.org) is supported by the OpenSearch Software Foundation. All components are available under the [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0.html) on [GitHub](https://github.com/opensearch-project/). The project welcomes GitHub issues, bug fixes, features, plugins, documentation---anything at all. To get involved, see [Contributing](https://opensearch.org/source.html) on the OpenSearch website. --- diff --git a/_ml-commons-plugin/tutorials/index.md b/_ml-commons-plugin/tutorials/index.md deleted file mode 100644 index ec38422270..0000000000 --- a/_ml-commons-plugin/tutorials/index.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -layout: default -title: Tutorials -has_children: true -has_toc: false -nav_order: 140 ---- - -# Tutorials - -Using the OpenSearch machine learning (ML) framework, you can build various applications, from implementing conversational search to building your own chatbot. To learn more, explore the following ML tutorials: - -- **Semantic search**: - - [Generating embeddings for arrays of objects]({{site.url}}{{site.baseurl}}/ml-commons-plugin/tutorials/generate-embeddings/) - - [Semantic search using byte-quantized vectors]({{site.url}}{{site.baseurl}}/ml-commons-plugin/tutorials/semantic-search-byte-vectors/) - -- **Conversational search**: - - [Conversational search using the Cohere Command model]({{site.url}}{{site.baseurl}}/ml-commons-plugin/tutorials/conversational-search-cohere/) - -- **Using guardrails**: - - [Using Amazon Bedrock guardrails]({{site.url}}{{site.baseurl}}/ml-commons-plugin/tutorials/bedrock-guardrails/) - -- **Reranking search results**: - - [Reranking search results using the Cohere Rerank model]({{site.url}}{{site.baseurl}}/ml-commons-plugin/tutorials/reranking-cohere/) - - [Reranking search results using models hosted on Amazon Bedrock]({{site.url}}{{site.baseurl}}/ml-commons-plugin/tutorials/reranking-bedrock/) - - [Reranking search results using the MS MARCO cross-encoder model]({{site.url}}{{site.baseurl}}/ml-commons-plugin/tutorials/reranking-cross-encoder/) - -- **Agents and tools**: - - [Retrieval-augmented generation (RAG) chatbot]({{site.url}}{{site.baseurl}}/ml-commons-plugin/tutorials/rag-chatbot/) - - [RAG with a conversational flow agent]({{site.url}}{{site.baseurl}}/ml-commons-plugin/tutorials/rag-conversational-agent/) - - [Build your own chatbot]({{site.url}}{{site.baseurl}}/ml-commons-plugin/tutorials/build-chatbot/) diff --git a/_search-plugins/index.md b/_search-plugins/index.md index cca2493b8a..f9aff32389 100644 --- a/_search-plugins/index.md +++ b/_search-plugins/index.md @@ -22,25 +22,9 @@ OpenSearch supports the following search methods. OpenSearch supports [keyword (BM25) search]({{site.url}}{{site.baseurl}}/search-plugins/keyword-search/), which searches the document corpus for words that appear in the query. -### ML-powered search +### Vector search -OpenSearch supports the following machine learning (ML)-powered search methods: - -- **Vector search** - - - [k-NN search]({{site.url}}{{site.baseurl}}/search-plugins/knn/): Searches for the k-nearest neighbors to a search term across an index of vectors. - -- **Neural search**: [Neural search]({{site.url}}{{site.baseurl}}/search-plugins/neural-search/) facilitates generating vector embeddings at ingestion time and searching them at search time. Neural search lets you integrate ML models into your search and serves as a framework for implementing other search methods. The following search methods are built on top of neural search: - - - [Semantic search]({{site.url}}{{site.baseurl}}/search-plugins/semantic-search/): Considers the meaning of the words in the search context. Uses dense retrieval based on text embedding models to search text data. - - - [Multimodal search]({{site.url}}{{site.baseurl}}/search-plugins/multimodal-search/): Uses multimodal embedding models to search text and image data. - - - [Neural sparse search]({{site.url}}{{site.baseurl}}/search-plugins/neural-sparse-search/): Uses sparse retrieval based on sparse embedding models to search text data. - - - [Hybrid search]({{site.url}}{{site.baseurl}}/search-plugins/hybrid-search/): Combines traditional search and vector search to improve search relevance. - - - [Conversational search]({{site.url}}{{site.baseurl}}/search-plugins/conversational-search/): Implements a retrieval-augmented generative search. +OpenSearch supports various machine learning (ML)-powered search methods using [vector search]({{site.url}}{{site.baseurl}}/vector-search/). ## Query languages diff --git a/_vector-search/api.md b/_vector-search/api.md index 9272626d9a..8dd0bae4ca 100644 --- a/_vector-search/api.md +++ b/_vector-search/api.md @@ -351,7 +351,7 @@ Request parameter | Description `max_training_vector_count` | The maximum number of vectors from the training index to be used for training. Defaults to all the vectors in the index. Optional. `search_size` | The training data is pulled from the training index using scroll queries. This parameter defines the number of results to return per scroll query. Default is `10000`. Optional. `description` | A user-provided description of the model. Optional. -`method` | The configuration of the approximate k-NN method used for search operations. For more information about the available methods, see [k-NN index method definitions]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/method/). The method requires training to be valid. +`method` | The configuration of the approximate k-NN method used for search operations. For more information about the available methods, see [vector index method definitions]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/method/). The method requires training to be valid. `space_type` | The space type for which this model is trained, for example, Euclidean or cosine. Note: This value can also be set in the `method` parameter. #### Usage diff --git a/_vector-search/creating-vector-index/index.md b/_vector-search/creating-vector-index/index.md index dcfdbfb91d..be1a511e7e 100644 --- a/_vector-search/creating-vector-index/index.md +++ b/_vector-search/creating-vector-index/index.md @@ -13,7 +13,7 @@ redirect_from: Creating a vector index in OpenSearch involves a common core process with some variations depending on the type of vector search. This guide outlines the key elements shared across all vector indexes and the differences specific to supported use cases. -To create a k-NN index, set the `settings.index.knn` parameter to `true`: +To create a vector index, set the `settings.index.knn` parameter to `true`: ```json PUT /test-index diff --git a/_vector-search/creating-vector-index/method.md b/_vector-search/creating-vector-index/method.md index 8139fee715..2b93480616 100644 --- a/_vector-search/creating-vector-index/method.md +++ b/_vector-search/creating-vector-index/method.md @@ -109,7 +109,7 @@ An index created in OpenSearch version 2.11 or earlier will still use the old `e ## Supported Faiss encoders -You can use encoders to reduce the memory footprint of a k-NN index at the expense of search accuracy. The k-NN plugin currently supports the `flat`, `pq`, and `sq` encoders in the Faiss library. +You can use encoders to reduce the memory footprint of a vector index at the expense of search accuracy. OpenSearch currently supports the `flat`, `pq`, and `sq` encoders in the Faiss library. The following example method definition specifies the `hnsw` method and a `pq` encoder: @@ -136,7 +136,7 @@ Encoder name | Requires training | Description :--- | :--- | :--- `flat` (Default) | No | Encode vectors as floating-point arrays. This encoding does not reduce memory footprint. `pq` | Yes | An abbreviation for _product quantization_, it is a lossy compression technique that uses clustering to encode a vector into a fixed size of bytes, with the goal of minimizing the drop in k-NN search accuracy. At a high level, vectors are broken up into `m` subvectors, and then each subvector is represented by a `code_size` code obtained from a code book produced during training. For more information about product quantization, see [this blog post](https://medium.com/dotstar/understanding-faiss-part-2-79d90b1e5388). -`sq` | No | An abbreviation for _scalar quantization_. Starting with k-NN plugin version 2.13, you can use the `sq` encoder to quantize 32-bit floating-point vectors into 16-bit floats. In version 2.13, the built-in `sq` encoder is the SQFP16 Faiss encoder. The encoder reduces memory footprint with a minimal loss of precision and improves performance by using SIMD optimization (using AVX2 on x86 architecture or Neon on ARM64 architecture). For more information, see [Faiss scalar quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-performance/faiss-16-bit-quantization/). +`sq` | No | An abbreviation for _scalar quantization_. Starting with OpenSearch version 2.13, you can use the `sq` encoder to quantize 32-bit floating-point vectors into 16-bit floats. In version 2.13, the built-in `sq` encoder is the SQFP16 Faiss encoder. The encoder reduces memory footprint with a minimal loss of precision and improves performance by using SIMD optimization (using AVX2 on x86 architecture or Neon on ARM64 architecture). For more information, see [Faiss scalar quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-performance/faiss-16-bit-quantization/). ### PQ parameters @@ -253,7 +253,7 @@ You can reduce the memory footprint by a factor of 2, with a minimal loss in sea ## Memory estimation -In a typical OpenSearch cluster, a certain portion of RAM is reserved for the JVM heap. The k-NN plugin allocates native library indexes to a portion of the remaining RAM. This portion's size is determined by the `circuit_breaker_limit` cluster setting. By default, the limit is set to 50%. +In a typical OpenSearch cluster, a certain portion of RAM is reserved for the JVM heap. OpenSearch allocates native library indexes to a portion of the remaining RAM. This portion's size is determined by the `circuit_breaker_limit` cluster setting. By default, the limit is set to 50%. Having a replica doubles the total number of vectors. {: .note } diff --git a/_vector-search/creating-vector-index/vector-field.md b/_vector-search/creating-vector-index/vector-field.md index 973f77f987..fbe768563c 100644 --- a/_vector-search/creating-vector-index/vector-field.md +++ b/_vector-search/creating-vector-index/vector-field.md @@ -25,7 +25,7 @@ Starting with OpenSearch version 2.16, you can use `binary` vectors with the `fa ## SIMD optimization for the Faiss engine -Starting with version 2.13, the k-NN plugin supports [Single Instruction Multiple Data (SIMD)](https://en.wikipedia.org/wiki/Single_instruction,_multiple_data) processing if the underlying hardware supports SIMD instructions (AVX2 on x64 architecture and Neon on ARM64 architecture). SIMD is supported by default on Linux machines only for the Faiss engine. SIMD architecture helps boost overall performance by improving indexing throughput and reducing search latency. Starting with version 2.18, the k-NN plugin supports AVX512 SIMD instructions on x64 architecture. +Starting with version 2.13, OpenSearch supports [Single Instruction Multiple Data (SIMD)](https://en.wikipedia.org/wiki/Single_instruction,_multiple_data) processing if the underlying hardware supports SIMD instructions (AVX2 on x64 architecture and Neon on ARM64 architecture). SIMD is supported by default on Linux machines only for the Faiss engine. SIMD architecture helps boost overall performance by improving indexing throughput and reducing search latency. Starting with version 2.18, the OpenSearch supports AVX512 SIMD instructions on x64 architecture. SIMD optimization is applicable only if the vector dimension is a multiple of 8. {: .note} @@ -43,9 +43,9 @@ For x64 architecture, the following versions of the Faiss library are built and When using the Faiss library, the performance ranking is as follows: AVX512 > AVX2 > no optimization. {: .note } -If your hardware supports AVX512, the k-NN plugin loads the `libopensearchknn_faiss_avx512.so` library at runtime. +If your hardware supports AVX512, OpenSearch loads the `libopensearchknn_faiss_avx512.so` library at runtime. -If your hardware supports AVX2 but doesn't support AVX512, the k-NN plugin loads the `libopensearchknn_faiss_avx2.so` library at runtime. +If your hardware supports AVX2 but doesn't support AVX512, Open loads the `libopensearchknn_faiss_avx2.so` library at runtime. To disable the AVX512 and AVX2 SIMD instructions and load the non-optimized Faiss library (`libopensearchknn_faiss.so`), specify the `knn.faiss.avx512.disabled` and `knn.faiss.avx2.disabled` static settings as `true` in `opensearch.yml` (by default, both of these are `false`). diff --git a/_vector-search/getting-started/auto-generated-embeddings.md b/_vector-search/getting-started/auto-generated-embeddings.md index 79bc84f841..afcdfe5e56 100644 --- a/_vector-search/getting-started/auto-generated-embeddings.md +++ b/_vector-search/getting-started/auto-generated-embeddings.md @@ -255,4 +255,4 @@ The response contains the matching documents: ## Next steps -- Learn about configuring semantic and hybrid search in the [Semantic and hybrid search tutorial]({{site.url}}{{site.baseurl}}/vector-search/getting-started/neural-search-tutorial/) \ No newline at end of file +- Learn about configuring semantic and hybrid search in the [Getting started with semantic and hybrid search]({{site.url}}{{site.baseurl}}/vector-search/getting-started/tutorials/neural-search-tutorial/) \ No newline at end of file diff --git a/_vector-search/getting-started/index.md b/_vector-search/getting-started/index.md index 141aa5edcb..45ac08d2cd 100644 --- a/_vector-search/getting-started/index.md +++ b/_vector-search/getting-started/index.md @@ -14,9 +14,9 @@ tutorial_cards: - heading: "Auto-generated embeddings quickstart" description: "Use embeddings automatically generated within OpenSearch" link: "/vector-search/getting-started/auto-generated-embeddings/" - - heading: "Semantic and hybrid search tutorial" + - heading: "Getting started with semantic and hybrid search" description: "Learn how to implement semantic and hybrid search" - link: "/vector-search/getting-started/neural-search-tutorial/" + link: "/vector-search/getting-started/tutorials/neural-search-tutorial/" pre_items: - heading: "Generate embeddings" description: "Generate embeddings outside of OpenSearch using your favorite embedding utility." @@ -62,7 +62,7 @@ In the following image, the vectors for `Wild West` and `Broncos` are closer to ![Similarity search]({{site.url}}{{site.baseurl}}/images/vector-search/vector-similarity.jpg){: width="450px"} -To learn more about the types of k-NN search that OpenSearch supports, see [Vector search techniques]({{site.url}}{{site.baseurl}}/vector-search/vector-search-techniques/). +To learn more about the types of vector search that OpenSearch supports, see [Vector search techniques]({{site.url}}{{site.baseurl}}/vector-search/vector-search-techniques/). ## Vector search options diff --git a/_vector-search/getting-started/pre-generated-embeddings.md b/_vector-search/getting-started/pre-generated-embeddings.md index 589cc0be73..0be364c174 100644 --- a/_vector-search/getting-started/pre-generated-embeddings.md +++ b/_vector-search/getting-started/pre-generated-embeddings.md @@ -9,13 +9,13 @@ nav_order: 10 With this approach, you generate embeddings externally and then index them into OpenSearch. This method offers greater flexibility in how embeddings are created. -In this example, you'll create a k-NN index, ingest vector embedding data into the index, and search the data. +In this example, you'll create a vector index, ingest vector embedding data into the index, and search the data. ## Prerequisite Before you start, you must generate embeddings using a library of your choice. -## Step 1: Create a k-NN index +## Step 1: Create a vector index First, create an index that will store sample hotel data. To use vector search, set `index.knn` to `true` and specify the `location` field as a `knn_vector`: diff --git a/_ml-commons-plugin/tutorials/bedrock-guardrails.md b/_vector-search/getting-started/tutorials/bedrock-guardrails.md similarity index 99% rename from _ml-commons-plugin/tutorials/bedrock-guardrails.md rename to _vector-search/getting-started/tutorials/bedrock-guardrails.md index b7357c29b9..c8c709c1f1 100644 --- a/_ml-commons-plugin/tutorials/bedrock-guardrails.md +++ b/_vector-search/getting-started/tutorials/bedrock-guardrails.md @@ -2,7 +2,10 @@ layout: default title: Using Amazon Bedrock guardrails parent: Tutorials +grand_parent: Getting started nav_order: 25 +redirect_from: + - /ml-commons-plugin/tutorials/bedrock-guardrails/ --- # Using Amazon Bedrock guardrails diff --git a/_ml-commons-plugin/tutorials/build-chatbot.md b/_vector-search/getting-started/tutorials/build-chatbot.md similarity index 99% rename from _ml-commons-plugin/tutorials/build-chatbot.md rename to _vector-search/getting-started/tutorials/build-chatbot.md index 1e51298106..33835a46d9 100644 --- a/_ml-commons-plugin/tutorials/build-chatbot.md +++ b/_vector-search/getting-started/tutorials/build-chatbot.md @@ -2,7 +2,10 @@ layout: default title: Build your own chatbot parent: Tutorials +grand_parent: Getting started nav_order: 60 +redirect_from: + - /ml-commons-plugin/tutorials/build-chatbot/ --- # Build your own chatbot diff --git a/_ml-commons-plugin/tutorials/conversational-search-cohere.md b/_vector-search/getting-started/tutorials/conversational-search-cohere.md similarity index 99% rename from _ml-commons-plugin/tutorials/conversational-search-cohere.md rename to _vector-search/getting-started/tutorials/conversational-search-cohere.md index e02f576b7c..ca1f059b46 100644 --- a/_ml-commons-plugin/tutorials/conversational-search-cohere.md +++ b/_vector-search/getting-started/tutorials/conversational-search-cohere.md @@ -2,7 +2,10 @@ layout: default title: Conversational search with Cohere Command parent: Tutorials +grand_parent: Getting started nav_order: 20 +redirect_from: + - /ml-commons-plugin/tutorials/conversational-search-cohere/ --- # Conversational search using the Cohere Command model diff --git a/_ml-commons-plugin/tutorials/generate-embeddings.md b/_vector-search/getting-started/tutorials/generate-embeddings.md similarity index 97% rename from _ml-commons-plugin/tutorials/generate-embeddings.md rename to _vector-search/getting-started/tutorials/generate-embeddings.md index c236424eb8..e11f087ada 100644 --- a/_ml-commons-plugin/tutorials/generate-embeddings.md +++ b/_vector-search/getting-started/tutorials/generate-embeddings.md @@ -2,7 +2,10 @@ layout: default title: Generating embeddings parent: Tutorials +grand_parent: Getting started nav_order: 5 +redirect_from: + - /ml-commons-plugin/tutorials/generate-embeddings/ --- # Generating embeddings for arrays of objects @@ -54,9 +57,9 @@ The response contains inference results: Follow the next set of steps to create an ingest pipeline for generating embeddings. -### Step 2.1: Create a k-NN index +### Step 2.1: Create a vector index -First, create a k-NN index: +First, create a vector index: ```json PUT my_books diff --git a/_vector-search/getting-started/tutorials/index.md b/_vector-search/getting-started/tutorials/index.md new file mode 100644 index 0000000000..0b59e98475 --- /dev/null +++ b/_vector-search/getting-started/tutorials/index.md @@ -0,0 +1,68 @@ +--- +layout: default +title: Tutorials +has_children: true +has_toc: false +nav_order: 140 +parent: Getting started +redirect_from: + - /vector-search/getting-started/tutorials/ + - /ml-commons-plugin/tutorials/ + - /ml-commons-plugin/tutorials/index/ +vector_search_101: + - heading: "Getting started with semantic and hybrid search" + link: "/vector-search/getting-started/tutorials/neural-search-tutorial/" +semantic_search: + - heading: "Generating embeddings for arrays of objects" + link: "/vector-search/getting-started/tutorials/generate-embeddings/" + - heading: "Semantic search using byte-quantized vectors" + link: "/vector-search/getting-started/tutorials/semantic-search-byte-vectors/" +conversational_search: + - heading: "Conversational search using the Cohere Command model" + link: "/vector-search/getting-started/tutorials/conversational-search-cohere/" +guardrails: + - heading: "Using Amazon Bedrock guardrails" + link: "/vector-search/getting-started/tutorials/bedrock-guardrails/" +reranking: + - heading: "Reranking search results using the Cohere Rerank model" + link: "/vector-search/getting-started/tutorials/reranking-cohere/" + - heading: "Reranking search results using models hosted on Amazon Bedrock" + link: "/vector-search/getting-started/tutorials/reranking-bedrock/" + - heading: "Reranking search results using the MS MARCO cross-encoder model" + link: "/vector-search/getting-started/tutorials/reranking-cross-encoder/" +agents: + - heading: "Retrieval-augmented generation (RAG) chatbot" + link: "/vector-search/getting-started/tutorials/rag-chatbot/" + - heading: "RAG with a conversational flow agent" + link: "/vector-search/getting-started/tutorials/rag-conversational-agent/" + - heading: "Build your own chatbot" + link: "/vector-search/getting-started/tutorials/build-chatbot/" +--- + +# Tutorials + +Explore these tutorials to build ML-powered search applications, from semantic search to custom chatbots. + +## Vector search 101 + +{% include cards.html cards=page.vector_search_101 %} + +## Semantic search + +{% include cards.html cards=page.semantic_search %} + +## Conversational search + +{% include cards.html cards=page.conversational_search %} + +## Using guardrails + +{% include cards.html cards=page.guardrails %} + +## Reranking search results + +{% include cards.html cards=page.reranking %} + +## Agents and tools + +{% include cards.html cards=page.agents %} \ No newline at end of file diff --git a/_vector-search/getting-started/neural-search-tutorial.md b/_vector-search/getting-started/tutorials/neural-search-tutorial.md similarity index 99% rename from _vector-search/getting-started/neural-search-tutorial.md rename to _vector-search/getting-started/tutorials/neural-search-tutorial.md index eead670988..b0360c41e8 100644 --- a/_vector-search/getting-started/neural-search-tutorial.md +++ b/_vector-search/getting-started/tutorials/neural-search-tutorial.md @@ -1,15 +1,16 @@ --- layout: default -title: Semantic and hybrid search tutorial +title: Getting started with semantic and hybrid search has_children: false -parent: Getting started +parent: Tutorials +grand_parent: Getting started nav_order: 30 redirect_from: - /ml-commons-plugin/semantic-search/ - /search-plugins/neural-search-tutorial/ --- -# Semantic and hybrid search tutorial +# Getting started with semantic and hybrid search By default, OpenSearch calculates document scores using the [Okapi BM25](https://en.wikipedia.org/wiki/Okapi_BM25) algorithm. BM25 is a keyword-based algorithm that performs well on queries containing keywords but fails to capture the semantic meaning of the query terms. Semantic search, unlike keyword-based search, takes into account the meaning of the query in the search context. Thus, semantic search performs well when a query requires natural language understanding. diff --git a/_ml-commons-plugin/tutorials/rag-chatbot.md b/_vector-search/getting-started/tutorials/rag-chatbot.md similarity index 99% rename from _ml-commons-plugin/tutorials/rag-chatbot.md rename to _vector-search/getting-started/tutorials/rag-chatbot.md index 8d6a681fb6..3f382fbf4b 100644 --- a/_ml-commons-plugin/tutorials/rag-chatbot.md +++ b/_vector-search/getting-started/tutorials/rag-chatbot.md @@ -2,7 +2,10 @@ layout: default title: RAG chatbot parent: Tutorials +grand_parent: Getting started nav_order: 50 +redirect_from: + - /ml-commons-plugin/tutorials/rag-chatbot/ --- # RAG chatbot diff --git a/_ml-commons-plugin/tutorials/rag-conversational-agent.md b/_vector-search/getting-started/tutorials/rag-conversational-agent.md similarity index 97% rename from _ml-commons-plugin/tutorials/rag-conversational-agent.md rename to _vector-search/getting-started/tutorials/rag-conversational-agent.md index 86fe38416a..d674a7c45a 100644 --- a/_ml-commons-plugin/tutorials/rag-conversational-agent.md +++ b/_vector-search/getting-started/tutorials/rag-conversational-agent.md @@ -2,7 +2,10 @@ layout: default title: RAG chatbot with a conversational flow agent parent: Tutorials +grand_parent: Getting started nav_order: 40 +redirect_from: + - /ml-commons-plugin/tutorials/rag-conversational-agent/ --- # RAG chatbot with a conversational flow agent @@ -16,7 +19,7 @@ An alternative way to build RAG conversational search is to use a RAG pipeline. ## Prerequisite -In this tutorial, you'll build a RAG application that provides an OpenSearch [k-NN index]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/) as a knowledge base for a large language model (LLM). For data retrieval, you'll use [semantic search]({{site.url}}{{site.baseurl}}/search-plugins/semantic-search/). For a comprehensive semantic search tutorial, see [Neural search tutorial]({{site.url}}{{site.baseurl}}/search-plugins/neural-search-tutorial/). +In this tutorial, you'll build a RAG application that provides an OpenSearch [vector index]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/) as a knowledge base for a large language model (LLM). For data retrieval, you'll use [semantic search]({{site.url}}{{site.baseurl}}/search-plugins/semantic-search/). For a comprehensive semantic search tutorial, see [Neural search tutorial]({{site.url}}{{site.baseurl}}/search-plugins/neural-search-tutorial/). First, you'll need to update your cluster settings. If you don't have a dedicated machine learning (ML) node, set `"plugins.ml_commons.only_run_on_ml_node": false`. To avoid triggering a native memory circuit breaker, set `"plugins.ml_commons.native_memory_threshold"` to 100%: @@ -104,9 +107,9 @@ PUT /_ingest/pipeline/test_population_data_pipeline For more information about ingest pipelines, see [Ingest pipelines]({{site.url}}{{site.baseurl}}/ingest-pipelines/). -### Step 1.3: Create a k-NN index +### Step 1.3: Create a vector index -Create a k-NN index specifying the ingest pipeline as a default pipeline: +Create a vector index specifying the ingest pipeline as a default pipeline: ```json PUT test_population_data @@ -133,11 +136,11 @@ PUT test_population_data ``` {% include copy-curl.html %} -For more information about k-NN indexes, see [k-NN index]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/). +For more information about vector indexes, see [Creating a vector index]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/). ### Step 1.4: Ingest data -Ingest test data into the k-NN index: +Ingest test data into the vector index: ```json POST _bulk diff --git a/_ml-commons-plugin/tutorials/reranking-bedrock.md b/_vector-search/getting-started/tutorials/reranking-bedrock.md similarity index 99% rename from _ml-commons-plugin/tutorials/reranking-bedrock.md rename to _vector-search/getting-started/tutorials/reranking-bedrock.md index dfa5169744..8c1662375d 100644 --- a/_ml-commons-plugin/tutorials/reranking-bedrock.md +++ b/_vector-search/getting-started/tutorials/reranking-bedrock.md @@ -2,7 +2,10 @@ layout: default title: Reranking search results with Amazon Bedrock models parent: Tutorials +grand_parent: Getting started nav_order: 32 +redirect_from: + - /ml-commons-plugin/tutorials/raranking-bedrock/ --- # Reranking search results using models hosted on Amazon Bedrock diff --git a/_ml-commons-plugin/tutorials/reranking-cohere.md b/_vector-search/getting-started/tutorials/reranking-cohere.md similarity index 99% rename from _ml-commons-plugin/tutorials/reranking-cohere.md rename to _vector-search/getting-started/tutorials/reranking-cohere.md index 412180066f..6ac311bfee 100644 --- a/_ml-commons-plugin/tutorials/reranking-cohere.md +++ b/_vector-search/getting-started/tutorials/reranking-cohere.md @@ -3,6 +3,9 @@ layout: default title: Reranking with Cohere Rerank parent: Tutorials nav_order: 30 +grand_parent: Getting started +redirect_from: + - /ml-commons-plugin/tutorials/reranking-cohere/ --- # Reranking search results using the Cohere Rerank model diff --git a/_ml-commons-plugin/tutorials/reranking-cross-encoder.md b/_vector-search/getting-started/tutorials/reranking-cross-encoder.md similarity index 99% rename from _ml-commons-plugin/tutorials/reranking-cross-encoder.md rename to _vector-search/getting-started/tutorials/reranking-cross-encoder.md index e46c7eb511..acd7d95fd3 100644 --- a/_ml-commons-plugin/tutorials/reranking-cross-encoder.md +++ b/_vector-search/getting-started/tutorials/reranking-cross-encoder.md @@ -2,7 +2,10 @@ layout: default title: Reranking with the MS MARCO cross-encoder parent: Tutorials +grand_parent: Getting started nav_order: 35 +redirect_from: + - /ml-commons-plugin/tutorials/reranking-cross-encoder/ --- # Reranking search results using the MS MARCO cross-encoder model diff --git a/_ml-commons-plugin/tutorials/semantic-search-byte-vectors.md b/_vector-search/getting-started/tutorials/semantic-search-byte-vectors.md similarity index 98% rename from _ml-commons-plugin/tutorials/semantic-search-byte-vectors.md rename to _vector-search/getting-started/tutorials/semantic-search-byte-vectors.md index c4cc27f660..b33de5b91a 100644 --- a/_ml-commons-plugin/tutorials/semantic-search-byte-vectors.md +++ b/_vector-search/getting-started/tutorials/semantic-search-byte-vectors.md @@ -2,7 +2,10 @@ layout: default title: Semantic search using byte vectors parent: Tutorials +grand_parent: Getting started nav_order: 10 +redirect_from: + - /ml-commons-plugin/tutorials/semantic-search-byte-vectors/ --- # Semantic search using byte-quantized vectors @@ -159,7 +162,7 @@ PUT /_ingest/pipeline/pipeline-cohere ``` {% include copy-curl.html %} -Next, create a k-NN index and set the `data_type` for the `passage_embedding` field to `byte` so that it can hold byte-quantized vectors: +Next, create a vector index and set the `data_type` for the `passage_embedding` field to `byte` so that it can hold byte-quantized vectors: ```json PUT my_test_data diff --git a/_vector-search/index.md b/_vector-search/index.md index c07f9915f5..70006dadf2 100644 --- a/_vector-search/index.md +++ b/_vector-search/index.md @@ -26,9 +26,9 @@ ml_steps: - heading: "Auto-generated embeddings quickstart" description: "Follow a quickstart tutorial for text-to-embedding search." link: "/vector-search/getting-started/auto-generated-embeddings/" - - heading: "Semantic and hybrid search tutorial" + - heading: "Getting started with semantic and hybrid search" description: "Dive into semantic search and hybrid search." - link: "/vector-search/getting-started/neural-search-tutorial/" + link: "/vector-search/getting-started/tutorials/neural-search-tutorial/" - heading: "ML-powered search" description: "Learn about many ML-powered search options that OpenSearch provides." link: "/vector-search/ml-powered-search/" diff --git a/_vector-search/ml-powered-search/hybrid-search.md b/_vector-search/ml-powered-search/hybrid-search.md index f5ab70d4bb..b5d17d39b8 100644 --- a/_vector-search/ml-powered-search/hybrid-search.md +++ b/_vector-search/ml-powered-search/hybrid-search.md @@ -54,9 +54,9 @@ PUT /_ingest/pipeline/nlp-ingest-pipeline ## Step 2: Create an index for ingestion -In order to use the text embedding processor defined in your pipeline, create a k-NN index, adding the pipeline created in the previous step as the default pipeline. Ensure that the fields defined in the `field_map` are mapped as correct types. Continuing with the example, the `passage_embedding` field must be mapped as a k-NN vector with a dimension that matches the model dimension. Similarly, the `passage_text` field should be mapped as `text`. +In order to use the text embedding processor defined in your pipeline, create a vector index, adding the pipeline created in the previous step as the default pipeline. Ensure that the fields defined in the `field_map` are mapped as correct types. Continuing with the example, the `passage_embedding` field must be mapped as a k-NN vector with a dimension that matches the model dimension. Similarly, the `passage_text` field should be mapped as `text`. -The following example request creates a k-NN index that is set up with a default ingest pipeline: +The following example request creates a vector index that is set up with a default ingest pipeline: ```json PUT /my-nlp-index @@ -89,7 +89,7 @@ PUT /my-nlp-index ``` {% include copy-curl.html %} -For more information about creating a k-NN index and using supported methods, see [k-NN index]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/). +For more information about creating a vector index and using supported methods, see [Creating a vector index]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/). ## Step 3: Ingest documents into the index diff --git a/_vector-search/ml-powered-search/index.md b/_vector-search/ml-powered-search/index.md index e15458d7fb..a86696731d 100644 --- a/_vector-search/ml-powered-search/index.md +++ b/_vector-search/ml-powered-search/index.md @@ -16,9 +16,9 @@ model_cards: - heading: "Connect to a model hosted on an external platform" link: "/ml-commons-plugin/remote-models/index/" tutorial_cards: - - heading: "Semantic and hybrid search tutorial" + - heading: "Getting started with semantic and hybrid search" description: "Learn how to implement semantic and hybrid search" - link: "/vector-search/getting-started/neural-search-tutorial/" + link: "/vector-search/getting-started/tutorials/neural-search-tutorial/" search_method_cards: - heading: "Semantic search" description: "Uses dense retrieval based on text embedding models to search text data." diff --git a/_vector-search/ml-powered-search/multimodal-search.md b/_vector-search/ml-powered-search/multimodal-search.md index e74a6ff7b5..53c9a6f865 100644 --- a/_vector-search/ml-powered-search/multimodal-search.md +++ b/_vector-search/ml-powered-search/multimodal-search.md @@ -56,9 +56,9 @@ PUT /_ingest/pipeline/nlp-ingest-pipeline ## Step 2: Create an index for ingestion -In order to use the text embedding processor defined in your pipeline, create a k-NN index, adding the pipeline created in the previous step as the default pipeline. Ensure that the fields defined in the `field_map` are mapped as correct types. Continuing with the example, the `vector_embedding` field must be mapped as a k-NN vector with a dimension that matches the model dimension. Similarly, the `image_description` field should be mapped as `text`, and the `image_binary` should be mapped as `binary`. +In order to use the text embedding processor defined in your pipeline, create a vector index, adding the pipeline created in the previous step as the default pipeline. Ensure that the fields defined in the `field_map` are mapped as correct types. Continuing with the example, the `vector_embedding` field must be mapped as a k-NN vector with a dimension that matches the model dimension. Similarly, the `image_description` field should be mapped as `text`, and the `image_binary` should be mapped as `binary`. -The following example request creates a k-NN index that is set up with a default ingest pipeline: +The following example request creates a vector index that is set up with a default ingest pipeline: ```json PUT /my-nlp-index @@ -91,7 +91,7 @@ PUT /my-nlp-index ``` {% include copy-curl.html %} -For more information about creating a k-NN index and its supported methods, see [k-NN index]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/). +For more information about creating a vector index and its supported methods, see [Creating a vector index]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/). ## Step 3: Ingest documents into the index @@ -110,7 +110,7 @@ Before the document is ingested into the index, the ingest pipeline runs the `te ## Step 4: Search the index -To perform vector search on your index, use the `neural` query clause either in the [k-NN plugin API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api/#search-for-a-model) or [Query DSL]({{site.url}}{{site.baseurl}}/opensearch/query-dsl/index/) queries. You can refine the results by using a [k-NN search filter]({{site.url}}{{site.baseurl}}/search-plugins/knn/filter-search-knn/). You can search by text, image, or both text and image. +To perform vector search on your index, use the `neural` query clause either in the [Search for a Model API({{site.url}}{{site.baseurl}}/search-plugins/knn/api/#search-for-a-model) or [Query DSL]({{site.url}}{{site.baseurl}}/opensearch/query-dsl/index/) queries. You can refine the results by using a [vector search filter]({{site.url}}{{site.baseurl}}/search-plugins/knn/filter-search-knn/). You can search by text, image, or both text and image. The following example request uses a neural query to search for text and image: @@ -132,4 +132,4 @@ GET /my-nlp-index/_search ``` {% include copy-curl.html %} -To eliminate passing the model ID with each neural query request, you can set a default model on a k-NN index or a field. To learn more, see [Setting a default model on an index or field]({{site.url}}{{site.baseurl}}/search-plugins/neural-text-search/##setting-a-default-model-on-an-index-or-field). +To eliminate passing the model ID with each neural query request, you can set a default model on a vector index or a field. To learn more, see [Setting a default model on an index or field]({{site.url}}{{site.baseurl}}/search-plugins/neural-text-search/##setting-a-default-model-on-an-index-or-field). diff --git a/_vector-search/ml-powered-search/semantic-search.md b/_vector-search/ml-powered-search/semantic-search.md index 5bfe5aa9a6..63f9380b49 100644 --- a/_vector-search/ml-powered-search/semantic-search.md +++ b/_vector-search/ml-powered-search/semantic-search.md @@ -11,7 +11,7 @@ redirect_from: # Semantic search -Semantic search considers the context and intent of a query. In OpenSearch, semantic search is facilitated by text embedding models. Semantic search creates a dense vector (a list of floats) and ingests data into a k-NN index. +Semantic search considers the context and intent of a query. In OpenSearch, semantic search is facilitated by text embedding models. Semantic search creates a dense vector (a list of floats) and ingests data into a vector index. **PREREQUISITE**
Before using semantic search, you must set up a text embedding model. For more information, see [Choosing a model]({{site.url}}{{site.baseurl}}/ml-commons-plugin/integrating-ml-models/#choosing-a-model). @@ -54,9 +54,9 @@ To split long text into passages, use the `text_chunking` ingest processor befor ## Step 2: Create an index for ingestion -In order to use the text embedding processor defined in your pipeline, create a k-NN index, adding the pipeline created in the previous step as the default pipeline. Ensure that the fields defined in the `field_map` are mapped as correct types. Continuing with the example, the `passage_embedding` field must be mapped as a k-NN vector with a dimension that matches the model dimension. Similarly, the `passage_text` field should be mapped as `text`. +In order to use the text embedding processor defined in your pipeline, create a vector index, adding the pipeline created in the previous step as the default pipeline. Ensure that the fields defined in the `field_map` are mapped as correct types. Continuing with the example, the `passage_embedding` field must be mapped as a k-NN vector with a dimension that matches the model dimension. Similarly, the `passage_text` field should be mapped as `text`. -The following example request creates a k-NN index that is set up with a default ingest pipeline: +The following example request creates a vector index that is set up with a default ingest pipeline: ```json PUT /my-nlp-index @@ -89,7 +89,7 @@ PUT /my-nlp-index ``` {% include copy-curl.html %} -For more information about creating a k-NN index and its supported methods, see [k-NN index]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/). +For more information about creating a vector index and its supported methods, see [Creating a vector index]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/). ## Step 3: Ingest documents into the index @@ -117,7 +117,7 @@ Before the document is ingested into the index, the ingest pipeline runs the `te ## Step 4: Search the index -To perform vector search on your index, use the `neural` query clause either in the [k-NN plugin API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api/#search-for-a-model) or [Query DSL]({{site.url}}{{site.baseurl}}/opensearch/query-dsl/index/) queries. You can refine the results by using a [k-NN search filter]({{site.url}}{{site.baseurl}}/search-plugins/knn/filter-search-knn/). +To perform vector search on your index, use the `neural` query clause either in the [Search for a Model API({{site.url}}{{site.baseurl}}/search-plugins/knn/api/#search-for-a-model) or [Query DSL]({{site.url}}{{site.baseurl}}/opensearch/query-dsl/index/) queries. You can refine the results by using a [vector search filter]({{site.url}}{{site.baseurl}}/search-plugins/knn/filter-search-knn/). The following example request uses a Boolean query to combine a filter clause and two query clauses---a neural query and a `match` query. The `script_score` query assigns custom weights to the query clauses: @@ -205,7 +205,7 @@ The response contains the matching document: ## Setting a default model on an index or field -A [`neural`]({{site.url}}{{site.baseurl}}/query-dsl/specialized/neural/) query requires a model ID for generating vector embeddings. To eliminate passing the model ID with each neural query request, you can set a default model on a k-NN index or a field. +A [`neural`]({{site.url}}{{site.baseurl}}/query-dsl/specialized/neural/) query requires a model ID for generating vector embeddings. To eliminate passing the model ID with each neural query request, you can set a default model on a vector index or a field. First, create a [search pipeline]({{site.url}}{{site.baseurl}}/search-plugins/search-pipelines/index/) with a [`neural_query_enricher`]({{site.url}}{{site.baseurl}}/search-plugins/search-pipelines/neural-query-enricher/) request processor. To set a default model for an index, provide the model ID in the `default_model_id` parameter. To set a default model for a specific field, provide the field name and the corresponding model ID in the `neural_field_default_id` map. If you provide both `default_model_id` and `neural_field_default_id`, `neural_field_default_id` takes precedence: diff --git a/_vector-search/ml-powered-search/text-chunking.md b/_vector-search/ml-powered-search/text-chunking.md index b35667c9f5..2214d16b5d 100644 --- a/_vector-search/ml-powered-search/text-chunking.md +++ b/_vector-search/ml-powered-search/text-chunking.md @@ -51,7 +51,7 @@ PUT _ingest/pipeline/text-chunking-embedding-ingest-pipeline ## Step 2: Create an index for ingestion -In order to use the ingest pipeline, you need to create a k-NN index. The `passage_chunk_embedding` field must be of the `nested` type. The `knn.dimension` field must contain the number of dimensions for your model: +In order to use the ingest pipeline, you need to create a vector index. The `passage_chunk_embedding` field must be of the `nested` type. The `knn.dimension` field must contain the number of dimensions for your model: ```json PUT testindex diff --git a/_vector-search/optimizing-performance/binary-quantization.md b/_vector-search/optimizing-performance/binary-quantization.md index 60ce828e49..e2903be2a7 100644 --- a/_vector-search/optimizing-performance/binary-quantization.md +++ b/_vector-search/optimizing-performance/binary-quantization.md @@ -119,7 +119,7 @@ PUT my-vector-index ## Search using binary quantized vectors -You can perform a k-NN search on your index by providing a vector and specifying the number of nearest neighbors (k) to return: +You can perform a vector search on your index by providing a vector and specifying the number of nearest neighbors (k) to return: ```json GET my-vector-index/_search diff --git a/_vector-search/optimizing-performance/disk-based-vector-search.md b/_vector-search/optimizing-performance/disk-based-vector-search.md index 37264a4e79..86790e57cd 100644 --- a/_vector-search/optimizing-performance/disk-based-vector-search.md +++ b/_vector-search/optimizing-performance/disk-based-vector-search.md @@ -198,10 +198,10 @@ POST /_plugins/_knn/models/test-model/_train ``` {% include copy-curl.html %} -This command assumes that training data has been ingested into the `train-index-name` index. For more information, see [Building a k-NN index from a model]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-k-nn-index-from-a-model). +This command assumes that training data has been ingested into the `train-index-name` index. For more information, see [Building a vector index from a model]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-k-nn-index-from-a-model). {: .note} -You can override the `compression_level` for disk-optimized indexes in the same way as for regular k-NN indexes. +You can override the `compression_level` for disk-optimized indexes in the same way as for regular vector indexes. ## Next steps diff --git a/_vector-search/optimizing-performance/faiss-16-bit-quantization.md b/_vector-search/optimizing-performance/faiss-16-bit-quantization.md index 4fadd9d8f7..717c8ea524 100644 --- a/_vector-search/optimizing-performance/faiss-16-bit-quantization.md +++ b/_vector-search/optimizing-performance/faiss-16-bit-quantization.md @@ -10,7 +10,7 @@ has_math: true # Faiss 16-bit scalar quantization -Starting with version 2.13, the k-NN plugin supports performing scalar quantization for the Faiss engine within OpenSearch. Within the Faiss engine, a scalar quantizer (SQfp16) performs the conversion between 32-bit and 16-bit vectors. At ingestion time, when you upload 32-bit floating-point vectors to OpenSearch, SQfp16 quantizes them into 16-bit floating-point vectors and stores the quantized vectors in a k-NN index. +Starting with version 2.13, OpenSearch supports performing scalar quantization for the Faiss engine within OpenSearch. Within the Faiss engine, a scalar quantizer (SQfp16) performs the conversion between 32-bit and 16-bit vectors. At ingestion time, when you upload 32-bit floating-point vectors to OpenSearch, SQfp16 quantizes them into 16-bit floating-point vectors and stores the quantized vectors in a vector index. At search time, SQfp16 decodes the vector values back into 32-bit floating-point values for distance computation. The SQfp16 quantization can decrease the memory footprint by a factor of 2. Additionally, it leads to a minimal loss in recall when differences between vector values are large compared to the error introduced by eliminating their two least significant bits. When used with [SIMD optimization]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/vector-field/#simd-optimization-for-the-faiss-engine), SQfp16 quantization can also significantly reduce search latencies and improve indexing throughput. @@ -19,7 +19,7 @@ SIMD optimization is not supported on Windows. Using Faiss scalar quantization o ## Using Faiss scalar quantization -To use Faiss scalar quantization, set the k-NN vector field's `method.parameters.encoder.name` to `sq` when creating a k-NN index: +To use Faiss scalar quantization, set the k-NN vector field's `method.parameters.encoder.name` to `sq` when creating a vector index: ```json PUT /test-index diff --git a/_vector-search/optimizing-performance/faiss-product-quantization.md b/_vector-search/optimizing-performance/faiss-product-quantization.md index de71164df0..12f6b49a36 100644 --- a/_vector-search/optimizing-performance/faiss-product-quantization.md +++ b/_vector-search/optimizing-performance/faiss-product-quantization.md @@ -22,7 +22,7 @@ In OpenSearch, the training vectors need to be present in an index. In general, For PQ, both _m_ and _code_size_ need to be selected. _m_ determines the number of subvectors into which vectors should be split for separate encoding. Consequently, the _dimension_ needs to be divisible by _m_. _code_size_ determines the number of bits used to encode each subvector. In general, we recommend a setting of `code_size = 8` and then tuning _m_ to get the desired trade-off between memory footprint and recall. -For an example of setting up an index with PQ, see the [Building a k-NN index from a model]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-k-nn-index-from-a-model) tutorial. +For an example of setting up an index with PQ, see the [Building a vector index from a model]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-k-nn-index-from-a-model) tutorial. ## Memory estimation diff --git a/_vector-search/optimizing-performance/knn-vector-quantization.md b/_vector-search/optimizing-performance/knn-vector-quantization.md index 7f405677aa..2bffbdbc07 100644 --- a/_vector-search/optimizing-performance/knn-vector-quantization.md +++ b/_vector-search/optimizing-performance/knn-vector-quantization.md @@ -28,7 +28,7 @@ inside_cards: # Vector quantization -By default, the k-NN plugin supports the indexing and querying of vectors of type `float`, where each dimension of the vector occupies 4 bytes of memory. For use cases that require ingestion on a large scale, keeping `float` vectors can be expensive because OpenSearch needs to construct, load, save, and search graphs (for native `nmslib` and `faiss` engines). To reduce the memory footprint, you can use vector quantization. +By default, OpenSearch supports the indexing and querying of vectors of type `float`, where each dimension of the vector occupies 4 bytes of memory. For use cases that require ingestion on a large scale, keeping `float` vectors can be expensive because OpenSearch needs to construct, load, save, and search graphs (for native `nmslib` and `faiss` engines). To reduce the memory footprint, you can use vector quantization. OpenSearch supports many varieties of quantization. In general, the level of quantization will provide a trade-off between the accuracy of the nearest neighbor search and the size of the memory footprint consumed by the vector search. diff --git a/_vector-search/optimizing-performance/lucene-scalar-quantization.md b/_vector-search/optimizing-performance/lucene-scalar-quantization.md index 5c088da794..50a63031d2 100644 --- a/_vector-search/optimizing-performance/lucene-scalar-quantization.md +++ b/_vector-search/optimizing-performance/lucene-scalar-quantization.md @@ -10,13 +10,13 @@ has_math: true # Lucene scalar quantization -Starting with version 2.16, the k-NN plugin supports built-in scalar quantization for the Lucene engine. Unlike [byte vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#byte-vectors), which require you to quantize vectors before ingesting documents, the Lucene scalar quantizer quantizes input vectors in OpenSearch during ingestion. The Lucene scalar quantizer converts 32-bit floating-point input vectors into 7-bit integer vectors in each segment using the minimum and maximum quantiles computed based on the [`confidence_interval`](#confidence-interval) parameter. During search, the query vector is quantized in each segment using the segment's minimum and maximum quantiles in order to compute the distance between the query vector and the segment's quantized input vectors. +Starting with version 2.16, Opensupports built-in scalar quantization for the Lucene engine. Unlike [byte vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#byte-vectors), which require you to quantize vectors before ingesting documents, the Lucene scalar quantizer quantizes input vectors in OpenSearch during ingestion. The Lucene scalar quantizer converts 32-bit floating-point input vectors into 7-bit integer vectors in each segment using the minimum and maximum quantiles computed based on the [`confidence_interval`](#confidence-interval) parameter. During search, the query vector is quantized in each segment using the segment's minimum and maximum quantiles in order to compute the distance between the query vector and the segment's quantized input vectors. Quantization can decrease the memory footprint by a factor of 4 in exchange for some loss in recall. Additionally, quantization slightly increases disk usage because it requires storing both the raw input vectors and the quantized vectors. ## Using Lucene scalar quantization -To use the Lucene scalar quantizer, set the k-NN vector field's `method.parameters.encoder.name` to `sq` when creating a k-NN index: +To use the Lucene scalar quantizer, set the k-NN vector field's `method.parameters.encoder.name` to `sq` when creating a vector index: ```json PUT /test-index diff --git a/_vector-search/optimizing-performance/performance-tuning.md b/_vector-search/optimizing-performance/performance-tuning.md index 1eee277951..d5516cdfe6 100644 --- a/_vector-search/optimizing-performance/performance-tuning.md +++ b/_vector-search/optimizing-performance/performance-tuning.md @@ -104,7 +104,7 @@ This is an expert-level setting. Disabling the `_recovery_source` may lead to fa This approach is recommended only for workloads that involve a single initial bulk upload and will be used exclusively for search after force merging to a single segment. -During indexing, vector search builds a specialized data structure for a `knn_vector` field to enable efficient approximate k-NN search. However, these structures are rebuilt during [force merge]({{site.url}}{{site.baseurl}}/api-reference/index-apis/force-merge/) on k-NN indexes. To optimize indexing speed, follow these steps: +During indexing, vector search builds a specialized data structure for a `knn_vector` field to enable efficient approximate k-NN search. However, these structures are rebuilt during [force merge]({{site.url}}{{site.baseurl}}/api-reference/index-apis/force-merge/) on vector indexes. To optimize indexing speed, follow these steps: 1. **Disable vector data structure creation**: Disable vector data structure creation for new segments by setting [`index.knn.advanced.approximate_threshold`]({{site.url}}{{site.baseurl}}/vector-search/settings/#index-settings) to `-1`. diff --git a/_vector-search/specialized-operations/filter-search-knn.md b/_vector-search/specialized-operations/filter-search-knn.md index 5597ef74fd..6c70c05262 100644 --- a/_vector-search/specialized-operations/filter-search-knn.md +++ b/_vector-search/specialized-operations/filter-search-knn.md @@ -14,8 +14,8 @@ redirect_from: To refine vector search results, you can filter a vector search using one of the following methods: - [Efficient k-NN filtering](#efficient-k-nn-filtering): This approach applies filtering _during_ the k-NN search, as opposed to before or after the k-NN search, which ensures that `k` results are returned (if there are at least `k` results in total). This approach is supported by the following engines: - - Lucene engine with a Hierarchical Navigable Small World (HNSW) algorithm (k-NN plugin versions 2.4 and later) - - Faiss engine with an HNSW algorithm (k-NN plugin versions 2.9 and later) or IVF algorithm (k-NN plugin versions 2.10 and later) + - Lucene engine with a Hierarchical Navigable Small World (HNSW) algorithm (OpenSearch versions 2.4 and later) + - Faiss engine with an HNSW algorithm (OpenSearch versions 2.9 and later) or IVF algorithm (OpenSearch versions 2.10 and later) - [Post-filtering](#post-filtering): Because it is performed after the k-NN search, this approach may return significantly fewer than `k` results for a restrictive filter. You can use the following two filtering strategies for this approach: - [Boolean post-filter](#boolean-filter-with-ann-search): This approach runs an [approximate nearest neighbor (ANN)]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/) search and then applies a filter to the results. The two query parts are executed independently, and then the results are combined based on the query operator (`should`, `must`, and so on) provided in the query. @@ -57,7 +57,7 @@ You can perform efficient k-NN filtering with the `lucene` or `faiss` engines. ### Lucene k-NN filter implementation -k-NN plugin version 2.2 introduced support for running k-NN searches with the Lucene engine using HNSW graphs. Starting with version 2.4, which is based on Lucene version 9.4, you can use Lucene filters for k-NN searches. +OpenSearch version 2.2 introduced support for running k-NN searches with the Lucene engine using HNSW graphs. Starting with version 2.4, which is based on Lucene version 9.4, you can use Lucene filters for k-NN searches. When you specify a Lucene filter for a k-NN search, the Lucene algorithm decides whether to perform an exact k-NN search with pre-filtering or an approximate search with modified post-filtering. The algorithm uses the following variables: @@ -263,7 +263,7 @@ For more ways to construct a filter, see [Constructing a filter](#constructing-a ### Faiss k-NN filter implementation -For k-NN searches, you can use `faiss` filters with an HNSW algorithm (k-NN plugin versions 2.9 and later) or IVF algorithm (k-NN plugin versions 2.10 and later). +For k-NN searches, you can use `faiss` filters with an HNSW algorithm (OpenSearch versions 2.9 and later) or IVF algorithm (OpenSearch versions 2.10 and later). When you specify a Faiss filter for a k-NN search, the Faiss algorithm decides whether to perform an exact k-NN search with pre-filtering or an approximate search with modified post-filtering. The algorithm uses the following variables: diff --git a/_vector-search/specialized-operations/nested-search-knn.md b/_vector-search/specialized-operations/nested-search-knn.md index e30a0181c4..3c4fb6d87e 100644 --- a/_vector-search/specialized-operations/nested-search-knn.md +++ b/_vector-search/specialized-operations/nested-search-knn.md @@ -22,7 +22,7 @@ k-NN search with nested fields is supported by the HNSW algorithm for the Lucene ## Indexing and searching nested fields -To use k-NN search with nested fields, you must create a k-NN index by setting `index.knn` to `true`. Create a nested field by setting its `type` to `nested` and specify one or more fields of the `knn_vector` data type within the nested field. In this example, the `knn_vector` field `my_vector` is nested inside the `nested_field` field: +To use k-NN search with nested fields, you must create a vector index by setting `index.knn` to `true`. Create a nested field by setting its `type` to `nested` and specify one or more fields of the `knn_vector` data type within the nested field. In this example, the `knn_vector` field `my_vector` is nested inside the `nested_field` field: ```json PUT my-knn-index-1 @@ -483,7 +483,7 @@ You can apply a filter to a k-NN search with nested fields. A filter can be appl The following example applies a filter to a top-level field. -First, create a k-NN index with a nested field: +First, create a vector index with a nested field: ```json PUT my-knn-index-1 diff --git a/_vector-search/specialized-operations/radial-search-knn.md b/_vector-search/specialized-operations/radial-search-knn.md index 31505b76f0..d2ec6f4257 100644 --- a/_vector-search/specialized-operations/radial-search-knn.md +++ b/_vector-search/specialized-operations/radial-search-knn.md @@ -42,7 +42,7 @@ The following examples can help you to get started with radial search. ### Prerequisites -To use a k-NN index with radial search, create a k-NN index by setting `index.knn` to `true`. Specify one or more fields of the `knn_vector` data type, as shown in the following example: +To use a vector index with radial search, create a vector index by setting `index.knn` to `true`. Specify one or more fields of the `knn_vector` data type, as shown in the following example: ```json PUT knn-index-test diff --git a/_vector-search/vector-search-techniques/approximate-knn.md b/_vector-search/vector-search-techniques/approximate-knn.md index 32fe19bba4..61e4f11732 100644 --- a/_vector-search/vector-search-techniques/approximate-knn.md +++ b/_vector-search/vector-search-techniques/approximate-knn.md @@ -13,12 +13,12 @@ redirect_from: Standard k-NN search methods compute similarity using a brute-force approach that measures the nearest distance between a query and a number of points, which produces exact results. This works well in many applications. However, in the case of extremely large datasets with high dimensionality, this creates a scaling problem that reduces the efficiency of the search. Approximate k-NN search methods can overcome this by employing tools that restructure indexes more efficiently and reduce the dimensionality of searchable vectors. Using this approach requires a sacrifice in accuracy but increases search processing speeds appreciably. -The Approximate k-NN search methods leveraged by OpenSearch use approximate nearest neighbor (ANN) algorithms from the [NMSLIB](https://github.com/nmslib/nmslib), [Faiss](https://github.com/facebookresearch/faiss), and [Lucene](https://lucene.apache.org/) libraries to power k-NN search. These search methods employ ANN to improve search latency for large datasets. Of the three search methods the k-NN plugin provides, this method offers the best search scalability for large datasets. This approach is the preferred method when a dataset reaches hundreds of thousands of vectors. +The Approximate k-NN search methods leveraged by OpenSearch use approximate nearest neighbor (ANN) algorithms from the [NMSLIB](https://github.com/nmslib/nmslib), [Faiss](https://github.com/facebookresearch/faiss), and [Lucene](https://lucene.apache.org/) libraries to power k-NN search. These search methods employ ANN to improve search latency for large datasets. Of the three search methods OpenSearch provides, this method offers the best search scalability for large datasets. This approach is the preferred method when a dataset reaches hundreds of thousands of vectors. -For details on the algorithms the plugin currently supports, see [k-NN Index documentation]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/). +For details on the algorithms the plugin currently supports, see [Creating a vector index]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/). {: .note} -The k-NN plugin builds a native library index of the vectors for each `knn-vector` field/Lucene segment pair during indexing, which can be used to efficiently find the k-nearest neighbors to a query vector during search. To learn more about Lucene segments, see the [Apache Lucene documentation](https://lucene.apache.org/core/8_9_0/core/org/apache/lucene/codecs/lucene87/package-summary.html#package.description). These native library indexes are loaded into native memory during search and managed by a cache. To learn more about preloading native library indexes into memory, refer to the [warmup API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#warmup-operation). Additionally, you can see which native library indexes are already loaded in memory. To learn more about this, see the [stats API section]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#stats). +OpenSearch builds a native library index of the vectors for each `knn-vector` field/Lucene segment pair during indexing, which can be used to efficiently find the k-nearest neighbors to a query vector during search. To learn more about Lucene segments, see the [Apache Lucene documentation](https://lucene.apache.org/core/8_9_0/core/org/apache/lucene/codecs/lucene87/package-summary.html#package.description). These native library indexes are loaded into native memory during search and managed by a cache. To learn more about preloading native library indexes into memory, refer to the [warmup API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#warmup-operation). Additionally, you can see which native library indexes are already loaded in memory. To learn more about this, see the [stats API section]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#stats). Because the native library indexes are constructed during indexing, it is not possible to apply a filter on an index and then use this search method. All filters are applied on the results produced by the approximate nearest neighbor search. @@ -32,7 +32,7 @@ When considering cluster node sizing, a general approach is to first establish a ## Get started with approximate k-NN -To use the k-NN plugin's approximate search functionality, you must first create a k-NN index with `index.knn` set to `true`. This setting tells the plugin to create native library indexes for the index. +To use the approximate search functionality, you must first create a vector index with `index.knn` set to `true`. This setting tells the plugin to create native library indexes for the index. Next, you must add one or more fields of the `knn_vector` data type. This example creates an index with two `knn_vector` fields, one using `faiss` and the other using `nmslib` fields: @@ -84,7 +84,7 @@ In the preceding example, both `knn_vector` fields are configured using method d The `knn_vector` data type supports a vector of floats that can have a dimension count of up to 16,000 for the NMSLIB, Faiss, and Lucene engines, as set by the dimension mapping parameter. -In OpenSearch, codecs handle the storage and retrieval of indexes. The k-NN plugin uses a custom codec to write vector data to native library indexes so that the underlying k-NN search library can read it. +In OpenSearch, codecs handle the storage and retrieval of indexes. OpenSearch uses a custom codec to write vector data to native library indexes so that the underlying k-NN search library can read it. {: .tip } After you create the index, you can add some data to it: @@ -150,9 +150,9 @@ The number of results returned by Faiss/NMSLIB differs from the number of result Starting in OpenSearch 2.14, you can use `k`, `min_score`, or `max_distance` for [radial search]({{site.url}}{{site.baseurl}}/search-plugins/knn/radial-search-knn/). -### Building a k-NN index from a model +### Building a vector index from a model -For some of the algorithms that the k-NN plugin supports, the native library index needs to be trained before it can be used. It would be expensive to train every newly created segment, so, instead, the plugin features the concept of a *model* that initializes the native library index during segment creation. You can create a model by calling the [Train API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#train-a-model) and passing in the source of the training data and the method definition of the model. Once training is complete, the model is serialized to a k-NN model system index. Then, during indexing, the model is pulled from this index to initialize the segments. +For some of the algorithms that OpenSearch supports, the native library index needs to be trained before it can be used. It would be expensive to train every newly created segment, so, instead, the plugin features the concept of a *model* that initializes the native library index during segment creation. You can create a model by calling the [Train API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#train-a-model) and passing in the source of the training data and the method definition of the model. Once training is complete, the model is serialized to a k-NN model system index. Then, during indexing, the model is pulled from this index to initialize the segments. To train a model, you first need an OpenSearch index containing training data. Training data can come from any `knn_vector` field that has a dimension matching the dimension of the model you want to create. Training data can be the same data that you are going to index or data in a separate set. To create a training index, send the following request: @@ -312,7 +312,7 @@ Engine | Notes ### Rescoring quantized results using full precision -Quantization can be used to significantly reduce the memory footprint of a k-NN index. For more information about quantization, see [k-NN vector quantization]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-vector-quantization). Because some vector representation is lost during quantization, the computed distances will be approximate. This causes the overall recall of the search to decrease. +Quantization can be used to significantly reduce the memory footprint of a vector index. For more information about quantization, see [k-NN vector quantization]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-vector-quantization). Because some vector representation is lost during quantization, the computed distances will be approximate. This causes the overall recall of the search to decrease. To improve recall while maintaining the memory savings of quantization, you can use a two-phase search approach. In the first phase, `oversample_factor * k` results are retrieved from an index using quantized vectors and the scores are approximated. In the second phase, the full-precision vectors of those `oversample_factor * k` results are loaded into memory from disk, and scores are recomputed against the full-precision query vector. The results are then reduced to the top k. @@ -394,7 +394,7 @@ To learn more about using binary vectors with k-NN search, see [Binary k-NN vect ## Spaces -A _space_ corresponds to the function used to measure the distance between two points in order to determine the k-nearest neighbors. From the k-NN perspective, a lower score equates to a closer and better result. This is the opposite of how OpenSearch scores results, where a higher score equates to a better result. The k-NN plugin supports the following spaces. +A _space_ corresponds to the function used to measure the distance between two points in order to determine the k-nearest neighbors. From the k-NN perspective, a lower score equates to a closer and better result. This is the opposite of how OpenSearch scores results, where a higher score equates to a better result. OpenSearch supports the following spaces. Not every method supports each of these spaces. Be sure to check out [the method documentation]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/method/) to make sure the space you are interested in is supported. {: note.} diff --git a/_vector-search/vector-search-techniques/knn-score-script.md b/_vector-search/vector-search-techniques/knn-score-script.md index 5afdb112ac..2d0d33b037 100644 --- a/_vector-search/vector-search-techniques/knn-score-script.md +++ b/_vector-search/vector-search-techniques/knn-score-script.md @@ -300,7 +300,7 @@ A _space_ corresponds to the function used to measure the distance between two p | `innerproduct` (supported for Lucene in OpenSearch version 2.13 and later) | $$ d(\mathbf{x}, \mathbf{y}) = - {\mathbf{x} \cdot \mathbf{y}} = - \sum_{i=1}^n x_i y_i $$ | $$ \text{If} d \ge 0, score = {1 \over 1 + d }$$
$$\text{If} d < 0, score = −d + 1$$ | | `hammingbit` (supported for binary and long vectors)

`hamming` (supported for binary vectors in OpenSearch version 2.16 and later) | $$ d(\mathbf{x}, \mathbf{y}) = \text{countSetBits}(\mathbf{x} \oplus \mathbf{y})$$ | $$ score = {1 \over 1 + d } $$ | -Cosine similarity returns a number between -1 and 1, and because OpenSearch relevance scores can't be below 0, the k-NN plugin adds 1 to get the final score. +Cosine similarity returns a number between -1 and 1, and because OpenSearch relevance scores can't be below 0, OpenSearch adds 1 to get the final score. With cosine similarity, it is not valid to pass a zero vector (`[0, 0, ... ]`) as input. This is because the magnitude of such a vector is 0, which raises a `divide by 0` exception in the corresponding formula. Requests containing the zero vector will be rejected, and a corresponding exception will be thrown. {: .note } diff --git a/_vector-search/vector-search-techniques/painless-functions.md b/_vector-search/vector-search-techniques/painless-functions.md index 4865537681..2678766c62 100644 --- a/_vector-search/vector-search-techniques/painless-functions.md +++ b/_vector-search/vector-search-techniques/painless-functions.md @@ -12,7 +12,7 @@ redirect_from: # Painless scripting extensions -With the Painless Scripting extensions, you can use k-NN distance functions directly in your Painless scripts to perform operations on `knn_vector` fields. Painless has a strict list of allowed functions and classes per context to ensure its scripts are secure. The k-NN plugin adds Painless Scripting extensions to a few of the distance functions used in [k-NN score script]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-score-script/), so you can use them to customize your k-NN workload. +With the Painless Scripting extensions, you can use k-NN distance functions directly in your Painless scripts to perform operations on `knn_vector` fields. Painless has a strict list of allowed functions and classes per context to ensure its scripts are secure. OpenSearch adds Painless Scripting extensions to a few of the distance functions used in [k-NN score script]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-score-script/), so you can use them to customize your k-NN workload. ## Get started with k-NN's Painless Scripting functions @@ -49,13 +49,13 @@ GET my-knn-index-2/_search `field` needs to map to a `knn_vector` field, and `query_value` needs to be a floating point array with the same dimension as `field`. ## Function types -The following table describes the available Painless functions the k-NN plugin provides: +The following table describes the available Painless functions OpenSearch provides. Function name | Function signature | Description :--- | :--- l2Squared | `float l2Squared (float[] queryVector, doc['vector field'])` | This function calculates the square of the L2 distance (Euclidean distance) between a given query vector and document vectors. The shorter the distance, the more relevant the document is, so this example inverts the return value of the l2Squared function. If the document vector matches the query vector, the result is 0, so this example also adds 1 to the distance to avoid divide by zero errors. l1Norm | `float l1Norm (float[] queryVector, doc['vector field'])` | This function calculates the L1 Norm distance (Manhattan distance) between a given query vector and document vectors. -cosineSimilarity | `float cosineSimilarity (float[] queryVector, doc['vector field'])` | Cosine similarity is an inner product of the query vector and document vector normalized to both have a length of 1. If the magnitude of the query vector doesn't change throughout the query, you can pass the magnitude of the query vector to improve performance, instead of calculating the magnitude every time for every filtered document:
`float cosineSimilarity (float[] queryVector, doc['vector field'], float normQueryVector)`
In general, the range of cosine similarity is [-1, 1]. However, in the case of information retrieval, the cosine similarity of two documents ranges from 0 to 1 because the tf-idf statistic can't be negative. Therefore, the k-NN plugin adds 1.0 in order to always yield a positive cosine similarity score. +cosineSimilarity | `float cosineSimilarity (float[] queryVector, doc['vector field'])` | Cosine similarity is an inner product of the query vector and document vector normalized to both have a length of 1. If the magnitude of the query vector doesn't change throughout the query, you can pass the magnitude of the query vector to improve performance, instead of calculating the magnitude every time for every filtered document:
`float cosineSimilarity (float[] queryVector, doc['vector field'], float normQueryVector)`
In general, the range of cosine similarity is [-1, 1]. However, in the case of information retrieval, the cosine similarity of two documents ranges from 0 to 1 because the tf-idf statistic can't be negative. Therefore, OpenSearch adds 1.0 in order to always yield a positive cosine similarity score. hamming | `float hamming (float[] queryVector, doc['vector field'])` | This function calculates the Hamming distance between a given query vector and document vectors. The Hamming distance is the number of positions at which the corresponding elements are different. The shorter the distance, the more relevant the document is, so this example inverts the return value of the Hamming distance. The `hamming` space type is supported for binary vectors in OpenSearch version 2.16 and later. For more information, see [Binary k-NN vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#binary-vectors). From 1900e755796ec706116b31b6bd0eae4c6a188074 Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Tue, 11 Feb 2025 14:46:12 -0500 Subject: [PATCH 11/32] unify terminology Signed-off-by: Fanit Kolchina --- _field-types/supported-field-types/knn-vector.md | 4 ++-- _ml-commons-plugin/index.md | 2 +- _vector-search/creating-vector-index/method.md | 2 +- .../optimizing-performance/disk-based-vector-search.md | 4 ++-- .../optimizing-performance/faiss-product-quantization.md | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/_field-types/supported-field-types/knn-vector.md b/_field-types/supported-field-types/knn-vector.md index 0aa48f2ef7..84a59d1165 100644 --- a/_field-types/supported-field-types/knn-vector.md +++ b/_field-types/supported-field-types/knn-vector.md @@ -260,7 +260,7 @@ GET test-index/_search ### Example: IVF -The `ivf` method requires a training step that creates and trains the model used to initialize the native library index during segment creation. For more information, see [Building a k-NN index from a model]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-k-nn-index-from-a-model). +The `ivf` method requires a training step that creates and trains the model used to initialize the native library index during segment creation. For more information, see [Building a vector index from a model]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-vector-index-from-a-model). First, create an index that will contain byte vector training data. Specify the `faiss` engine and `ivf` algorithm and make sure that the `dimension` matches the dimension of the model you want to create: @@ -650,7 +650,7 @@ The response contains the two vectors closest to the query vector: ### Example: IVF -The IVF method requires a training step that creates and trains the model used to initialize the native library index during segment creation. For more information, see [Building a k-NN index from a model]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-k-nn-index-from-a-model). +The IVF method requires a training step that creates and trains the model used to initialize the native library index during segment creation. For more information, see [Building a vector index from a model]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-vector-index-from-a-model). First, create an index that will contain binary vector training data. Specify the Faiss engine and IVF algorithm and make sure that the `dimension` matches the dimension of the model you want to create: diff --git a/_ml-commons-plugin/index.md b/_ml-commons-plugin/index.md index 50d637379e..f18813383d 100644 --- a/_ml-commons-plugin/index.md +++ b/_ml-commons-plugin/index.md @@ -34,7 +34,7 @@ ML Commons provides its own set of REST APIs. For more information, see [ML Comm ## ML-powered search -For information about available ML-powered search types, see [ML-powered search]({{site.url}}{{site.baseurl}}/search-plugins/index/#ml-powered-search). +For information about available ML-powered search types, see [Vector search]({{site.url}}{{site.baseurl}}/vector-search/). ## Tutorials diff --git a/_vector-search/creating-vector-index/method.md b/_vector-search/creating-vector-index/method.md index 2b93480616..e5fa0797c0 100644 --- a/_vector-search/creating-vector-index/method.md +++ b/_vector-search/creating-vector-index/method.md @@ -7,7 +7,7 @@ nav_order: 20 # Supported methods -A _method_ definition refers to the underlying configuration of the [approximate k-NN]({{site.url}}{{site.baseurl}}/vector-search/vector-search-techniques/approximate-knn/) algorithm you want to use. Method definitions are used to either create a `knn_vector` field (when the method does not require training) or [create a model during training]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#train-a-model) that can then be used to [create a `knn_vector` field]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-k-nn-index-from-a-model). +A _method_ definition refers to the underlying configuration of the [approximate k-NN]({{site.url}}{{site.baseurl}}/vector-search/vector-search-techniques/approximate-knn/) algorithm you want to use. Method definitions are used to either create a `knn_vector` field (when the method does not require training) or [create a model during training]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#train-a-model) that can then be used to [create a `knn_vector` field]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-vector-index-from-a-model). A method definition will always contain the name of the method, the space_type the method is built for, the engine (the library) to use, and a map of parameters. diff --git a/_vector-search/optimizing-performance/disk-based-vector-search.md b/_vector-search/optimizing-performance/disk-based-vector-search.md index 86790e57cd..bcf04aedfe 100644 --- a/_vector-search/optimizing-performance/disk-based-vector-search.md +++ b/_vector-search/optimizing-performance/disk-based-vector-search.md @@ -181,7 +181,7 @@ GET my-vector-index/_search ## Model-based indexes -For [model-based indexes]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-k-nn-index-from-a-model), you can specify the `on_disk` parameter in the training request in the same way that you would specify it during index creation. By default, `on_disk` mode will use the [Faiss IVF method]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/method/#supported-faiss-methods) and a compression level of `32x`. To run the training API, send the following request: +For [model-based indexes]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-vector-index-from-a-model), you can specify the `on_disk` parameter in the training request in the same way that you would specify it during index creation. By default, `on_disk` mode will use the [Faiss IVF method]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/method/#supported-faiss-methods) and a compression level of `32x`. To run the training API, send the following request: ```json POST /_plugins/_knn/models/test-model/_train @@ -198,7 +198,7 @@ POST /_plugins/_knn/models/test-model/_train ``` {% include copy-curl.html %} -This command assumes that training data has been ingested into the `train-index-name` index. For more information, see [Building a vector index from a model]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-k-nn-index-from-a-model). +This command assumes that training data has been ingested into the `train-index-name` index. For more information, see [Building a vector index from a model]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-vector-index-from-a-model). {: .note} You can override the `compression_level` for disk-optimized indexes in the same way as for regular vector indexes. diff --git a/_vector-search/optimizing-performance/faiss-product-quantization.md b/_vector-search/optimizing-performance/faiss-product-quantization.md index 12f6b49a36..57a989e8e7 100644 --- a/_vector-search/optimizing-performance/faiss-product-quantization.md +++ b/_vector-search/optimizing-performance/faiss-product-quantization.md @@ -22,7 +22,7 @@ In OpenSearch, the training vectors need to be present in an index. In general, For PQ, both _m_ and _code_size_ need to be selected. _m_ determines the number of subvectors into which vectors should be split for separate encoding. Consequently, the _dimension_ needs to be divisible by _m_. _code_size_ determines the number of bits used to encode each subvector. In general, we recommend a setting of `code_size = 8` and then tuning _m_ to get the desired trade-off between memory footprint and recall. -For an example of setting up an index with PQ, see the [Building a vector index from a model]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-k-nn-index-from-a-model) tutorial. +For an example of setting up an index with PQ, see the [Building a vector index from a model]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-vector-index-from-a-model) tutorial. ## Memory estimation From a5e8b8db410d5d6538bc88abd5df5d82827d3494 Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Tue, 11 Feb 2025 14:56:11 -0500 Subject: [PATCH 12/32] Review comments Signed-off-by: Fanit Kolchina --- .../optimizing-performance/knn-vector-quantization.md | 9 ++++++++- _vector-search/vector-search-techniques/index.md | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/_vector-search/optimizing-performance/knn-vector-quantization.md b/_vector-search/optimizing-performance/knn-vector-quantization.md index 2bffbdbc07..7bb4eaf3ba 100644 --- a/_vector-search/optimizing-performance/knn-vector-quantization.md +++ b/_vector-search/optimizing-performance/knn-vector-quantization.md @@ -9,8 +9,11 @@ redirect_from: - /search-plugins/knn/knn-vector-quantization/ outside_cards: - heading: "Byte vectors" - description: "Quantize vectors outside of OpenSearch before ingesting them into an OpenSearch index." + description: "Quantize vectors into byte vectors." link: "/field-types/supported-field-types/knn-vector#byte-vectors" + - heading: "Binary vectors" + description: "Quantize vectors into binary vectors." + link: "/field-types/supported-field-types/knn-vector#binary-vectors" inside_cards: - heading: "Lucene scalar quantization" description: "Use built-in scalar quantization for the Lucene engine." @@ -34,8 +37,12 @@ OpenSearch supports many varieties of quantization. In general, the level of qua ## Quantize vectors outside of OpenSearch +Quantize vectors outside of OpenSearch before ingesting them into an OpenSearch index. + {% include cards.html cards=page.outside_cards %} ## Quantize vectors within OpenSearch +Use OpenSearch built-in quantization to quantize vectors. + {% include cards.html cards=page.inside_cards %} \ No newline at end of file diff --git a/_vector-search/vector-search-techniques/index.md b/_vector-search/vector-search-techniques/index.md index 52db1d3274..58f48f9ebd 100644 --- a/_vector-search/vector-search-techniques/index.md +++ b/_vector-search/vector-search-techniques/index.md @@ -55,7 +55,7 @@ In general, select NMSLIB or Faiss for large-scale use cases. Lucene is a good o |:---|:---|:---|:---|:---| | Max dimensions | 16,000 | 16,000 | 16,000 | 16,000 | | Filter | Post-filter | Post-filter | Post-filter | Filter during search | -| Training required | No | No | Yes | No | +| Training required | No | No (Yes for PQ) | Yes | No | | Similarity metrics | `l2`, `innerproduct`, `cosinesimil`, `l1`, `linf` | `l2`, `innerproduct` | `l2`, `innerproduct` | `l2`, `cosinesimil` | | Number of vectors | Tens of billions | Tens of billions | Tens of billions | Less than 10 million | | Indexing latency | Low | Low | Lowest | Low | From 3f5301a6a29ee29c3e29a2db477498447df391c0 Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Wed, 12 Feb 2025 10:30:56 -0500 Subject: [PATCH 13/32] More review comments Signed-off-by: Fanit Kolchina --- .../supported-field-types/knn-vector.md | 2 +- _query-dsl/specialized/index.md | 4 +- _query-dsl/specialized/kNN.md | 203 ++++++++++++++++++ _query-dsl/specialized/neural.md | 2 +- _vector-search/index.md | 12 ++ .../approximate-knn.md | 47 ---- .../vector-search-techniques/index.md | 4 + 7 files changed, 224 insertions(+), 50 deletions(-) create mode 100644 _query-dsl/specialized/kNN.md diff --git a/_field-types/supported-field-types/knn-vector.md b/_field-types/supported-field-types/knn-vector.md index 84a59d1165..34fab56e65 100644 --- a/_field-types/supported-field-types/knn-vector.md +++ b/_field-types/supported-field-types/knn-vector.md @@ -11,7 +11,7 @@ has_math: true **Introduced 1.0** {: .label .label-purple } -The `knn_vector` data type allows you to ingest vectors into an OpenSearch index and perform different kinds of vector search. The `knn_vector` field is highly configurable and can serve many different vector workloads. In general, a `knn_vector` field can be built either by providing a method definition or specifying a model id. +The `knn_vector` data type allows you to ingest vectors into an OpenSearch index and perform different kinds of vector search. The `knn_vector` field is highly configurable and can serve many different vector workloads. In general, a `knn_vector` field can be built either by providing a method definition or specifying a model ID. ## Example diff --git a/_query-dsl/specialized/index.md b/_query-dsl/specialized/index.md index 8a4cd81af6..1e0f8194f6 100644 --- a/_query-dsl/specialized/index.md +++ b/_query-dsl/specialized/index.md @@ -14,7 +14,9 @@ OpenSearch supports the following specialized queries: - `more_like_this`: Finds documents similar to the provided text, document, or collection of documents. -- [`neural`]({{site.url}}{{site.baseurl}}/query-dsl/specialized/neural/): Used for vector field search in [neural search]({{site.url}}{{site.baseurl}}/search-plugins/neural-search/). +- [`knn`]({{site.url}}{{site.baseurl}}/query-dsl/specialized/kNN/): Used for searching raw vectors during [vector search]({{site.url}}{{site.baseurl}}/vector-search/). + +- [`neural`]({{site.url}}{{site.baseurl}}/query-dsl/specialized/neural/): Used for searching by text or image in [vector search]({{site.url}}{{site.baseurl}}/search-plugins/neural-search/). - [`neural_sparse`]({{site.url}}{{site.baseurl}}/query-dsl/specialized/neural-sparse/): Used for vector field search in [sparse neural search]({{site.url}}{{site.baseurl}}/search-plugins/neural-sparse-search/). diff --git a/_query-dsl/specialized/kNN.md b/_query-dsl/specialized/kNN.md new file mode 100644 index 0000000000..22f4c655ef --- /dev/null +++ b/_query-dsl/specialized/kNN.md @@ -0,0 +1,203 @@ +--- +layout: default +title: k-NN +parent: Specialized queries +nav_order: 55 +--- + +# k-NN query + +Use the `knn` query for vector field search in [vector search]({{site.url}}{{site.baseurl}}/vector-search/). The query can use either raw vectors or automatically generate vectors from text using a machine learning model. + +## Request body fields + +Provide a vector field in the `knn` query and specify additional request fields in the vector field object: + +```json +"knn": { + "": { + "vector": [], + "k": , + ... + } +} +``` + +The top-level `vector_field` specifies the vector field against which to run a search query. The following table lists all supported request fields. + +Field | Data type | Required/Optional | Description +:--- | :--- | :--- | :--- +`vector` | Array of floats | Required | The query vector to use for vector search. +`k` | Integer | Optional | The number of nearest neighbors to return. Required if `max_distance` or `min_score` is not specified. +`max_distance` | Float | Optional | The maximum distance threshold for search results. Only one of `k`, `max_distance`, or `min_score` can be specified. For more information, see [Radial search]({{site.url}}{{site.baseurl}}/vector-search/specialized-operations/radial-search-knn/). +`min_score` | Float | Optional | The minimum score threshold for search results. Only one of `k`, `max_distance`, or `min_score` can be specified. For more information, see [Radial search]({{site.url}}{{site.baseurl}}/vector-search/specialized-operations/radial-search-knn/). +`filter` | Object | Optional | A filter to apply to the k-NN search. For more information, see [Vector search with filters]({{site.url}}{{site.baseurl}}/vector-search/vector-search-with-filters/). +`method_parameters` | Object | Optional | Additional parameters for fine-tuning the search:
- `ef_search`: Number of vectors to examine (for `hnsw` method)
- `nprobes`: Number of buckets to examine (for `ivf` method). For more information, see [Specifying method parameters in the query](#specifying-method-parameters-in-the-query). +`rescore` | Object | Optional | Parameters for configuring rescoring functionality:
- `oversample_factor`: Controls the oversampling of candidate vectors before ranking. For more information, see [Rescoring results](#rescoring-results). +`expand_nested_docs` | Boolean | Optional | When `true`, retrieves scores for all nested field documents within each parent document. Used with nested queries. For more information, see [Vector search with nested fields]({{site.url}}{{site.baseurl}}/vector-search/specialized-operations/nested-search-knn/). + +## Example request + +```json +GET /my-nlp-index/_search +{ + "query": { + "knn": { + "my_vector": { + "vector": [1.5, 2.5], + "k": 3 + } + } + } +} +``` +{% include copy-curl.html %} + +## Example request: Nested fields + +```json +GET my-knn-index-1/_search +{ + "_source": false, + "query": { + "nested": { + "path": "nested_field", + "query": { + "knn": { + "nested_field.my_vector": { + "vector": [1,1,1], + "k": 2, + "expand_nested_docs": true + } + } + }, + "inner_hits": { + "_source": false, + "fields":["nested_field.color"] + }, + "score_mode": "max" + } + } +} +``` +{% include copy-curl.html %} + +## Example request: Radial search with max_distance + +The following example shows a radial search performed with `max_distance`: + +```json +GET knn-index-test/_search +{ + "query": { + "knn": { + "my_vector": { + "vector": [ + 7.1, + 8.3 + ], + "max_distance": 2 + } + } + } +} +``` +{% include copy-curl.html %} + + +## Example request: Radial search with min_score + +The following example shows a radial search performed with `min_score`: + +```json +GET knn-index-test/_search +{ + "query": { + "knn": { + "my_vector": { + "vector": [7.1, 8.3], + "min_score": 0.95 + } + } + } +} +``` +{% include copy-curl.html %} + +## Specifying method parameters in the query + +Starting with version 2.16, you can provide `method_parameters` in a search request: + +```json +GET my-knn-index-1/_search +{ + "size": 2, + "query": { + "knn": { + "target-field": { + "vector": [2, 3, 5, 6], + "k": 2, + "method_parameters" : { + "ef_search": 100 + } + } + } + } +} +``` +{% include copy-curl.html %} + +These parameters are dependent on the combination of engine and method used to create the index. The following sections provide information about the supported `method_parameters`. + +### ef_search + +You can provide the `ef_search` parameter when searching an index created using the `hnsw` method. The `ef_search` parameter specifies the number of vectors to examine in order to find the top k nearest neighbors. Higher `ef_search` values improve recall at the cost of increased search latency. The value must be positive. + +The following table provides information about the `ef_search` parameter for the supported engines. + +Engine | Radial query support | Notes +:--- | :--- | :--- +`nmslib` | No | If `ef_search` is present in a query, it overrides the `index.knn.algo_param.ef_search` index setting. +`faiss` | Yes | If `ef_search` is present in a query, it overrides the `index.knn.algo_param.ef_search` index setting. +`lucene` | No | When creating a search query, you must specify `k`. If you provide both `k` and `ef_search`, then the larger value is passed to the engine. If `ef_search` is larger than `k`, you can provide the `size` parameter to limit the final number of results to `k`. + + +### nprobes + + +You can provide the `nprobes` parameter when searching an index created using the `ivf` method. The `nprobes` parameter specifies the number of buckets to examine in order to find the top k nearest neighbors. Higher `nprobes` values improve recall at the cost of increased search latency. The value must be positive. + +The following table provides information about the `nprobes` parameter for the supported engines. + +Engine | Notes +:--- | :--- +`faiss` | If `nprobes` is present in a query, it overrides the value provided when creating the index. + +## Rescoring results + +You can fine-tune search by providing the `ef_search` and `oversample_factor` parameters. +The `oversample_factor` parameter controls the factor by which the search oversamples the candidate vectors before ranking them. Using a higher oversample factor means that more candidates will be considered before ranking, improving accuracy but also increasing search time. When selecting the `oversample_factor` value, consider the trade-off between accuracy and efficiency. For example, setting the `oversample_factor` to `2.0` will double the number of candidates considered during the ranking phase, which may help achieve better results. + +The following request specifies the `ef_search` and `oversample_factor` parameters: + +```json +GET my-vector-index/_search +{ + "size": 2, + "query": { + "knn": { + "my_vector_field": { + "vector": [1.5, 5.5, 1.5, 5.5, 1.5, 5.5, 1.5, 5.5], + "k": 10, + "method_parameters": { + "ef_search": 10 + }, + "rescore": { + "oversample_factor": 10.0 + } + } + } + } +} +``` +{% include copy-curl.html %} \ No newline at end of file diff --git a/_query-dsl/specialized/neural.md b/_query-dsl/specialized/neural.md index ae9e1f2ea4..5350c742f9 100644 --- a/_query-dsl/specialized/neural.md +++ b/_query-dsl/specialized/neural.md @@ -7,7 +7,7 @@ nav_order: 50 # Neural query -Use the `neural` query for vector field search in [neural search]({{site.url}}{{site.baseurl}}/search-plugins/neural-search/). +Use the `neural` query for vector field search by text or image in [vector search]({{site.url}}{{site.baseurl}}/vector-search/). ## Request body fields diff --git a/_vector-search/index.md b/_vector-search/index.md index 70006dadf2..ffd897b297 100644 --- a/_vector-search/index.md +++ b/_vector-search/index.md @@ -43,10 +43,22 @@ Traditional lexical search, based on term frequency models like BM25, is effecti OpenSearch combines traditional search, analytics, and vector search into a single, unified solution. Its vector database capabilities simplify the development of artificial intelligence (AI) applications by reducing the effort required to manage and integrate AI-generated assets. You can bring your models, vectors, and metadata into OpenSearch to enable vector, lexical, and hybrid search and analytics, all with built-in performance and scalability. +## Key features + +OpenSearch vector search supports the following key features: + +- **Automatic embedding generation**: Generate vector embeddings dynamically within OpenSearch using built-in machine learning models, eliminating the need for external preprocessing of your data. +- **Advanced filtering capabilities**: Combine vector search with traditional filters to refine results, enabling precise control over search outcomes while maintaining semantic relevance. +- **Multi-vector support**: Store and search multiple vectors per document using nested fields, useful for complex documents with multiple components requiring separate vector representations. +- **Memory-efficient search**: Optimize memory usage through various quantization techniques and efficient indexing methods, making vector search practical even with large-scale deployments. +- **Hybrid search capabilities**: Combine traditional keyword search with vector-based semantic search to use the strengths of both approaches, improving search relevance and accuracy. + ## Using OpenSearch as a vector database OpenSearch provides an integrated  vector database that can support AI systems by serving as a knowledge base. This benefits AI applications like generative AI and natural language search by providing a long-term memory of AI-generated outputs. These outputs can be used to enhance information retrieval and analytics, improve efficiency and stability, and give generative AI models a broader and deeper pool of data from which to draw more accurate responses to queries. + + [Get started]({{site.url}}{{site.baseurl}}/vector-search/getting-started/){: .btn-dark-blue} diff --git a/_vector-search/vector-search-techniques/approximate-knn.md b/_vector-search/vector-search-techniques/approximate-knn.md index 61e4f11732..01f403f6d2 100644 --- a/_vector-search/vector-search-techniques/approximate-knn.md +++ b/_vector-search/vector-search-techniques/approximate-knn.md @@ -263,53 +263,6 @@ POST _bulk After data is ingested, it can be searched in the same way as any other `knn_vector` field. -### Additional query parameters - -Starting with version 2.16, you can provide `method_parameters` in a search request: - -```json -GET my-knn-index-1/_search -{ - "size": 2, - "query": { - "knn": { - "target-field": { - "vector": [2, 3, 5, 6], - "k": 2, - "method_parameters" : { - "ef_search": 100 - } - } - } - } -} -``` -{% include copy-curl.html %} - -These parameters are dependent on the combination of engine and method used to create the index. The following sections provide information about the supported `method_parameters`. - -#### `ef_search` - -You can provide the `ef_search` parameter when searching an index created using the `hnsw` method. The `ef_search` parameter specifies the number of vectors to examine in order to find the top k nearest neighbors. Higher `ef_search` values improve recall at the cost of increased search latency. The value must be positive. - -The following table provides information about the `ef_search` parameter for the supported engines. - -Engine | Radial query support | Notes -:--- | :--- | :--- -`nmslib` | No | If `ef_search` is present in a query, it overrides the `index.knn.algo_param.ef_search` index setting. -`faiss` | Yes | If `ef_search` is present in a query, it overrides the `index.knn.algo_param.ef_search` index setting. -`lucene` | No | When creating a search query, you must specify `k`. If you provide both `k` and `ef_search`, then the larger value is passed to the engine. If `ef_search` is larger than `k`, you can provide the `size` parameter to limit the final number of results to `k`. - -#### `nprobes` - -You can provide the `nprobes` parameter when searching an index created using the `ivf` method. The `nprobes` parameter specifies the number of buckets to examine in order to find the top k nearest neighbors. Higher `nprobes` values improve recall at the cost of increased search latency. The value must be positive. - -The following table provides information about the `nprobes` parameter for the supported engines. - -Engine | Notes -:--- | :--- -`faiss` | If `nprobes` is present in a query, it overrides the value provided when creating the index. - ### Rescoring quantized results using full precision Quantization can be used to significantly reduce the memory footprint of a vector index. For more information about quantization, see [k-NN vector quantization]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-vector-quantization). Because some vector representation is lost during quantization, the computed distances will be approximate. This causes the overall recall of the search to decrease. diff --git a/_vector-search/vector-search-techniques/index.md b/_vector-search/vector-search-techniques/index.md index 58f48f9ebd..32b11b9bb8 100644 --- a/_vector-search/vector-search-techniques/index.md +++ b/_vector-search/vector-search-techniques/index.md @@ -63,6 +63,10 @@ In general, select NMSLIB or Faiss for large-scale use cases. Lucene is a good o | Vector compression | Flat | Flat
Product quantization | Flat
Product quantization | Flat | | Memory consumption | High | High
Low with PQ | Medium
Low with PQ | High | +## Using sparse vectors +_Neural sparse search_ offers an efficient alternative to dense vector search by using sparse embedding models and inverted indexes, providing performance similar to BM25. Unlike dense vector methods that require significant memory and CPU resources, sparse search creates a list of token-weight pairs and stores them in a rank features index. This approach combines the efficiency of traditional search with the semantic understanding of neural networks. OpenSearch supports both automatic embedding generation through ingest pipelines and direct sparse vector ingestion. For implementation details and setup instructions, see [Neural sparse search]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/neural-sparse-search/). +## Combining multiple search techniques +_Hybrid search_ enhances search relevance by combining multiple search techniques within OpenSearch. It integrates traditional keyword search with vector-based semantic search. Through a configurable search pipeline, hybrid search normalizes and combines scores from different search methods to provide unified, relevant results. This approach is particularly effective for complex queries where both semantic understanding and exact matching are important. The search pipeline can be further customized with post-filtering operations and aggregations to meet specific search requirements. For complete implementation details, see [Hybrid search]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/hybrid-search/). From 8b8d831f12971af71b35bc27cdd254eb4f7e442f Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Wed, 12 Feb 2025 13:02:25 -0500 Subject: [PATCH 14/32] Resolve merge conflicts Signed-off-by: Fanit Kolchina --- _search-plugins/knn/knn-index.md | 385 -------------- .../knn/knn-vector-quantization.md | 501 ------------------ _search-plugins/knn/settings.md | 39 -- _search-plugins/vector-search.md | 283 ---------- 4 files changed, 1208 deletions(-) delete mode 100644 _search-plugins/knn/knn-index.md delete mode 100644 _search-plugins/knn/knn-vector-quantization.md delete mode 100644 _search-plugins/knn/settings.md delete mode 100644 _search-plugins/vector-search.md diff --git a/_search-plugins/knn/knn-index.md b/_search-plugins/knn/knn-index.md deleted file mode 100644 index b5cfa03470..0000000000 --- a/_search-plugins/knn/knn-index.md +++ /dev/null @@ -1,385 +0,0 @@ ---- -layout: default -title: k-NN index -nav_order: 5 -parent: k-NN search -has_children: false ---- - -# k-NN index - -The k-NN plugin introduces a custom data type, the `knn_vector`, that allows users to ingest their k-NN vectors into an OpenSearch index and perform different kinds of k-NN search. The `knn_vector` field is highly configurable and can serve many different k-NN workloads. For more information, see [k-NN vector]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/). - -To create a k-NN index, set the `settings.index.knn` parameter to `true`: - -```json -PUT /test-index -{ - "settings": { - "index": { - "knn": true - } - }, - "mappings": { - "properties": { - "my_vector1": { - "type": "knn_vector", - "dimension": 3, - "space_type": "l2", - "method": { - "name": "hnsw", - "engine": "lucene", - "parameters": { - "ef_construction": 128, - "m": 24 - } - } - } - } - } -} -``` -{% include copy-curl.html %} - -## Byte vectors - -Starting with k-NN plugin version 2.17, you can use `byte` vectors with the `faiss` and `lucene` engines to reduce the amount of required memory and storage space. For more information, see [Byte vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#byte-vectors). - -## Binary vectors - -Starting with k-NN plugin version 2.16, you can use `binary` vectors with the `faiss` engine to reduce the amount of required storage space. For more information, see [Binary vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#binary-vectors). - -Starting with k-NN plugin version 2.19, you can use `binary` vectors with the `lucene` engine. - -## SIMD optimization for the Faiss engine - -Starting with version 2.13, the k-NN plugin supports [Single Instruction Multiple Data (SIMD)](https://en.wikipedia.org/wiki/Single_instruction,_multiple_data) processing if the underlying hardware supports SIMD instructions (AVX2 on x64 architecture and Neon on ARM64 architecture). SIMD is supported by default on Linux machines only for the Faiss engine. SIMD architecture helps boost overall performance by improving indexing throughput and reducing search latency. Starting with version 2.18, the k-NN plugin supports AVX-512 SIMD instructions on x64 architecture. Starting with version 2.19, the k-NN plugin supports advanced AVX-512 SIMD instructions on x64 architecture for Intel Sapphire Rapids or a newer-generation processor, improving the performance of Hamming distance computation. - -SIMD optimization is applicable only if the vector dimension is a multiple of 8. -{: .note} - - -### x64 architecture - - -For x64 architecture, the following versions of the Faiss library are built and shipped with the artifact: - -- `libopensearchknn_faiss_avx512_spr.so`: The Faiss library containing advanced AVX-512 SIMD instructions for newer-generation processors, available on public clouds such as AWS for c/m/r 7i or newer instances. -- `libopensearchknn_faiss_avx512.so`: The Faiss library containing AVX-512 SIMD instructions. -- `libopensearchknn_faiss_avx2.so`: The Faiss library containing AVX2 SIMD instructions. -- `libopensearchknn_faiss.so`: The non-optimized Faiss library without SIMD instructions. - -When using the Faiss library, the performance ranking is as follows: advanced AVX-512 > AVX-512 > AVX2 > no optimization. -{: .note } - -If your hardware supports advanced AVX512(spr), the k-NN plugin loads the `libopensearchknn_faiss_avx512_spr.so` library at runtime. - -If your hardware supports AVX-512, the k-NN plugin loads the `libopensearchknn_faiss_avx512.so` library at runtime. - -If your hardware supports AVX2 but doesn't support AVX-512, the k-NN plugin loads the `libopensearchknn_faiss_avx2.so` library at runtime. - -To disable the advanced AVX-512 (for Sapphire Rapids or newer-generation processors), AVX512, and AVX2 SIMD instructions and load the non-optimized Faiss library (`libopensearchknn_faiss.so`), specify the `knn.faiss.avx512_spr.disabled`, `knn.faiss.avx512.disabled`, and `knn.faiss.avx2.disabled` static settings as `true` in `opensearch.yml` (by default, all of these are `false`). - -Note that to update a static setting, you must stop the cluster, change the setting, and restart the cluster. For more information, see [Static settings]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/index/#static-settings). - -### ARM64 architecture - -For the ARM64 architecture, only one performance-boosting Faiss library (`libopensearchknn_faiss.so`) is built and shipped. The library contains Neon SIMD instructions and cannot be disabled. - -## Method definitions - -A method definition refers to the underlying configuration of the approximate k-NN algorithm you want to use. Method definitions are used to either create a `knn_vector` field (when the method does not require training) or [create a model during training]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#train-a-model) that can then be used to [create a `knn_vector` field]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-k-nn-index-from-a-model). - -A method definition will always contain the name of the method, the space_type the method is built for, the engine -(the library) to use, and a map of parameters. - -Mapping parameter | Required | Default | Updatable | Description -:--- | :--- | :--- | :--- | :--- -`name` | true | n/a | false | The identifier for the nearest neighbor method. -`space_type` | false | l2 | false | The vector space used to calculate the distance between vectors. Note: This value can also be specified at the top level of the mapping. -`engine` | false | faiss | false | The approximate k-NN library to use for indexing and search. The available libraries are `faiss`, `lucene`, and `nmslib` (deprecated). -`parameters` | false | null | false | The parameters used for the nearest neighbor method. - -### Supported NMSLIB methods - -Method name | Requires training | Supported spaces | Description -:--- | :--- | :--- | :--- -`hnsw` | false | l2, innerproduct, cosinesimil, l1, linf | Hierarchical proximity graph approach to approximate k-NN search. For more details on the algorithm, see this [abstract](https://arxiv.org/abs/1603.09320). - -#### HNSW parameters - -Parameter name | Required | Default | Updatable | Description -:--- | :--- | :--- | :--- | :--- -`ef_construction` | false | 100 | false | The size of the dynamic list used during k-NN graph creation. Higher values result in a more accurate graph but slower indexing speed. -`m` | false | 16 | false | The number of bidirectional links that the plugin creates for each new element. Increasing and decreasing this value can have a large impact on memory consumption. Keep this value between 2 and 100. - -For nmslib (deprecated), *ef_search* is set in the [index settings](#index-settings). -{: .note} - -An index created in OpenSearch version 2.11 or earlier will still use the old `ef_construction` value (`512`). -{: .note} - -### Supported Faiss methods - -Method name | Requires training | Supported spaces | Description -:--- | :--- |:---| :--- -`hnsw` | false | l2, innerproduct, hamming | Hierarchical proximity graph approach to approximate k-NN search. -`ivf` | true | l2, innerproduct, hamming | Stands for _inverted file index_. Bucketing approach where vectors are assigned different buckets based on clustering and, during search, only a subset of the buckets is searched. - -For hnsw, "innerproduct" is not available when PQ is used. -{: .note} - -The `hamming` space type is supported for binary vectors in OpenSearch version 2.16 and later. For more information, see [Binary k-NN vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#binary-vectors). -{: .note} - -#### HNSW parameters - -Parameter name | Required | Default | Updatable | Description -:--- | :--- | :--- | :--- | :--- -`ef_search` | false | 100 | false | The size of the dynamic list used during k-NN searches. Higher values result in more accurate but slower searches. -`ef_construction` | false | 100 | false | The size of the dynamic list used during k-NN graph creation. Higher values result in a more accurate graph but slower indexing speed. -`m` | false | 16 | false | The number of bidirectional links that the plugin creates for each new element. Increasing and decreasing this value can have a large impact on memory consumption. Keep this value between 2 and 100. -`encoder` | false | flat | false | Encoder definition for encoding vectors. Encoders can reduce the memory footprint of your index, at the expense of search accuracy. - -An index created in OpenSearch version 2.11 or earlier will still use the old `ef_construction` and `ef_search` values (`512`). -{: .note} - -#### IVF parameters - -Parameter name | Required | Default | Updatable | Description -:--- | :--- | :--- | :--- | :--- -`nlist` | false | 4 | false | Number of buckets to partition vectors into. Higher values may lead to more accurate searches at the expense of memory and training latency. For more information about choosing the right value, refer to [Guidelines to choose an index](https://github.com/facebookresearch/faiss/wiki/Guidelines-to-choose-an-index). -`nprobes` | false | 1 | false | Number of buckets to search during query. Higher values lead to more accurate but slower searches. -`encoder` | false | flat | false | Encoder definition for encoding vectors. Encoders can reduce the memory footprint of your index, at the expense of search accuracy. - -For more information about setting these parameters, refer to the [Faiss documentation](https://github.com/facebookresearch/faiss/wiki/Faiss-indexes). - -#### IVF training requirements - -The IVF algorithm requires a training step. To create an index that uses IVF, you need to train a model with the [Train API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#train-a-model), passing the IVF method definition. IVF requires that, at a minimum, there are `nlist` training data points, but it is [recommended that you use more than this](https://github.com/facebookresearch/faiss/wiki/Guidelines-to-choose-an-index#how-big-is-the-dataset). Training data can be composed of either the same data that is going to be ingested or a separate dataset. - -### Supported Lucene methods - -Method name | Requires training | Supported spaces | Description -:--- | :--- |:--------------------------------------------------------------------------------| :--- -`hnsw` | false | l2, cosinesimil, innerproduct (supported in OpenSearch 2.13 and later), hamming | Hierarchical proximity graph approach to approximate k-NN search. - -The `hamming` space type is supported for binary vectors in OpenSearch version 2.19 and later. For more information, see [Binary k-NN vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#binary-vectors). -{: .note} - -#### HNSW parameters - -Parameter name | Required | Default | Updatable | Description -:--- | :--- | :--- | :--- | :--- -`ef_construction` | false | 100 | false | The size of the dynamic list used during k-NN graph creation. Higher values result in a more accurate graph but slower indexing speed.
The Lucene engine uses the proprietary term "beam_width" to describe this function, which corresponds directly to "ef_construction". To be consistent throughout the OpenSearch documentation, we retain the term "ef_construction" for this parameter. -`m` | false | 16 | false | The number of bidirectional links that the plugin creates for each new element. Increasing and decreasing this value can have a large impact on memory consumption. Keep this value between 2 and 100.
The Lucene engine uses the proprietary term "max_connections" to describe this function, which corresponds directly to "m". To be consistent throughout OpenSearch documentation, we retain the term "m" to label this parameter. - -Lucene HNSW implementation ignores `ef_search` and dynamically sets it to the value of "k" in the search request. Therefore, there is no need to make settings for `ef_search` when using the Lucene engine. -{: .note} - -An index created in OpenSearch version 2.11 or earlier will still use the old `ef_construction` value (`512`). -{: .note} - -```json -"method": { - "name":"hnsw", - "engine":"lucene", - "parameters":{ - "m":2048, - "ef_construction": 245 - } -} -``` - -### Supported Faiss encoders - -You can use encoders to reduce the memory footprint of a k-NN index at the expense of search accuracy. The k-NN plugin currently supports the `flat`, `pq`, and `sq` encoders in the Faiss library. - -The following example method definition specifies the `hnsw` method and a `pq` encoder: - -```json -"method": { - "name":"hnsw", - "engine":"faiss", - "parameters":{ - "encoder":{ - "name":"pq", - "parameters":{ - "code_size": 8, - "m": 8 - } - } - } -} -``` - -The `hnsw` method supports the `pq` encoder for OpenSearch versions 2.10 and later. The `code_size` parameter of a `pq` encoder with the `hnsw` method must be **8**. -{: .important} - -Encoder name | Requires training | Description -:--- | :--- | :--- -`flat` (Default) | false | Encode vectors as floating-point arrays. This encoding does not reduce memory footprint. -`pq` | true | An abbreviation for _product quantization_, it is a lossy compression technique that uses clustering to encode a vector into a fixed size of bytes, with the goal of minimizing the drop in k-NN search accuracy. At a high level, vectors are broken up into `m` subvectors, and then each subvector is represented by a `code_size` code obtained from a code book produced during training. For more information about product quantization, see [this blog post](https://medium.com/dotstar/understanding-faiss-part-2-79d90b1e5388). -`sq` | false | An abbreviation for _scalar quantization_. Starting with k-NN plugin version 2.13, you can use the `sq` encoder to quantize 32-bit floating-point vectors into 16-bit floats. In version 2.13, the built-in `sq` encoder is the SQFP16 Faiss encoder. The encoder reduces memory footprint with a minimal loss of precision and improves performance by using SIMD optimization (using AVX2 on x86 architecture or Neon on ARM64 architecture). For more information, see [Faiss scalar quantization]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-vector-quantization#faiss-16-bit-scalar-quantization). - -#### PQ parameters - -Parameter name | Required | Default | Updatable | Description -:--- | :--- | :--- | :--- | :--- -`m` | false | 1 | false | Determines the number of subvectors into which to break the vector. Subvectors are encoded independently of each other. This vector dimension must be divisible by `m`. Maximum value is 1,024. -`code_size` | false | 8 | false | Determines the number of bits into which to encode a subvector. Maximum value is 8. For IVF, this value must be less than or equal to 8. For HNSW, this value can only be 8. - -#### SQ parameters - -Parameter name | Required | Default | Updatable | Description -:--- | :--- | :-- | :--- | :--- -`type` | false | `fp16` | false | The type of scalar quantization to be used to encode 32-bit float vectors into the corresponding type. As of OpenSearch 2.13, only the `fp16` encoder type is supported. For the `fp16` encoder, vector values must be in the [-65504.0, 65504.0] range. -`clip` | false | `false` | false | If `true`, then any vector values outside of the supported range for the specified vector type are rounded so that they are in the range. If `false`, then the request is rejected if any vector values are outside of the supported range. Setting `clip` to `true` may decrease recall. - -For more information and examples, see [Using Faiss scalar quantization]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-vector-quantization/#using-faiss-scalar-quantization). - -#### Examples - -The following example uses the `ivf` method without specifying an encoder (by default, OpenSearch uses the `flat` encoder): - -```json -"method": { - "name":"ivf", - "engine":"faiss", - "parameters":{ - "nlist": 4, - "nprobes": 2 - } -} -``` - -The following example uses the `ivf` method with a `pq` encoder: - -```json -"method": { - "name":"ivf", - "engine":"faiss", - "parameters":{ - "encoder":{ - "name":"pq", - "parameters":{ - "code_size": 8, - "m": 8 - } - } - } -} -``` - -The following example uses the `hnsw` method without specifying an encoder (by default, OpenSearch uses the `flat` encoder): - -```json -"method": { - "name":"hnsw", - "engine":"faiss", - "parameters":{ - "ef_construction": 256, - "m": 8 - } -} -``` - -The following example uses the `hnsw` method with an `sq` encoder of type `fp16` with `clip` enabled: - -```json -"method": { - "name":"hnsw", - "engine":"faiss", - "parameters":{ - "encoder": { - "name": "sq", - "parameters": { - "type": "fp16", - "clip": true - } - }, - "ef_construction": 256, - "m": 8 - } -} -``` - -The following example uses the `ivf` method with an `sq` encoder of type `fp16`: - -```json -"method": { - "name":"ivf", - "engine":"faiss", - "parameters":{ - "encoder": { - "name": "sq", - "parameters": { - "type": "fp16", - "clip": false - } - }, - "nprobes": 2 - } -} -``` - -### Choosing the right method - -There are several options to choose from when building your `knn_vector` field. To determine the correct methods and parameters, you should first understand the requirements of your workload and what trade-offs you are willing to make. Factors to consider are (1) query latency, (2) query quality, (3) memory limits, and (4) indexing latency. - -If memory is not a concern, HNSW offers a strong query latency/query quality trade-off. - -If you want to use less memory and increase indexing speed as compared to HNSW while maintaining similar query quality, you should evaluate IVF. - -If memory is a concern, consider adding a PQ encoder to your HNSW or IVF index. Because PQ is a lossy encoding, query quality will drop. - -You can reduce the memory footprint by a factor of 2, with a minimal loss in search quality, by using the [`fp_16` encoder]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-vector-quantization/#faiss-16-bit-scalar-quantization). If your vector dimensions are within the [-128, 127] byte range, we recommend using the [byte quantizer]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/#byte-vectors) to reduce the memory footprint by a factor of 4. To learn more about vector quantization options, see [k-NN vector quantization]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-vector-quantization/). - -### Memory estimation - -In a typical OpenSearch cluster, a certain portion of RAM is reserved for the JVM heap. The k-NN plugin allocates native library indexes to a portion of the remaining RAM. This portion's size is determined by the `circuit_breaker_limit` cluster setting. By default, the limit is set to 50%. - -Having a replica doubles the total number of vectors. -{: .note } - -For information about using memory estimation with vector quantization, see the [vector quantization documentation]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-vector-quantization/#memory-estimation). -{: .note } - -#### HNSW memory estimation - -The memory required for HNSW is estimated to be `1.1 * (4 * dimension + 8 * M)` bytes/vector. - -As an example, assume you have a million vectors with a dimension of 256 and M of 16. The memory requirement can be estimated as follows: - -``` -1.1 * (4 * 256 + 8 * 16) * 1,000,000 ~= 1.267 GB -``` - -#### IVF memory estimation - -The memory required for IVF is estimated to be `1.1 * (((4 * dimension) * num_vectors) + (4 * nlist * d))` bytes. - -As an example, assume you have a million vectors with a dimension of 256 and nlist of 128. The memory requirement can be estimated as follows: - -``` -1.1 * (((4 * 256) * 1,000,000) + (4 * 128 * 256)) ~= 1.126 GB - -``` - -## Index settings - -Additionally, the k-NN plugin introduces several index settings that can be used to configure the k-NN structure as well. - -At the moment, several parameters defined in the settings are in the deprecation process. Those parameters should be set in the mapping instead of the index settings. Parameters set in the mapping will override the parameters set in the index settings. Setting the parameters in the mapping allows an index to have multiple `knn_vector` fields with different parameters. - -Setting | Default | Updatable | Description -:--- |:--------| :--- | :--- -`index.knn` | false | false | Whether the index should build native library indexes for the `knn_vector` fields. If set to false, the `knn_vector` fields will be stored in doc values, but approximate k-NN search functionality will be disabled. -`index.knn.algo_param.ef_search` (Deprecated) | 100 | true | The size of the dynamic list used during k-NN searches. Higher values result in more accurate but slower searches. Only available for NMSLIB. -`index.knn.advanced.approximate_threshold` | 0 | true | The number of vectors a segment must have before creating specialized data structures for approximate search. Set to `-1` to disable building vector data structures and `0` to always build them. -`index.knn.algo_param.ef_construction` | 100 | false | Deprecated in 1.0.0. Instead, use the [mapping parameters]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/#method-definitions) to set this value. -`index.knn.algo_param.m` | 16 | false | Deprecated in 1.0.0. Use the [mapping parameters]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/#method-definitions) to set this value instead. -`index.knn.space_type` | l2 | false | Deprecated in 1.0.0. Use the [mapping parameters]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/#method-definitions) to set this value instead. - -An index created in OpenSearch version 2.11 or earlier will still use the old `ef_construction` and `ef_search` values (`512`). -{: .note} diff --git a/_search-plugins/knn/knn-vector-quantization.md b/_search-plugins/knn/knn-vector-quantization.md deleted file mode 100644 index b820eea3d0..0000000000 --- a/_search-plugins/knn/knn-vector-quantization.md +++ /dev/null @@ -1,501 +0,0 @@ ---- -layout: default -title: k-NN vector quantization -nav_order: 27 -parent: k-NN search -has_children: false -has_math: true ---- - -# k-NN vector quantization - -By default, the k-NN plugin supports the indexing and querying of vectors of type `float`, where each dimension of the vector occupies 4 bytes of memory. For use cases that require ingestion on a large scale, keeping `float` vectors can be expensive because OpenSearch needs to construct, load, save, and search graphs (for the native `faiss` and `nmslib` [deprecated] engines). To reduce the memory footprint, you can use vector quantization. - -OpenSearch supports many varieties of quantization. In general, the level of quantization will provide a trade-off between the accuracy of the nearest neighbor search and the size of the memory footprint consumed by the vector search. The supported types include byte vectors, 16-bit scalar quantization, product quantization (PQ), and binary quantization(BQ). - -## Byte vectors - -Starting with version 2.17, the k-NN plugin supports `byte` vectors with the `faiss` and `lucene` engines in order to reduce the amount of required memory. This requires quantizing the vectors outside of OpenSearch before ingesting them into an OpenSearch index. For more information, see [Byte vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#byte-vectors). - -## Lucene scalar quantization - -Starting with version 2.16, the k-NN plugin supports built-in scalar quantization for the Lucene engine. Unlike [byte vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#byte-vectors), which require you to quantize vectors before ingesting documents, the Lucene scalar quantizer quantizes input vectors in OpenSearch during ingestion. The Lucene scalar quantizer converts 32-bit floating-point input vectors into 7-bit integer vectors in each segment using the minimum and maximum quantiles computed based on the [`confidence_interval`](#confidence-interval) parameter. During search, the query vector is quantized in each segment using the segment's minimum and maximum quantiles in order to compute the distance between the query vector and the segment's quantized input vectors. - -Quantization can decrease the memory footprint by a factor of 4 in exchange for some loss in recall. Additionally, quantization slightly increases disk usage because it requires storing both the raw input vectors and the quantized vectors. - -### Using Lucene scalar quantization - -To use the Lucene scalar quantizer, set the k-NN vector field's `method.parameters.encoder.name` to `sq` when creating a k-NN index: - -```json -PUT /test-index -{ - "settings": { - "index": { - "knn": true - } - }, - "mappings": { - "properties": { - "my_vector1": { - "type": "knn_vector", - "dimension": 2, - "space_type": "l2", - "method": { - "name": "hnsw", - "engine": "lucene", - "parameters": { - "encoder": { - "name": "sq" - }, - "ef_construction": 256, - "m": 8 - } - } - } - } - } -} -``` -{% include copy-curl.html %} - -### Confidence interval - -Optionally, you can specify the `confidence_interval` parameter in the `method.parameters.encoder` object. -The `confidence_interval` is used to compute the minimum and maximum quantiles in order to quantize the vectors: -- If you set the `confidence_interval` to a value in the `0.9` to `1.0` range, inclusive, then the quantiles are calculated statically. For example, setting the `confidence_interval` to `0.9` specifies to compute the minimum and maximum quantiles based on the middle 90% of the vector values, excluding the minimum 5% and maximum 5% of the values. -- Setting `confidence_interval` to `0` specifies to compute the quantiles dynamically, which involves oversampling and additional computations performed on the input data. -- When `confidence_interval` is not set, it is computed based on the vector dimension $$d$$ using the formula $$max(0.9, 1 - \frac{1}{1 + d})$$. - -Lucene scalar quantization is applied only to `float` vectors. If you change the default value of the `data_type` parameter from `float` to `byte` or any other type when mapping a [k-NN vector]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/), then the request is rejected. -{: .warning} - -The following example method definition specifies the Lucene `sq` encoder with the `confidence_interval` set to `1.0`. This `confidence_interval` specifies to consider all the input vectors when computing the minimum and maximum quantiles. Vectors are quantized to 7 bits by default: - -```json -PUT /test-index -{ - "settings": { - "index": { - "knn": true - } - }, - "mappings": { - "properties": { - "my_vector1": { - "type": "knn_vector", - "dimension": 2, - "space_type": "l2", - "method": { - "name": "hnsw", - "engine": "lucene", - "parameters": { - "encoder": { - "name": "sq", - "parameters": { - "confidence_interval": 1.0 - } - }, - "ef_construction": 256, - "m": 8 - } - } - } - } - } -} -``` -{% include copy-curl.html %} - -There are no changes to ingestion or query mapping and no range limitations for the input vectors. - -### Memory estimation - -In the ideal scenario, 7-bit vectors created by the Lucene scalar quantizer use only 25% of the memory required by 32-bit vectors. - -#### HNSW memory estimation - -The memory required for the Hierarchical Navigable Small World (HNSW) graph can be estimated as `1.1 * (dimension + 8 * m)` bytes/vector, where `m` is the maximum number of bidirectional links created for each element during the construction of the graph. - -As an example, assume that you have 1 million vectors with a dimension of 256 and M of 16. The memory requirement can be estimated as follows: - -```r -1.1 * (256 + 8 * 16) * 1,000,000 ~= 0.4 GB -``` - -## Faiss 16-bit scalar quantization - -Starting with version 2.13, the k-NN plugin supports performing scalar quantization for the Faiss engine within OpenSearch. Within the Faiss engine, a scalar quantizer (SQfp16) performs the conversion between 32-bit and 16-bit vectors. At ingestion time, when you upload 32-bit floating-point vectors to OpenSearch, SQfp16 quantizes them into 16-bit floating-point vectors and stores the quantized vectors in a k-NN index. - -At search time, SQfp16 decodes the vector values back into 32-bit floating-point values for distance computation. The SQfp16 quantization can decrease the memory footprint by a factor of 2. Additionally, it leads to a minimal loss in recall when differences between vector values are large compared to the error introduced by eliminating their two least significant bits. When used with [SIMD optimization]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index#simd-optimization-for-the-faiss-engine), SQfp16 quantization can also significantly reduce search latencies and improve indexing throughput. - -SIMD optimization is not supported on Windows. Using Faiss scalar quantization on Windows can lead to a significant drop in performance, including decreased indexing throughput and increased search latencies. -{: .warning} - -### Using Faiss scalar quantization - -To use Faiss scalar quantization, set the k-NN vector field's `method.parameters.encoder.name` to `sq` when creating a k-NN index: - -```json -PUT /test-index -{ - "settings": { - "index": { - "knn": true, - "knn.algo_param.ef_search": 100 - } - }, - "mappings": { - "properties": { - "my_vector1": { - "type": "knn_vector", - "dimension": 3, - "space_type": "l2", - "method": { - "name": "hnsw", - "engine": "faiss", - "parameters": { - "encoder": { - "name": "sq" - }, - "ef_construction": 256, - "m": 8 - } - } - } - } - } -} -``` -{% include copy-curl.html %} - -Optionally, you can specify the parameters in `method.parameters.encoder`. For more information about `encoder` object parameters, see [SQ parameters]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/#sq-parameters). - -The `fp16` encoder converts 32-bit vectors into their 16-bit counterparts. For this encoder type, the vector values must be in the [-65504.0, 65504.0] range. To define how to handle out-of-range values, the preceding request specifies the `clip` parameter. By default, this parameter is `false`, and any vectors containing out-of-range values are rejected. - -When `clip` is set to `true` (as in the preceding request), out-of-range vector values are rounded up or down so that they are in the supported range. For example, if the original 32-bit vector is `[65510.82, -65504.1]`, the vector will be indexed as a 16-bit vector `[65504.0, -65504.0]`. - -We recommend setting `clip` to `true` only if very few elements lie outside of the supported range. Rounding the values may cause a drop in recall. -{: .note} - -The following example method definition specifies the Faiss SQfp16 encoder, which rejects any indexing request that contains out-of-range vector values (because the `clip` parameter is `false` by default): - -```json -PUT /test-index -{ - "settings": { - "index": { - "knn": true, - "knn.algo_param.ef_search": 100 - } - }, - "mappings": { - "properties": { - "my_vector1": { - "type": "knn_vector", - "dimension": 3, - "space_type": "l2", - "method": { - "name": "hnsw", - "engine": "faiss", - "parameters": { - "encoder": { - "name": "sq", - "parameters": { - "type": "fp16" - } - }, - "ef_construction": 256, - "m": 8 - } - } - } - } - } -} -``` -{% include copy-curl.html %} - -During ingestion, make sure each vector dimension is in the supported range ([-65504.0, 65504.0]). - -```json -PUT test-index/_doc/1 -{ - "my_vector1": [-65504.0, 65503.845, 55.82] -} -``` -{% include copy-curl.html %} - -During querying, the query vector has no range limitation: - -```json -GET test-index/_search -{ - "size": 2, - "query": { - "knn": { - "my_vector1": { - "vector": [265436.876, -120906.256, 99.84], - "k": 2 - } - } - } -} -``` -{% include copy-curl.html %} - -### Memory estimation - -In the best-case scenario, 16-bit vectors produced by the Faiss SQfp16 quantizer require 50% of the memory that 32-bit vectors require. - -#### HNSW memory estimation - -The memory required for Hierarchical Navigable Small Worlds (HNSW) is estimated to be `1.1 * (2 * dimension + 8 * m)` bytes/vector, where `m` is the maximum number of bidirectional links created for each element during the construction of the graph. - -As an example, assume that you have 1 million vectors with a dimension of 256 and an `m` of 16. The memory requirement can be estimated as follows: - -```r -1.1 * (2 * 256 + 8 * 16) * 1,000,000 ~= 0.656 GB -``` - -#### IVF memory estimation - -The memory required for IVF is estimated to be `1.1 * (((2 * dimension) * num_vectors) + (4 * nlist * dimension))` bytes/vector, where `nlist` is the number of buckets to partition vectors into. - -As an example, assume that you have 1 million vectors with a dimension of 256 and an `nlist` of 128. The memory requirement can be estimated as follows: - -```r -1.1 * (((2 * 256) * 1,000,000) + (4 * 128 * 256)) ~= 0.525 GB -``` - -## Faiss product quantization - -PQ is a technique used to represent a vector in a configurable amount of bits. In general, it can be used to achieve a higher level of compression as compared to byte or scalar quantization. PQ works by separating vectors into _m_ subvectors and encoding each subvector with _code_size_ bits. Thus, the total amount of memory for the vector is `m*code_size` bits, plus overhead. For details about the parameters, see [PQ parameters]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/#pq-parameters). PQ is only supported for the _Faiss_ engine and can be used with either the _HNSW_ or _IVF_ approximate nearest neighbor (ANN) algorithms. - -### Using Faiss product quantization - -To minimize loss in accuracy, PQ requires a _training_ step that builds a model based on the distribution of the data that will be searched. - -The product quantizer is trained by running k-means clustering on a set of training vectors for each subvector space and extracts the centroids to be used for encoding. The training vectors can be either a subset of the vectors to be ingested or vectors that have the same distribution and dimension as the vectors to be ingested. - -In OpenSearch, the training vectors need to be present in an index. In general, the amount of training data will depend on which ANN algorithm is used and how much data will be stored in the index. For IVF-based indexes, a recommended number of training vectors is `max(1000*nlist, 2^code_size * 1000)`. For HNSW-based indexes, a recommended number is `2^code_size*1000`. See the [Faiss documentation](https://github.com/facebookresearch/faiss/wiki/FAQ#how-many-training-points-do-i-need-for-k-means) for more information about the methodology used to calculate these figures. - -For PQ, both _m_ and _code_size_ need to be selected. _m_ determines the number of subvectors into which vectors should be split for separate encoding. Consequently, the _dimension_ needs to be divisible by _m_. _code_size_ determines the number of bits used to encode each subvector. In general, we recommend a setting of `code_size = 8` and then tuning _m_ to get the desired trade-off between memory footprint and recall. - -For an example of setting up an index with PQ, see the [Building a k-NN index from a model]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-k-nn-index-from-a-model) tutorial. - -### Memory estimation - -While PQ is meant to represent individual vectors with `m*code_size` bits, in reality, the indexes consume more space. This is mainly due to the overhead of storing certain code tables and auxiliary data structures. - -Some of the memory formulas depend on the number of segments present. This is not typically known beforehand, but a recommended default value is 300. -{: .note} - -#### HNSW memory estimation - -The memory required for HNSW with PQ is estimated to be `1.1*(((pq_code_size / 8) * pq_m + 24 + 8 * hnsw_m) * num_vectors + num_segments * (2^pq_code_size * 4 * d))` bytes. - -As an example, assume that you have 1 million vectors with a dimension of 256, `hnsw_m` of 16, `pq_m` of 32, `pq_code_size` of 8, and 100 segments. The memory requirement can be estimated as follows: - -```r -1.1 * ((8 / 8 * 32 + 24 + 8 * 16) * 1000000 + 100 * (2^8 * 4 * 256)) ~= 0.215 GB -``` - -#### IVF memory estimation - -The memory required for IVF with PQ is estimated to be `1.1*(((pq_code_size / 8) * pq_m + 24) * num_vectors + num_segments * (2^code_size * 4 * d + 4 * ivf_nlist * d))` bytes. - -For example, assume that you have 1 million vectors with a dimension of 256, `ivf_nlist` of 512, `pq_m` of 32, `pq_code_size` of 8, and 100 segments. The memory requirement can be estimated as follows: - -```r -1.1*((8 / 8 * 64 + 24) * 1000000 + 100 * (2^8 * 4 * 256 + 4 * 512 * 256)) ~= 0.171 GB -``` - -## Binary quantization - -Starting with version 2.17, OpenSearch supports BQ with binary vector support for the Faiss engine. BQ compresses vectors into a binary format (0s and 1s), making it highly efficient in terms of memory usage. You can choose to represent each vector dimension using 1, 2, or 4 bits, depending on the desired precision. One of the advantages of using BQ is that the training process is handled automatically during indexing. This means that no separate training step is required, unlike other quantization techniques such as PQ. - -### Using BQ -To configure BQ for the Faiss engine, define a `knn_vector` field and specify the `mode` as `on_disk`. This configuration defaults to 1-bit BQ and both `ef_search` and `ef_construction` set to `100`: - -```json -PUT my-vector-index -{ - "settings" : { - "index": { - "knn": true - } - }, - "mappings": { - "properties": { - "my_vector_field": { - "type": "knn_vector", - "dimension": 8, - "space_type": "l2", - "data_type": "float", - "mode": "on_disk" - } - } - } -} -``` -{% include copy-curl.html %} - -To further optimize the configuration, you can specify additional parameters, such as the compression level, and fine-tune the search parameters. For example, you can override the `ef_construction` value or define the compression level, which corresponds to the number of bits used for quantization: - -- **32x compression** for 1-bit quantization -- **16x compression** for 2-bit quantization -- **8x compression** for 4-bit quantization - -This allows for greater control over memory usage and recall performance, providing flexibility to balance between precision and storage efficiency. - -To specify the compression level, set the `compression_level` parameter: - -```json -PUT my-vector-index -{ - "settings" : { - "index": { - "knn": true - } - }, - "mappings": { - "properties": { - "my_vector_field": { - "type": "knn_vector", - "dimension": 8, - "space_type": "l2", - "data_type": "float", - "mode": "on_disk", - "compression_level": "16x", - "method": { - "name": "hnsw", - "engine": "faiss", - "parameters": { - "ef_construction": 16 - } - } - } - } - } -} -``` -{% include copy-curl.html %} - -The following example further fine-tunes the configuration by defining `ef_construction`, `encoder`, and the number of `bits` (which can be `1`, `2`, or `4`): - -```json -PUT my-vector-index -{ - "settings" : { - "index": { - "knn": true - } - }, - "mappings": { - "properties": { - "my_vector_field": { - "type": "knn_vector", - "dimension": 8, - "method": { - "name": "hnsw", - "engine": "faiss", - "space_type": "l2", - "parameters": { - "m": 16, - "ef_construction": 512, - "encoder": { - "name": "binary", - "parameters": { - "bits": 1 - } - } - } - } - } - } - } -} -``` -{% include copy-curl.html %} - -### Search using binary quantized vectors - -You can perform a k-NN search on your index by providing a vector and specifying the number of nearest neighbors (k) to return: - -```json -GET my-vector-index/_search -{ - "size": 2, - "query": { - "knn": { - "my_vector_field": { - "vector": [1.5, 5.5, 1.5, 5.5, 1.5, 5.5, 1.5, 5.5], - "k": 10 - } - } - } -} -``` -{% include copy-curl.html %} - -You can also fine-tune search by providing the `ef_search` and `oversample_factor` parameters. -The `oversample_factor` parameter controls the factor by which the search oversamples the candidate vectors before ranking them. Using a higher oversample factor means that more candidates will be considered before ranking, improving accuracy but also increasing search time. When selecting the `oversample_factor` value, consider the trade-off between accuracy and efficiency. For example, setting the `oversample_factor` to `2.0` will double the number of candidates considered during the ranking phase, which may help achieve better results. - -The following request specifies the `ef_search` and `oversample_factor` parameters: - -```json -GET my-vector-index/_search -{ - "size": 2, - "query": { - "knn": { - "my_vector_field": { - "vector": [1.5, 5.5, 1.5, 5.5, 1.5, 5.5, 1.5, 5.5], - "k": 10, - "method_parameters": { - "ef_search": 10 - }, - "rescore": { - "oversample_factor": 10.0 - } - } - } - } -} -``` -{% include copy-curl.html %} - - -#### HNSW memory estimation - -The memory required for the Hierarchical Navigable Small World (HNSW) graph can be estimated as `1.1 * (dimension + 8 * m)` bytes/vector, where `m` is the maximum number of bidirectional links created for each element during the construction of the graph. - -As an example, assume that you have 1 million vectors with a dimension of 256 and an `m` of 16. The following sections provide memory requirement estimations for various compression values. - -##### 1-bit quantization (32x compression) - -In 1-bit quantization, each dimension is represented using 1 bit, equivalent to a 32x compression factor. The memory requirement can be estimated as follows: - -```r -Memory = 1.1 * ((256 * 1 / 8) + 8 * 16) * 1,000,000 - ~= 0.176 GB -``` - -##### 2-bit quantization (16x compression) - -In 2-bit quantization, each dimension is represented using 2 bits, equivalent to a 16x compression factor. The memory requirement can be estimated as follows: - -```r -Memory = 1.1 * ((256 * 2 / 8) + 8 * 16) * 1,000,000 - ~= 0.211 GB -``` - -##### 4-bit quantization (8x compression) - -In 4-bit quantization, each dimension is represented using 4 bits, equivalent to an 8x compression factor. The memory requirement can be estimated as follows: - -```r -Memory = 1.1 * ((256 * 4 / 8) + 8 * 16) * 1,000,000 - ~= 0.282 GB -``` diff --git a/_search-plugins/knn/settings.md b/_search-plugins/knn/settings.md deleted file mode 100644 index 9d90c6461d..0000000000 --- a/_search-plugins/knn/settings.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -layout: default -title: Settings -parent: k-NN search -nav_order: 40 ---- - -# k-NN settings - -The k-NN plugin adds several new cluster settings. To learn more about static and dynamic settings, see [Configuring OpenSearch]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/index/). - -## Cluster settings - -The following table lists all available cluster-level k-NN settings. For more information about cluster settings, see [Configuring OpenSearch]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/index/#updating-cluster-settings-using-the-api) and [Updating cluster settings using the API]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/index/#updating-cluster-settings-using-the-api). - -Setting | Static/Dynamic | Default | Description -:--- | :--- | :--- | :--- -`knn.plugin.enabled`| Dynamic | `true` | Enables or disables the k-NN plugin. -`knn.algo_param.index_thread_qty` | Dynamic | `1` | The number of threads used for native library and Lucene library (for OpenSearch version 2.19 and later) index creation. Keeping this value low reduces the CPU impact of the k-NN plugin but also reduces indexing performance. -`knn.cache.item.expiry.enabled` | Dynamic | `false` | Whether to remove native library indexes that have not been accessed for a certain duration from memory. -`knn.cache.item.expiry.minutes` | Dynamic | `3h` | If enabled, the amount of idle time before a native library index is removed from memory. -`knn.circuit_breaker.unset.percentage` | Dynamic | `75` | The native memory usage threshold for the circuit breaker. Memory usage must be lower than this percentage of `knn.memory.circuit_breaker.limit` in order for `knn.circuit_breaker.triggered` to remain `false`. -`knn.circuit_breaker.triggered` | Dynamic | `false` | True when memory usage exceeds the `knn.circuit_breaker.unset.percentage` value. -`knn.memory.circuit_breaker.limit` | Dynamic | `50%` | The native memory limit for native library indexes. At the default value, if a machine has 100 GB of memory and the JVM uses 32 GB, then the k-NN plugin uses 50% of the remaining 68 GB (34 GB). If memory usage exceeds this value, then the plugin removes the native library indexes used least recently. -`knn.memory.circuit_breaker.enabled` | Dynamic | `true` | Whether to enable the k-NN memory circuit breaker. -`knn.model.index.number_of_shards`| Dynamic | `1` | The number of shards to use for the model system index, which is the OpenSearch index that stores the models used for approximate nearest neighbor (ANN) search. -`knn.model.index.number_of_replicas`| Dynamic | `1` | The number of replica shards to use for the model system index. Generally, in a multi-node cluster, this value should be at least 1 in order to increase stability. -`knn.model.cache.size.limit` | Dynamic | `10%` | The model cache limit cannot exceed 25% of the JVM heap. -`knn.faiss.avx2.disabled` | Static | `false` | A static setting that specifies whether to disable the SIMD-based `libopensearchknn_faiss_avx2.so` library and load the non-optimized `libopensearchknn_faiss.so` library for the Faiss engine on machines with x64 architecture. For more information, see [SIMD optimization for the Faiss engine]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/#simd-optimization-for-the-faiss-engine). -`knn.faiss.avx512.disabled` | Static | `false` | A static setting that specifies whether to disable the SIMD-based `libopensearchknn_faiss_avx512.so` library and load the `libopensearchknn_faiss_avx2.so` library or the non-optimized `libopensearchknn_faiss.so` library for the Faiss engine on machines with x64 architecture. For more information, see [SIMD optimization for the Faiss engine]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/#simd-optimization-for-the-faiss-engine). - -## Index settings - -The following table lists all available index-level k-NN settings. All settings are static. For information about updating static index-level settings, see [Updating a static index setting]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/index-settings/#updating-a-static-index-setting). - -Setting | Default | Description -:--- | :--- | :--- -`index.knn.advanced.filtered_exact_search_threshold`| `null` | The filtered ID threshold value used to switch to exact search during filtered ANN search. If the number of filtered IDs in a segment is lower than this setting's value, then exact search will be performed on the filtered IDs. -`index.knn.algo_param.ef_search` | `100` | `ef` (or `efSearch`) represents the size of the dynamic list for the nearest neighbors used during a search. Higher `ef` values lead to a more accurate but slower search. `ef` cannot be set to a value lower than the number of queried nearest neighbors, `k`. `ef` can take any value between `k` and the size of the dataset. \ No newline at end of file diff --git a/_search-plugins/vector-search.md b/_search-plugins/vector-search.md deleted file mode 100644 index 5b6fc7f371..0000000000 --- a/_search-plugins/vector-search.md +++ /dev/null @@ -1,283 +0,0 @@ ---- -layout: default -title: Vector search -nav_order: 22 -has_children: false -has_toc: false ---- - -# Vector search - -OpenSearch is a comprehensive search platform that supports a variety of data types, including vectors. OpenSearch vector database functionality is seamlessly integrated with its generic database function. - -In OpenSearch, you can generate vector embeddings, store those embeddings in an index, and use them for vector search. Choose one of the following options: - -- Generate embeddings using a library of your choice before ingesting them into OpenSearch. Once you ingest vectors into an index, you can perform a vector similarity search on the vector space. For more information, see [Working with embeddings generated outside of OpenSearch](#working-with-embeddings-generated-outside-of-opensearch). -- Automatically generate embeddings within OpenSearch. To use embeddings for semantic search, the ingested text (the corpus) and the query need to be embedded using the same model. [Neural search]({{site.url}}{{site.baseurl}}/search-plugins/neural-search/) packages this functionality, eliminating the need to manage the internal details. For more information, see [Generating vector embeddings within OpenSearch](#generating-vector-embeddings-in-opensearch). - -## Working with embeddings generated outside of OpenSearch - -After you generate vector embeddings, upload them to an OpenSearch index and search the index using vector search. For a complete example, see [Example](#example). - -### k-NN index - -To build a vector database and use vector search, you must specify your index as a [k-NN index]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/) when creating it by setting `index.knn` to `true`: - -```json -PUT test-index -{ - "settings": { - "index": { - "knn": true, - "knn.algo_param.ef_search": 100 - } - }, - "mappings": { - "properties": { - "my_vector1": { - "type": "knn_vector", - "dimension": 1024, - "space_type": "l2", - "method": { - "name": "hnsw", - "engine": "faiss", - "parameters": { - "ef_construction": 128, - "m": 24 - } - } - } - } - } -} -``` -{% include copy-curl.html %} - -### k-NN vector - -You must designate the field that will store vectors as a [`knn_vector`]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) field type. OpenSearch supports vectors of up to 16,000 dimensions, each of which is represented as a 32-bit or 16-bit float. - -To save storage space, you can use `byte` or `binary` vectors. For more information, see [Byte vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#byte-vectors) and [Binary vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#binary-vectors). - -### k-NN vector search - -Vector search finds the vectors in your database that are most similar to the query vector. OpenSearch supports the following search methods: - -- [Approximate search](#approximate-search) (approximate k-NN, or ANN): Returns approximate nearest neighbors to the query vector. Usually, approximate search algorithms sacrifice indexing speed and search accuracy in exchange for performance benefits such as lower latency, smaller memory footprints, and more scalable search. For most use cases, approximate search is the best option. - -- Exact search (exact k-NN): A brute-force, exact k-NN search of vector fields. OpenSearch supports the following types of exact search: - - [Exact k-NN with scoring script]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-score-script/): Using the k-NN scoring script, you can apply a filter to an index before executing the nearest neighbor search. - - [Painless extensions]({{site.url}}{{site.baseurl}}/search-plugins/knn/painless-functions/): Adds the distance functions as Painless extensions that you can use in more complex combinations. You can use this method to perform a brute-force, exact k-NN search of an index, which also supports pre-filtering. - -### Approximate search - -OpenSearch supports several algorithms for approximate vector search, each with its own advantages. For complete documentation, see [Approximate search]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/). For more information about the search methods and engines, see [Method definitions]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/#method-definitions). For method recommendations, see [Choosing the right method]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/#choosing-the-right-method). - -To use approximate vector search, specify one of the following search methods (algorithms) in the `method` parameter: - -- Hierarchical Navigable Small World (HNSW) -- Inverted File System (IVF) - -Additionally, specify the engine (library) that implements this method in the `engine` parameter: - -- [Non-Metric Space Library (NMSLIB)](https://github.com/nmslib/nmslib) -- [Facebook AI Similarity Search (Faiss)](https://github.com/facebookresearch/faiss) -- Lucene - -The following table lists the combinations of search methods and libraries supported by the k-NN engine for approximate vector search. - -Method | Engine -:--- | :--- -HNSW | Faiss, Lucene, NMSLIB (deprecated) -IVF | Faiss - -### Engine recommendations - -In general, select Faiss for large-scale use cases. Lucene is a good option for smaller deployments and offers benefits like smart filtering, where the optimal filtering strategy—pre-filtering, post-filtering, or exact k-NN—is automatically applied depending on the situation. The following table summarizes the differences between each option. - -| | NMSLIB/HNSW | Faiss/HNSW | Faiss/IVF | Lucene/HNSW | -|:---|:---|:---|:---|:---| -| Max dimensions | 16,000 | 16,000 | 16,000 | 16,000 | -| Filter | Post-filter | Post-filter | Post-filter | Filter during search | -| Training required | No | No | Yes | No | -| Similarity metrics | `l2`, `innerproduct`, `cosinesimil`, `l1`, `linf` | `l2`, `innerproduct` | `l2`, `innerproduct` | `l2`, `cosinesimil` | -| Number of vectors | Tens of billions | Tens of billions | Tens of billions | Less than 10 million | -| Indexing latency | Low | Low | Lowest | Low | -| Query latency and quality | Low latency and high quality | Low latency and high quality | Low latency and low quality | High latency and high quality | -| Vector compression | Flat | Flat
Product quantization | Flat
Product quantization | Flat | -| Memory consumption | High | High
Low with PQ | Medium
Low with PQ | High | - -### Example - -In this example, you'll create a k-NN index, add data to the index, and search the data. - -#### Step 1: Create a k-NN index - -First, create an index that will store sample hotel data. Set `index.knn` to `true` and specify the `location` field as a `knn_vector`: - -```json -PUT /hotels-index -{ - "settings": { - "index": { - "knn": true, - "knn.algo_param.ef_search": 100, - "number_of_shards": 1, - "number_of_replicas": 0 - } - }, - "mappings": { - "properties": { - "location": { - "type": "knn_vector", - "dimension": 2, - "space_type": "l2", - "method": { - "name": "hnsw", - "engine": "lucene", - "parameters": { - "ef_construction": 100, - "m": 16 - } - } - } - } - } -} -``` -{% include copy-curl.html %} - -#### Step 2: Add data to your index - -Next, add data to your index. Each document represents a hotel. The `location` field in each document contains a vector specifying the hotel's location: - -```json -POST /_bulk -{ "index": { "_index": "hotels-index", "_id": "1" } } -{ "location": [5.2, 4.4] } -{ "index": { "_index": "hotels-index", "_id": "2" } } -{ "location": [5.2, 3.9] } -{ "index": { "_index": "hotels-index", "_id": "3" } } -{ "location": [4.9, 3.4] } -{ "index": { "_index": "hotels-index", "_id": "4" } } -{ "location": [4.2, 4.6] } -{ "index": { "_index": "hotels-index", "_id": "5" } } -{ "location": [3.3, 4.5] } -``` -{% include copy-curl.html %} - -#### Step 3: Search your data - -Now search for hotels closest to the pin location `[5, 4]`. This location is labeled `Pin` in the following image. Each hotel is labeled with its document number. - -![Hotels on a coordinate plane]({{site.url}}{{site.baseurl}}/images/k-nn-search-hotels.png/) - -To search for the top three closest hotels, set `k` to `3`: - -```json -POST /hotels-index/_search -{ - "size": 3, - "query": { - "knn": { - "location": { - "vector": [ - 5, - 4 - ], - "k": 3 - } - } - } -} -``` -{% include copy-curl.html %} - -The response contains the hotels closest to the specified pin location: - -```json -{ - "took": 1093, - "timed_out": false, - "_shards": { - "total": 1, - "successful": 1, - "skipped": 0, - "failed": 0 - }, - "hits": { - "total": { - "value": 3, - "relation": "eq" - }, - "max_score": 0.952381, - "hits": [ - { - "_index": "hotels-index", - "_id": "2", - "_score": 0.952381, - "_source": { - "location": [ - 5.2, - 3.9 - ] - } - }, - { - "_index": "hotels-index", - "_id": "1", - "_score": 0.8333333, - "_source": { - "location": [ - 5.2, - 4.4 - ] - } - }, - { - "_index": "hotels-index", - "_id": "3", - "_score": 0.72992706, - "_source": { - "location": [ - 4.9, - 3.4 - ] - } - } - ] - } -} -``` - -### Vector search with filtering - -For information about vector search with filtering, see [k-NN search with filters]({{site.url}}{{site.baseurl}}/search-plugins/knn/filter-search-knn/). - -## Generating vector embeddings in OpenSearch - -[Neural search]({{site.url}}{{site.baseurl}}/search-plugins/neural-search/) encapsulates the infrastructure needed to perform semantic vector searches. After you integrate an inference (embedding) service, neural search functions like lexical search, accepting a textual query and returning relevant documents. - -When you index your data, neural search transforms text into vector embeddings and indexes both the text and its vector embeddings in a vector index. When you use a neural query during search, neural search converts the query text into vector embeddings and uses vector search to return the results. - -### Choosing a model - -The first step in setting up neural search is choosing a model. You can upload a model to your OpenSearch cluster, use one of the pretrained models provided by OpenSearch, or connect to an externally hosted model. For more information, see [Integrating ML models]({{site.url}}{{site.baseurl}}/ml-commons-plugin/integrating-ml-models/). - -### Neural search tutorial - -For a step-by-step tutorial, see [Neural search tutorial]({{site.url}}{{site.baseurl}}/search-plugins/neural-search-tutorial/). - -### Search methods - -Choose one of the following search methods to use your model for neural search: - -- [Semantic search]({{site.url}}{{site.baseurl}}/search-plugins/semantic-search/): Uses dense retrieval based on text embedding models to search text data. - -- [Hybrid search]({{site.url}}{{site.baseurl}}/search-plugins/hybrid-search/): Combines lexical and neural search to improve search relevance. - -- [Multimodal search]({{site.url}}{{site.baseurl}}/search-plugins/multimodal-search/): Uses neural search with multimodal embedding models to search text and image data. - -- [Neural sparse search]({{site.url}}{{site.baseurl}}/search-plugins/neural-sparse-search/): Uses neural search with sparse retrieval based on sparse embedding models to search text data. - -- [Conversational search]({{site.url}}{{site.baseurl}}/search-plugins/conversational-search/): With conversational search, you can ask questions in natural language, receive a text response, and ask additional clarifying questions. From d977108995fef3f5a2262a84eba5228b2c9901e9 Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Fri, 14 Feb 2025 09:16:17 -0500 Subject: [PATCH 15/32] More restructuring Signed-off-by: Fanit Kolchina --- .../knn-memory-optimized.md | 855 ++++++++++++++++++ .../knn-methods-engines.md | 365 ++++++++ .../supported-field-types/knn-spaces.md | 96 ++ .../supported-field-types/knn-vector.md | 840 +---------------- _query-dsl/specialized/kNN.md | 88 +- _query-dsl/specialized/neural.md | 8 +- _sass/_home.scss | 19 +- _vector-search/api.md | 2 +- .../index.md => creating-vector-index.md} | 25 +- .../creating-vector-index/method.md | 283 ------ .../creating-vector-index/vector-field.md | 59 -- .../auto-generated-embeddings.md | 10 +- _vector-search/getting-started/index.md | 4 +- .../pre-generated-embeddings.md | 21 +- .../tutorials/neural-search-tutorial.md | 7 +- .../tutorials/semantic-search-byte-vectors.md | 2 +- _vector-search/index.md | 24 +- _vector-search/ingesting-data.md | 2 +- .../conversational-search.md | 5 +- .../ml-powered-search/hybrid-search.md | 4 + .../ml-powered-search/multimodal-search.md | 4 + .../ml-powered-search/neural-sparse-search.md | 1 + .../neural-sparse-with-pipelines.md | 6 +- .../neural-sparse-with-raw-vectors.md | 4 + .../ml-powered-search/semantic-search.md | 6 +- .../ml-powered-search/text-chunking.md | 4 + .../optimizing-performance/index.md | 32 - .../performance-tuning.md | 225 ----- .../binary-quantization.md | 7 +- .../disk-based-vector-search.md | 23 +- .../faiss-16-bit-quantization.md | 13 +- .../faiss-product-quantization.md | 13 +- _vector-search/optimizing-storage/index.md | 22 + .../knn-vector-quantization.md | 16 +- .../lucene-scalar-quantization.md | 11 +- _vector-search/performance-tuning-indexing.md | 154 ++++ _vector-search/performance-tuning-search.md | 56 ++ _vector-search/performance-tuning.md | 73 ++ _vector-search/searching-data.md | 10 + _vector-search/settings.md | 4 +- .../radial-search-knn.md | 2 +- .../approximate-knn.md | 123 +-- .../vector-search-techniques/index.md | 2 +- .../knn-score-script.md | 10 +- .../painless-functions.md | 4 +- 45 files changed, 1862 insertions(+), 1682 deletions(-) create mode 100644 _field-types/supported-field-types/knn-memory-optimized.md create mode 100644 _field-types/supported-field-types/knn-methods-engines.md create mode 100644 _field-types/supported-field-types/knn-spaces.md rename _vector-search/{creating-vector-index/index.md => creating-vector-index.md} (79%) delete mode 100644 _vector-search/creating-vector-index/method.md delete mode 100644 _vector-search/creating-vector-index/vector-field.md delete mode 100644 _vector-search/optimizing-performance/index.md delete mode 100644 _vector-search/optimizing-performance/performance-tuning.md rename _vector-search/{optimizing-performance => optimizing-storage}/binary-quantization.md (96%) rename _vector-search/{optimizing-performance => optimizing-storage}/disk-based-vector-search.md (79%) rename _vector-search/{optimizing-performance => optimizing-storage}/faiss-16-bit-quantization.md (90%) rename _vector-search/{optimizing-performance => optimizing-storage}/faiss-product-quantization.md (86%) create mode 100644 _vector-search/optimizing-storage/index.md rename _vector-search/{optimizing-performance => optimizing-storage}/knn-vector-quantization.md (60%) rename _vector-search/{optimizing-performance => optimizing-storage}/lucene-scalar-quantization.md (78%) create mode 100644 _vector-search/performance-tuning-indexing.md create mode 100644 _vector-search/performance-tuning-search.md create mode 100644 _vector-search/performance-tuning.md diff --git a/_field-types/supported-field-types/knn-memory-optimized.md b/_field-types/supported-field-types/knn-memory-optimized.md new file mode 100644 index 0000000000..39bb7e2c63 --- /dev/null +++ b/_field-types/supported-field-types/knn-memory-optimized.md @@ -0,0 +1,855 @@ +--- +layout: default +title: Memory-optimized vectors +parent: k-NN vector +grand_parent: Supported field types +nav_order: 30 +--- + +# Memory-optimized vectors + +Vector search operations can be memory-intensive, particularly when dealing with large-scale deployments. OpenSearch provides several strategies to optimize memory usage while maintaining search performance. You can choose between different workload modes that prioritize either low latency or low cost, apply various compression levels to reduce memory footprint, and use alternative vector representations like byte or binary vectors. These optimization techniques allow you to balance memory consumption, search performance, and cost based on your specific use case requirements. + +## Vector workload modes + +Vector search requires balancing between search performance and operational costs. While in-memory search provides the lowest latency, [disk-based search]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/disk-based-vector-search/) offers a more cost-effective approach by reducing memory usage, though with slightly higher search latency. To choose between these approaches, use the `mode` mapping parameter in your `knn_vector` field configuration. This parameter sets appropriate default values for k-NN parameters based on your priority: either low latency or low cost. For additional optimization, you can override these default parameter values in your k-NN field mapping. + +OpenSearch supports the following vector workload modes. + +| Mode | Default engine | Description | +|:---|:---|:---| +| `in_memory` (Default) | `faiss` | Prioritizes low-latency search. This mode uses the `faiss` engine without any quantization applied. It is configured with the default parameter values for vector search in OpenSearch. | +| `on_disk` | `faiss` | Prioritizes low-cost vector search while maintaining strong recall. By default, the `on_disk` mode uses quantization and rescoring to execute a two-pass approach to retrieve the top neighbors. The `on_disk` mode supports only `float` vector types. | + +To create a vector index that uses the `on_disk` mode for low-cost search, send the following request: + +```json +PUT test-index +{ + "settings": { + "index": { + "knn": true + } + }, + "mappings": { + "properties": { + "my_vector": { + "type": "knn_vector", + "dimension": 3, + "space_type": "l2", + "mode": "on_disk" + } + } + } +} +``` +{% include copy-curl.html %} + +### Compression levels + +The `compression_level` mapping parameter selects a quantization encoder that reduces vector memory consumption by the given factor. The following table lists the available `compression_level` values. + +| Compression level | Supported engines | +|:------------------|:---------------------------------------------| +| `1x` | `faiss`, `lucene`, and `nmslib` (deprecated) | +| `2x` | `faiss` | +| `4x` | `lucene` | +| `8x` | `faiss` | +| `16x` | `faiss` | +| `32x` | `faiss` | + +For example, if a `compression_level` of `32x` is passed for a `float32` index of 768-dimensional vectors, the per-vector memory is reduced from `4 * 768 = 3072` bytes to `3072 / 32 = 846` bytes. Internally, binary quantization (which maps a `float` to a `bit`) may be used to achieve this compression. + +If you set the `compression_level` parameter, then you cannot specify an `encoder` in the `method` mapping. Compression levels greater than `1x` are only supported for `float` vector types. +{: .note} + +The following table lists the default `compression_level` values for the available workload modes. + +| Mode | Default compression level | +|:------------------|:-------------------------------| +| `in_memory` | `1x` | +| `on_disk` | `32x` | + + +To create a vector field with a `compression_level` of `16x`, specify the `compression_level` parameter in the mappings. This parameter overrides the default compression level for the `on_disk` mode from `32x` to `16x`, producing higher recall and accuracy at the expense of a larger memory footprint: + +```json +PUT test-index +{ + "settings": { + "index": { + "knn": true + } + }, + "mappings": { + "properties": { + "my_vector": { + "type": "knn_vector", + "dimension": 3, + "space_type": "l2", + "mode": "on_disk", + "compression_level": "16x" + } + } + } +} +``` +{% include copy-curl.html %} + +## Byte vectors + +By default, k-NN vectors are `float` vectors, in which each dimension is 4 bytes. If you want to save storage space, you can use `byte` vectors with the `faiss` or `lucene` engine. In a `byte` vector, each dimension is a signed 8-bit integer in the [-128, 127] range. + +Byte vectors are supported only for the `lucene` and `faiss` engines. They are not supported for the `nmslib` engine. +{: .note} + +In [k-NN benchmarking tests](https://github.com/opensearch-project/opensearch-benchmark-workloads/tree/main/vectorsearch), the use of `byte` rather than `float` vectors resulted in a significant reduction in storage and memory usage as well as improved indexing throughput and reduced query latency. Additionally, precision on recall was not greatly affected (note that recall can depend on various factors, such as the [quantization technique](#quantization-techniques) and data distribution). + +When using `byte` vectors, expect some loss of precision in the recall compared to using `float` vectors. Byte vectors are useful in large-scale applications and use cases that prioritize a reduced memory footprint in exchange for a minimal loss of recall. +{: .important} + +When using `byte` vectors with the `faiss` engine, we recommend using [SIMD optimization]({{site.url}}{{site.baseurl}}/vector-search/performance-tuning/#simd-optimization-for-the-faiss-engine), which helps to significantly reduce search latencies and improve indexing throughput. +{: .important} + +Introduced in k-NN plugin version 2.9, the optional `data_type` parameter defines the data type of a vector. The default value of this parameter is `float`. + +To use a `byte` vector, set the `data_type` parameter to `byte` when creating mappings for an index: + +### Example: HNSW + +The following example creates a byte vector index with the `lucene` engine and `hnsw` algorithm: + +```json +PUT test-index +{ + "settings": { + "index": { + "knn": true, + "knn.algo_param.ef_search": 100 + } + }, + "mappings": { + "properties": { + "my_vector": { + "type": "knn_vector", + "dimension": 3, + "data_type": "byte", + "space_type": "l2", + "method": { + "name": "hnsw", + "engine": "lucene", + "parameters": { + "ef_construction": 100, + "m": 16 + } + } + } + } + } +} +``` +{% include copy-curl.html %} + +After creating the index, ingest documents as usual. Make sure each dimension in the vector is in the supported [-128, 127] range: + +```json +PUT test-index/_doc/1 +{ + "my_vector": [-126, 28, 127] +} +``` +{% include copy-curl.html %} + +```json +PUT test-index/_doc/2 +{ + "my_vector": [100, -128, 0] +} +``` +{% include copy-curl.html %} + +When querying, be sure to use a `byte` vector: + +```json +GET test-index/_search +{ + "size": 2, + "query": { + "knn": { + "my_vector": { + "vector": [26, -120, 99], + "k": 2 + } + } + } +} +``` +{% include copy-curl.html %} + +### Example: IVF + +The `ivf` method requires a training step that creates and trains the model used to initialize the native library index during segment creation. For more information, see [Building a vector index from a model]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-vector-index-from-a-model). + +First, create an index that will contain byte vector training data. Specify the `faiss` engine and `ivf` algorithm and make sure that the `dimension` matches the dimension of the model you want to create: + +```json +PUT train-index +{ + "mappings": { + "properties": { + "train-field": { + "type": "knn_vector", + "dimension": 4, + "data_type": "byte" + } + } + } +} +``` +{% include copy-curl.html %} + +First, ingest training data containing byte vectors into the training index: + +```json +PUT _bulk +{ "index": { "_index": "train-index", "_id": "1" } } +{ "train-field": [127, 100, 0, -120] } +{ "index": { "_index": "train-index", "_id": "2" } } +{ "train-field": [2, -128, -10, 50] } +{ "index": { "_index": "train-index", "_id": "3" } } +{ "train-field": [13, -100, 5, 126] } +{ "index": { "_index": "train-index", "_id": "4" } } +{ "train-field": [5, 100, -6, -125] } +``` +{% include copy-curl.html %} + +Then, create and train the model named `byte-vector-model`. The model will be trained using the training data from the `train-field` in the `train-index`. Specify the `byte` data type: + +```json +POST _plugins/_knn/models/byte-vector-model/_train +{ + "training_index": "train-index", + "training_field": "train-field", + "dimension": 4, + "description": "model with byte data", + "data_type": "byte", + "method": { + "name": "ivf", + "engine": "faiss", + "space_type": "l2", + "parameters": { + "nlist": 1, + "nprobes": 1 + } + } +} +``` +{% include copy-curl.html %} + +To check the model training status, call the Get Model API: + +```json +GET _plugins/_knn/models/byte-vector-model?filter_path=state +``` +{% include copy-curl.html %} + +Once the training is complete, the `state` changes to `created`. + +Next, create an index that will initialize its native library indexes using the trained model: + +```json +PUT test-byte-ivf +{ + "settings": { + "index": { + "knn": true + } + }, + "mappings": { + "properties": { + "my_vector": { + "type": "knn_vector", + "model_id": "byte-vector-model" + } + } + } +} +``` +{% include copy-curl.html %} + +Ingest the data containing the byte vectors that you want to search into the created index: + +```json +PUT _bulk?refresh=true +{"index": {"_index": "test-byte-ivf", "_id": "1"}} +{"my_vector": [7, 10, 15, -120]} +{"index": {"_index": "test-byte-ivf", "_id": "2"}} +{"my_vector": [10, -100, 120, -108]} +{"index": {"_index": "test-byte-ivf", "_id": "3"}} +{"my_vector": [1, -2, 5, -50]} +{"index": {"_index": "test-byte-ivf", "_id": "4"}} +{"my_vector": [9, -7, 45, -78]} +{"index": {"_index": "test-byte-ivf", "_id": "5"}} +{"my_vector": [80, -70, 127, -128]} +``` +{% include copy-curl.html %} + +Finally, search the data. Be sure to provide a byte vector in the k-NN vector field: + +```json +GET test-byte-ivf/_search +{ + "size": 2, + "query": { + "knn": { + "my_vector": { + "vector": [100, -120, 50, -45], + "k": 2 + } + } + } +} +``` +{% include copy-curl.html %} + +### Memory estimation + +In the best-case scenario, byte vectors require 25% of the memory required by 32-bit vectors. + +#### HNSW memory estimation + +The memory required for Hierarchical Navigable Small Worlds (HNSW) is estimated to be `1.1 * (dimension + 8 * m)` bytes/vector, where `m` is the maximum number of bidirectional links created for each element during the construction of the graph. + +As an example, assume that you have 1 million vectors with a `dimension` of `256` and an `m` of `16`. The memory requirement can be estimated as follows: + +```r +1.1 * (256 + 8 * 16) * 1,000,000 ~= 0.39 GB +``` + +#### IVF memory estimation + +The memory required for IVF is estimated to be `1.1 * ((dimension * num_vectors) + (4 * nlist * dimension))` bytes/vector, where `nlist` is the number of buckets to partition vectors into. + +As an example, assume that you have 1 million vectors with a `dimension` of `256` and an `nlist` of `128`. The memory requirement can be estimated as follows: + +```r +1.1 * ((256 * 1,000,000) + (4 * 128 * 256)) ~= 0.27 GB +``` + + +### Quantization techniques + +If your vectors are of the type `float`, you need to first convert them to the `byte` type before ingesting the documents. This conversion is accomplished by _quantizing the dataset_---reducing the precision of its vectors. There are many quantization techniques, such as scalar quantization or product quantization (PQ), which is used in the Faiss engine. The choice of quantization technique depends on the type of data you're using and can affect the accuracy of recall values. The following sections describe the scalar quantization algorithms that were used to quantize the [k-NN benchmarking test](https://github.com/opensearch-project/opensearch-benchmark-workloads/tree/main/vectorsearch) data for the [L2](#scalar-quantization-for-the-l2-space-type) and [cosine similarity](#scalar-quantization-for-the-cosine-similarity-space-type) space types. The provided pseudocode is for illustration purposes only. + +#### Scalar quantization for the L2 space type + +The following example pseudocode illustrates the scalar quantization technique used for the benchmarking tests on Euclidean datasets with the L2 space type. Euclidean distance is shift invariant. If you shift both $$x$$ and $$y$$ by the same $$z$$, then the distance remains the same ($$\lVert x-y\rVert =\lVert (x-z)-(y-z)\rVert$$). + +```python +# Random dataset (Example to create a random dataset) +dataset = np.random.uniform(-300, 300, (100, 10)) +# Random query set (Example to create a random queryset) +queryset = np.random.uniform(-350, 350, (100, 10)) +# Number of values +B = 256 + +# INDEXING: +# Get min and max +dataset_min = np.min(dataset) +dataset_max = np.max(dataset) +# Shift coordinates to be non-negative +dataset -= dataset_min +# Normalize into [0, 1] +dataset *= 1. / (dataset_max - dataset_min) +# Bucket into 256 values +dataset = np.floor(dataset * (B - 1)) - int(B / 2) + +# QUERYING: +# Clip (if queryset range is out of datset range) +queryset = queryset.clip(dataset_min, dataset_max) +# Shift coordinates to be non-negative +queryset -= dataset_min +# Normalize +queryset *= 1. / (dataset_max - dataset_min) +# Bucket into 256 values +queryset = np.floor(queryset * (B - 1)) - int(B / 2) +``` +{% include copy.html %} + +#### Scalar quantization for the cosine similarity space type + +The following example pseudocode illustrates the scalar quantization technique used for the benchmarking tests on angular datasets with the cosine similarity space type. Cosine similarity is not shift invariant ($$cos(x, y) \neq cos(x-z, y-z)$$). + +The following pseudocode is for positive numbers: + +```python +# For Positive Numbers + +# INDEXING and QUERYING: + +# Get Max of train dataset +max = np.max(dataset) +min = 0 +B = 127 + +# Normalize into [0,1] +val = (val - min) / (max - min) +val = (val * B) + +# Get int and fraction values +int_part = floor(val) +frac_part = val - int_part + +if 0.5 < frac_part: + bval = int_part + 1 +else: + bval = int_part + +return Byte(bval) +``` +{% include copy.html %} + +The following pseudocode is for negative numbers: + +```python +# For Negative Numbers + +# INDEXING and QUERYING: + +# Get Min of train dataset +min = 0 +max = -np.min(dataset) +B = 128 + +# Normalize into [0,1] +val = (val - min) / (max - min) +val = (val * B) + +# Get int and fraction values +int_part = floor(var) +frac_part = val - int_part + +if 0.5 < frac_part: + bval = int_part + 1 +else: + bval = int_part + +return Byte(bval) +``` +{% include copy.html %} + +## Binary vectors + +You can reduce memory costs by a factor of 32 by switching from float to binary vectors. Using binary vector indexes can lower operational costs while maintaining high recall performance, making large-scale deployment more economical and efficient. + +Binary format is available for the following k-NN search types: + +- [Approximate k-NN]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/): Supports binary vectors only for the Faiss engine with the HNSW and IVF algorithms. +- [Script score k-NN]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-score-script/): Enables the use of binary vectors in script scoring. +- [Painless extensions]({{site.url}}{{site.baseurl}}/search-plugins/knn/painless-functions/): Allows the use of binary vectors with Painless scripting extensions. + +### Requirements + +There are several requirements for using binary vectors in the OpenSearch k-NN plugin: + +- The `data_type` of the binary vector index must be `binary`. +- The `space_type` of the binary vector index must be `hamming`. +- The `dimension` of the binary vector index must be a multiple of 8. +- You must convert your binary data into 8-bit signed integers (`int8`) in the [-128, 127] range. For example, the binary sequence of 8 bits `0, 1, 1, 0, 0, 0, 1, 1` must be converted into its equivalent byte value of `99` to be used as a binary vector input. + +### Example: HNSW + +To create a binary vector index with the Faiss engine and HNSW algorithm, send the following request: + +```json +PUT /test-binary-hnsw +{ + "settings": { + "index": { + "knn": true + } + }, + "mappings": { + "properties": { + "my_vector": { + "type": "knn_vector", + "dimension": 8, + "data_type": "binary", + "space_type": "hamming", + "method": { + "name": "hnsw", + "engine": "faiss" + } + } + } + } +} +``` +{% include copy-curl.html %} + +Then ingest some documents containing binary vectors: + +```json +PUT _bulk +{"index": {"_index": "test-binary-hnsw", "_id": "1"}} +{"my_vector": [7], "price": 4.4} +{"index": {"_index": "test-binary-hnsw", "_id": "2"}} +{"my_vector": [10], "price": 14.2} +{"index": {"_index": "test-binary-hnsw", "_id": "3"}} +{"my_vector": [15], "price": 19.1} +{"index": {"_index": "test-binary-hnsw", "_id": "4"}} +{"my_vector": [99], "price": 1.2} +{"index": {"_index": "test-binary-hnsw", "_id": "5"}} +{"my_vector": [80], "price": 16.5} +``` +{% include copy-curl.html %} + +When querying, be sure to use a binary vector: + +```json +GET /test-binary-hnsw/_search +{ + "size": 2, + "query": { + "knn": { + "my_vector": { + "vector": [9], + "k": 2 + } + } + } +} +``` +{% include copy-curl.html %} + +The response contains the two vectors closest to the query vector: + +

+ + Response + + {: .text-delta} + +```json +{ + "took": 8, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 2, + "relation": "eq" + }, + "max_score": 0.5, + "hits": [ + { + "_index": "test-binary-hnsw", + "_id": "2", + "_score": 0.5, + "_source": { + "my_vector": [ + 10 + ], + "price": 14.2 + } + }, + { + "_index": "test-binary-hnsw", + "_id": "5", + "_score": 0.25, + "_source": { + "my_vector": [ + 80 + ], + "price": 16.5 + } + } + ] + } +} +``` +
+ +### Example: IVF + +The IVF method requires a training step that creates and trains the model used to initialize the native library index during segment creation. For more information, see [Building a vector index from a model]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-vector-index-from-a-model). + +First, create an index that will contain binary vector training data. Specify the Faiss engine and IVF algorithm and make sure that the `dimension` matches the dimension of the model you want to create: + +```json +PUT train-index +{ + "mappings": { + "properties": { + "train-field": { + "type": "knn_vector", + "dimension": 8, + "data_type": "binary" + } + } + } +} +``` +{% include copy-curl.html %} + +Ingest training data containing binary vectors into the training index: + +
+ + Bulk ingest request + + {: .text-delta} + +```json +PUT _bulk +{ "index": { "_index": "train-index", "_id": "1" } } +{ "train-field": [1] } +{ "index": { "_index": "train-index", "_id": "2" } } +{ "train-field": [2] } +{ "index": { "_index": "train-index", "_id": "3" } } +{ "train-field": [3] } +{ "index": { "_index": "train-index", "_id": "4" } } +{ "train-field": [4] } +{ "index": { "_index": "train-index", "_id": "5" } } +{ "train-field": [5] } +{ "index": { "_index": "train-index", "_id": "6" } } +{ "train-field": [6] } +{ "index": { "_index": "train-index", "_id": "7" } } +{ "train-field": [7] } +{ "index": { "_index": "train-index", "_id": "8" } } +{ "train-field": [8] } +{ "index": { "_index": "train-index", "_id": "9" } } +{ "train-field": [9] } +{ "index": { "_index": "train-index", "_id": "10" } } +{ "train-field": [10] } +{ "index": { "_index": "train-index", "_id": "11" } } +{ "train-field": [11] } +{ "index": { "_index": "train-index", "_id": "12" } } +{ "train-field": [12] } +{ "index": { "_index": "train-index", "_id": "13" } } +{ "train-field": [13] } +{ "index": { "_index": "train-index", "_id": "14" } } +{ "train-field": [14] } +{ "index": { "_index": "train-index", "_id": "15" } } +{ "train-field": [15] } +{ "index": { "_index": "train-index", "_id": "16" } } +{ "train-field": [16] } +{ "index": { "_index": "train-index", "_id": "17" } } +{ "train-field": [17] } +{ "index": { "_index": "train-index", "_id": "18" } } +{ "train-field": [18] } +{ "index": { "_index": "train-index", "_id": "19" } } +{ "train-field": [19] } +{ "index": { "_index": "train-index", "_id": "20" } } +{ "train-field": [20] } +{ "index": { "_index": "train-index", "_id": "21" } } +{ "train-field": [21] } +{ "index": { "_index": "train-index", "_id": "22" } } +{ "train-field": [22] } +{ "index": { "_index": "train-index", "_id": "23" } } +{ "train-field": [23] } +{ "index": { "_index": "train-index", "_id": "24" } } +{ "train-field": [24] } +{ "index": { "_index": "train-index", "_id": "25" } } +{ "train-field": [25] } +{ "index": { "_index": "train-index", "_id": "26" } } +{ "train-field": [26] } +{ "index": { "_index": "train-index", "_id": "27" } } +{ "train-field": [27] } +{ "index": { "_index": "train-index", "_id": "28" } } +{ "train-field": [28] } +{ "index": { "_index": "train-index", "_id": "29" } } +{ "train-field": [29] } +{ "index": { "_index": "train-index", "_id": "30" } } +{ "train-field": [30] } +{ "index": { "_index": "train-index", "_id": "31" } } +{ "train-field": [31] } +{ "index": { "_index": "train-index", "_id": "32" } } +{ "train-field": [32] } +{ "index": { "_index": "train-index", "_id": "33" } } +{ "train-field": [33] } +{ "index": { "_index": "train-index", "_id": "34" } } +{ "train-field": [34] } +{ "index": { "_index": "train-index", "_id": "35" } } +{ "train-field": [35] } +{ "index": { "_index": "train-index", "_id": "36" } } +{ "train-field": [36] } +{ "index": { "_index": "train-index", "_id": "37" } } +{ "train-field": [37] } +{ "index": { "_index": "train-index", "_id": "38" } } +{ "train-field": [38] } +{ "index": { "_index": "train-index", "_id": "39" } } +{ "train-field": [39] } +{ "index": { "_index": "train-index", "_id": "40" } } +{ "train-field": [40] } +``` +{% include copy-curl.html %} +
+ +Then, create and train the model named `test-binary-model`. The model will be trained using the training data from the `train_field` in the `train-index`. Specify the `binary` data type and `hamming` space type: + +```json +POST _plugins/_knn/models/test-binary-model/_train +{ + "training_index": "train-index", + "training_field": "train-field", + "dimension": 8, + "description": "model with binary data", + "data_type": "binary", + "space_type": "hamming", + "method": { + "name": "ivf", + "engine": "faiss", + "parameters": { + "nlist": 16, + "nprobes": 1 + } + } +} +``` +{% include copy-curl.html %} + +To check the model training status, call the Get Model API: + +```json +GET _plugins/_knn/models/test-binary-model?filter_path=state +``` +{% include copy-curl.html %} + +Once the training is complete, the `state` changes to `created`. + +Next, create an index that will initialize its native library indexes using the trained model: + +```json +PUT test-binary-ivf +{ + "settings": { + "index": { + "knn": true + } + }, + "mappings": { + "properties": { + "my_vector": { + "type": "knn_vector", + "model_id": "test-binary-model" + } + } + } +} +``` +{% include copy-curl.html %} + +Ingest the data containing the binary vectors that you want to search into the created index: + +```json +PUT _bulk?refresh=true +{"index": {"_index": "test-binary-ivf", "_id": "1"}} +{"my_vector": [7], "price": 4.4} +{"index": {"_index": "test-binary-ivf", "_id": "2"}} +{"my_vector": [10], "price": 14.2} +{"index": {"_index": "test-binary-ivf", "_id": "3"}} +{"my_vector": [15], "price": 19.1} +{"index": {"_index": "test-binary-ivf", "_id": "4"}} +{"my_vector": [99], "price": 1.2} +{"index": {"_index": "test-binary-ivf", "_id": "5"}} +{"my_vector": [80], "price": 16.5} +``` +{% include copy-curl.html %} + +Finally, search the data. Be sure to provide a binary vector in the k-NN vector field: + +```json +GET test-binary-ivf/_search +{ + "size": 2, + "query": { + "knn": { + "my_vector": { + "vector": [8], + "k": 2 + } + } + } +} +``` +{% include copy-curl.html %} + +The response contains the two vectors closest to the query vector: + +
+ + Response + + {: .text-delta} + +```json +GET /_plugins/_knn/models/my-model?filter_path=state +{ + "took": 7, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 2, + "relation": "eq" + }, + "max_score": 0.5, + "hits": [ + { + "_index": "test-binary-ivf", + "_id": "2", + "_score": 0.5, + "_source": { + "my_vector": [ + 10 + ], + "price": 14.2 + } + }, + { + "_index": "test-binary-ivf", + "_id": "3", + "_score": 0.25, + "_source": { + "my_vector": [ + 15 + ], + "price": 19.1 + } + } + ] + } +} +``` +
+ +### Memory estimation + +Use the following formulas to estimate the amount of memory required for binary vectors. + +#### HNSW memory estimation + +The memory required for HNSW can be estimated using the following formula, where `m` is the maximum number of bidirectional links created for each element during the construction of the graph: + +```r +1.1 * (dimension / 8 + 8 * m) bytes/vector +``` + +#### IVF memory estimation + +The memory required for IVF can be estimated using the following formula, where `nlist` is the number of buckets to partition vectors into: + +```r +1.1 * (((dimension / 8) * num_vectors) + (nlist * dimension / 8)) +``` diff --git a/_field-types/supported-field-types/knn-methods-engines.md b/_field-types/supported-field-types/knn-methods-engines.md new file mode 100644 index 0000000000..481fcd9672 --- /dev/null +++ b/_field-types/supported-field-types/knn-methods-engines.md @@ -0,0 +1,365 @@ +--- +layout: default +title: Methods and engines +parent: k-NN vector +grand_parent: Supported field types +nav_order: 20 +--- + +# Methods and engines + +A _method_ defines the algorithm used for organizing vector data at indexing time and searching it at search time in [approximate k-NN search]({{site.url}}{{site.baseurl}}/vector-search/vector-search-techniques/approximate-knn/). + +OpenSearch supports the following methods: + +- **Hierarchical Navigable Small World (HNSW)** creates a hierarchical graph structure of connections between vectors. For more information about the algorithm, see [Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs](https://arxiv.org/abs/1603.09320). +- **Inverted File Index (IVF)** organizes vectors into buckets based on clustering and, during search, searches only a subset of the buckets. + +An _engine_ is the library that implements these methods. Different engines can implement the same method, sometimes with varying optimizations or characteristics. For example, HNSW is implemented by all supported engines, each with its own advantages. + +OpenSearch supports the following engines: +- [**Lucene**](#lucene-engine): The native search library, offering an HNSW implementation with efficient filtering capabilities +- [**Faiss**](#faiss-engine) (Facebook AI Similarity Search): A comprehensive library implementing both HNSW and IVF methods, with additional vector compression options +- [**NMSLIB**](#nmslib-engine-deprecated) (Non-Metric Space Library): A legacy implementation of HNSW (now deprecated) + +## Method definition example + +A method definition contains the following components: + +- The `name` of the method (for example, `hnsw` or `ivf`) +- The `space_type` the method is built for (for example, `l2` or `cosinesimil`) +- The `engine` that will implement the method (for example, `faiss` or `lucene`) +- A map of `parameters` specific to that implementation + +The following example configures an `hnsw` method with an `l2` space type, a `faiss` engine, and the method-specific parameters: + +```json +PUT test-index +{ + "settings": { + "index": { + "knn": true, + "knn.algo_param.ef_search": 100 + } + }, + "mappings": { + "properties": { + "my_vector1": { + "type": "knn_vector", + "dimension": 1024, + "method": { + "name": "hnsw", + "space_type": "l2", + "engine": "faiss", + "parameters": { + "ef_construction": 128, + "m": 24 + } + } + } + } + } +} +``` +{% include copy-curl.html %} + +Not every method/engine combination supports each of the spaces. For a list of supported spaces, see the specific engine section. +{: .note} + +## Common parameters + +The following parameters are common to all method definitions. + +Mapping parameter | Required | Default | Updatable | Description +:--- | :--- | :--- | :--- | :--- +`name` | Yes | N/A | No | The nearest neighbor method. Valid values are `hnsw` and `ivf`. Not every engine combination supports each of the methods. For a list of supported methods, see the specific engine section. +`space_type` | No | `l2` | No | The vector space used to calculate the distance between vectors. Valid values are `l1`, `l2`, `linf`, `cosinesimil`, `innerproduct`, `hamming`, and `hammingbit`. Not every method/engine combination supports each of the spaces. For a list of supported spaces, see the specific engine section. Note: This value can also be specified at the top level of the mapping. For more information, see [Spaces]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-spaces/). +`engine` | No | `faiss` | No | The approximate k-NN library to use for indexing and search. Valid values are `faiss`, `lucene`, and `nmslib` (deprecated). +`parameters` | No | `null` | No | The parameters used for the nearest neighbor method. For more information, see the specific engine section. + +## Lucene engine + +The Lucene engine provides a native implementation of vector search directly within Lucene. It offers efficient filtering capabilities and is well-suited for smaller deployments. + +### Supported methods + +The Lucene engine supports the following method. + +Method name | Requires training | Supported spaces +:--- | :--- |:--- +[`hnsw`](#hnsw-parameters) | No | `l2`, `cosinesimil`, `innerproduct` (supported in OpenSearch 2.13 and later) + +#### HNSW parameters + +The HNSW method supports the following parameters. + +Parameter name | Required | Default | Updatable | Description +:--- | :--- | :--- | :--- | :--- +`ef_construction` | No | 100 | No | The size of the dynamic list used during k-NN graph creation. Higher values result in a more accurate graph but slower indexing speed.
Note: Lucene uses the term `beam_width` internally, but OpenSearch documentation uses `ef_construction` for consistency. +`m` | No | 16 | No | The number of bidirectional links created for each new element. Impacts memory consumption significantly. Keep between 2 and 100.
Note: Lucene uses the term `max_connections` internally, but OpenSearch documentation uses `m` for consistency. + +Lucene HNSW implementation ignores `ef_search` and dynamically sets it to the value of "k" in the search request. Therefore, there is no need to make settings for `ef_search` when using the Lucene engine. +{: .note} + +An index created in OpenSearch version 2.11 or earlier will still use the old `ef_construction` value (`512`). +{: .note} + +### Example configuration + +```json +"method": { + "name": "hnsw", + "engine": "lucene", + "parameters": { + "m": 2048, + "ef_construction": 245 + } +} +``` + +## Faiss engine + +The Faiss engine provides advanced vector indexing capabilities with support for multiple methods and encoding options to optimize memory usage and search performance. + +### Supported methods + +The Faiss engine supports the following methods. + +Method name | Requires training | Supported spaces +:--- | :--- |:--- +[`hnsw`](#hnsw-parameters-1) | No | `l2`, `innerproduct` (not available when [PQ](#pq-parameters) is used), `hamming` +[`ivf`](#ivf-parameters) | Yes | `l2`, `innerproduct`, `hamming` (supported for binary vectors in OpenSearch version 2.16 and later. For more information, see [Binary k-NN vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized#binary-vectors). + + +#### HNSW parameters + +The `hnsw` method supports the following parameters. + +Parameter name | Required | Default | Updatable | Description +:--- | :--- | :--- | :--- | :--- +`ef_search` | No | 100 | No | The size of the dynamic list used during k-NN searches. Higher values result in more accurate but slower searches. +`ef_construction` | No | 100 | No | The size of the dynamic list used during k-NN graph creation. Higher values result in a more accurate graph but slower indexing speed. +`m` | No | 16 | No | The number of bidirectional links that the plugin creates for each new element. Increasing and decreasing this value can have a large impact on memory consumption. Keep this value between `2` and `100`. +`encoder` | No | flat | No | Encoder definition for encoding vectors. Encoders can reduce the memory footprint of your index, at the expense of search accuracy. + +An index created in OpenSearch version 2.11 or earlier will still use the old `ef_construction` value (`512`). +{: .note} + +#### IVF parameters + +The IVF method supports the following parameters. + +Parameter name | Required | Default | Updatable | Description +:--- | :--- | :--- | :--- | :--- +`nlist` | No | 4 | No | Number of buckets to partition vectors into. Higher values may increase accuracy but increase memory and training latency. +`nprobes` | No | 1 | No | Number of buckets to search during query. Higher values increase accuracy but slow searches. +`encoder` | No | flat | No | Encoder definition for encoding vectors. + +For more information about these parameters, see the [Faiss documentation](https://github.com/facebookresearch/faiss/wiki/Faiss-indexes). + +### IVF training requirements + +The IVF algorithm requires a training step. To create an index that uses IVF, you need to train a model with the [Train API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#train-a-model), passing the IVF method definition. IVF requires that, at a minimum, there are `nlist` training data points, but we recommend [that you use more than this](https://github.com/facebookresearch/faiss/wiki/Guidelines-to-choose-an-index#how-big-is-the-dataset). Training data can be composed of either the same data that is going to be ingested or a separate dataset. + +### Supported encoders + +You can use encoders to reduce the memory footprint of a vector index at the expense of search accuracy. + +OpenSearch currently supports the following encoders in the Faiss library. + +Encoder name | Requires training | Description +:--- | :--- | :--- +`flat` (Default) | No | Encode vectors as floating-point arrays. This encoding does not reduce memory footprint. +[`pq`](#pq-parameters) | Yes | An abbreviation for _product quantization_, it is a lossy compression technique that uses clustering to encode a vector into a fixed size of bytes, with the goal of minimizing the drop in k-NN search accuracy. At a high level, vectors are broken up into `m` subvectors, and then each subvector is represented by a `code_size` code obtained from a code book produced during training. For more information about product quantization, see [this blog post](https://medium.com/dotstar/understanding-faiss-part-2-79d90b1e5388). +[`sq`](#sq-parameters) | No | An abbreviation for _scalar quantization_. Starting with OpenSearch version 2.13, you can use the `sq` encoder to quantize 32-bit floating-point vectors into 16-bit floats. In version 2.13, the built-in `sq` encoder is the SQFP16 Faiss encoder. The encoder reduces memory footprint with a minimal loss of precision and improves performance by using SIMD optimization (using AVX2 on x86 architecture or Neon on ARM64 architecture). For more information, see [Faiss scalar quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/faiss-16-bit-quantization/). + +#### PQ parameters + +The `pq` encoder supports the following parameters. + +Parameter name | Required | Default | Updatable | Description +:--- | :--- | :--- | :--- | :--- +`m` | No | `1` | No | Determines the number of subvectors into which to break the vector. Subvectors are encoded independently of each other. This vector dimension must be divisible by `m`. Maximum value is 1,024. +`code_size` | No | `8` | No | Determines the number of bits into which to encode a subvector. Maximum value is `8`. For `ivf`, this value must be less than or equal to `8`. For `hnsw`, this value must be `8`. + +The `hnsw` method supports the `pq` encoder for OpenSearch versions 2.10 and later. The `code_size` parameter of a `pq` encoder with the `hnsw` method must be **8**. +{: .important} + +#### SQ parameters + +The `sq` encoder supports the following parameters. + +Parameter name | Required | Default | Updatable | Description +:--- | :--- | :-- | :--- | :--- +`type` | No | `fp16` | No | The type of scalar quantization to be used to encode 32-bit float vectors into the corresponding type. As of OpenSearch 2.13, only the `fp16` encoder type is supported. For the `fp16` encoder, vector values must be in the [-65504.0, 65504.0] range. +`clip` | No | `false` | No | If `true`, then any vector values outside of the supported range for the specified vector type are rounded so that they are in the range. If `false`, then the request is rejected if any vector values are outside of the supported range. Setting `clip` to `true` may decrease recall. + +For more information and examples, see [Using Faiss scalar quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/faiss-16-bit-quantization/). + +### Example configurations + +The following example uses the `ivf` method without specifying an encoder (by default, OpenSearch uses the `flat` encoder): + +```json +"method": { + "name":"ivf", + "engine":"faiss", + "parameters":{ + "nlist": 4, + "nprobes": 2 + } +} +``` + +The following example uses the `ivf` method with a `pq` encoder: + +```json +"method": { + "name":"ivf", + "engine":"faiss", + "parameters":{ + "encoder":{ + "name":"pq", + "parameters":{ + "code_size": 8, + "m": 8 + } + } + } +} +``` + +The following example uses the `hnsw` method without specifying an encoder (by default, OpenSearch uses the `flat` encoder): + +```json +"method": { + "name":"hnsw", + "engine":"faiss", + "parameters":{ + "ef_construction": 256, + "m": 8 + } +} +``` + +The following example uses the `ivf` method with an `sq` encoder of type `fp16`: + +```json +"method": { + "name":"ivf", + "engine":"faiss", + "parameters":{ + "encoder": { + "name": "sq", + "parameters": { + "type": "fp16", + "clip": false + } + }, + "nprobes": 2 + } +} +``` + +The following example uses the `hnsw` method with an `sq` encoder of type `fp16` with `clip` enabled: + +```json +"method": { + "name":"hnsw", + "engine":"faiss", + "parameters":{ + "encoder": { + "name": "sq", + "parameters": { + "type": "fp16", + "clip": true + } + }, + "ef_construction": 256, + "m": 8 + } +} +``` + +## NMSLIB engine (deprecated) + +The Non-Metric Space Library (NMSLIB) engine was one of the first vector search implementations in OpenSearch. While still supported, it is deprecated in favor of the Faiss and Lucene engines. + +### Supported methods + +The NMSLIB engine supports the following methods. + +Method name | Requires training | Supported spaces +:--- | :--- | :--- +[`hnsw`](#hnsw-parameters-2) | No | `l2`, `innerproduct`, `cosinesimil`, `l1`, `linf` + +#### HNSW parameters + +The HNSW method supports the following parameters. + +Parameter name | Required | Default | Updatable | Description +:--- | :--- | :--- | :--- | :--- +`ef_construction` | No | 100 | No | The size of the dynamic list used during k-NN graph creation. Higher values result in a more accurate graph but slower indexing speed. +`m` | No | 16 | No | The number of bidirectional links created for each new element. Impacts memory consumption significantly. Keep between 2 and 100. + +For NMSLIB (deprecated), *ef_search* is set in the [index settings]({{site.url}}{{site.baseurl}}/vector-search/settings/#index-settings). +{: .note} + +An index created in OpenSearch version 2.11 or earlier will still use the old `ef_construction` value (`512`). +{: .note} + +### Example configuration + +```json +"method": { + "name": "hnsw", + "engine": "nmslib", + "space_type": "l2", + "parameters": { + "ef_construction": 100, + "m": 16 + } +} +``` + +## Choosing the right method + +There are several options to choose from when building your `knn_vector` field. To determine the correct methods and parameters, you should first understand the requirements of your workload and what trade-offs you are willing to make. Factors to consider are (1) query latency, (2) query quality, (3) memory limits, and (4) indexing latency. + +If memory is not a concern, HNSW offers a strong query latency/query quality trade-off. + +If you want to use less memory and increase indexing speed as compared to HNSW while maintaining similar query quality, you should evaluate IVF. + +If memory is a concern, consider adding a PQ encoder to your HNSW or IVF index. Because PQ is a lossy encoding, query quality will drop. + +You can reduce the memory footprint by a factor of 2, with a minimal loss in search quality, by using the [`fp_16` encoder]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/faiss-16-bit-quantization/). If your vector dimensions are within the [-128, 127] byte range, we recommend using the [byte quantizer]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/#byte-vectors) to reduce the memory footprint by a factor of 4. To learn more about vector quantization options, see [k-NN vector quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/knn-vector-quantization/). + +## Memory estimation + +In a typical OpenSearch cluster, a certain portion of RAM is reserved for the JVM heap. OpenSearch allocates native library indexes to a portion of the remaining RAM. This portion's size is determined by the `circuit_breaker_limit` cluster setting. By default, the limit is set to 50%. + +Having a replica doubles the total number of vectors. +{: .note } + +For information about using memory estimation with vector quantization, see the [vector quantization documentation]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/knn-vector-quantization/). +{: .note } + +### HNSW memory estimation + +The memory required for HNSW is estimated to be `1.1 * (4 * dimension + 8 * m)` bytes/vector. + +As an example, assume you have a million vectors with a `dimension` of 256 and an `m` of 16. The memory requirement can be estimated as follows: + +```r +1.1 * (4 * 256 + 8 * 16) * 1,000,000 ~= 1.267 GB +``` + +### IVF memory estimation + +The memory required for IVF is estimated to be `1.1 * (((4 * dimension) * num_vectors) + (4 * nlist * d))` bytes. + +As an example, assume you have a million vectors with a `dimension` of `256` and an `nlist` of `128`. The memory requirement can be estimated as follows: + +```r +1.1 * (((4 * 256) * 1,000,000) + (4 * 128 * 256)) ~= 1.126 GB +``` \ No newline at end of file diff --git a/_field-types/supported-field-types/knn-spaces.md b/_field-types/supported-field-types/knn-spaces.md new file mode 100644 index 0000000000..1a8f693c31 --- /dev/null +++ b/_field-types/supported-field-types/knn-spaces.md @@ -0,0 +1,96 @@ +--- +layout: default +title: Spaces +parent: k-NN vector +grand_parent: Supported field types +nav_order: 10 +has_math: true +--- + +## Spaces + +In vector search, a _space_ defines how the distance (or similarity) between two vectors is calculated. The choice of space affects how nearest neighbors are determined during search operations. + +The space type is specified when creating an index: + +- You can specify the space type at the top level of the field mapping: + + ```json + PUT /test-index + { + "settings": { + "index": { + "knn": true + } + }, + "mappings": { + "properties": { + "my_vector1": { + "type": "knn_vector", + "dimension": 3, + "space_type": "l2" + } + } + } + } + ``` + {% include copy-curl.html %} + +- Alternatively, you can specify the space type within the `method` object if defining a method: + + ```json + PUT test-index + { + "settings": { + "index": { + "knn": true, + "knn.algo_param.ef_search": 100 + } + }, + "mappings": { + "properties": { + "my_vector1": { + "type": "knn_vector", + "dimension": 1024, + "method": { + "name": "hnsw", + "space_type": "l2", + "engine": "nmslib", + "parameters": { + "ef_construction": 128, + "m": 24 + } + } + } + } + } + } + ``` + {% include copy-curl.html %} + +## Distance calculation + +A space defines the function used to measure the distance between two points in order to determine the k-nearest neighbors. From the k-NN perspective, a lower score equates to a closer and better result. This is the opposite of how OpenSearch scores results, where a higher score equates to a better result. OpenSearch supports the following spaces. + +Not every method/engine combination supports each of the spaces. For a list of supported spaces, see the specific engine section in the [method documentation]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/). +{: .note} + +| Space type | Search type | Distance function ($$d$$ ) | OpenSearch score | +| :--- | :--- | :--- | +| `l1` | Approximate, exact | $$ d(\mathbf{x}, \mathbf{y}) = \sum_{i=1}^n \lvert x_i - y_i \rvert $$ | $$ score = {1 \over {1 + d} } $$ | +| `l2` | Approximate, exact | $$ d(\mathbf{x}, \mathbf{y}) = \sum_{i=1}^n (x_i - y_i)^2 $$ | $$ score = {1 \over 1 + d } $$ | +| `linf` | Approximate, exact | $$ d(\mathbf{x}, \mathbf{y}) = max(\lvert x_i - y_i \rvert) $$ | $$ score = {1 \over 1 + d } $$ | +| `cosinesimil` | Approximate, exact | $$ d(\mathbf{x}, \mathbf{y}) = 1 - cos { \theta } = 1 - {\mathbf{x} \cdot \mathbf{y} \over \lVert \mathbf{x}\rVert \cdot \lVert \mathbf{y}\rVert}$$$$ = 1 - {\sum_{i=1}^n x_i y_i \over \sqrt{\sum_{i=1}^n x_i^2} \cdot \sqrt{\sum_{i=1}^n y_i^2}}$$,
where $$\lVert \mathbf{x}\rVert$$ and $$\lVert \mathbf{y}\rVert$$ represent the norms of vectors $$\mathbf{x}$$ and $$\mathbf{y}$$, respectively. | $$ score = {2 - d \over 2} $$ | +| `innerproduct` (supported for Lucene in OpenSearch version 2.13 and later) | Approximate | **NMSLIB** and **Faiss**:
$$ d(\mathbf{x}, \mathbf{y}) = - {\mathbf{x} \cdot \mathbf{y}} = - \sum_{i=1}^n x_i y_i $$

**Lucene**:
$$ d(\mathbf{x}, \mathbf{y}) = {\mathbf{x} \cdot \mathbf{y}} = \sum_{i=1}^n x_i y_i $$ | **NMSLIB** and **Faiss**:
$$ \text{If} d \ge 0, score = {1 \over 1 + d }$$
$$\text{If} d < 0, score = −d + 1$$

**Lucene:**
$$ \text{If} d > 0, score = d + 1 $$
$$\text{If} d \le 0, score = {1 \over 1 + (-1 \cdot d) }$$ | +| `innerproduct` (supported for Lucene in OpenSearch version 2.13 and later) | Exact | $$ d(\mathbf{x}, \mathbf{y}) = - {\mathbf{x} \cdot \mathbf{y}} = - \sum_{i=1}^n x_i y_i $$ | $$ \text{If} d \ge 0, score = {1 \over 1 + d }$$
$$\text{If} d < 0, score = −d + 1$$ | +| `hamming` (supported for binary vectors in OpenSearch version 2.16 and later) | Approximate, exact | $$ d(\mathbf{x}, \mathbf{y}) = \text{countSetBits}(\mathbf{x} \oplus \mathbf{y})$$ | $$ score = {1 \over 1 + d } $$ | +| `hammingbit` (supported for binary and long vectors) | Exact | $$ d(\mathbf{x}, \mathbf{y}) = \text{countSetBits}(\mathbf{x} \oplus \mathbf{y})$$ | $$ score = {1 \over 1 + d } $$ | + +The cosine similarity formula does not include the `1 -` prefix. However, because similarity search libraries equate lower scores with closer results, they return `1 - cosineSimilarity` for the cosine similarity space---this is why `1 -` is included in the distance function. +{: .note } + +With cosine similarity, it is not valid to pass a zero vector (`[0, 0, ...]`) as input. This is because the magnitude of such a vector is 0, which raises a `divide by 0` exception in the corresponding formula. Requests containing the zero vector will be rejected, and a corresponding exception will be thrown. +{: .note } + +The `hamming` space type is supported for binary vectors in OpenSearch version 2.16 and later. For more information, see [Binary k-NN vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized#binary-vectors). +{: .note} diff --git a/_field-types/supported-field-types/knn-vector.md b/_field-types/supported-field-types/knn-vector.md index 4a6e32a658..6c22c8b0f0 100644 --- a/_field-types/supported-field-types/knn-vector.md +++ b/_field-types/supported-field-types/knn-vector.md @@ -2,7 +2,7 @@ layout: default title: k-NN vector nav_order: 20 -has_children: false +has_children: true parent: Supported field types has_math: true --- @@ -11,14 +11,14 @@ has_math: true **Introduced 1.0** {: .label .label-purple } -The `knn_vector` data type allows you to ingest vectors into an OpenSearch index and perform different kinds of vector search. The `knn_vector` field is highly configurable and can serve many different vector workloads. In general, a `knn_vector` field can be built either by providing a method definition or specifying a model ID. +The `knn_vector` data type allows you to ingest vectors into an OpenSearch index and perform different kinds of vector search. The `knn_vector` field is highly configurable and can serve many different vector workloads. In general, a `knn_vector` field can be built either by [providing a method definition](#method-definitions) or [specifying a model ID](#model-ids). ## Example For example, to map `my_vector` as a `knn_vector`, use the following request: ```json -PUT test-index +PUT /test-index { "settings": { "index": { @@ -30,97 +30,7 @@ PUT test-index "my_vector": { "type": "knn_vector", "dimension": 3, - "space_type": "l2", - "method": { - "name": "hnsw", - "engine": "faiss" - } - } - } - } -} -``` -{% include copy-curl.html %} - -## Vector workload modes - -Vector search involves trade-offs between low-latency and low-cost search. Specify the `mode` mapping parameter of the `knn_vector` type to indicate which search mode you want to prioritize. The `mode` dictates the default values for k-NN parameters. You can further fine-tune your index by overriding the default parameter values in the k-NN field mapping. - -The following modes are currently supported. - -| Mode | Default engine | Description | -|:---|:---|:---| -| `in_memory` (Default) | `faiss` | Prioritizes low-latency search. This mode uses the `faiss` engine without any quantization applied. It is configured with the default parameter values for vector search in OpenSearch. | -| `on_disk` | `faiss` | Prioritizes low-cost vector search while maintaining strong recall. By default, the `on_disk` mode uses quantization and rescoring to execute a two-pass approach to retrieve the top neighbors. The `on_disk` mode supports only `float` vector types. | - -To create a vector index that uses the `on_disk` mode for low-cost search, send the following request: - -```json -PUT test-index -{ - "settings": { - "index": { - "knn": true - } - }, - "mappings": { - "properties": { - "my_vector": { - "type": "knn_vector", - "dimension": 3, - "space_type": "l2", - "mode": "on_disk" - } - } - } -} -``` -{% include copy-curl.html %} - -## Compression levels - -The `compression_level` mapping parameter selects a quantization encoder that reduces vector memory consumption by the given factor. The following table lists the available `compression_level` values. - -| Compression level | Supported engines | -|:------------------|:---------------------------------------------| -| `1x` | `faiss`, `lucene`, and `nmslib` (deprecated) | -| `2x` | `faiss` | -| `4x` | `lucene` | -| `8x` | `faiss` | -| `16x` | `faiss` | -| `32x` | `faiss` | - -For example, if a `compression_level` of `32x` is passed for a `float32` index of 768-dimensional vectors, the per-vector memory is reduced from `4 * 768 = 3072` bytes to `3072 / 32 = 846` bytes. Internally, binary quantization (which maps a `float` to a `bit`) may be used to achieve this compression. - -If you set the `compression_level` parameter, then you cannot specify an `encoder` in the `method` mapping. Compression levels greater than `1x` are only supported for `float` vector types. -{: .note} - -The following table lists the default `compression_level` values for the available workload modes. - -| Mode | Default compression level | -|:------------------|:-------------------------------| -| `in_memory` | `1x` | -| `on_disk` | `32x` | - - -To create a vector field with a `compression_level` of `16x`, specify the `compression_level` parameter in the mappings. This parameter overrides the default compression level for the `on_disk` mode from `32x` to `16x`, producing higher recall and accuracy at the expense of a larger memory footprint: - -```json -PUT test-index -{ - "settings": { - "index": { - "knn": true - } - }, - "mappings": { - "properties": { - "my_vector": { - "type": "knn_vector", - "dimension": 3, - "space_type": "l2", - "mode": "on_disk", - "compression_level": "16x" + "space_type": "l2" } } } @@ -130,7 +40,7 @@ PUT test-index ## Method definitions -[Method definitions]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/method/) are used when the underlying [approximate k-NN]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/) algorithm does not require training. For example, the following `knn_vector` field specifies that NMSLIB's implementation of HNSW should be used for approximate k-NN search. During indexing, NMSLIB will build the corresponding HNSW segment files. +[Method definitions]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/) are used when the underlying [approximate k-NN]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/) algorithm does not require training. For example, the following `knn_vector` field specifies that Faiss implementation of HNSW should be used for approximate k-NN search. During indexing, Faiss builds the corresponding HNSW segment files: ```json "my_vector": { @@ -139,7 +49,7 @@ PUT test-index "space_type": "l2", "method": { "name": "hnsw", - "engine": "nmslib", + "engine": "faiss", "parameters": { "ef_construction": 100, "m": 16 @@ -168,741 +78,3 @@ However, if you intend to use Painless scripting or a k-NN score script, you onl } ``` -## Byte vectors - -By default, k-NN vectors are `float` vectors, in which each dimension is 4 bytes. If you want to save storage space, you can use `byte` vectors with the `faiss` or `lucene` engine. In a `byte` vector, each dimension is a signed 8-bit integer in the [-128, 127] range. - -Byte vectors are supported only for the `lucene` and `faiss` engines. They are not supported for the `nmslib` engine. -{: .note} - -In [k-NN benchmarking tests](https://github.com/opensearch-project/opensearch-benchmark-workloads/tree/main/vectorsearch), the use of `byte` rather than `float` vectors resulted in a significant reduction in storage and memory usage as well as improved indexing throughput and reduced query latency. Additionally, precision on recall was not greatly affected (note that recall can depend on various factors, such as the [quantization technique](#quantization-techniques) and data distribution). - -When using `byte` vectors, expect some loss of precision in the recall compared to using `float` vectors. Byte vectors are useful in large-scale applications and use cases that prioritize a reduced memory footprint in exchange for a minimal loss of recall. -{: .important} - -When using `byte` vectors with the `faiss` engine, we recommend using [SIMD optimization]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/vector-field/#simd-optimization-for-the-faiss-engine), which helps to significantly reduce search latencies and improve indexing throughput. -{: .important} - -Introduced in k-NN plugin version 2.9, the optional `data_type` parameter defines the data type of a vector. The default value of this parameter is `float`. - -To use a `byte` vector, set the `data_type` parameter to `byte` when creating mappings for an index: - -### Example: HNSW - -The following example creates a byte vector index with the `lucene` engine and `hnsw` algorithm: - -```json -PUT test-index -{ - "settings": { - "index": { - "knn": true, - "knn.algo_param.ef_search": 100 - } - }, - "mappings": { - "properties": { - "my_vector": { - "type": "knn_vector", - "dimension": 3, - "data_type": "byte", - "space_type": "l2", - "method": { - "name": "hnsw", - "engine": "lucene", - "parameters": { - "ef_construction": 100, - "m": 16 - } - } - } - } - } -} -``` -{% include copy-curl.html %} - -After creating the index, ingest documents as usual. Make sure each dimension in the vector is in the supported [-128, 127] range: - -```json -PUT test-index/_doc/1 -{ - "my_vector": [-126, 28, 127] -} -``` -{% include copy-curl.html %} - -```json -PUT test-index/_doc/2 -{ - "my_vector": [100, -128, 0] -} -``` -{% include copy-curl.html %} - -When querying, be sure to use a `byte` vector: - -```json -GET test-index/_search -{ - "size": 2, - "query": { - "knn": { - "my_vector": { - "vector": [26, -120, 99], - "k": 2 - } - } - } -} -``` -{% include copy-curl.html %} - -### Example: IVF - -The `ivf` method requires a training step that creates and trains the model used to initialize the native library index during segment creation. For more information, see [Building a vector index from a model]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-vector-index-from-a-model). - -First, create an index that will contain byte vector training data. Specify the `faiss` engine and `ivf` algorithm and make sure that the `dimension` matches the dimension of the model you want to create: - -```json -PUT train-index -{ - "mappings": { - "properties": { - "train-field": { - "type": "knn_vector", - "dimension": 4, - "data_type": "byte" - } - } - } -} -``` -{% include copy-curl.html %} - -First, ingest training data containing byte vectors into the training index: - -```json -PUT _bulk -{ "index": { "_index": "train-index", "_id": "1" } } -{ "train-field": [127, 100, 0, -120] } -{ "index": { "_index": "train-index", "_id": "2" } } -{ "train-field": [2, -128, -10, 50] } -{ "index": { "_index": "train-index", "_id": "3" } } -{ "train-field": [13, -100, 5, 126] } -{ "index": { "_index": "train-index", "_id": "4" } } -{ "train-field": [5, 100, -6, -125] } -``` -{% include copy-curl.html %} - -Then, create and train the model named `byte-vector-model`. The model will be trained using the training data from the `train-field` in the `train-index`. Specify the `byte` data type: - -```json -POST _plugins/_knn/models/byte-vector-model/_train -{ - "training_index": "train-index", - "training_field": "train-field", - "dimension": 4, - "description": "model with byte data", - "data_type": "byte", - "method": { - "name": "ivf", - "engine": "faiss", - "space_type": "l2", - "parameters": { - "nlist": 1, - "nprobes": 1 - } - } -} -``` -{% include copy-curl.html %} - -To check the model training status, call the Get Model API: - -```json -GET _plugins/_knn/models/byte-vector-model?filter_path=state -``` -{% include copy-curl.html %} - -Once the training is complete, the `state` changes to `created`. - -Next, create an index that will initialize its native library indexes using the trained model: - -```json -PUT test-byte-ivf -{ - "settings": { - "index": { - "knn": true - } - }, - "mappings": { - "properties": { - "my_vector": { - "type": "knn_vector", - "model_id": "byte-vector-model" - } - } - } -} -``` -{% include copy-curl.html %} - -Ingest the data containing the byte vectors that you want to search into the created index: - -```json -PUT _bulk?refresh=true -{"index": {"_index": "test-byte-ivf", "_id": "1"}} -{"my_vector": [7, 10, 15, -120]} -{"index": {"_index": "test-byte-ivf", "_id": "2"}} -{"my_vector": [10, -100, 120, -108]} -{"index": {"_index": "test-byte-ivf", "_id": "3"}} -{"my_vector": [1, -2, 5, -50]} -{"index": {"_index": "test-byte-ivf", "_id": "4"}} -{"my_vector": [9, -7, 45, -78]} -{"index": {"_index": "test-byte-ivf", "_id": "5"}} -{"my_vector": [80, -70, 127, -128]} -``` -{% include copy-curl.html %} - -Finally, search the data. Be sure to provide a byte vector in the k-NN vector field: - -```json -GET test-byte-ivf/_search -{ - "size": 2, - "query": { - "knn": { - "my_vector": { - "vector": [100, -120, 50, -45], - "k": 2 - } - } - } -} -``` -{% include copy-curl.html %} - -### Memory estimation - -In the best-case scenario, byte vectors require 25% of the memory required by 32-bit vectors. - -#### HNSW memory estimation - -The memory required for Hierarchical Navigable Small Worlds (HNSW) is estimated to be `1.1 * (dimension + 8 * m)` bytes/vector, where `m` is the maximum number of bidirectional links created for each element during the construction of the graph. - -As an example, assume that you have 1 million vectors with a dimension of 256 and an `m` of 16. The memory requirement can be estimated as follows: - -```r -1.1 * (256 + 8 * 16) * 1,000,000 ~= 0.39 GB -``` - -#### IVF memory estimation - -The memory required for IVF is estimated to be `1.1 * ((dimension * num_vectors) + (4 * nlist * dimension))` bytes/vector, where `nlist` is the number of buckets to partition vectors into. - -As an example, assume that you have 1 million vectors with a dimension of 256 and an `nlist` of 128. The memory requirement can be estimated as follows: - -```r -1.1 * ((256 * 1,000,000) + (4 * 128 * 256)) ~= 0.27 GB -``` - - -### Quantization techniques - -If your vectors are of the type `float`, you need to first convert them to the `byte` type before ingesting the documents. This conversion is accomplished by _quantizing the dataset_---reducing the precision of its vectors. There are many quantization techniques, such as scalar quantization or product quantization (PQ), which is used in the Faiss engine. The choice of quantization technique depends on the type of data you're using and can affect the accuracy of recall values. The following sections describe the scalar quantization algorithms that were used to quantize the [k-NN benchmarking test](https://github.com/opensearch-project/opensearch-benchmark-workloads/tree/main/vectorsearch) data for the [L2](#scalar-quantization-for-the-l2-space-type) and [cosine similarity](#scalar-quantization-for-the-cosine-similarity-space-type) space types. The provided pseudocode is for illustration purposes only. - -#### Scalar quantization for the L2 space type - -The following example pseudocode illustrates the scalar quantization technique used for the benchmarking tests on Euclidean datasets with the L2 space type. Euclidean distance is shift invariant. If you shift both $$x$$ and $$y$$ by the same $$z$$, then the distance remains the same ($$\lVert x-y\rVert =\lVert (x-z)-(y-z)\rVert$$). - -```python -# Random dataset (Example to create a random dataset) -dataset = np.random.uniform(-300, 300, (100, 10)) -# Random query set (Example to create a random queryset) -queryset = np.random.uniform(-350, 350, (100, 10)) -# Number of values -B = 256 - -# INDEXING: -# Get min and max -dataset_min = np.min(dataset) -dataset_max = np.max(dataset) -# Shift coordinates to be non-negative -dataset -= dataset_min -# Normalize into [0, 1] -dataset *= 1. / (dataset_max - dataset_min) -# Bucket into 256 values -dataset = np.floor(dataset * (B - 1)) - int(B / 2) - -# QUERYING: -# Clip (if queryset range is out of datset range) -queryset = queryset.clip(dataset_min, dataset_max) -# Shift coordinates to be non-negative -queryset -= dataset_min -# Normalize -queryset *= 1. / (dataset_max - dataset_min) -# Bucket into 256 values -queryset = np.floor(queryset * (B - 1)) - int(B / 2) -``` -{% include copy.html %} - -#### Scalar quantization for the cosine similarity space type - -The following example pseudocode illustrates the scalar quantization technique used for the benchmarking tests on angular datasets with the cosine similarity space type. Cosine similarity is not shift invariant ($$cos(x, y) \neq cos(x-z, y-z)$$). - -The following pseudocode is for positive numbers: - -```python -# For Positive Numbers - -# INDEXING and QUERYING: - -# Get Max of train dataset -max = np.max(dataset) -min = 0 -B = 127 - -# Normalize into [0,1] -val = (val - min) / (max - min) -val = (val * B) - -# Get int and fraction values -int_part = floor(val) -frac_part = val - int_part - -if 0.5 < frac_part: - bval = int_part + 1 -else: - bval = int_part - -return Byte(bval) -``` -{% include copy.html %} - -The following pseudocode is for negative numbers: - -```python -# For Negative Numbers - -# INDEXING and QUERYING: - -# Get Min of train dataset -min = 0 -max = -np.min(dataset) -B = 128 - -# Normalize into [0,1] -val = (val - min) / (max - min) -val = (val * B) - -# Get int and fraction values -int_part = floor(var) -frac_part = val - int_part - -if 0.5 < frac_part: - bval = int_part + 1 -else: - bval = int_part - -return Byte(bval) -``` -{% include copy.html %} - -## Binary vectors - -You can reduce memory costs by a factor of 32 by switching from float to binary vectors. -Using binary vector indexes can lower operational costs while maintaining high recall performance, making large-scale deployment more economical and efficient. - -Binary format is available for the following k-NN search types: - -- [Approximate k-NN]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/): Supports binary vectors only for the Faiss engine with the HNSW and IVF algorithms. -- [Script score k-NN]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-score-script/): Enables the use of binary vectors in script scoring. -- [Painless extensions]({{site.url}}{{site.baseurl}}/search-plugins/knn/painless-functions/): Allows the use of binary vectors with Painless scripting extensions. - -### Requirements - -There are several requirements for using binary vectors in the OpenSearch k-NN plugin: - -- The `data_type` of the binary vector index must be `binary`. -- The `space_type` of the binary vector index must be `hamming`. -- The `dimension` of the binary vector index must be a multiple of 8. -- You must convert your binary data into 8-bit signed integers (`int8`) in the [-128, 127] range. For example, the binary sequence of 8 bits `0, 1, 1, 0, 0, 0, 1, 1` must be converted into its equivalent byte value of `99` to be used as a binary vector input. - -### Example: HNSW - -To create a binary vector index with the Faiss engine and HNSW algorithm, send the following request: - -```json -PUT /test-binary-hnsw -{ - "settings": { - "index": { - "knn": true - } - }, - "mappings": { - "properties": { - "my_vector": { - "type": "knn_vector", - "dimension": 8, - "data_type": "binary", - "space_type": "hamming", - "method": { - "name": "hnsw", - "engine": "faiss" - } - } - } - } -} -``` -{% include copy-curl.html %} - -Then ingest some documents containing binary vectors: - -```json -PUT _bulk -{"index": {"_index": "test-binary-hnsw", "_id": "1"}} -{"my_vector": [7], "price": 4.4} -{"index": {"_index": "test-binary-hnsw", "_id": "2"}} -{"my_vector": [10], "price": 14.2} -{"index": {"_index": "test-binary-hnsw", "_id": "3"}} -{"my_vector": [15], "price": 19.1} -{"index": {"_index": "test-binary-hnsw", "_id": "4"}} -{"my_vector": [99], "price": 1.2} -{"index": {"_index": "test-binary-hnsw", "_id": "5"}} -{"my_vector": [80], "price": 16.5} -``` -{% include copy-curl.html %} - -When querying, be sure to use a binary vector: - -```json -GET /test-binary-hnsw/_search -{ - "size": 2, - "query": { - "knn": { - "my_vector": { - "vector": [9], - "k": 2 - } - } - } -} -``` -{% include copy-curl.html %} - -The response contains the two vectors closest to the query vector: - -
- - Response - - {: .text-delta} - -```json -{ - "took": 8, - "timed_out": false, - "_shards": { - "total": 1, - "successful": 1, - "skipped": 0, - "failed": 0 - }, - "hits": { - "total": { - "value": 2, - "relation": "eq" - }, - "max_score": 0.5, - "hits": [ - { - "_index": "test-binary-hnsw", - "_id": "2", - "_score": 0.5, - "_source": { - "my_vector": [ - 10 - ], - "price": 14.2 - } - }, - { - "_index": "test-binary-hnsw", - "_id": "5", - "_score": 0.25, - "_source": { - "my_vector": [ - 80 - ], - "price": 16.5 - } - } - ] - } -} -``` -
- -### Example: IVF - -The IVF method requires a training step that creates and trains the model used to initialize the native library index during segment creation. For more information, see [Building a vector index from a model]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-vector-index-from-a-model). - -First, create an index that will contain binary vector training data. Specify the Faiss engine and IVF algorithm and make sure that the `dimension` matches the dimension of the model you want to create: - -```json -PUT train-index -{ - "mappings": { - "properties": { - "train-field": { - "type": "knn_vector", - "dimension": 8, - "data_type": "binary" - } - } - } -} -``` -{% include copy-curl.html %} - -Ingest training data containing binary vectors into the training index: - -
- - Bulk ingest request - - {: .text-delta} - -```json -PUT _bulk -{ "index": { "_index": "train-index", "_id": "1" } } -{ "train-field": [1] } -{ "index": { "_index": "train-index", "_id": "2" } } -{ "train-field": [2] } -{ "index": { "_index": "train-index", "_id": "3" } } -{ "train-field": [3] } -{ "index": { "_index": "train-index", "_id": "4" } } -{ "train-field": [4] } -{ "index": { "_index": "train-index", "_id": "5" } } -{ "train-field": [5] } -{ "index": { "_index": "train-index", "_id": "6" } } -{ "train-field": [6] } -{ "index": { "_index": "train-index", "_id": "7" } } -{ "train-field": [7] } -{ "index": { "_index": "train-index", "_id": "8" } } -{ "train-field": [8] } -{ "index": { "_index": "train-index", "_id": "9" } } -{ "train-field": [9] } -{ "index": { "_index": "train-index", "_id": "10" } } -{ "train-field": [10] } -{ "index": { "_index": "train-index", "_id": "11" } } -{ "train-field": [11] } -{ "index": { "_index": "train-index", "_id": "12" } } -{ "train-field": [12] } -{ "index": { "_index": "train-index", "_id": "13" } } -{ "train-field": [13] } -{ "index": { "_index": "train-index", "_id": "14" } } -{ "train-field": [14] } -{ "index": { "_index": "train-index", "_id": "15" } } -{ "train-field": [15] } -{ "index": { "_index": "train-index", "_id": "16" } } -{ "train-field": [16] } -{ "index": { "_index": "train-index", "_id": "17" } } -{ "train-field": [17] } -{ "index": { "_index": "train-index", "_id": "18" } } -{ "train-field": [18] } -{ "index": { "_index": "train-index", "_id": "19" } } -{ "train-field": [19] } -{ "index": { "_index": "train-index", "_id": "20" } } -{ "train-field": [20] } -{ "index": { "_index": "train-index", "_id": "21" } } -{ "train-field": [21] } -{ "index": { "_index": "train-index", "_id": "22" } } -{ "train-field": [22] } -{ "index": { "_index": "train-index", "_id": "23" } } -{ "train-field": [23] } -{ "index": { "_index": "train-index", "_id": "24" } } -{ "train-field": [24] } -{ "index": { "_index": "train-index", "_id": "25" } } -{ "train-field": [25] } -{ "index": { "_index": "train-index", "_id": "26" } } -{ "train-field": [26] } -{ "index": { "_index": "train-index", "_id": "27" } } -{ "train-field": [27] } -{ "index": { "_index": "train-index", "_id": "28" } } -{ "train-field": [28] } -{ "index": { "_index": "train-index", "_id": "29" } } -{ "train-field": [29] } -{ "index": { "_index": "train-index", "_id": "30" } } -{ "train-field": [30] } -{ "index": { "_index": "train-index", "_id": "31" } } -{ "train-field": [31] } -{ "index": { "_index": "train-index", "_id": "32" } } -{ "train-field": [32] } -{ "index": { "_index": "train-index", "_id": "33" } } -{ "train-field": [33] } -{ "index": { "_index": "train-index", "_id": "34" } } -{ "train-field": [34] } -{ "index": { "_index": "train-index", "_id": "35" } } -{ "train-field": [35] } -{ "index": { "_index": "train-index", "_id": "36" } } -{ "train-field": [36] } -{ "index": { "_index": "train-index", "_id": "37" } } -{ "train-field": [37] } -{ "index": { "_index": "train-index", "_id": "38" } } -{ "train-field": [38] } -{ "index": { "_index": "train-index", "_id": "39" } } -{ "train-field": [39] } -{ "index": { "_index": "train-index", "_id": "40" } } -{ "train-field": [40] } -``` -{% include copy-curl.html %} -
- -Then, create and train the model named `test-binary-model`. The model will be trained using the training data from the `train_field` in the `train-index`. Specify the `binary` data type and `hamming` space type: - -```json -POST _plugins/_knn/models/test-binary-model/_train -{ - "training_index": "train-index", - "training_field": "train-field", - "dimension": 8, - "description": "model with binary data", - "data_type": "binary", - "space_type": "hamming", - "method": { - "name": "ivf", - "engine": "faiss", - "parameters": { - "nlist": 16, - "nprobes": 1 - } - } -} -``` -{% include copy-curl.html %} - -To check the model training status, call the Get Model API: - -```json -GET _plugins/_knn/models/test-binary-model?filter_path=state -``` -{% include copy-curl.html %} - -Once the training is complete, the `state` changes to `created`. - -Next, create an index that will initialize its native library indexes using the trained model: - -```json -PUT test-binary-ivf -{ - "settings": { - "index": { - "knn": true - } - }, - "mappings": { - "properties": { - "my_vector": { - "type": "knn_vector", - "model_id": "test-binary-model" - } - } - } -} -``` -{% include copy-curl.html %} - -Ingest the data containing the binary vectors that you want to search into the created index: - -```json -PUT _bulk?refresh=true -{"index": {"_index": "test-binary-ivf", "_id": "1"}} -{"my_vector": [7], "price": 4.4} -{"index": {"_index": "test-binary-ivf", "_id": "2"}} -{"my_vector": [10], "price": 14.2} -{"index": {"_index": "test-binary-ivf", "_id": "3"}} -{"my_vector": [15], "price": 19.1} -{"index": {"_index": "test-binary-ivf", "_id": "4"}} -{"my_vector": [99], "price": 1.2} -{"index": {"_index": "test-binary-ivf", "_id": "5"}} -{"my_vector": [80], "price": 16.5} -``` -{% include copy-curl.html %} - -Finally, search the data. Be sure to provide a binary vector in the k-NN vector field: - -```json -GET test-binary-ivf/_search -{ - "size": 2, - "query": { - "knn": { - "my_vector": { - "vector": [8], - "k": 2 - } - } - } -} -``` -{% include copy-curl.html %} - -The response contains the two vectors closest to the query vector: - -
- - Response - - {: .text-delta} - -```json -GET /_plugins/_knn/models/my-model?filter_path=state -{ - "took": 7, - "timed_out": false, - "_shards": { - "total": 1, - "successful": 1, - "skipped": 0, - "failed": 0 - }, - "hits": { - "total": { - "value": 2, - "relation": "eq" - }, - "max_score": 0.5, - "hits": [ - { - "_index": "test-binary-ivf", - "_id": "2", - "_score": 0.5, - "_source": { - "my_vector": [ - 10 - ], - "price": 14.2 - } - }, - { - "_index": "test-binary-ivf", - "_id": "3", - "_score": 0.25, - "_source": { - "my_vector": [ - 15 - ], - "price": 19.1 - } - } - ] - } -} -``` -
diff --git a/_query-dsl/specialized/kNN.md b/_query-dsl/specialized/kNN.md index ef3500a359..28c546f210 100644 --- a/_query-dsl/specialized/kNN.md +++ b/_query-dsl/specialized/kNN.md @@ -2,12 +2,12 @@ layout: default title: k-NN parent: Specialized queries -nav_order: 55 +nav_order: 10 --- # k-NN query -Use the `knn` query for vector field search in [vector search]({{site.url}}{{site.baseurl}}/vector-search/). The query can use either raw vectors or automatically generate vectors from text using a machine learning model. +Use the `knn` query for searching fields containing raw vectors in [vector search]({{site.url}}{{site.baseurl}}/vector-search/). ## Request body fields @@ -31,15 +31,15 @@ Field | Data type | Required/Optional | Description `k` | Integer | Optional | The number of nearest neighbors to return. Required if `max_distance` or `min_score` is not specified. `max_distance` | Float | Optional | The maximum distance threshold for search results. Only one of `k`, `max_distance`, or `min_score` can be specified. For more information, see [Radial search]({{site.url}}{{site.baseurl}}/vector-search/specialized-operations/radial-search-knn/). `min_score` | Float | Optional | The minimum score threshold for search results. Only one of `k`, `max_distance`, or `min_score` can be specified. For more information, see [Radial search]({{site.url}}{{site.baseurl}}/vector-search/specialized-operations/radial-search-knn/). -`filter` | Object | Optional | A filter to apply to the k-NN search. For more information, see [Vector search with filters]({{site.url}}{{site.baseurl}}/vector-search/vector-search-with-filters/). -`method_parameters` | Object | Optional | Additional parameters for fine-tuning the search:
- `ef_search`: Number of vectors to examine (for `hnsw` method)
- `nprobes`: Number of buckets to examine (for `ivf` method). For more information, see [Specifying method parameters in the query](#specifying-method-parameters-in-the-query). -`rescore` | Object | Optional | Parameters for configuring rescoring functionality:
- `oversample_factor`: Controls the oversampling of candidate vectors before ranking. For more information, see [Rescoring results](#rescoring-results). +`filter` | Object | Optional | A filter to apply to the k-NN search. For more information, see [Vector search with filters]({{site.url}}{{site.baseurl}}/vector-search/specialized-operations/filter-search-knn/). **Important**: Filter can only be used with the `faiss` or `lucene` engines. +`method_parameters` | Object | Optional | Additional parameters for fine-tuning the search:
- `ef_search` (Integer): The number of vectors to examine (for `hnsw` method)
- `nprobes` (Integer): The number of buckets to examine (for `ivf` method). For more information, see [Specifying method parameters in the query](#specifying-method-parameters-in-the-query). +`rescore` | Object or Boolean | Optional | Parameters for configuring rescoring functionality:
- `oversample_factor` (Float): Controls the oversampling of candidate vectors before ranking. Valid values are in the `[1.0, 100.0]` range. Default is `1.0` (no rescoring). To use the default `oversample_factor` of `1.0`, set `rescore` to `true`. For more information, see [Rescoring results](#rescoring-results). `expand_nested_docs` | Boolean | Optional | When `true`, retrieves scores for all nested field documents within each parent document. Used with nested queries. For more information, see [Vector search with nested fields]({{site.url}}{{site.baseurl}}/vector-search/specialized-operations/nested-search-knn/). ## Example request ```json -GET /my-nlp-index/_search +GET /my-vector-index/_search { "query": { "knn": { @@ -56,7 +56,7 @@ GET /my-nlp-index/_search ## Example request: Nested fields ```json -GET my-knn-index-1/_search +GET /my-vector-index/_search { "_source": false, "query": { @@ -87,7 +87,7 @@ GET my-knn-index-1/_search The following example shows a radial search performed with `max_distance`: ```json -GET knn-index-test/_search +GET /my-vector-index/_search { "query": { "knn": { @@ -110,7 +110,7 @@ GET knn-index-test/_search The following example shows a radial search performed with `min_score`: ```json -GET knn-index-test/_search +GET /my-vector-index/_search { "query": { "knn": { @@ -129,7 +129,7 @@ GET knn-index-test/_search Starting with version 2.16, you can provide `method_parameters` in a search request: ```json -GET my-knn-index-1/_search +GET /my-vector-index/_search { "size": 2, "query": { @@ -181,7 +181,7 @@ The `oversample_factor` parameter controls the factor by which the search oversa The following request specifies the `ef_search` and `oversample_factor` parameters: ```json -GET my-vector-index/_search +GET /my-vector-index/_search { "size": 2, "query": { @@ -200,4 +200,68 @@ GET my-vector-index/_search } } ``` -{% include copy-curl.html %} \ No newline at end of file +{% include copy-curl.html %} + +## Rescoring quantized results to full precision + +[Disk-based search]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/disk-based-vector-search/) uses [vector quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/knn-vector-quantization/) in order to reduce memory usage by compressing vectors, but this compression can impact search accuracy. To improve recall while maintaining the memory savings of quantization, you can use a two-phase search approach. In the first phase, `oversample_factor * k` results are retrieved from an index using quantized vectors and the scores are approximated. In the second phase, the full-precision vectors of those `oversample_factor * k` results are loaded into memory from disk, and scores are recomputed against the full-precision query vector. The results are then reduced to the top k. + +The default rescoring behavior is determined by the `mode` and `compression_level` of the backing k-NN vector field: + +- For `in_memory` mode, no rescoring is applied by default. +- For `on_disk` mode, default rescoring is based on the configured `compression_level`. Each `compression_level` provides a default `oversample_factor`, specified in the following table. + +| Compression level | Default rescore `oversample_factor` | +|:------------------|:----------------------------------| +| `32x` (default) | 3.0 | +| `16x` | 2.0 | +| `8x` | 2.0 | +| `4x` | No default rescoring | +| `2x` | No default rescoring | + +To explicitly apply rescoring, provide the `rescore` parameter in a query on a quantized index and specify the `oversample_factor`: + +```json +GET /my-vector-index/_search +{ + "size": 2, + "query": { + "knn": { + "target-field": { + "vector": [2, 3, 5, 6], + "k": 2, + "rescore" : { + "oversample_factor": 1.2 + } + } + } + } +} +``` +{% include copy-curl.html %} + +Alternatively, set the `rescore` parameter to `true` to use the default `oversample_factor` of `1.0`: + +```json +GET /my-vector-index/_search +{ + "size": 2, + "query": { + "knn": { + "target-field": { + "vector": [2, 3, 5, 6], + "k": 2, + "rescore" : true + } + } + } +} +``` +{% include copy-curl.html %} + +The `oversample_factor` is a floating-point number between 1.0 and 100.0, inclusive. The number of results in the first pass is calculated as `oversample_factor * k` and is guaranteed to be between 100 and 10,000, inclusive. If the calculated number of results is smaller than 100, then the number of results is set to 100. If the calculated number of results is greater than 10,000, then the number of results is set to 10,000. + +Rescoring is only supported for the `faiss` engine. + +Rescoring is not needed if quantization is not used because the scores returned are already fully precise. +{: .note} diff --git a/_query-dsl/specialized/neural.md b/_query-dsl/specialized/neural.md index 5350c742f9..bc3d832e4b 100644 --- a/_query-dsl/specialized/neural.md +++ b/_query-dsl/specialized/neural.md @@ -32,11 +32,9 @@ Field | Data type | Required/Optional | Description `query_image` | String | Optional | A base-64 encoded string that corresponds to the query image from which to generate vector embeddings. You must specify at least one `query_text` or `query_image`. `model_id` | String | Required if the default model ID is not set. For more information, see [Setting a default model on an index or field]({{site.url}}{{site.baseurl}}/search-plugins/neural-text-search/#setting-a-default-model-on-an-index-or-field). | The ID of the model that will be used to generate vector embeddings from the query text. The model must be deployed in OpenSearch before it can be used in neural search. For more information, see [Using custom models within OpenSearch]({{site.url}}{{site.baseurl}}/ml-commons-plugin/using-ml-models/) and [Neural search]({{site.url}}{{site.baseurl}}/search-plugins/neural-search/). `k` | Integer | Optional | The number of results returned by the k-NN search. Only one variable, either `k`, `min_score`, or `max_distance`, can be specified. If a variable is not specified, the default is `k` with a value of `10`. -`min_score` | Float | Optional | The minimum score threshold for the search results. Only one variable, either `k`, `min_score`, or `max_distance`, can be specified. For more information, see [k-NN radial search]({{site.url}}{{site.baseurl}}/search-plugins/knn/radial-search-knn/). -`max_distance` | Float | Optional | The maximum distance threshold for the search results. Only one variable, either `k`, `min_score`, or `max_distance`, can be specified. For more information, see [k-NN radial search]({{site.url}}{{site.baseurl}}/search-plugins/knn/radial-search-knn/). -`filter` | Object | Optional | A query that can be used to reduce the number of documents considered. For more information about filter usage, see [k-NN search with filters]({{site.url}}{{site.baseurl}}/search-plugins/knn/filter-search-knn/). **Important**: Filter can only be used with the `faiss` or `lucene` engines. -`method_parameters` | Object | Optional | Parameters passed to the k-NN index during search. See [Additional query parameters]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#additional-query-parameters). -`rescore` | Object | Optional | Parameters for configuring rescoring functionality for k-NN indexes built using quantization. See [Rescoring]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#rescoring-quantized-results-using-full-precision). +`min_score` | Float | Optional | The minimum score threshold for the search results. Only one variable, either `k`, `min_score`, or `max_distance`, can be specified. For more information, see [Radial search]({{site.url}}{{site.baseurl}}/search-plugins/knn/radial-search-knn/). +`max_distance` | Float | Optional | The maximum distance threshold for the search results. Only one variable, either `k`, `min_score`, or `max_distance`, can be specified. For more information, see [Radial search]({{site.url}}{{site.baseurl}}/search-plugins/knn/radial-search-knn/). +`filter` | Object | Optional | A query that can be used to reduce the number of documents considered. For more information about filter usage, see [Vector search with filters]({{site.url}}{{site.baseurl}}/search-plugins/knn/filter-search-knn/). #### Example request diff --git a/_sass/_home.scss b/_sass/_home.scss index 53645c666d..0a3d1f7dac 100644 --- a/_sass/_home.scss +++ b/_sass/_home.scss @@ -88,14 +88,9 @@ } } -.card { - @extend .home-card; - margin-bottom: 0; -} - -@mixin heading-font { +@mixin heading-font($size: 1.5rem) { @include heading-sans-serif; - font-size: 1.5rem; + font-size: $size; font-weight: 700; color: $blue-dk-300; } @@ -107,6 +102,14 @@ margin: 1rem 0 1.5rem 0; } +.card { + @extend .home-card; + margin-bottom: 0; + .heading { + @include heading-font(1.2rem); + } +} + .heading-main { @include heading-font; margin: 0; @@ -170,7 +173,7 @@ } .list-heading { - @include heading-font; + @include heading-font (1.2rem); margin: 0 0 0.75rem 0; font-size: 1.2rem; color: $blue-dk-300; diff --git a/_vector-search/api.md b/_vector-search/api.md index a4aeba633f..1980b9a1c8 100644 --- a/_vector-search/api.md +++ b/_vector-search/api.md @@ -351,7 +351,7 @@ Request parameter | Description `max_training_vector_count` | The maximum number of vectors from the training index to be used for training. Defaults to all the vectors in the index. Optional. `search_size` | The training data is pulled from the training index using scroll queries. This parameter defines the number of results to return per scroll query. Default is `10000`. Optional. `description` | A user-provided description of the model. Optional. -`method` | The configuration of the approximate k-NN method used for search operations. For more information about the available methods, see [vector index method definitions]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/method/). The method requires training to be valid. +`method` | The configuration of the approximate k-NN method used for search operations. For more information about the available methods, see [Methods and engines]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/). The method requires training to be valid. `space_type` | The space type for which this model is trained, for example, Euclidean or cosine. Note: This value can also be set in the `method` parameter. #### Usage diff --git a/_vector-search/creating-vector-index/index.md b/_vector-search/creating-vector-index.md similarity index 79% rename from _vector-search/creating-vector-index/index.md rename to _vector-search/creating-vector-index.md index be1a511e7e..acd1b5b834 100644 --- a/_vector-search/creating-vector-index/index.md +++ b/_vector-search/creating-vector-index.md @@ -2,7 +2,6 @@ layout: default title: Creating a vector index nav_order: 20 -has_children: true redirect_from: - /vector-search/creating-a-vector-db/ - /search-plugins/knn/knn-index/ @@ -28,15 +27,7 @@ PUT /test-index "my_vector1": { "type": "knn_vector", "dimension": 3, - "space_type": "l2", - "method": { - "name": "hnsw", - "engine": "lucene", - "parameters": { - "ef_construction": 128, - "m": 24 - } - } + "space_type": "l2" } } } @@ -51,16 +42,16 @@ Regardless of the type of vector search, the following elements are part of crea Set `index.knn` to `true` in the index settings to enable k-NN search functionality. 2. **Define a vector field**: - Specify the field that will store the vector data. + Specify the field that will store the vector data. When defining a `knn_vector` field in OpenSearch, you can select from different data types to balance storage requirements and performance. By default, k-NN vectors are float vectors, but you can also opt for byte or binary vectors for more efficient storage. For more information, see [k-NN vector]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/). 3. **Specify dimension**: Set the `dimension` property to match the size of the vectors used. 4. **Choose a space type**: - Select a distance metric for similarity comparisons, such as `l2` (Euclidean distance) or `cosinesimil`. + Select a distance metric for similarity comparisons, such as `l2` (Euclidean distance) or `cosinesimil`. For more information, see [Spaces]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-spaces/). -5. **Select a method**: - Configure the indexing method, such as HNSW or IVF, to optimize vector search performance. +5. (Advanced) **Select a method**: + Optionally, configure the indexing method, such as HNSW or IVF, to optimize vector search performance. For more information, see [Methods and engines]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/). To create a vector index, choose one of the following options: @@ -155,9 +146,9 @@ PUT /my-semantic-search-index ``` {% include copy-curl.html %} -## Next steps +## Related articles - [Ingesting data into a vector index]({{site.url}}{{site.baseurl}}/vector-search/searching-data/) -- [Vector data types]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/vector-field/) -- [Supported methods]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/method/) +- [k-NN vector]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) +- [Methods and engines]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/) - [k-NN vector field type]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) \ No newline at end of file diff --git a/_vector-search/creating-vector-index/method.md b/_vector-search/creating-vector-index/method.md deleted file mode 100644 index 43a0a1ac25..0000000000 --- a/_vector-search/creating-vector-index/method.md +++ /dev/null @@ -1,283 +0,0 @@ ---- -layout: default -title: Supported methods -parent: Creating a vector index -nav_order: 20 ---- - -# Supported methods - -A _method_ definition refers to the underlying configuration of the [approximate k-NN]({{site.url}}{{site.baseurl}}/vector-search/vector-search-techniques/approximate-knn/) algorithm you want to use. Method definitions are used to either create a `knn_vector` field (when the method does not require training) or [create a model during training]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#train-a-model) that can then be used to [create a `knn_vector` field]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-vector-index-from-a-model). - -A method definition will always contain the name of the method, the space_type the method is built for, the engine -(the library) to use, and a map of parameters. - -Mapping parameter | Required | Default | Updatable | Description -:--- | :--- | :--- | :--- | :--- -`name` | Yes | N/A | No | The identifier for the nearest neighbor method. -`space_type` | No | `l2` | No | The vector space used to calculate the distance between vectors. Note: This value can also be specified at the top level of the mapping. -`engine` | No | `faiss` | No | The approximate k-NN library to use for indexing and search. The available libraries are `faiss`, `lucene`, and `nmslib` (deprecated). -`parameters` | No | `null` | No | The parameters used for the nearest neighbor method. - -## Supported NMSLIB methods - -Method name | Requires training | Supported spaces | Description -:--- | :--- | :--- | :--- -`hnsw` | No | `l2`, `innerproduct`, `cosinesimil`, `l1`, `linf` | Hierarchical proximity graph approach to approximate k-NN search. For more details on the algorithm, see this [abstract](https://arxiv.org/abs/1603.09320). - -### HNSW parameters - -Parameter name | Required | Default | Updatable | Description -:--- | :--- | :--- | :--- | :--- -`ef_construction` | No | 100 | No | The size of the dynamic list used during k-NN graph creation. Higher values result in a more accurate graph but slower indexing speed. -`m` | No | 16 | No | The number of bidirectional links that the plugin creates for each new element. Increasing and decreasing this value can have a large impact on memory consumption. Keep this value between 2 and 100. - -For NMSLIB (deprecated), *ef_search* is set in the [index settings]({{site.url}}{{site.baseurl}}/vector-search/settings/#index-settings). -{: .note} - -An index created in OpenSearch version 2.11 or earlier will still use the old `ef_construction` value (`512`). -{: .note} - -## Supported Faiss methods - -Method name | Requires training | Supported spaces | Description -:--- | :--- |:---| :--- -`hnsw` | No | `l2`, `innerproduct`, `hamming` | Hierarchical proximity graph approach to approximate k-NN search. -`ivf` | Yes | `l2`, `innerproduct`, `hamming` | Stands for _inverted file index_. Bucketing approach where vectors are assigned different buckets based on clustering and, during search, only a subset of the buckets is searched. - -For HNSW, `innerproduct` is not available when PQ is used. -{: .note} - -The `hamming` space type is supported for binary vectors in OpenSearch version 2.16 and later. For more information, see [Binary k-NN vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#binary-vectors). -{: .note} - -### HNSW parameters - -Parameter name | Required | Default | Updatable | Description -:--- | :--- | :--- | :--- | :--- -`ef_search` | No | 100 | No | The size of the dynamic list used during k-NN searches. Higher values result in more accurate but slower searches. -`ef_construction` | No | 100 | No | The size of the dynamic list used during k-NN graph creation. Higher values result in a more accurate graph but slower indexing speed. -`m` | No | 16 | No | The number of bidirectional links that the plugin creates for each new element. Increasing and decreasing this value can have a large impact on memory consumption. Keep this value between 2 and 100. -`encoder` | No | flat | No | Encoder definition for encoding vectors. Encoders can reduce the memory footprint of your index, at the expense of search accuracy. - -An index created in OpenSearch version 2.11 or earlier will still use the old `ef_construction` and `ef_search` values (`512`). -{: .note} - -### IVF parameters - -Parameter name | Required | Default | Updatable | Description -:--- | :--- | :--- | :--- | :--- -`nlist` | No | 4 | No | Number of buckets to partition vectors into. Higher values may lead to more accurate searches at the expense of memory and training latency. For more information about choosing the right value, refer to [Guidelines to choose an index](https://github.com/facebookresearch/faiss/wiki/Guidelines-to-choose-an-index). -`nprobes` | No | 1 | No | Number of buckets to search during query. Higher values lead to more accurate but slower searches. -`encoder` | No | flat | No | Encoder definition for encoding vectors. Encoders can reduce the memory footprint of your index, at the expense of search accuracy. - -For more information about setting these parameters, refer to the [Faiss documentation](https://github.com/facebookresearch/faiss/wiki/Faiss-indexes). - -### IVF training requirements - -The IVF algorithm requires a training step. To create an index that uses IVF, you need to train a model with the [Train API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#train-a-model), passing the IVF method definition. IVF requires that, at a minimum, there are `nlist` training data points, but we recommend [that you use more than this](https://github.com/facebookresearch/faiss/wiki/Guidelines-to-choose-an-index#how-big-is-the-dataset). Training data can be composed of either the same data that is going to be ingested or a separate dataset. - -## Supported Lucene methods - -Method name | Requires training | Supported spaces | Description -:--- | :--- |:--------------------------------------------------------------------------------| :--- -`hnsw` | No | `l2`, `cosinesimil`, `innerproduct` (supported in OpenSearch 2.13 and later) | Hierarchical proximity graph approach to approximate k-NN search. - -### HNSW parameters - -Parameter name | Required | Default | Updatable | Description -:--- | :--- | :--- | :--- | :--- -`ef_construction` | No | 100 | No | The size of the dynamic list used during k-NN graph creation. Higher values result in a more accurate graph but slower indexing speed.
The Lucene engine uses the proprietary term "beam_width" to describe this function, which corresponds directly to "ef_construction". To be consistent throughout the OpenSearch documentation, we retain the term "ef_construction" for this parameter. -`m` | No | 16 | No | The number of bidirectional links that the plugin creates for each new element. Increasing and decreasing this value can have a large impact on memory consumption. Keep this value between 2 and 100.
The Lucene engine uses the proprietary term "max_connections" to describe this function, which corresponds directly to "m". To be consistent throughout OpenSearch documentation, we retain the term "m" to label this parameter. - -Lucene HNSW implementation ignores `ef_search` and dynamically sets it to the value of "k" in the search request. Therefore, there is no need to make settings for `ef_search` when using the Lucene engine. -{: .note} - -An index created in OpenSearch version 2.11 or earlier will still use the old `ef_construction` value (`512`). -{: .note} - -```json -"method": { - "name":"hnsw", - "engine":"lucene", - "parameters":{ - "m":2048, - "ef_construction": 245 - } -} -``` - -## Supported Faiss encoders - -You can use encoders to reduce the memory footprint of a vector index at the expense of search accuracy. OpenSearch currently supports the `flat`, `pq`, and `sq` encoders in the Faiss library. - -The following example method definition specifies the `hnsw` method and a `pq` encoder: - -```json -"method": { - "name":"hnsw", - "engine":"faiss", - "parameters":{ - "encoder":{ - "name":"pq", - "parameters":{ - "code_size": 8, - "m": 8 - } - } - } -} -``` - -The `hnsw` method supports the `pq` encoder for OpenSearch versions 2.10 and later. The `code_size` parameter of a `pq` encoder with the `hnsw` method must be **8**. -{: .important} - -Encoder name | Requires training | Description -:--- | :--- | :--- -`flat` (Default) | No | Encode vectors as floating-point arrays. This encoding does not reduce memory footprint. -`pq` | Yes | An abbreviation for _product quantization_, it is a lossy compression technique that uses clustering to encode a vector into a fixed size of bytes, with the goal of minimizing the drop in k-NN search accuracy. At a high level, vectors are broken up into `m` subvectors, and then each subvector is represented by a `code_size` code obtained from a code book produced during training. For more information about product quantization, see [this blog post](https://medium.com/dotstar/understanding-faiss-part-2-79d90b1e5388). -`sq` | No | An abbreviation for _scalar quantization_. Starting with OpenSearch version 2.13, you can use the `sq` encoder to quantize 32-bit floating-point vectors into 16-bit floats. In version 2.13, the built-in `sq` encoder is the SQFP16 Faiss encoder. The encoder reduces memory footprint with a minimal loss of precision and improves performance by using SIMD optimization (using AVX2 on x86 architecture or Neon on ARM64 architecture). For more information, see [Faiss scalar quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-performance/faiss-16-bit-quantization/). - -### PQ parameters - -Parameter name | Required | Default | Updatable | Description -:--- | :--- | :--- | :--- | :--- -`m` | No | 1 | No | Determines the number of subvectors into which to break the vector. Subvectors are encoded independently of each other. This vector dimension must be divisible by `m`. Maximum value is 1,024. -`code_size` | No | 8 | No | Determines the number of bits into which to encode a subvector. Maximum value is 8. For IVF, this value must be less than or equal to 8. For HNSW, this value can only be 8. - -### SQ parameters - -Parameter name | Required | Default | Updatable | Description -:--- | :--- | :-- | :--- | :--- -`type` | No | `fp16` | No | The type of scalar quantization to be used to encode 32-bit float vectors into the corresponding type. As of OpenSearch 2.13, only the `fp16` encoder type is supported. For the `fp16` encoder, vector values must be in the [-65504.0, 65504.0] range. -`clip` | No | `false` | No | If `true`, then any vector values outside of the supported range for the specified vector type are rounded so that they are in the range. If `false`, then the request is rejected if any vector values are outside of the supported range. Setting `clip` to `true` may decrease recall. - -For more information and examples, see [Using Faiss scalar quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-performance/faiss-16-bit-quantization/). - -### Examples - -The following example uses the `ivf` method without specifying an encoder (by default, OpenSearch uses the `flat` encoder): - -```json -"method": { - "name":"ivf", - "engine":"faiss", - "parameters":{ - "nlist": 4, - "nprobes": 2 - } -} -``` - -The following example uses the `ivf` method with a `pq` encoder: - -```json -"method": { - "name":"ivf", - "engine":"faiss", - "parameters":{ - "encoder":{ - "name":"pq", - "parameters":{ - "code_size": 8, - "m": 8 - } - } - } -} -``` - -The following example uses the `hnsw` method without specifying an encoder (by default, OpenSearch uses the `flat` encoder): - -```json -"method": { - "name":"hnsw", - "engine":"faiss", - "parameters":{ - "ef_construction": 256, - "m": 8 - } -} -``` - -The following example uses the `hnsw` method with an `sq` encoder of type `fp16` with `clip` enabled: - -```json -"method": { - "name":"hnsw", - "engine":"faiss", - "parameters":{ - "encoder": { - "name": "sq", - "parameters": { - "type": "fp16", - "clip": true - } - }, - "ef_construction": 256, - "m": 8 - } -} -``` - -The following example uses the `ivf` method with an `sq` encoder of type `fp16`: - -```json -"method": { - "name":"ivf", - "engine":"faiss", - "parameters":{ - "encoder": { - "name": "sq", - "parameters": { - "type": "fp16", - "clip": false - } - }, - "nprobes": 2 - } -} -``` - -## Choosing the right method - -There are several options to choose from when building your `knn_vector` field. To determine the correct methods and parameters, you should first understand the requirements of your workload and what trade-offs you are willing to make. Factors to consider are (1) query latency, (2) query quality, (3) memory limits, and (4) indexing latency. - -If memory is not a concern, HNSW offers a strong query latency/query quality trade-off. - -If you want to use less memory and increase indexing speed as compared to HNSW while maintaining similar query quality, you should evaluate IVF. - -If memory is a concern, consider adding a PQ encoder to your HNSW or IVF index. Because PQ is a lossy encoding, query quality will drop. - -You can reduce the memory footprint by a factor of 2, with a minimal loss in search quality, by using the [`fp_16` encoder]({{site.url}}{{site.baseurl}}/vector-search/optimizing-performance/faiss-16-bit-quantization/). If your vector dimensions are within the [-128, 127] byte range, we recommend using the [byte quantizer]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/#byte-vectors) to reduce the memory footprint by a factor of 4. To learn more about vector quantization options, see [k-NN vector quantization]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-vector-quantization/). - -## Memory estimation - -In a typical OpenSearch cluster, a certain portion of RAM is reserved for the JVM heap. OpenSearch allocates native library indexes to a portion of the remaining RAM. This portion's size is determined by the `circuit_breaker_limit` cluster setting. By default, the limit is set to 50%. - -Having a replica doubles the total number of vectors. -{: .note } - -For information about using memory estimation with vector quantization, see the [vector quantization documentation]({{site.url}}{{site.baseurl}}/vector-search/optimizing-performance/knn-vector-quantization/). -{: .note } - -### HNSW memory estimation - -The memory required for HNSW is estimated to be `1.1 * (4 * dimension + 8 * M)` bytes/vector. - -As an example, assume you have a million vectors with a dimension of 256 and M of 16. The memory requirement can be estimated as follows: - -``` -1.1 * (4 * 256 + 8 * 16) * 1,000,000 ~= 1.267 GB -``` - -### IVF memory estimation - -The memory required for IVF is estimated to be `1.1 * (((4 * dimension) * num_vectors) + (4 * nlist * d))` bytes. - -As an example, assume you have a million vectors with a dimension of 256 and `nlist` of 128. The memory requirement can be estimated as follows: - -``` -1.1 * (((4 * 256) * 1,000,000) + (4 * 128 * 256)) ~= 1.126 GB - -``` diff --git a/_vector-search/creating-vector-index/vector-field.md b/_vector-search/creating-vector-index/vector-field.md deleted file mode 100644 index 0af15e8cd5..0000000000 --- a/_vector-search/creating-vector-index/vector-field.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -layout: default -title: Vector data types -parent: Creating a vector index -nav_order: 10 ---- - -# Vector data types - -The `knn_vector` data type allows you to ingest vectors into an OpenSearch index and perform different kinds of vector search. The `knn_vector` field is highly configurable and can serve many different k-NN workloads. For more information, see [k-NN vector]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/). - -When defining a `knn_vector` field in OpenSearch, you can select from different data types to balance storage requirements and performance. By default, k-NN vectors are float vectors, but you can also opt for byte or binary vectors for more efficient storage. - -## Float vectors - -Float is the default type for `knn_vector` fields. Each dimension is stored as a 4-byte floating-point number. - -## Byte vectors - -Starting with OpenSearch version 2.17, you can use `byte` vectors with the `faiss` and `lucene` engines to reduce the amount of required memory and storage space. Each dimension is stored as a signed 8-bit integer, significantly reducing storage space. For more information, see [Byte vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#byte-vectors). - -## Binary vectors - -Starting with OpenSearch version 2.16, you can use `binary` vectors with the `faiss` engine to reduce the amount of required storage space. For more information, see [Binary vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#binary-vectors). - -## SIMD optimization for the Faiss engine - -Starting with version 2.13, the k-NN plugin supports [Single Instruction Multiple Data (SIMD)](https://en.wikipedia.org/wiki/Single_instruction,_multiple_data) processing if the underlying hardware supports SIMD instructions (AVX2 on x64 architecture and Neon on ARM64 architecture). SIMD is supported by default on Linux machines only for the Faiss engine. SIMD architecture helps boost overall performance by improving indexing throughput and reducing search latency. Starting with version 2.18, the k-NN plugin supports AVX-512 SIMD instructions on x64 architecture. Starting with version 2.19, the k-NN plugin supports advanced AVX-512 SIMD instructions on x64 architecture for Intel Sapphire Rapids or a newer-generation processor, improving the performance of Hamming distance computation. - -SIMD optimization is applicable only if the vector dimension is a multiple of 8. -{: .note} - - -### x64 architecture - - -For x64 architecture, the following versions of the Faiss library are built and shipped with the artifact: - -- `libopensearchknn_faiss_avx512_spr.so`: The Faiss library containing advanced AVX-512 SIMD instructions for newer-generation processors, available on public clouds such as AWS for c/m/r 7i or newer instances. -- `libopensearchknn_faiss_avx512.so`: The Faiss library containing AVX-512 SIMD instructions. -- `libopensearchknn_faiss_avx2.so`: The Faiss library containing AVX2 SIMD instructions. -- `libopensearchknn_faiss.so`: The non-optimized Faiss library without SIMD instructions. - -When using the Faiss library, the performance ranking is as follows: advanced AVX-512 > AVX-512 > AVX2 > no optimization. -{: .note } - -If your hardware supports advanced AVX-512(spr), the k-NN plugin loads the `libopensearchknn_faiss_avx512_spr.so` library at runtime. - -If your hardware supports AVX-512, OpenSearch loads the `libopensearchknn_faiss_avx512.so` library at runtime. - -If your hardware supports AVX2 but doesn't support AVX-512, Open loads the `libopensearchknn_faiss_avx2.so` library at runtime. - -To disable the advanced AVX-512 (for Sapphire Rapids or newer-generation processors), AVX-512, and AVX2 SIMD instructions and load the non-optimized Faiss library (`libopensearchknn_faiss.so`), specify the `knn.faiss.avx512_spr.disabled`, `knn.faiss.avx512.disabled`, and `knn.faiss.avx2.disabled` static settings as `true` in `opensearch.yml` (by default, all of these are `false`). - -Note that to update a static setting, you must stop the cluster, change the setting, and restart the cluster. For more information, see [Static settings]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/index/#static-settings). - -### ARM64 architecture - -For the ARM64 architecture, only one performance-boosting Faiss library (`libopensearchknn_faiss.so`) is built and shipped. The library contains Neon SIMD instructions and cannot be disabled. \ No newline at end of file diff --git a/_vector-search/getting-started/auto-generated-embeddings.md b/_vector-search/getting-started/auto-generated-embeddings.md index afcdfe5e56..38592940d4 100644 --- a/_vector-search/getting-started/auto-generated-embeddings.md +++ b/_vector-search/getting-started/auto-generated-embeddings.md @@ -134,12 +134,7 @@ PUT /my-nlp-index "passage_embedding": { "type": "knn_vector", "dimension": 768, - "method": { - "engine": "lucene", - "space_type": "l2", - "name": "hnsw", - "parameters": {} - } + "space_type": "l2" }, "text": { "type": "text" @@ -255,4 +250,5 @@ The response contains the matching documents: ## Next steps -- Learn about configuring semantic and hybrid search in the [Getting started with semantic and hybrid search]({{site.url}}{{site.baseurl}}/vector-search/getting-started/tutorials/neural-search-tutorial/) \ No newline at end of file +- Learn about configuring semantic and hybrid search in the [Getting started with semantic and hybrid search]({{site.url}}{{site.baseurl}}/vector-search/getting-started/tutorials/neural-search-tutorial/) +- Learn about the supported types of ML-powered search [ML-powered search]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/) \ No newline at end of file diff --git a/_vector-search/getting-started/index.md b/_vector-search/getting-started/index.md index 45ac08d2cd..b147d57b5b 100644 --- a/_vector-search/getting-started/index.md +++ b/_vector-search/getting-started/index.md @@ -22,7 +22,7 @@ pre_items: description: "Generate embeddings outside of OpenSearch using your favorite embedding utility." - heading: "Create an OpenSearch index" description: "Create an OpenSearch index to upload your embeddings." - link: "/vector-search/creating-vector-index/index/#pre-generated-embeddings-or-raw-vectors" + link: "/vector-search/creating-vector-index/#pre-generated-embeddings-or-raw-vectors" - heading: "Ingest embeddings" description: "Ingest your embeddings into the index." link: "/vector-search/ingesting-data/#raw-vector-ingestion" @@ -35,7 +35,7 @@ auto_items: link: "/ml-commons-plugin/integrating-ml-models/" - heading: "Create an OpenSearch index" description: "Create an OpenSearch index to upload your text." - link: "/vector-search/creating-vector-index/index/#auto-generated-embeddings" + link: "/vector-search/creating-vector-index/#auto-generated-embeddings" - heading: "Ingest text" description: "Ingest your text into the index." link: "/vector-search/ingesting-data/#auto-generated-embeddings" diff --git a/_vector-search/getting-started/pre-generated-embeddings.md b/_vector-search/getting-started/pre-generated-embeddings.md index 0be364c174..18978a6f07 100644 --- a/_vector-search/getting-started/pre-generated-embeddings.md +++ b/_vector-search/getting-started/pre-generated-embeddings.md @@ -24,10 +24,7 @@ PUT /hotels-index { "settings": { "index": { - "knn": true, - "knn.algo_param.ef_search": 100, - "number_of_shards": 1, - "number_of_replicas": 0 + "knn": true } }, "mappings": { @@ -35,15 +32,7 @@ PUT /hotels-index "location": { "type": "knn_vector", "dimension": 2, - "space_type": "l2", - "method": { - "name": "hnsw", - "engine": "lucene", - "parameters": { - "ef_construction": 100, - "m": 16 - } - } + "space_type": "l2" } } } @@ -152,4 +141,8 @@ The response contains the hotels closest to the specified pin location: ] } } -``` \ No newline at end of file +``` + +## Next steps + +- [Vector search techniques]({{site.url}}{{site.baseurl}}/vector-search/vector-search-techniques/) \ No newline at end of file diff --git a/_vector-search/getting-started/tutorials/neural-search-tutorial.md b/_vector-search/getting-started/tutorials/neural-search-tutorial.md index 12090db3bd..55cf583296 100644 --- a/_vector-search/getting-started/tutorials/neural-search-tutorial.md +++ b/_vector-search/getting-started/tutorials/neural-search-tutorial.md @@ -365,12 +365,7 @@ PUT /my-nlp-index "passage_embedding": { "type": "knn_vector", "dimension": 768, - "method": { - "engine": "lucene", - "space_type": "l2", - "name": "hnsw", - "parameters": {} - } + "space_type": "l2" }, "text": { "type": "text" diff --git a/_vector-search/getting-started/tutorials/semantic-search-byte-vectors.md b/_vector-search/getting-started/tutorials/semantic-search-byte-vectors.md index b33de5b91a..bf0b7d162b 100644 --- a/_vector-search/getting-started/tutorials/semantic-search-byte-vectors.md +++ b/_vector-search/getting-started/tutorials/semantic-search-byte-vectors.md @@ -10,7 +10,7 @@ redirect_from: # Semantic search using byte-quantized vectors -This tutorial shows you how to build a semantic search using the [Cohere Embed model](https://docs.cohere.com/reference/embed) and byte-quantized vectors. For more information about using byte-quantized vectors, see [Byte vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/#byte-vectors). +This tutorial shows you how to build a semantic search using the [Cohere Embed model](https://docs.cohere.com/reference/embed) and byte-quantized vectors. For more information about using byte-quantized vectors, see [Byte vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/#byte-vectors). The Cohere Embed v3 model supports several `embedding_types`. For this tutorial, you'll use the `INT8` type to encode byte-quantized vectors. diff --git a/_vector-search/index.md b/_vector-search/index.md index ffd897b297..573eb7c289 100644 --- a/_vector-search/index.md +++ b/_vector-search/index.md @@ -19,9 +19,12 @@ raw_steps: - heading: "Specialized vector search" description: "Learn about specialized vector search use cases, such as filtering, nested field search, and radial search." link: "/vector-search/specialized-operations/" - - heading: "Optimizing vector search performance" + - heading: "Optimizing vector storage" description: "Learn about optimizing vector search reduce memory usage and improve performance." - link: "/vector-search/optimizing-performance/" + link: "/vector-search/optimizing-storage/" + - heading: "Performance tuning" + description: "Learn about optimizing vector search reduce memory usage and improve performance." + link: "/vector-search/performance-tuning/" ml_steps: - heading: "Auto-generated embeddings quickstart" description: "Follow a quickstart tutorial for text-to-embedding search." @@ -32,9 +35,12 @@ ml_steps: - heading: "ML-powered search" description: "Learn about many ML-powered search options that OpenSearch provides." link: "/vector-search/ml-powered-search/" - - heading: "Optimizing vector search performance" + - heading: "Optimizing vector storage" + description: "Learn about optimizing vector search reduce memory usage and improve performance." + link: "/vector-search/optimizing-storage/" + - heading: "Performance tuning" description: "Learn about optimizing vector search reduce memory usage and improve performance." - link: "/vector-search/optimizing-performance/" + link: "/vector-search/performance-tuning/" --- # Vector search @@ -43,6 +49,10 @@ Traditional lexical search, based on term frequency models like BM25, is effecti OpenSearch combines traditional search, analytics, and vector search into a single, unified solution. Its vector database capabilities simplify the development of artificial intelligence (AI) applications by reducing the effort required to manage and integrate AI-generated assets. You can bring your models, vectors, and metadata into OpenSearch to enable vector, lexical, and hybrid search and analytics, all with built-in performance and scalability. +## Using OpenSearch as a vector database + +OpenSearch provides an integrated  vector database that can support AI systems by serving as a knowledge base. This benefits AI applications like generative AI and natural language search by providing a long-term memory of AI-generated outputs. These outputs can be used to enhance information retrieval and analytics, improve efficiency and stability, and give generative AI models a broader and deeper pool of data from which to draw more accurate responses to queries. + ## Key features OpenSearch vector search supports the following key features: @@ -53,12 +63,6 @@ OpenSearch vector search supports the following key features: - **Memory-efficient search**: Optimize memory usage through various quantization techniques and efficient indexing methods, making vector search practical even with large-scale deployments. - **Hybrid search capabilities**: Combine traditional keyword search with vector-based semantic search to use the strengths of both approaches, improving search relevance and accuracy. -## Using OpenSearch as a vector database - -OpenSearch provides an integrated  vector database that can support AI systems by serving as a knowledge base. This benefits AI applications like generative AI and natural language search by providing a long-term memory of AI-generated outputs. These outputs can be used to enhance information retrieval and analytics, improve efficiency and stability, and give generative AI models a broader and deeper pool of data from which to draw more accurate responses to queries. - - - [Get started]({{site.url}}{{site.baseurl}}/vector-search/getting-started/){: .btn-dark-blue} diff --git a/_vector-search/ingesting-data.md b/_vector-search/ingesting-data.md index 635485c44c..5df1bb1326 100644 --- a/_vector-search/ingesting-data.md +++ b/_vector-search/ingesting-data.md @@ -74,7 +74,7 @@ POST /my-semantic-search-index/_doc The pipeline automatically generates and stores the embeddings in the `passage_embedding` field. -## Next steps +## Related articles - [Searching vector data]({{site.url}}{{site.baseurl}}/vector-search/searching-data/) - [Bulk API]({{site.url}}{{site.baseurl}}/api-reference/document-apis/bulk/) diff --git a/_vector-search/ml-powered-search/conversational-search.md b/_vector-search/ml-powered-search/conversational-search.md index ddf0e921ad..823a3bc587 100644 --- a/_vector-search/ml-powered-search/conversational-search.md +++ b/_vector-search/ml-powered-search/conversational-search.md @@ -443,7 +443,4 @@ The response contains both messages: ## Next steps -- To learn more about connecting to models on external platforms, see [Connectors]({{site.url}}{{site.baseurl}}/ml-commons-plugin/remote-models/connectors/). -- For supported APIs, see [Memory APIs]({{site.url}}{{site.baseurl}}/ml-commons-plugin/api/memory-apis/index/). -- To learn more about search pipelines and processors, see [Search pipelines]({{site.url}}{{site.baseurl}}/search-plugins/search-pipelines/index/). -- For available OpenSearch queries, see [Query DSL]({{site.url}}{{site.baseurl}}/query-dsl/). \ No newline at end of file +- Explore our [tutorials]({{site.url}}{{site.baseurl}}/vector-search/getting-started/tutorials/) to learn how to build ML-powered search applications. \ No newline at end of file diff --git a/_vector-search/ml-powered-search/hybrid-search.md b/_vector-search/ml-powered-search/hybrid-search.md index ed9f0139e5..9828db8270 100644 --- a/_vector-search/ml-powered-search/hybrid-search.md +++ b/_vector-search/ml-powered-search/hybrid-search.md @@ -1417,3 +1417,7 @@ The response excludes the first five entries and displays the remaining results: } } ``` + +## Next steps + +- Explore our [tutorials]({{site.url}}{{site.baseurl}}/vector-search/getting-started/tutorials/) to learn how to build ML-powered search applications. \ No newline at end of file diff --git a/_vector-search/ml-powered-search/multimodal-search.md b/_vector-search/ml-powered-search/multimodal-search.md index 53c9a6f865..b07ac9c6d2 100644 --- a/_vector-search/ml-powered-search/multimodal-search.md +++ b/_vector-search/ml-powered-search/multimodal-search.md @@ -133,3 +133,7 @@ GET /my-nlp-index/_search {% include copy-curl.html %} To eliminate passing the model ID with each neural query request, you can set a default model on a vector index or a field. To learn more, see [Setting a default model on an index or field]({{site.url}}{{site.baseurl}}/search-plugins/neural-text-search/##setting-a-default-model-on-an-index-or-field). + +## Next steps + +- Explore our [tutorials]({{site.url}}{{site.baseurl}}/vector-search/getting-started/tutorials/) to learn how to build ML-powered search applications. \ No newline at end of file diff --git a/_vector-search/ml-powered-search/neural-sparse-search.md b/_vector-search/ml-powered-search/neural-sparse-search.md index ca9299d560..30d66325ec 100644 --- a/_vector-search/ml-powered-search/neural-sparse-search.md +++ b/_vector-search/ml-powered-search/neural-sparse-search.md @@ -62,3 +62,4 @@ For information about `two_phase_search_pipeline`, see [Neural sparse query two- - Learn more about how sparse encoding models work and explore OpenSearch neural sparse search benchmarks in [Improving document retrieval with sparse semantic encoders](https://opensearch.org/blog/improving-document-retrieval-with-sparse-semantic-encoders/). - Learn the fundamentals of neural sparse search and its efficiency in [A deep dive into faster semantic sparse retrieval in OpenSearch 2.12](https://opensearch.org/blog/A-deep-dive-into-faster-semantic-sparse-retrieval-in-OS-2.12/). +- Explore our [tutorials]({{site.url}}{{site.baseurl}}/vector-search/getting-started/tutorials/) to learn how to build ML-powered search applications. diff --git a/_vector-search/ml-powered-search/neural-sparse-with-pipelines.md b/_vector-search/ml-powered-search/neural-sparse-with-pipelines.md index e11946e741..71e4284065 100644 --- a/_vector-search/ml-powered-search/neural-sparse-with-pipelines.md +++ b/_vector-search/ml-powered-search/neural-sparse-with-pipelines.md @@ -513,4 +513,8 @@ For OpenSearch versions earlier than 2.15, a throttling exception will be return } ``` -To mitigate throttling exceptions, decrease the maximum number of connections specified in the `max_connection` setting in the connector's [`client_config`]({{site.url}}{{site.baseurl}}/ml-commons-plugin/remote-models/blueprints/#configuration-parameters) object. Doing so will prevent the maximum number of concurrent connections from exceeding the threshold of the remote service. You can also modify the retry settings to avoid a request spike during ingestion. \ No newline at end of file +To mitigate throttling exceptions, decrease the maximum number of connections specified in the `max_connection` setting in the connector's [`client_config`]({{site.url}}{{site.baseurl}}/ml-commons-plugin/remote-models/blueprints/#configuration-parameters) object. Doing so will prevent the maximum number of concurrent connections from exceeding the threshold of the remote service. You can also modify the retry settings to avoid a request spike during ingestion. + +## Next steps + +- Explore our [tutorials]({{site.url}}{{site.baseurl}}/vector-search/getting-started/tutorials/) to learn how to build ML-powered search applications. \ No newline at end of file diff --git a/_vector-search/ml-powered-search/neural-sparse-with-raw-vectors.md b/_vector-search/ml-powered-search/neural-sparse-with-raw-vectors.md index 4c4d604464..dabde80279 100644 --- a/_vector-search/ml-powered-search/neural-sparse-with-raw-vectors.md +++ b/_vector-search/ml-powered-search/neural-sparse-with-raw-vectors.md @@ -100,3 +100,7 @@ GET my-nlp-index/_search ## Accelerating neural sparse search To learn more about improving retrieval time for neural sparse search, see [Accelerating neural sparse search]({{site.url}}{{site.baseurl}}/search-plugins/neural-sparse-search/#accelerating-neural-sparse-search). + +## Next steps + +- Explore our [tutorials]({{site.url}}{{site.baseurl}}/vector-search/getting-started/tutorials/) to learn how to build ML-powered search applications. diff --git a/_vector-search/ml-powered-search/semantic-search.md b/_vector-search/ml-powered-search/semantic-search.md index 63f9380b49..ce33b93896 100644 --- a/_vector-search/ml-powered-search/semantic-search.md +++ b/_vector-search/ml-powered-search/semantic-search.md @@ -299,4 +299,8 @@ The response contains both documents: ] } } -``` \ No newline at end of file +``` + +## Next steps + +- Explore our [tutorials]({{site.url}}{{site.baseurl}}/vector-search/getting-started/tutorials/) to learn how to build ML-powered search applications. \ No newline at end of file diff --git a/_vector-search/ml-powered-search/text-chunking.md b/_vector-search/ml-powered-search/text-chunking.md index 2214d16b5d..8cf969e6eb 100644 --- a/_vector-search/ml-powered-search/text-chunking.md +++ b/_vector-search/ml-powered-search/text-chunking.md @@ -117,3 +117,7 @@ GET testindex/_search } ``` {% include copy-curl.html %} + +## Next steps + +- Explore our [tutorials]({{site.url}}{{site.baseurl}}/vector-search/getting-started/tutorials/) to learn how to build ML-powered search applications. diff --git a/_vector-search/optimizing-performance/index.md b/_vector-search/optimizing-performance/index.md deleted file mode 100644 index 0c1cb6f82c..0000000000 --- a/_vector-search/optimizing-performance/index.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -layout: default -title: Optimizing vector search performance -nav_order: 60 -has_children: true -has_toc: false -redirect_from: - - /vector-search/optimizing-performance/ -storage_cards: - - heading: "Vector quantization" - description: "Reduce vector storage space by quantizing vectors." - link: "/vector-search/optimizing-performance/knn-vector-quantization/" - - heading: "Disk-based vector search" - description: "Uses binary quantization to reduce operational costs of vector workloads." - link: "/vector-search/optimizing-performance/disk-based-vector-search/" -performance_cards: - - heading: "Performance tuning" - description: "Improve indexing and search performance for approximate k-NN (ANN)." - link: "/vector-search/optimizing-performance/performance-tuning/" ---- - -# Optimizing vector search performance - -Vector search operations can be resource-intensive, especially when dealing with large-scale vector datasets. OpenSearch provides several optimization techniques to reduce memory usage and enhance search performance. - -## Optimizing vector storage - -{% include cards.html cards=page.storage_cards %} - -## Optimizing performance - -{% include cards.html cards=page.performance_cards %} \ No newline at end of file diff --git a/_vector-search/optimizing-performance/performance-tuning.md b/_vector-search/optimizing-performance/performance-tuning.md deleted file mode 100644 index d5516cdfe6..0000000000 --- a/_vector-search/optimizing-performance/performance-tuning.md +++ /dev/null @@ -1,225 +0,0 @@ ---- -layout: default -title: Performance tuning -nav_order: 30 -parent: Optimizing vector search performance -redirect_from: - - /search-plugins/knn/performance-tuning/ ---- - -# Performance tuning - -This topic provides performance tuning recommendations to improve indexing and search performance for approximate k-NN (ANN). From a high level, k-NN works according to these principles: -* Native library indexes are created per knn_vector field / (Lucene) segment pair. -* Queries execute on segments sequentially inside the shard (same as any other OpenSearch query). -* Each native library index in the segment returns <=k neighbors. -* The coordinator node picks up final size number of neighbors from the neighbors returned by each shard. - -This topic also provides recommendations for comparing approximate k-NN to exact k-NN with score script. - -## Indexing performance tuning - -Take any of the following steps to improve indexing performance, especially when you plan to index a large number of vectors at once. - -### Disable the refresh interval - -Either disable the refresh interval (default = 1 sec) or set a long duration for the refresh interval to avoid creating multiple small segments: - - ```json - PUT //_settings - { - "index" : { - "refresh_interval" : "-1" - } - } - ``` - -Make sure to reenable `refresh_interval` after indexing is complete. - -### Disable replicas (no OpenSearch replica shard) - - Set replicas to `0` to prevent duplicate construction of native library indexes in both primary and replica shards. When you enable replicas after indexing completes, the serialized native library indexes are copied directly. If you have no replicas, losing nodes might cause data loss, so it's important that the data be stored elsewhere so that this initial load can be retried in the event of an issue. - -### Increase the number of indexing threads - -If your hardware has multiple cores, you can allow multiple threads in native library index construction by speeding up the indexing process. Determine the number of threads to allot with the [knn.algo_param.index_thread_qty]({{site.url}}{{site.baseurl}}/search-plugins/knn/settings#cluster-settings) setting. - -Monitor CPU utilization and choose the correct number of threads. Because native library index construction is costly, choosing more threads then you need can cause additional CPU load. - - -### (Expert level) Disable vector field storage in the source field - -The `_source` field contains the original JSON document body that was passed at index time. This field is not indexed and is not searchable but is stored so that it can be returned when executing fetch requests such as `get` and `search`. When using vector fields within the source, you can remove the vector field to save disk space, as shown in the following example where the `location` vector is excluded: - - ```json - PUT //_mappings - { - "_source": { - "excludes": ["location"] - }, - "properties": { - "location": { - "type": "knn_vector", - "dimension": 2, - "space_type": "l2", - "method": { - "name": "hnsw", - "engine": "faiss" - } - } - } - } - ``` - - -Disabling the `_source` field can cause certain features to become unavailable, such as the `update`, `update_by_query`, and `reindex` APIs and the ability to debug queries or aggregations by using the original document at index time. - -In OpenSearch 2.15 or later, you can further improve indexing speed and reduce disk space by removing the vector field from the `_recovery_source`, as shown in the following example: - - ```json - PUT //_mappings - { - "_source": { - "excludes": ["location"], - "recovery_source_excludes": ["location"] - }, - "properties": { - "location": { - "type": "knn_vector", - "dimension": 2, - "space_type": "l2", - "method": { - "name": "hnsw", - "engine": "faiss" - } - } - } - } - ``` - -This is an expert-level setting. Disabling the `_recovery_source` may lead to failures during peer-to-peer recovery. Before disabling the `_recovery_source`, check with your OpenSearch cluster admin to determine whether your cluster performs regular flushes before starting the peer-to-peer recovery of shards prior to disabling the `_recovery_source`. -{: .warning} - -### (Expert level) Build vector data structures on demand - -This approach is recommended only for workloads that involve a single initial bulk upload and will be used exclusively for search after force merging to a single segment. - -During indexing, vector search builds a specialized data structure for a `knn_vector` field to enable efficient approximate k-NN search. However, these structures are rebuilt during [force merge]({{site.url}}{{site.baseurl}}/api-reference/index-apis/force-merge/) on vector indexes. To optimize indexing speed, follow these steps: - -1. **Disable vector data structure creation**: Disable vector data structure creation for new segments by setting [`index.knn.advanced.approximate_threshold`]({{site.url}}{{site.baseurl}}/vector-search/settings/#index-settings) to `-1`. - - To specify the setting at index creation, send the following request: - - ```json - PUT /test-index/ - { - "settings": { - "index.knn.advanced.approximate_threshold": "-1" - } - } - ``` - {% include copy-curl.html %} - - To specify the setting after index creation, send the following request: - - ```json - PUT /test-index/_settings - { - "index.knn.advanced.approximate_threshold": "-1" - } - ``` - {% include copy-curl.html %} - -1. **Perform bulk indexing**: Index data in [bulk]({{site.url}}{{site.baseurl}}/api-reference/document-apis/bulk/) without performing any searches during ingestion: - - ```json - POST _bulk - { "index": { "_index": "test-index", "_id": "1" } } - { "my_vector1": [1.5, 2.5], "price": 12.2 } - { "index": { "_index": "test-index", "_id": "2" } } - { "my_vector1": [2.5, 3.5], "price": 7.1 } - ``` - {% include copy-curl.html %} - - If searches are performed while vector data structures are disabled, they will run using exact k-NN search. - -1. **Reenable vector data structure creation**: Once indexing is complete, enable vector data structure creation by setting `index.knn.advanced.approximate_threshold` to `0`: - - ```json - PUT /test-index/_settings - { - "index.knn.advanced.approximate_threshold": "0" - } - ``` - {% include copy-curl.html %} - - If you do not reset the setting to `0` before the force merge, you will need to reindex your data. - {: .note} - -1. **Force merge segments into one segment**: Perform a force merge and specify `max_num_segments=1` to create the vector data structures only once: - - ```json - POST test-index/_forcemerge?max_num_segments=1 - ``` - {% include copy-curl.html %} - - After the force merge, new search requests will execute approximate k-NN search using the newly created data structures. - -## Search performance tuning - -Take the following steps to improve search performance: - -### Reduce segment count - - To improve search performance, you must keep the number of segments under control. Lucene's IndexSearcher searches over all of the segments in a shard to find the 'size' best results. - - Ideally, having one segment per shard provides the optimal performance with respect to search latency. You can configure an index to have multiple shards to avoid giant shards and achieve more parallelism. - - You can control the number of segments by choosing a larger refresh interval, or during indexing by asking OpenSearch to slow down segment creation by disabling the refresh interval. - -### Warm up the index - - Native library indexes are constructed during indexing, but they're loaded into memory during the first search. In Lucene, each segment is searched sequentially (so, for k-NN, each segment returns up to k nearest neighbors of the query point), and the top 'size' number of results based on the score are returned from all the results returned by segments at a shard level (higher score = better result). - - Once a native library index is loaded (native library indexes are loaded outside OpenSearch JVM), OpenSearch caches them in memory. Initial queries are expensive and take a few seconds, while subsequent queries are faster and take milliseconds (assuming the k-NN circuit breaker isn't hit). - - To avoid this latency penalty during your first queries, you can use the warmup API operation on the indexes you want to search: - - ```json - GET /_plugins/_knn/warmup/index1,index2,index3?pretty - { - "_shards" : { - "total" : 6, - "successful" : 6, - "failed" : 0 - } - } - ``` - - The warmup API operation loads all native library indexes for all shards (primary and replica) for the specified indexes into the cache, so there's no penalty to load native library indexes during initial searches. - -This API operation only loads the segments of active indexes into the cache. If a merge or refresh operation finishes after the API runs, or if you add new documents, you need to rerun the API to load those native library indexes into memory. -{: .warning} - - -### Avoid reading stored fields - - If your use case is simply to read the IDs and scores of the nearest neighbors, you can disable reading stored fields, which saves time retrieving the vectors from stored fields. - -### Use `mmap` file I/O - - For the Lucene-based approximate k-NN search, there is no dedicated cache layer that speeds up read/write operations. Instead, the plugin relies on the existing caching mechanism in OpenSearch core. In versions 2.4 and earlier of the Lucene-based approximate k-NN search, read/write operations were based on Java NIO by default, which can be slow, depending on the Lucene version and number of segments per shard. Starting with version 2.5, k-NN enables [`mmap`](https://en.wikipedia.org/wiki/Mmap) file I/O by default when the store type is `hybridfs` (the default store type in OpenSearch). This leads to fast file I/O operations and improves the overall performance of both data ingestion and search. The two file extensions specific to vector values that use `mmap` are `.vec` and `.vem`. For more information about these file extensions, see [the Lucene documentation](https://lucene.apache.org/core/9_0_0/core/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsFormat.html). - - The `mmap` file I/O uses the system file cache rather than memory allocated for the Java heap, so no additional allocation is required. To change the default list of extensions set by the plugin, update the `index.store.hybrid.mmap.extensions` setting at the cluster level using the [Cluster Settings API]({{site.url}}{{site.baseurl}}/api-reference/cluster-api/cluster-settings). **Note**: This is an expert-level setting that requires closing the index before updating the setting and reopening it after the update. - -## Improving recall - -Recall depends on multiple factors like number of vectors, number of dimensions, segments, and so on. Searching over a large number of small segments and aggregating the results leads to better recall than searching over a small number of large segments and aggregating results. The larger the native library index, the more chances of losing recall if you're using smaller algorithm parameters. Choosing larger values for algorithm parameters should help solve this issue but sacrifices search latency and indexing time. That being said, it's important to understand your system's requirements for latency and accuracy, and then choose the number of segments you want your index to have based on experimentation. - -The default parameters work on a broader set of use cases, but make sure to run your own experiments on your data sets and choose the appropriate values. For index-level settings, see [Index settings]({{site.url}}{{site.baseurl}}/vector-search/settings/#index-settings). - -## Approximate nearest neighbor versus score script - -The standard k-NN query and custom scoring option perform differently. Test with a representative set of documents to see if the search results and latencies match your expectations. - -Custom scoring works best if the initial filter reduces the number of documents to no more than 20,000. Increasing shard count can improve latency, but be sure to keep shard size within the [recommended guidelines]({{site.url}}{{site.baseurl}}/intro/#primary-and-replica-shards). diff --git a/_vector-search/optimizing-performance/binary-quantization.md b/_vector-search/optimizing-storage/binary-quantization.md similarity index 96% rename from _vector-search/optimizing-performance/binary-quantization.md rename to _vector-search/optimizing-storage/binary-quantization.md index e2903be2a7..cbda3c29f5 100644 --- a/_vector-search/optimizing-performance/binary-quantization.md +++ b/_vector-search/optimizing-storage/binary-quantization.md @@ -2,7 +2,7 @@ layout: default title: Binary quantization parent: Vector quantization -grand_parent: Optimizing vector search performance +grand_parent: Optimizing vector storage nav_order: 40 has_children: false has_math: true @@ -197,3 +197,8 @@ In 4-bit quantization, each dimension is represented using 4 bits, equivalent to Memory = 1.1 * ((256 * 4 / 8) + 8 * 16) * 1,000,000 ~= 0.282 GB ``` + +## Related articles + +- [Memory-optimized vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/) +- [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/knn/) \ No newline at end of file diff --git a/_vector-search/optimizing-performance/disk-based-vector-search.md b/_vector-search/optimizing-storage/disk-based-vector-search.md similarity index 79% rename from _vector-search/optimizing-performance/disk-based-vector-search.md rename to _vector-search/optimizing-storage/disk-based-vector-search.md index bcf04aedfe..bb7abf912a 100644 --- a/_vector-search/optimizing-performance/disk-based-vector-search.md +++ b/_vector-search/optimizing-storage/disk-based-vector-search.md @@ -2,7 +2,7 @@ layout: default title: Disk-based vector search nav_order: 20 -parent: Optimizing vector search performance +parent: Optimizing vector storage has_children: false redirect_from: - /search-plugins/knn/disk-based-vector-search/ @@ -12,9 +12,9 @@ redirect_from: **Introduced 2.17** {: .label .label-purple} -For low-memory environments, OpenSearch provides _disk-based vector search_, which significantly reduces the operational costs for vector workloads. Disk-based vector search uses [binary quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-performance/binary-quantization/), compressing vectors and thereby reducing the memory requirements. This memory optimization provides large memory savings at the cost of slightly increased search latency while still maintaining strong recall. +For low-memory environments, OpenSearch provides _disk-based vector search_, which significantly reduces the operational costs for vector workloads. Disk-based vector search uses [binary quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/binary-quantization/), compressing vectors and thereby reducing the memory requirements. This memory optimization provides large memory savings at the cost of slightly increased search latency while still maintaining strong recall. -To use disk-based vector search, set the [`mode`]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/#vector-workload-modes) parameter to `on_disk` for your vector field type. This parameter will configure your index to use secondary storage. +To use disk-based vector search, set the [`mode`]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/#vector-workload-modes) parameter to `on_disk` for your vector field type. This parameter will configure your index to use secondary storage. ## Creating an index for disk-based vector search @@ -43,7 +43,7 @@ PUT my-vector-index ``` {% include copy-curl.html %} -By default, the `on_disk` mode configures the index to use the `faiss` engine and `hnsw` method. The default [`compression_level`]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/#compression-levels) of `32x` reduces the amount of memory the vectors require by a factor of 32. To preserve the search recall, rescoring is enabled by default. A search on a disk-optimized index runs in two phases: The compressed index is searched first, and then the results are rescored using full-precision vectors loaded from disk. +By default, the `on_disk` mode configures the index to use the `faiss` engine and `hnsw` method. The default [`compression_level`]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/#compression-levels) of `32x` reduces the amount of memory the vectors require by a factor of 32. To preserve the search recall, rescoring is enabled by default. A search on a disk-optimized index runs in two phases: The compressed index is searched first, and then the results are rescored using full-precision vectors loaded from disk. To reduce the compression level, provide the `compression_level` parameter when creating the index mapping: @@ -71,7 +71,7 @@ PUT my-vector-index ``` {% include copy-curl.html %} -For more information about the `compression_level` parameter, see [Compression levels]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/#compression-levels). Note that for `4x` compression, the `lucene` engine will be used. +For more information about the `compression_level` parameter, see [Compression levels]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/#compression-levels). Note that for `4x` compression, the `lucene` engine will be used. {: .note} If you need more granular fine-tuning, you can override additional k-NN parameters in the method definition. For example, to improve recall, increase the `ef_construction` parameter value: @@ -136,7 +136,7 @@ POST _bulk ## Search -Search is also performed in the same way as in other index configurations. The key difference is that, by default, the `oversample_factor` of the rescore parameter is set to `3.0` (unless you override the `compression_level`). For more information, see [Rescoring quantized results using full precision]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#rescoring-quantized-results-using-full-precision). To perform vector search on a disk-optimized index, provide the search vector: +Search is also performed in the same way as in other index configurations. The key difference is that, by default, the `oversample_factor` of the rescore parameter is set to `3.0` (unless you override the `compression_level`). For more information, see [Rescoring quantized results using full precision]({{site.url}}{{site.baseurl}}/query-dsl/specialized/knn/#rescoring-quantized-results-to-full-precision). To perform vector search on a disk-optimized index, provide the search vector: ```json GET my-vector-index/_search @@ -181,7 +181,7 @@ GET my-vector-index/_search ## Model-based indexes -For [model-based indexes]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-vector-index-from-a-model), you can specify the `on_disk` parameter in the training request in the same way that you would specify it during index creation. By default, `on_disk` mode will use the [Faiss IVF method]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/method/#supported-faiss-methods) and a compression level of `32x`. To run the training API, send the following request: +For [model-based indexes]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-vector-index-from-a-model), you can specify the `on_disk` parameter in the training request in the same way that you would specify it during index creation. By default, `on_disk` mode will use the [Faiss IVF method]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/#ivf-parameters) and a compression level of `32x`. To run the training API, send the following request: ```json POST /_plugins/_knn/models/test-model/_train @@ -206,5 +206,10 @@ You can override the `compression_level` for disk-optimized indexes in the same ## Next steps -- For more information about binary quantization, see [Binary quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-performance/binary-quantization/). -- For more information about k-NN vector workload modes, see [Vector workload modes]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/#vector-workload-modes). \ No newline at end of file +- For more information about binary quantization, see [Binary quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/binary-quantization/). +- For more information about k-NN vector workload modes, see [Vector workload modes]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/#vector-workload-modes). + +## Related articles + +- [Memory-optimized vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/) +- [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/knn/) \ No newline at end of file diff --git a/_vector-search/optimizing-performance/faiss-16-bit-quantization.md b/_vector-search/optimizing-storage/faiss-16-bit-quantization.md similarity index 90% rename from _vector-search/optimizing-performance/faiss-16-bit-quantization.md rename to _vector-search/optimizing-storage/faiss-16-bit-quantization.md index 717c8ea524..c8c6bca779 100644 --- a/_vector-search/optimizing-performance/faiss-16-bit-quantization.md +++ b/_vector-search/optimizing-storage/faiss-16-bit-quantization.md @@ -2,7 +2,7 @@ layout: default title: Faiss 16-bit scalar quantization parent: Vector quantization -grand_parent: Optimizing vector search performance +grand_parent: Optimizing vector storage nav_order: 20 has_children: false has_math: true @@ -12,7 +12,7 @@ has_math: true Starting with version 2.13, OpenSearch supports performing scalar quantization for the Faiss engine within OpenSearch. Within the Faiss engine, a scalar quantizer (SQfp16) performs the conversion between 32-bit and 16-bit vectors. At ingestion time, when you upload 32-bit floating-point vectors to OpenSearch, SQfp16 quantizes them into 16-bit floating-point vectors and stores the quantized vectors in a vector index. -At search time, SQfp16 decodes the vector values back into 32-bit floating-point values for distance computation. The SQfp16 quantization can decrease the memory footprint by a factor of 2. Additionally, it leads to a minimal loss in recall when differences between vector values are large compared to the error introduced by eliminating their two least significant bits. When used with [SIMD optimization]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/vector-field/#simd-optimization-for-the-faiss-engine), SQfp16 quantization can also significantly reduce search latencies and improve indexing throughput. +At search time, SQfp16 decodes the vector values back into 32-bit floating-point values for distance computation. The SQfp16 quantization can decrease the memory footprint by a factor of 2. Additionally, it leads to a minimal loss in recall when differences between vector values are large compared to the error introduced by eliminating their two least significant bits. When used with [SIMD optimization]({{site.url}}{{site.baseurl}}/vector-search/performance-tuning/#simd-optimization-for-the-faiss-engine), SQfp16 quantization can also significantly reduce search latencies and improve indexing throughput. SIMD optimization is not supported on Windows. Using Faiss scalar quantization on Windows can lead to a significant drop in performance, including decreased indexing throughput and increased search latencies. {: .warning} @@ -54,7 +54,7 @@ PUT /test-index ``` {% include copy-curl.html %} -Optionally, you can specify the parameters in `method.parameters.encoder`. For more information about `encoder` object parameters, see [SQ parameters]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/method/#sq-parameters). +Optionally, you can specify the parameters in `method.parameters.encoder`. For more information about `encoder` object parameters, see [SQ parameters]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/#sq-parameters). The `fp16` encoder converts 32-bit vectors into their 16-bit counterparts. For this encoder type, the vector values must be in the [-65504.0, 65504.0] range. To define how to handle out-of-range values, the preceding request specifies the `clip` parameter. By default, this parameter is `false`, and any vectors containing out-of-range values are rejected. @@ -151,4 +151,9 @@ As an example, assume that you have 1 million vectors with a dimension of 256 an ```r 1.1 * (((2 * 256) * 1,000,000) + (4 * 128 * 256)) ~= 0.525 GB -``` \ No newline at end of file +``` + +## Related articles + +- [Memory-optimized vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/) +- [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/knn/) \ No newline at end of file diff --git a/_vector-search/optimizing-performance/faiss-product-quantization.md b/_vector-search/optimizing-storage/faiss-product-quantization.md similarity index 86% rename from _vector-search/optimizing-performance/faiss-product-quantization.md rename to _vector-search/optimizing-storage/faiss-product-quantization.md index 57a989e8e7..8b064fd649 100644 --- a/_vector-search/optimizing-performance/faiss-product-quantization.md +++ b/_vector-search/optimizing-storage/faiss-product-quantization.md @@ -2,7 +2,7 @@ layout: default title: Faiss product quantization parent: Vector quantization -grand_parent: Optimizing vector search performance +grand_parent: Optimizing vector storage nav_order: 30 has_children: false has_math: true @@ -10,7 +10,7 @@ has_math: true # Faiss product quantization -Product quantization (PQ) is a technique used to represent a vector using a configurable number of bits. In general, it can be used to achieve a higher level of compression as compared to byte or scalar quantization. PQ works by separating vectors into _m_ subvectors and encoding each subvector with _code_size_ bits. Thus, the total amount of memory for the vector is `m*code_size` bits, plus overhead. For details about the parameters, see [PQ parameters]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/method/#pq-parameters). PQ is only supported for the _Faiss_ engine and can be used with either the _HNSW_ or _IVF_ approximate nearest neighbor (ANN) algorithms. +Product quantization (PQ) is a technique used to represent a vector using a configurable number of bits. In general, it can be used to achieve a higher level of compression as compared to byte or scalar quantization. PQ works by separating vectors into _m_ subvectors and encoding each subvector with _code_size_ bits. Thus, the total amount of memory for the vector is `m*code_size` bits, plus overhead. For details about the parameters, see [PQ parameters]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/#pq-parameters). PQ is only supported for the _Faiss_ engine and can be used with either the _HNSW_ or _IVF_ approximate nearest neighbor (ANN) algorithms. ## Using Faiss product quantization @@ -48,5 +48,10 @@ The memory required for IVF with PQ is estimated to be `1.1*(((pq_code_size / 8) For example, assume that you have 1 million vectors with a dimension of 256, `ivf_nlist` of 512, `pq_m` of 32, `pq_code_size` of 8, and 100 segments. The memory requirement can be estimated as follows: ```r -1.1*((8 / 8 * 64 + 24) * 1000000 + 100 * (2^8 * 4 * 256 + 4 * 512 * 256)) ~= 0.171 GB -``` \ No newline at end of file +1.1 * ((8 / 8 * 64 + 24) * 1000000 + 100 * (2^8 * 4 * 256 + 4 * 512 * 256)) ~= 0.171 GB +``` + +## Related articles + +- [Memory-optimized vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/) +- [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/knn/) \ No newline at end of file diff --git a/_vector-search/optimizing-storage/index.md b/_vector-search/optimizing-storage/index.md new file mode 100644 index 0000000000..1892230801 --- /dev/null +++ b/_vector-search/optimizing-storage/index.md @@ -0,0 +1,22 @@ +--- +layout: default +title: Optimizing vector storage +nav_order: 60 +has_children: true +has_toc: false +redirect_from: + - /vector-search/optimizing-storage/ +storage_cards: + - heading: "Vector quantization" + description: "Reduce vector storage space by quantizing vectors." + link: "/vector-search/optimizing-storage/knn-vector-quantization/" + - heading: "Disk-based vector search" + description: "Uses binary quantization to reduce operational costs of vector workloads." + link: "/vector-search/optimizing-storage/disk-based-vector-search/" +--- + +# Optimizing vector storage + +Vector search operations can be resource-intensive, especially when dealing with large-scale vector datasets. OpenSearch provides several optimization techniques to reduce memory usage. + +{% include cards.html cards=page.storage_cards %} \ No newline at end of file diff --git a/_vector-search/optimizing-performance/knn-vector-quantization.md b/_vector-search/optimizing-storage/knn-vector-quantization.md similarity index 60% rename from _vector-search/optimizing-performance/knn-vector-quantization.md rename to _vector-search/optimizing-storage/knn-vector-quantization.md index 8aa62b98fa..3bebfdb6f0 100644 --- a/_vector-search/optimizing-performance/knn-vector-quantization.md +++ b/_vector-search/optimizing-storage/knn-vector-quantization.md @@ -1,7 +1,7 @@ --- layout: default title: Vector quantization -parent: Optimizing vector search performance +parent: Optimizing vector storage nav_order: 10 has_children: true has_toc: false @@ -10,28 +10,28 @@ redirect_from: outside_cards: - heading: "Byte vectors" description: "Quantize vectors into byte vectors." - link: "/field-types/supported-field-types/knn-vector#byte-vectors" + link: "/field-types/supported-field-types/knn-memory-optimized/#byte-vectors" - heading: "Binary vectors" description: "Quantize vectors into binary vectors." - link: "/field-types/supported-field-types/knn-vector#binary-vectors" + link: "/field-types/supported-field-types/knn-memory-optimized/#binary-vectors" inside_cards: - heading: "Lucene scalar quantization" description: "Use built-in scalar quantization for the Lucene engine." - link: "/vector-search/optimizing-performance/lucene-scalar-quantization/" + link: "/vector-search/optimizing-storage/lucene-scalar-quantization/" - heading: "Faiss 16-bit scalar quantization" description: "Use built-in scalar quantization for the Faiss engine." - link: "/vector-search/optimizing-performance/faiss-16-bit-quantization/" + link: "/vector-search/optimizing-storage/faiss-16-bit-quantization/" - heading: "Faiss product quantization" description: "Use built-in product quantization for the Faiss engine." - link: "/vector-search/optimizing-performance/faiss-product-quantization/" + link: "/vector-search/optimizing-storage/faiss-product-quantization/" - heading: "Binary quantization" description: "Use built-in binary quantization for the Faiss engine." - link: "/vector-search/optimizing-performance/binary-quantization/" + link: "/vector-search/optimizing-storage/binary-quantization/" --- # Vector quantization -By default, the k-NN plugin supports the indexing and querying of vectors of type `float`, where each dimension of the vector occupies 4 bytes of memory. For use cases that require ingestion on a large scale, keeping `float` vectors can be expensive because OpenSearch needs to construct, load, save, and search graphs (for the native `faiss` and `nmslib` [deprecated] engines). To reduce the memory footprint, you can use vector quantization. +By default, OpenSearch supports the indexing and querying of vectors of type `float`, where each dimension of the vector occupies 4 bytes of memory. For use cases that require ingestion on a large scale, keeping `float` vectors can be expensive because OpenSearch needs to construct, load, save, and search graphs (for the native `faiss` and `nmslib` [deprecated] engines). To reduce the memory footprint, you can use vector quantization. OpenSearch supports many varieties of quantization. In general, the level of quantization will provide a trade-off between the accuracy of the nearest neighbor search and the size of the memory footprint consumed by the vector search. diff --git a/_vector-search/optimizing-performance/lucene-scalar-quantization.md b/_vector-search/optimizing-storage/lucene-scalar-quantization.md similarity index 78% rename from _vector-search/optimizing-performance/lucene-scalar-quantization.md rename to _vector-search/optimizing-storage/lucene-scalar-quantization.md index 50a63031d2..944360b239 100644 --- a/_vector-search/optimizing-performance/lucene-scalar-quantization.md +++ b/_vector-search/optimizing-storage/lucene-scalar-quantization.md @@ -2,7 +2,7 @@ layout: default title: Lucene scalar quantization parent: Vector quantization -grand_parent: Optimizing vector search performance +grand_parent: Optimizing vector storage nav_order: 10 has_children: false has_math: true @@ -10,7 +10,7 @@ has_math: true # Lucene scalar quantization -Starting with version 2.16, Opensupports built-in scalar quantization for the Lucene engine. Unlike [byte vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#byte-vectors), which require you to quantize vectors before ingesting documents, the Lucene scalar quantizer quantizes input vectors in OpenSearch during ingestion. The Lucene scalar quantizer converts 32-bit floating-point input vectors into 7-bit integer vectors in each segment using the minimum and maximum quantiles computed based on the [`confidence_interval`](#confidence-interval) parameter. During search, the query vector is quantized in each segment using the segment's minimum and maximum quantiles in order to compute the distance between the query vector and the segment's quantized input vectors. +Starting with version 2.16, OpenSearch supports built-in scalar quantization for the Lucene engine. Unlike [byte vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/#byte-vectors), which require you to quantize vectors before ingesting documents, the Lucene scalar quantizer quantizes input vectors in OpenSearch during ingestion. The Lucene scalar quantizer converts 32-bit floating-point input vectors into 7-bit integer vectors in each segment using the minimum and maximum quantiles computed based on the [`confidence_interval`](#confidence-interval) parameter. During search, the query vector is quantized in each segment using the segment's minimum and maximum quantiles in order to compute the distance between the query vector and the segment's quantized input vectors. Quantization can decrease the memory footprint by a factor of 4 in exchange for some loss in recall. Additionally, quantization slightly increases disk usage because it requires storing both the raw input vectors and the quantized vectors. @@ -112,4 +112,9 @@ As an example, assume that you have 1 million vectors with a dimension of 256 an ```r 1.1 * (256 + 8 * 16) * 1,000,000 ~= 0.4 GB -``` \ No newline at end of file +``` + +## Related articles + +- [Memory-optimized vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/) +- [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/knn/) \ No newline at end of file diff --git a/_vector-search/performance-tuning-indexing.md b/_vector-search/performance-tuning-indexing.md new file mode 100644 index 0000000000..a670eb34ac --- /dev/null +++ b/_vector-search/performance-tuning-indexing.md @@ -0,0 +1,154 @@ +--- +layout: default +title: Indexing performance tuning +nav_order: 10 +parent: Performance tuning +--- + +# Indexing performance tuning + +Take any of the following steps to improve indexing performance, especially when you plan to index a large number of vectors at once. + +## Disable the refresh interval + +Either disable the refresh interval (default = 1 sec) or set a long duration for the refresh interval to avoid creating multiple small segments: + + ```json + PUT //_settings + { + "index" : { + "refresh_interval" : "-1" + } + } + ``` + +Make sure to reenable `refresh_interval` after indexing is complete. + +## Disable replicas (no OpenSearch replica shard) + + Set replicas to `0` to prevent duplicate construction of native library indexes in both primary and replica shards. When you enable replicas after indexing completes, the serialized native library indexes are copied directly. If you have no replicas, losing nodes might cause data loss, so it's important that the data be stored elsewhere so that this initial load can be retried in the event of an issue. + +## Increase the number of indexing threads + +If your hardware has multiple cores, you can allow multiple threads in native library index construction by speeding up the indexing process. Determine the number of threads to allot with the [knn.algo_param.index_thread_qty]({{site.url}}{{site.baseurl}}/search-plugins/knn/settings#cluster-settings) setting. + +Monitor CPU utilization and choose the correct number of threads. Because native library index construction is costly, choosing more threads then you need can cause additional CPU load. + + +## (Expert level) Disable vector field storage in the source field + +The `_source` field contains the original JSON document body that was passed at index time. This field is not indexed and is not searchable but is stored so that it can be returned when executing fetch requests such as `get` and `search`. When using vector fields within the source, you can remove the vector field to save disk space, as shown in the following example where the `location` vector is excluded: + + ```json + PUT //_mappings + { + "_source": { + "excludes": ["location"] + }, + "properties": { + "location": { + "type": "knn_vector", + "dimension": 2, + "space_type": "l2", + "method": { + "name": "hnsw", + "engine": "faiss" + } + } + } + } + ``` + + +Disabling the `_source` field can cause certain features to become unavailable, such as the `update`, `update_by_query`, and `reindex` APIs and the ability to debug queries or aggregations by using the original document at index time. + +In OpenSearch 2.15 or later, you can further improve indexing speed and reduce disk space by removing the vector field from the `_recovery_source`, as shown in the following example: + + ```json + PUT //_mappings + { + "_source": { + "excludes": ["location"], + "recovery_source_excludes": ["location"] + }, + "properties": { + "location": { + "type": "knn_vector", + "dimension": 2, + "space_type": "l2", + "method": { + "name": "hnsw", + "engine": "faiss" + } + } + } + } + ``` + +This is an expert-level setting. Disabling the `_recovery_source` may lead to failures during peer-to-peer recovery. Before disabling the `_recovery_source`, check with your OpenSearch cluster admin to determine whether your cluster performs regular flushes before starting the peer-to-peer recovery of shards prior to disabling the `_recovery_source`. +{: .warning} + +## (Expert level) Build vector data structures on demand + +This approach is recommended only for workloads that involve a single initial bulk upload and will be used exclusively for search after force merging to a single segment. + +During indexing, vector search builds a specialized data structure for a `knn_vector` field to enable efficient approximate k-NN search. However, these structures are rebuilt during [force merge]({{site.url}}{{site.baseurl}}/api-reference/index-apis/force-merge/) on vector indexes. To optimize indexing speed, follow these steps: + +1. **Disable vector data structure creation**: Disable vector data structure creation for new segments by setting [`index.knn.advanced.approximate_threshold`]({{site.url}}{{site.baseurl}}/vector-search/settings/#index-settings) to `-1`. + + To specify the setting at index creation, send the following request: + + ```json + PUT /test-index/ + { + "settings": { + "index.knn.advanced.approximate_threshold": "-1" + } + } + ``` + {% include copy-curl.html %} + + To specify the setting after index creation, send the following request: + + ```json + PUT /test-index/_settings + { + "index.knn.advanced.approximate_threshold": "-1" + } + ``` + {% include copy-curl.html %} + +1. **Perform bulk indexing**: Index data in [bulk]({{site.url}}{{site.baseurl}}/api-reference/document-apis/bulk/) without performing any searches during ingestion: + + ```json + POST _bulk + { "index": { "_index": "test-index", "_id": "1" } } + { "my_vector1": [1.5, 2.5], "price": 12.2 } + { "index": { "_index": "test-index", "_id": "2" } } + { "my_vector1": [2.5, 3.5], "price": 7.1 } + ``` + {% include copy-curl.html %} + + If searches are performed while vector data structures are disabled, they will run using exact k-NN search. + +1. **Reenable vector data structure creation**: Once indexing is complete, enable vector data structure creation by setting `index.knn.advanced.approximate_threshold` to `0`: + + ```json + PUT /test-index/_settings + { + "index.knn.advanced.approximate_threshold": "0" + } + ``` + {% include copy-curl.html %} + + If you do not reset the setting to `0` before the force merge, you will need to reindex your data. + {: .note} + +1. **Force merge segments into one segment**: Perform a force merge and specify `max_num_segments=1` to create the vector data structures only once: + + ```json + POST test-index/_forcemerge?max_num_segments=1 + ``` + {% include copy-curl.html %} + + After the force merge, new search requests will execute approximate k-NN search using the newly created data structures. \ No newline at end of file diff --git a/_vector-search/performance-tuning-search.md b/_vector-search/performance-tuning-search.md new file mode 100644 index 0000000000..4d6245a359 --- /dev/null +++ b/_vector-search/performance-tuning-search.md @@ -0,0 +1,56 @@ +--- +layout: default +title: Search performance tuning +nav_order: 20 +parent: Performance tuning +--- + +# Search performance tuning + +Take the following steps to improve search performance: + +## Reduce segment count + + To improve search performance, you must keep the number of segments under control. Lucene's IndexSearcher searches over all of the segments in a shard to find the 'size' best results. + + Ideally, having one segment per shard provides the optimal performance with respect to search latency. You can configure an index to have multiple shards to avoid giant shards and achieve more parallelism. + + You can control the number of segments by choosing a larger refresh interval, or during indexing by asking OpenSearch to slow down segment creation by disabling the refresh interval. + +## Warm up the index + + Native library indexes are constructed during indexing, but they're loaded into memory during the first search. In Lucene, each segment is searched sequentially (so, for k-NN, each segment returns up to k nearest neighbors of the query point), and the top 'size' number of results based on the score are returned from all the results returned by segments at a shard level (higher score = better result). + + Once a native library index is loaded (native library indexes are loaded outside OpenSearch JVM), OpenSearch caches them in memory. Initial queries are expensive and take a few seconds, while subsequent queries are faster and take milliseconds (assuming the k-NN circuit breaker isn't hit). + + To avoid this latency penalty during your first queries, you can use the warmup API operation on the indexes you want to search: + + ```json + GET /_plugins/_knn/warmup/index1,index2,index3?pretty + { + "_shards" : { + "total" : 6, + "successful" : 6, + "failed" : 0 + } + } + ``` + + The warmup API operation loads all native library indexes for all shards (primary and replica) for the specified indexes into the cache, so there's no penalty to load native library indexes during initial searches. + +This API operation only loads the segments of active indexes into the cache. If a merge or refresh operation finishes after the API runs, or if you add new documents, you need to rerun the API to load those native library indexes into memory. +{: .warning} + + +## Avoid reading stored fields + + If your use case is simply to read the IDs and scores of the nearest neighbors, you can disable reading stored fields, which saves time retrieving the vectors from stored fields. + +## Use `mmap` file I/O + + For the Lucene-based approximate k-NN search, there is no dedicated cache layer that speeds up read/write operations. Instead, the plugin relies on the existing caching mechanism in OpenSearch core. In versions 2.4 and earlier of the Lucene-based approximate k-NN search, read/write operations were based on Java NIO by default, which can be slow, depending on the Lucene version and number of segments per shard. Starting with version 2.5, k-NN enables [`mmap`](https://en.wikipedia.org/wiki/Mmap) file I/O by default when the store type is `hybridfs` (the default store type in OpenSearch). This leads to fast file I/O operations and improves the overall performance of both data ingestion and search. The two file extensions specific to vector values that use `mmap` are `.vec` and `.vem`. For more information about these file extensions, see [the Lucene documentation](https://lucene.apache.org/core/9_0_0/core/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsFormat.html). + + The `mmap` file I/O uses the system file cache rather than memory allocated for the Java heap, so no additional allocation is required. To change the default list of extensions set by the plugin, update the `index.store.hybrid.mmap.extensions` setting at the cluster level using the [Cluster Settings API]({{site.url}}{{site.baseurl}}/api-reference/cluster-api/cluster-settings/). + + This is an expert-level setting that requires closing the index before updating the setting and reopening it after the update. + {: .important} \ No newline at end of file diff --git a/_vector-search/performance-tuning.md b/_vector-search/performance-tuning.md new file mode 100644 index 0000000000..1d20153711 --- /dev/null +++ b/_vector-search/performance-tuning.md @@ -0,0 +1,73 @@ +--- +layout: default +title: Performance tuning +nav_order: 70 +has_children: true +redirect_from: + - /search-plugins/knn/performance-tuning/ +--- + +# Performance tuning + +This topic provides performance tuning recommendations to improve indexing and search performance for approximate k-NN (ANN). From a high level, k-NN works according to these principles: +* Native library indexes are created per knn_vector field / (Lucene) segment pair. +* Queries execute on segments sequentially inside the shard (same as any other OpenSearch query). +* Each native library index in the segment returns <=k neighbors. +* The coordinator node picks up final size number of neighbors from the neighbors returned by each shard. + +This topic also provides recommendations for comparing approximate k-NN to exact k-NN with score script. + +## Recommendations for engines and cluster node sizing + +Each of the three engines used for approximate k-NN search has its own attributes that make one more sensible to use than the others in a given situation. Use the following information to help determine which engine will best meet your requirements. + +In general, NMSLIB (deprecated) outperforms both Faiss and Lucene when used for search operations. However, to optimize for indexing throughput, Faiss is a good option. For relatively smaller datasets (up to a few million vectors), the Lucene engine demonstrates better latencies and recall. At the same time, the size of the index is smallest compared to the other engines, which allows it to use smaller AWS instances for data nodes. For further considerations, see [Choosing the right method]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/#choosing-the-right-method) and [Memory estimation]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/#memory-estimation). + +When considering cluster node sizing, a general approach is to first establish an even distribution of the index across the cluster. However, there are other considerations. To help make these choices, you can refer to the OpenSearch managed service guidance in the section [Sizing domains](https://docs.aws.amazon.com/opensearch-service/latest/developerguide/sizing-domains.html). + +## Improving recall + +Recall depends on multiple factors like number of vectors, number of dimensions, segments, and so on. Searching over a large number of small segments and aggregating the results leads to better recall than searching over a small number of large segments and aggregating results. The larger the native library index, the more chances of losing recall if you're using smaller algorithm parameters. Choosing larger values for algorithm parameters should help solve this issue but sacrifices search latency and indexing time. That being said, it's important to understand your system's requirements for latency and accuracy, and then choose the number of segments you want your index to have based on experimentation. + +The default parameters work on a broader set of use cases, but make sure to run your own experiments on your data sets and choose the appropriate values. For index-level settings, see [Index settings]({{site.url}}{{site.baseurl}}/vector-search/settings/#index-settings). + +## Approximate nearest neighbor compared to score script + +The standard k-NN query and custom scoring option perform differently. Test with a representative set of documents to see if the search results and latencies match your expectations. + +Custom scoring works best if the initial filter reduces the number of documents to no more than 20,000. Increasing shard count can improve latency, but be sure to keep shard size within the [recommended guidelines]({{site.url}}{{site.baseurl}}/intro/#primary-and-replica-shards). + +## SIMD optimization for the Faiss engine + +Starting with version 2.13, OpenSearch supports [Single Instruction Multiple Data (SIMD)](https://en.wikipedia.org/wiki/Single_instruction,_multiple_data) processing if the underlying hardware supports SIMD instructions (AVX2 on x64 architecture and Neon on ARM64 architecture). SIMD is supported by default on Linux machines only for the Faiss engine. SIMD architecture helps boost overall performance by improving indexing throughput and reducing search latency. Starting with version 2.18, OpenSearch supports AVX-512 SIMD instructions on x64 architecture. Starting with version 2.19, OpenSearch supports advanced AVX-512 SIMD instructions on x64 architecture for Intel Sapphire Rapids or a newer-generation processor, improving the performance of Hamming distance computation. + +SIMD optimization is applicable only if the vector dimension is a multiple of 8. +{: .note} + + +### x64 architecture + + +For x64 architecture, the following versions of the Faiss library are built and shipped with the artifact: + +- `libopensearchknn_faiss_avx512_spr.so`: The Faiss library containing advanced AVX-512 SIMD instructions for newer-generation processors, available on public clouds such as AWS for c/m/r 7i or newer instances. +- `libopensearchknn_faiss_avx512.so`: The Faiss library containing AVX-512 SIMD instructions. +- `libopensearchknn_faiss_avx2.so`: The Faiss library containing AVX2 SIMD instructions. +- `libopensearchknn_faiss.so`: The non-optimized Faiss library without SIMD instructions. + +When using the Faiss library, the performance ranking is as follows: advanced AVX-512 > AVX-512 > AVX2 > no optimization. +{: .note } + +If your hardware supports advanced AVX-512(spr), OpenSearch loads the `libopensearchknn_faiss_avx512_spr.so` library at runtime. + +If your hardware supports AVX-512, OpenSearch loads the `libopensearchknn_faiss_avx512.so` library at runtime. + +If your hardware supports AVX2 but doesn't support AVX-512, Open loads the `libopensearchknn_faiss_avx2.so` library at runtime. + +To disable the advanced AVX-512 (for Sapphire Rapids or newer-generation processors), AVX-512, and AVX2 SIMD instructions and load the non-optimized Faiss library (`libopensearchknn_faiss.so`), specify the `knn.faiss.avx512_spr.disabled`, `knn.faiss.avx512.disabled`, and `knn.faiss.avx2.disabled` static settings as `true` in `opensearch.yml` (by default, all of these are `false`). + +Note that to update a static setting, you must stop the cluster, change the setting, and restart the cluster. For more information, see [Static settings]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/index/#static-settings). + +### ARM64 architecture + +For the ARM64 architecture, only one performance-boosting Faiss library (`libopensearchknn_faiss.so`) is built and shipped. The library contains Neon SIMD instructions and cannot be disabled. \ No newline at end of file diff --git a/_vector-search/searching-data.md b/_vector-search/searching-data.md index f1d5de90d9..80afa0326a 100644 --- a/_vector-search/searching-data.md +++ b/_vector-search/searching-data.md @@ -55,3 +55,13 @@ GET /my-semantic-search-index/_search } ``` {% include copy-curl.html %} + +## Next steps + +- Learn about configuring semantic and hybrid search in the [Getting started with semantic and hybrid search]({{site.url}}{{site.baseurl}}/vector-search/getting-started/tutorials/neural-search-tutorial/) +- Learn about the supported types of ML-powered search [ML-powered search]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/) + +## Related articles + +- [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/knn/) +- [Neural query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/neural/) diff --git a/_vector-search/settings.md b/_vector-search/settings.md index 83c80d31a9..0406af1d76 100644 --- a/_vector-search/settings.md +++ b/_vector-search/settings.md @@ -27,8 +27,8 @@ Setting | Static/Dynamic | Default | Description `knn.model.index.number_of_shards`| Dynamic | `1` | The number of shards to use for the model system index, which is the OpenSearch index that stores the models used for approximate nearest neighbor (ANN) search. `knn.model.index.number_of_replicas`| Dynamic | `1` | The number of replica shards to use for the model system index. Generally, in a multi-node cluster, this value should be at least 1 in order to increase stability. `knn.model.cache.size.limit` | Dynamic | `10%` | The model cache limit cannot exceed 25% of the JVM heap. -`knn.faiss.avx2.disabled` | Static | `false` | A static setting that specifies whether to disable the SIMD-based `libopensearchknn_faiss_avx2.so` library and load the non-optimized `libopensearchknn_faiss.so` library for the Faiss engine on machines with x64 architecture. For more information, see [SIMD optimization for the Faiss engine]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/vector-field/#simd-optimization-for-the-faiss-engine). -`knn.faiss.avx512.disabled` | Static | `false` | A static setting that specifies whether to disable the SIMD-based `libopensearchknn_faiss_avx512.so` library and load the `libopensearchknn_faiss_avx2.so` library or the non-optimized `libopensearchknn_faiss.so` library for the Faiss engine on machines with x64 architecture. For more information, see [SIMD optimization for the Faiss engine]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/vector-field/#simd-optimization-for-the-faiss-engine). +`knn.faiss.avx2.disabled` | Static | `false` | A static setting that specifies whether to disable the SIMD-based `libopensearchknn_faiss_avx2.so` library and load the non-optimized `libopensearchknn_faiss.so` library for the Faiss engine on machines with x64 architecture. For more information, see [SIMD optimization for the Faiss engine]({{site.url}}{{site.baseurl}}/vector-search/performance-tuning/#simd-optimization-for-the-faiss-engine). +`knn.faiss.avx512.disabled` | Static | `false` | A static setting that specifies whether to disable the SIMD-based `libopensearchknn_faiss_avx512.so` library and load the `libopensearchknn_faiss_avx2.so` library or the non-optimized `libopensearchknn_faiss.so` library for the Faiss engine on machines with x64 architecture. For more information, see [SIMD optimization for the Faiss engine]({{site.url}}{{site.baseurl}}/vector-search/performance-tuning/#simd-optimization-for-the-faiss-engine). ## Index settings diff --git a/_vector-search/specialized-operations/radial-search-knn.md b/_vector-search/specialized-operations/radial-search-knn.md index d2ec6f4257..6aecc44607 100644 --- a/_vector-search/specialized-operations/radial-search-knn.md +++ b/_vector-search/specialized-operations/radial-search-knn.md @@ -34,7 +34,7 @@ You can perform radial search with either Lucene or Faiss engine. The following ## Spaces -For supported spaces, see [Spaces]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#spaces). +For supported spaces, see [Spaces]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-spaces/). ## Examples diff --git a/_vector-search/vector-search-techniques/approximate-knn.md b/_vector-search/vector-search-techniques/approximate-knn.md index a34bf635d9..9872b047b2 100644 --- a/_vector-search/vector-search-techniques/approximate-knn.md +++ b/_vector-search/vector-search-techniques/approximate-knn.md @@ -1,6 +1,6 @@ --- layout: default -title: Approximate vector search +title: Approximate k-NN search nav_order: 15 parent: Vector search techniques has_children: false @@ -9,7 +9,7 @@ redirect_from: - /search-plugins/knn/approximate-knn/ --- -# Approximate vector search +# Approximate k-NN search Standard k-NN search methods compute similarity using a brute-force approach that measures the nearest distance between a query and a number of points, which produces exact results. This works well in many applications. However, in the case of extremely large datasets with high dimensionality, this creates a scaling problem that reduces the efficiency of the search. Approximate k-NN search methods can overcome this by employing tools that restructure indexes more efficiently and reduce the dimensionality of searchable vectors. Using this approach requires a sacrifice in accuracy but increases search processing speeds appreciably. @@ -22,14 +22,6 @@ OpenSearch builds a native library index of the vectors for each `knn-vector` fi Because the native library indexes are constructed during indexing, it is not possible to apply a filter on an index and then use this search method. All filters are applied on the results produced by the approximate nearest neighbor search. -## Recommendations for engines and cluster node sizing - -Each of the three engines used for approximate k-NN search has its own attributes that make one more sensible to use than the others in a given situation. Use the following information to help determine which engine will best meet your requirements. - -In general, NMSLIB (deprecated) outperforms both Faiss and Lucene when used for search operations. However, to optimize for indexing throughput, Faiss is a good option. For relatively smaller datasets (up to a few million vectors), the Lucene engine demonstrates better latencies and recall. At the same time, the size of the index is smallest compared to the other engines, which allows it to use smaller AWS instances for data nodes. - -When considering cluster node sizing, a general approach is to first establish an even distribution of the index across the cluster. However, there are other considerations. To help make these choices, you can refer to the OpenSearch managed service guidance in the section [Sizing domains](https://docs.aws.amazon.com/opensearch-service/latest/developerguide/sizing-domains.html). - ## Get started with approximate k-NN To use the approximate search functionality, you must first create a vector index with `index.knn` set to `true`. This setting tells the plugin to create native library indexes for the index. @@ -130,7 +122,7 @@ GET my-knn-index-1/_search ``` {% include copy-curl.html %} -### The number of returned results +## The number of returned results In the preceding query, `k` represents the number of neighbors returned by the search of each graph. You must also include the `size` option, indicating the final number of results that you want the query to return. @@ -150,7 +142,7 @@ The number of results returned by Faiss/NMSLIB differs from the number of result Starting in OpenSearch 2.14, you can use `k`, `min_score`, or `max_distance` for [radial search]({{site.url}}{{site.baseurl}}/search-plugins/knn/radial-search-knn/). -### Building a vector index from a model +## Building a vector index from a model For some of the algorithms that OpenSearch supports, the native library index needs to be trained before it can be used. It would be expensive to train every newly created segment, so, instead, the plugin features the concept of a *model* that initializes the native library index during segment creation. You can create a model by calling the [Train API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#train-a-model) and passing in the source of the training data and the method definition of the model. Once training is complete, the model is serialized to a k-NN model system index. Then, during indexing, the model is pulled from this index to initialize the segments. @@ -262,110 +254,3 @@ POST _bulk {% include copy-curl.html %} After data is ingested, it can be searched in the same way as any other `knn_vector` field. - -### Rescoring quantized results using full precision - -Quantization can be used to significantly reduce the memory footprint of a vector index. For more information about quantization, see [k-NN vector quantization]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-vector-quantization). Because some vector representation is lost during quantization, the computed distances will be approximate. This causes the overall recall of the search to decrease. - -To improve recall while maintaining the memory savings of quantization, you can use a two-phase search approach. In the first phase, `oversample_factor * k` results are retrieved from an index using quantized vectors and the scores are approximated. In the second phase, the full-precision vectors of those `oversample_factor * k` results are loaded into memory from disk, and scores are recomputed against the full-precision query vector. The results are then reduced to the top k. - -The default rescoring behavior is determined by the `mode` and `compression_level` of the backing k-NN vector field: - -- For `in_memory` mode, no rescoring is applied by default. -- For `on_disk` mode, default rescoring is based on the configured `compression_level`. Each `compression_level` provides a default `oversample_factor`, specified in the following table. - -| Compression level | Default rescore `oversample_factor` | -|:------------------|:----------------------------------| -| `32x` (default) | 3.0 | -| `16x` | 2.0 | -| `8x` | 2.0 | -| `4x` | No default rescoring | -| `2x` | No default rescoring | - -To explicitly apply rescoring, provide the `rescore` parameter in a query on a quantized index and specify the `oversample_factor`: - -```json -GET my-knn-index-1/_search -{ - "size": 2, - "query": { - "knn": { - "target-field": { - "vector": [2, 3, 5, 6], - "k": 2, - "rescore" : { - "oversample_factor": 1.2 - } - } - } - } -} -``` -{% include copy-curl.html %} - -Alternatively, set the `rescore` parameter to `true` to use a default `oversample_factor` of `1.0`: - -```json -GET my-knn-index-1/_search -{ - "size": 2, - "query": { - "knn": { - "target-field": { - "vector": [2, 3, 5, 6], - "k": 2, - "rescore" : true - } - } - } -} -``` -{% include copy-curl.html %} - -The `oversample_factor` is a floating-point number between 1.0 and 100.0, inclusive. The number of results in the first pass is calculated as `oversample_factor * k` and is guaranteed to be between 100 and 10,000, inclusive. If the calculated number of results is smaller than 100, then the number of results is set to 100. If the calculated number of results is greater than 10,000, then the number of results is set to 10,000. - -Rescoring is only supported for the `faiss` engine. - -Rescoring is not needed if quantization is not used because the scores returned are already fully precise. -{: .note} - -### Using approximate k-NN with filters - -To learn about using filters with k-NN search, see [k-NN search with filters]({{site.url}}{{site.baseurl}}/search-plugins/knn/filter-search-knn/). - -### Using approximate k-NN with nested fields - -To learn about using k-NN search with nested fields, see [k-NN search with nested fields]({{site.url}}{{site.baseurl}}/search-plugins/knn/nested-search-knn/). - -### Using approximate radial search - -To learn more about the radial search feature, see [k-NN radial search]({{site.url}}{{site.baseurl}}/search-plugins/knn/radial-search-knn/). - -### Using approximate k-NN with binary vectors - -To learn more about using binary vectors with k-NN search, see [Binary k-NN vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#binary-vectors). - -## Spaces - -A _space_ corresponds to the function used to measure the distance between two points in order to determine the k-nearest neighbors. From the k-NN perspective, a lower score equates to a closer and better result. This is the opposite of how OpenSearch scores results, where a higher score equates to a better result. OpenSearch supports the following spaces. - -Not every method supports each of these spaces. Be sure to check out [the method documentation]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/method/) to make sure the space you are interested in is supported. -{: note.} - -| Space type | Distance function ($$d$$ ) | OpenSearch score | -| :--- | :--- | :--- | -| `l1` | $$ d(\mathbf{x}, \mathbf{y}) = \sum_{i=1}^n \lvert x_i - y_i \rvert $$ | $$ score = {1 \over {1 + d} } $$ | -| `l2` | $$ d(\mathbf{x}, \mathbf{y}) = \sum_{i=1}^n (x_i - y_i)^2 $$ | $$ score = {1 \over 1 + d } $$ | -| `linf` | $$ d(\mathbf{x}, \mathbf{y}) = max(\lvert x_i - y_i \rvert) $$ | $$ score = {1 \over 1 + d } $$ | -| `cosinesimil` | $$ d(\mathbf{x}, \mathbf{y}) = 1 - cos { \theta } = 1 - {\mathbf{x} \cdot \mathbf{y} \over \lVert \mathbf{x}\rVert \cdot \lVert \mathbf{y}\rVert}$$$$ = 1 - {\sum_{i=1}^n x_i y_i \over \sqrt{\sum_{i=1}^n x_i^2} \cdot \sqrt{\sum_{i=1}^n y_i^2}}$$,
where $$\lVert \mathbf{x}\rVert$$ and $$\lVert \mathbf{y}\rVert$$ represent the norms of vectors $$\mathbf{x}$$ and $$\mathbf{y}$$, respectively. | $$ score = {2 - d \over 2} $$ | -| `innerproduct` (supported for Lucene in OpenSearch version 2.13 and later) | **NMSLIB** and **Faiss**:
$$ d(\mathbf{x}, \mathbf{y}) = - {\mathbf{x} \cdot \mathbf{y}} = - \sum_{i=1}^n x_i y_i $$

**Lucene**:
$$ d(\mathbf{x}, \mathbf{y}) = {\mathbf{x} \cdot \mathbf{y}} = \sum_{i=1}^n x_i y_i $$ | **NMSLIB** and **Faiss**:
$$ \text{If} d \ge 0, score = {1 \over 1 + d }$$
$$\text{If} d < 0, score = −d + 1$$

**Lucene:**
$$ \text{If} d > 0, score = d + 1 $$
$$\text{If} d \le 0, score = {1 \over 1 + (-1 \cdot d) }$$ | -| `hamming` (supported for binary vectors in OpenSearch version 2.16 and later) | $$ d(\mathbf{x}, \mathbf{y}) = \text{countSetBits}(\mathbf{x} \oplus \mathbf{y})$$ | $$ score = {1 \over 1 + d } $$ | - -The cosine similarity formula does not include the `1 -` prefix. However, because similarity search libraries equate lower scores with closer results, they return `1 - cosineSimilarity` for the cosine similarity space---this is why `1 -` is included in the distance function. -{: .note } - -With cosine similarity, it is not valid to pass a zero vector (`[0, 0, ...]`) as input. This is because the magnitude of such a vector is 0, which raises a `divide by 0` exception in the corresponding formula. Requests containing the zero vector will be rejected, and a corresponding exception will be thrown. -{: .note } - -The `hamming` space type is supported for binary vectors in OpenSearch version 2.16 and later. For more information, see [Binary k-NN vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#binary-vectors). -{: .note} diff --git a/_vector-search/vector-search-techniques/index.md b/_vector-search/vector-search-techniques/index.md index b1e0c33e25..6767dc4909 100644 --- a/_vector-search/vector-search-techniques/index.md +++ b/_vector-search/vector-search-techniques/index.md @@ -27,7 +27,7 @@ Overall, for larger data sets, you should generally choose the approximate neare ## Approximate search -OpenSearch supports several algorithms for approximate vector search, each with its own advantages. For complete documentation, see [Approximate search]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/). For more information about the search methods and engines, see [Method definitions]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/method/). For method recommendations, see [Choosing the right method]({{site.url}}{{site.baseurl}}/vector-search/creating-vector-index/method/#choosing-the-right-method). +OpenSearch supports several algorithms for approximate vector search, each with its own advantages. For complete documentation, see [Approximate search]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/). For more information about the search methods and engines, see [Methods and engines]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/). For method recommendations, see [Choosing the right method]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/#choosing-the-right-method). To use approximate vector search, specify one of the following search methods (algorithms) in the `method` parameter: diff --git a/_vector-search/vector-search-techniques/knn-score-script.md b/_vector-search/vector-search-techniques/knn-score-script.md index 2d0d33b037..4ef6c58a3d 100644 --- a/_vector-search/vector-search-techniques/knn-score-script.md +++ b/_vector-search/vector-search-techniques/knn-score-script.md @@ -1,6 +1,6 @@ --- layout: default -title: Exact vector search with scoring script +title: Exact k-NN search with scoring script nav_order: 20 parent: Vector search techniques has_children: true @@ -9,9 +9,9 @@ redirect_from: - /search-plugins/knn/knn-score-script/ --- -# Exact vector search with scoring script +# Exact k-NN search with scoring script -You can use exact vector search with scoring script to find the exact k-nearest neighbors to a given query point. Using the k-NN score script, you can apply a filter on an index before executing the nearest neighbor search. This is useful for dynamic search cases where the index body may vary based on other conditions. +You can use exact k-NN search with scoring script to find the exact k-nearest neighbors to a given query point. Using the k-NN score script, you can apply a filter on an index before executing the nearest neighbor search. This is useful for dynamic search cases where the index body may vary based on other conditions. Because the score script approach executes a brute force search, it doesn't scale as well as the [approximate approach]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/). In some cases, it might be better to think about refactoring your workflow or index structure to use the approximate approach instead of the score script approach. @@ -106,7 +106,7 @@ All parameters are required. - `query_value` is the point you want to find the nearest neighbors for. For the Euclidean and cosine similarity spaces, the value must be an array of floats that matches the dimension set in the field's mapping. For Hamming bit distance, this value can be either of type signed long or a base64-encoded string (for the long and binary field types, respectively). - `space_type` corresponds to the distance function. See the [spaces section](#spaces). -The [post filter example in the approximate approach]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn#using-approximate-k-nn-with-filters) shows a search that returns fewer than `k` results. If you want to avoid this situation, the score script method lets you essentially invert the order of events. In other words, you can filter down the set of documents over which to execute the k-nearest neighbor search. +The [post filter example in the approximate approach]({{site.url}}{{site.baseurl}}/vector-search/specialized-operations/filter-search-knn/) shows a search that returns fewer than `k` results. If you want to avoid this situation, the score script method lets you essentially invert the order of events. In other words, you can filter down the set of documents over which to execute the k-nearest neighbor search. This example shows a pre-filter approach to k-NN search with the score script approach. First, create the index: @@ -305,5 +305,5 @@ Cosine similarity returns a number between -1 and 1, and because OpenSearch rele With cosine similarity, it is not valid to pass a zero vector (`[0, 0, ... ]`) as input. This is because the magnitude of such a vector is 0, which raises a `divide by 0` exception in the corresponding formula. Requests containing the zero vector will be rejected, and a corresponding exception will be thrown. {: .note } -The `hamming` space type is supported for binary vectors in OpenSearch version 2.16 and later. For more information, see [Binary k-NN vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#binary-vectors). +The `hamming` space type is supported for binary vectors in OpenSearch version 2.16 and later. For more information, see [Binary k-NN vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized#binary-vectors). {: .note} diff --git a/_vector-search/vector-search-techniques/painless-functions.md b/_vector-search/vector-search-techniques/painless-functions.md index 2678766c62..ab8f619b6f 100644 --- a/_vector-search/vector-search-techniques/painless-functions.md +++ b/_vector-search/vector-search-techniques/painless-functions.md @@ -2,7 +2,7 @@ layout: default title: Painless extensions nav_order: 25 -parent: Exact vector search with scoring script +parent: Exact k-NN search with scoring script grand_parent: Vector search techniques has_children: false has_math: true @@ -58,7 +58,7 @@ l1Norm | `float l1Norm (float[] queryVector, doc['vector field'])` | This functi cosineSimilarity | `float cosineSimilarity (float[] queryVector, doc['vector field'])` | Cosine similarity is an inner product of the query vector and document vector normalized to both have a length of 1. If the magnitude of the query vector doesn't change throughout the query, you can pass the magnitude of the query vector to improve performance, instead of calculating the magnitude every time for every filtered document:
`float cosineSimilarity (float[] queryVector, doc['vector field'], float normQueryVector)`
In general, the range of cosine similarity is [-1, 1]. However, in the case of information retrieval, the cosine similarity of two documents ranges from 0 to 1 because the tf-idf statistic can't be negative. Therefore, OpenSearch adds 1.0 in order to always yield a positive cosine similarity score. hamming | `float hamming (float[] queryVector, doc['vector field'])` | This function calculates the Hamming distance between a given query vector and document vectors. The Hamming distance is the number of positions at which the corresponding elements are different. The shorter the distance, the more relevant the document is, so this example inverts the return value of the Hamming distance. -The `hamming` space type is supported for binary vectors in OpenSearch version 2.16 and later. For more information, see [Binary k-NN vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#binary-vectors). +The `hamming` space type is supported for binary vectors in OpenSearch version 2.16 and later. For more information, see [Binary k-NN vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized#binary-vectors). {: .note} ## Constraints From f73a161455033d9b03ac9ef241394f0189895eb9 Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Fri, 14 Feb 2025 09:49:20 -0500 Subject: [PATCH 16/32] Remove table from exact search Signed-off-by: Fanit Kolchina --- .../knn-score-script.md | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/_vector-search/vector-search-techniques/knn-score-script.md b/_vector-search/vector-search-techniques/knn-score-script.md index 4ef6c58a3d..ddae39603f 100644 --- a/_vector-search/vector-search-techniques/knn-score-script.md +++ b/_vector-search/vector-search-techniques/knn-score-script.md @@ -287,23 +287,3 @@ GET my-long-index/_search ``` {% include copy-curl.html %} -## Spaces - -A _space_ corresponds to the function used to measure the distance between two points in order to determine the k-nearest neighbors. From the k-NN perspective, a lower score equates to a closer and better result. This is the opposite of how OpenSearch scores results, where a higher score equates to a better result. The following table illustrates how OpenSearch converts spaces to scores. - -| Space type | Distance function ($$d$$ ) | OpenSearch score | -| :--- | :--- | :--- | -| `l1` | $$ d(\mathbf{x}, \mathbf{y}) = \sum_{i=1}^n \lvert x_i - y_i \rvert $$ | $$ score = {1 \over {1 + d} } $$ | -| `l2` | $$ d(\mathbf{x}, \mathbf{y}) = \sum_{i=1}^n (x_i - y_i)^2 $$ | $$ score = {1 \over 1 + d } $$ | -| `linf` | $$ d(\mathbf{x}, \mathbf{y}) = max(\lvert x_i - y_i \rvert) $$ | $$ score = {1 \over 1 + d } $$ | -| `cosinesimil` | $$ d(\mathbf{x}, \mathbf{y}) = 1 - cos { \theta } = 1 - {\mathbf{x} \cdot \mathbf{y} \over \lVert \mathbf{x}\rVert \cdot \lVert \mathbf{y}\rVert}$$$$ = 1 - {\sum_{i=1}^n x_i y_i \over \sqrt{\sum_{i=1}^n x_i^2} \cdot \sqrt{\sum_{i=1}^n y_i^2}}$$,
where $$\lVert \mathbf{x}\rVert$$ and $$\lVert \mathbf{y}\rVert$$ represent the norms of vectors $$\mathbf{x}$$ and $$\mathbf{y}$$, respectively. | $$ score = {2 - d \over 2 } $$ | -| `innerproduct` (supported for Lucene in OpenSearch version 2.13 and later) | $$ d(\mathbf{x}, \mathbf{y}) = - {\mathbf{x} \cdot \mathbf{y}} = - \sum_{i=1}^n x_i y_i $$ | $$ \text{If} d \ge 0, score = {1 \over 1 + d }$$
$$\text{If} d < 0, score = −d + 1$$ | -| `hammingbit` (supported for binary and long vectors)

`hamming` (supported for binary vectors in OpenSearch version 2.16 and later) | $$ d(\mathbf{x}, \mathbf{y}) = \text{countSetBits}(\mathbf{x} \oplus \mathbf{y})$$ | $$ score = {1 \over 1 + d } $$ | - -Cosine similarity returns a number between -1 and 1, and because OpenSearch relevance scores can't be below 0, OpenSearch adds 1 to get the final score. - -With cosine similarity, it is not valid to pass a zero vector (`[0, 0, ... ]`) as input. This is because the magnitude of such a vector is 0, which raises a `divide by 0` exception in the corresponding formula. Requests containing the zero vector will be rejected, and a corresponding exception will be thrown. -{: .note } - -The `hamming` space type is supported for binary vectors in OpenSearch version 2.16 and later. For more information, see [Binary k-NN vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized#binary-vectors). -{: .note} From 8fde05deec38439f5abf8d10d9c93b2e776313f8 Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Fri, 14 Feb 2025 11:54:04 -0500 Subject: [PATCH 17/32] Fix links Signed-off-by: Fanit Kolchina --- .../knn-memory-optimized.md | 65 +++++++++++++++++++ .../supported-field-types/knn-vector.md | 4 ++ _query-dsl/specialized/kNN.md | 65 +------------------ .../disk-based-vector-search.md | 2 +- _vector-search/performance-tuning-search.md | 2 +- .../knn-score-script.md | 4 +- 6 files changed, 76 insertions(+), 66 deletions(-) diff --git a/_field-types/supported-field-types/knn-memory-optimized.md b/_field-types/supported-field-types/knn-memory-optimized.md index 39bb7e2c63..82dd8ca5b2 100644 --- a/_field-types/supported-field-types/knn-memory-optimized.md +++ b/_field-types/supported-field-types/knn-memory-optimized.md @@ -96,6 +96,71 @@ PUT test-index ``` {% include copy-curl.html %} +## Rescoring quantized results to full precision + +To improve recall while maintaining the memory savings of quantization, you can use a two-phase search approach. In the first phase, `oversample_factor * k` results are retrieved from an index using quantized vectors and the scores are approximated. In the second phase, the full-precision vectors of those `oversample_factor * k` results are loaded into memory from disk, and scores are recomputed against the full-precision query vector. The results are then reduced to the top k. + +The default rescoring behavior is determined by the `mode` and `compression_level` of the backing k-NN vector field: + +- For `in_memory` mode, no rescoring is applied by default. +- For `on_disk` mode, default rescoring is based on the configured `compression_level`. Each `compression_level` provides a default `oversample_factor`, specified in the following table. + +| Compression level | Default rescore `oversample_factor` | +|:------------------|:----------------------------------| +| `32x` (default) | 3.0 | +| `16x` | 2.0 | +| `8x` | 2.0 | +| `4x` | No default rescoring | +| `2x` | No default rescoring | + +To explicitly apply rescoring, provide the `rescore` parameter in a query on a quantized index and specify the `oversample_factor`: + +```json +GET /my-vector-index/_search +{ + "size": 2, + "query": { + "knn": { + "target-field": { + "vector": [2, 3, 5, 6], + "k": 2, + "rescore" : { + "oversample_factor": 1.2 + } + } + } + } +} +``` +{% include copy-curl.html %} + +Alternatively, set the `rescore` parameter to `true` to use the default `oversample_factor` of `1.0`: + +```json +GET /my-vector-index/_search +{ + "size": 2, + "query": { + "knn": { + "target-field": { + "vector": [2, 3, 5, 6], + "k": 2, + "rescore" : true + } + } + } +} +``` +{% include copy-curl.html %} + +The `oversample_factor` is a floating-point number between 1.0 and 100.0, inclusive. The number of results in the first pass is calculated as `oversample_factor * k` and is guaranteed to be between 100 and 10,000, inclusive. If the calculated number of results is smaller than 100, then the number of results is set to 100. If the calculated number of results is greater than 10,000, then the number of results is set to 10,000. + +Rescoring is only supported for the `faiss` engine. + +Rescoring is not needed if quantization is not used because the scores returned are already fully precise. +{: .note} + + ## Byte vectors By default, k-NN vectors are `float` vectors, in which each dimension is 4 bytes. If you want to save storage space, you can use `byte` vectors with the `faiss` or `lucene` engine. In a `byte` vector, each dimension is a signed 8-bit integer in the [-128, 127] range. diff --git a/_field-types/supported-field-types/knn-vector.md b/_field-types/supported-field-types/knn-vector.md index 6c22c8b0f0..b79839ad4c 100644 --- a/_field-types/supported-field-types/knn-vector.md +++ b/_field-types/supported-field-types/knn-vector.md @@ -78,3 +78,7 @@ However, if you intend to use Painless scripting or a k-NN score script, you onl } ``` +## Related articles + +- [Vector search]({{site.url}}{{site.baseurl}}/vector-search/) +- [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/kNN/) \ No newline at end of file diff --git a/_query-dsl/specialized/kNN.md b/_query-dsl/specialized/kNN.md index 28c546f210..c02023e36c 100644 --- a/_query-dsl/specialized/kNN.md +++ b/_query-dsl/specialized/kNN.md @@ -202,66 +202,7 @@ GET /my-vector-index/_search ``` {% include copy-curl.html %} -## Rescoring quantized results to full precision +## Related articles -[Disk-based search]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/disk-based-vector-search/) uses [vector quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/knn-vector-quantization/) in order to reduce memory usage by compressing vectors, but this compression can impact search accuracy. To improve recall while maintaining the memory savings of quantization, you can use a two-phase search approach. In the first phase, `oversample_factor * k` results are retrieved from an index using quantized vectors and the scores are approximated. In the second phase, the full-precision vectors of those `oversample_factor * k` results are loaded into memory from disk, and scores are recomputed against the full-precision query vector. The results are then reduced to the top k. - -The default rescoring behavior is determined by the `mode` and `compression_level` of the backing k-NN vector field: - -- For `in_memory` mode, no rescoring is applied by default. -- For `on_disk` mode, default rescoring is based on the configured `compression_level`. Each `compression_level` provides a default `oversample_factor`, specified in the following table. - -| Compression level | Default rescore `oversample_factor` | -|:------------------|:----------------------------------| -| `32x` (default) | 3.0 | -| `16x` | 2.0 | -| `8x` | 2.0 | -| `4x` | No default rescoring | -| `2x` | No default rescoring | - -To explicitly apply rescoring, provide the `rescore` parameter in a query on a quantized index and specify the `oversample_factor`: - -```json -GET /my-vector-index/_search -{ - "size": 2, - "query": { - "knn": { - "target-field": { - "vector": [2, 3, 5, 6], - "k": 2, - "rescore" : { - "oversample_factor": 1.2 - } - } - } - } -} -``` -{% include copy-curl.html %} - -Alternatively, set the `rescore` parameter to `true` to use the default `oversample_factor` of `1.0`: - -```json -GET /my-vector-index/_search -{ - "size": 2, - "query": { - "knn": { - "target-field": { - "vector": [2, 3, 5, 6], - "k": 2, - "rescore" : true - } - } - } -} -``` -{% include copy-curl.html %} - -The `oversample_factor` is a floating-point number between 1.0 and 100.0, inclusive. The number of results in the first pass is calculated as `oversample_factor * k` and is guaranteed to be between 100 and 10,000, inclusive. If the calculated number of results is smaller than 100, then the number of results is set to 100. If the calculated number of results is greater than 10,000, then the number of results is set to 10,000. - -Rescoring is only supported for the `faiss` engine. - -Rescoring is not needed if quantization is not used because the scores returned are already fully precise. -{: .note} +- [k-NN vector]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) +- [Rescoring quantized results to full precision]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/#rescoring-quantized-results-to-full-precision) \ No newline at end of file diff --git a/_vector-search/optimizing-storage/disk-based-vector-search.md b/_vector-search/optimizing-storage/disk-based-vector-search.md index bb7abf912a..13d3fda3ed 100644 --- a/_vector-search/optimizing-storage/disk-based-vector-search.md +++ b/_vector-search/optimizing-storage/disk-based-vector-search.md @@ -136,7 +136,7 @@ POST _bulk ## Search -Search is also performed in the same way as in other index configurations. The key difference is that, by default, the `oversample_factor` of the rescore parameter is set to `3.0` (unless you override the `compression_level`). For more information, see [Rescoring quantized results using full precision]({{site.url}}{{site.baseurl}}/query-dsl/specialized/knn/#rescoring-quantized-results-to-full-precision). To perform vector search on a disk-optimized index, provide the search vector: +Search is also performed in the same way as in other index configurations. The key difference is that, by default, the `oversample_factor` of the rescore parameter is set to `3.0` (unless you override the `compression_level`). For more information, see [Rescoring quantized results using full precision]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/#rescoring-quantized-results-to-full-precision). To perform vector search on a disk-optimized index, provide the search vector: ```json GET my-vector-index/_search diff --git a/_vector-search/performance-tuning-search.md b/_vector-search/performance-tuning-search.md index 4d6245a359..b2acb59ead 100644 --- a/_vector-search/performance-tuning-search.md +++ b/_vector-search/performance-tuning-search.md @@ -7,7 +7,7 @@ parent: Performance tuning # Search performance tuning -Take the following steps to improve search performance: +Take the following steps to improve search performance. ## Reduce segment count diff --git a/_vector-search/vector-search-techniques/knn-score-script.md b/_vector-search/vector-search-techniques/knn-score-script.md index ddae39603f..1dec51056b 100644 --- a/_vector-search/vector-search-techniques/knn-score-script.md +++ b/_vector-search/vector-search-techniques/knn-score-script.md @@ -19,7 +19,7 @@ Because the score script approach executes a brute force search, it doesn't scal Similar to approximate nearest neighbor search, in order to use the score script on a body of vectors, you must first create an index with one or more `knn_vector` fields. -If you intend to just use the score script approach (and not the approximate approach) you can set `index.knn` to `false` and not set `index.knn.space_type`. You can choose the space type during search. See [spaces](#spaces) for the spaces the k-NN score script supports. +If you intend to just use the score script approach (and not the approximate approach) you can set `index.knn` to `false` and not set `index.knn.space_type`. You can choose the space type during search. For the spaces the k-NN score script supports, see [Spaces]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-spaces/). This example creates an index with two `knn_vector` fields: @@ -104,7 +104,7 @@ All parameters are required. - `field` is the field that contains your vector data. - `query_value` is the point you want to find the nearest neighbors for. For the Euclidean and cosine similarity spaces, the value must be an array of floats that matches the dimension set in the field's mapping. For Hamming bit distance, this value can be either of type signed long or a base64-encoded string (for the long and binary field types, respectively). -- `space_type` corresponds to the distance function. See the [spaces section](#spaces). +- `space_type` corresponds to the distance function. For more information, see [Spaces]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-spaces/). The [post filter example in the approximate approach]({{site.url}}{{site.baseurl}}/vector-search/specialized-operations/filter-search-knn/) shows a search that returns fewer than `k` results. If you want to avoid this situation, the score script method lets you essentially invert the order of events. In other words, you can filter down the set of documents over which to execute the k-nearest neighbor search. From 73f4fd70291196de80508b17f41e2b917e917688 Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Fri, 14 Feb 2025 17:30:47 -0500 Subject: [PATCH 18/32] Remove text from top page Signed-off-by: Fanit Kolchina --- _vector-search/index.md | 63 ++----------------- .../disk-based-vector-search.md | 2 +- 2 files changed, 5 insertions(+), 60 deletions(-) diff --git a/_vector-search/index.md b/_vector-search/index.md index 573eb7c289..80a940c82d 100644 --- a/_vector-search/index.md +++ b/_vector-search/index.md @@ -9,49 +9,15 @@ permalink: /vector-search/ redirect_from: - /vector-search/index/ - /search-plugins/vector-search/ -raw_steps: - - heading: "Pre-generated embeddings quickstart" - description: "Follow a quickstart tutorial for raw vector search." - link: "/vector-search/getting-started/pre-generated-embeddings/" - - heading: "Vector search techniques" - description: "Select a search technique and configure your vector search." - link: "/vector-search/vector-search-techniques/" - - heading: "Specialized vector search" - description: "Learn about specialized vector search use cases, such as filtering, nested field search, and radial search." - link: "/vector-search/specialized-operations/" - - heading: "Optimizing vector storage" - description: "Learn about optimizing vector search reduce memory usage and improve performance." - link: "/vector-search/optimizing-storage/" - - heading: "Performance tuning" - description: "Learn about optimizing vector search reduce memory usage and improve performance." - link: "/vector-search/performance-tuning/" -ml_steps: - - heading: "Auto-generated embeddings quickstart" - description: "Follow a quickstart tutorial for text-to-embedding search." - link: "/vector-search/getting-started/auto-generated-embeddings/" - - heading: "Getting started with semantic and hybrid search" - description: "Dive into semantic search and hybrid search." - link: "/vector-search/getting-started/tutorials/neural-search-tutorial/" - - heading: "ML-powered search" - description: "Learn about many ML-powered search options that OpenSearch provides." - link: "/vector-search/ml-powered-search/" - - heading: "Optimizing vector storage" - description: "Learn about optimizing vector search reduce memory usage and improve performance." - link: "/vector-search/optimizing-storage/" - - heading: "Performance tuning" - description: "Learn about optimizing vector search reduce memory usage and improve performance." - link: "/vector-search/performance-tuning/" --- # Vector search -Traditional lexical search, based on term frequency models like BM25, is effective for many search applications. However, these techniques often require substantial time and expertise to fine-tune for capturing the meaning or relevance of search terms. To add semantic understanding to your search application, you can use machine learning embedding models. These models encode the meaning and context of text, images, and audio into vectors, enabling similarity search. OpenSearch supports this functionality through its k-nearest neighbors (k-NN) search capabilities.  +Vector search enhances traditional lexical search by encoding text, images, and audio as vectors, enabling similarity search with semantic understanding. OpenSearch unifies traditional search, analytics, and vector search in a single solution. As a vector database, it streamlines AI application development by efficiently storing and retrieving high-dimensional data. -OpenSearch combines traditional search, analytics, and vector search into a single, unified solution. Its vector database capabilities simplify the development of artificial intelligence (AI) applications by reducing the effort required to manage and integrate AI-generated assets. You can bring your models, vectors, and metadata into OpenSearch to enable vector, lexical, and hybrid search and analytics, all with built-in performance and scalability. - -## Using OpenSearch as a vector database - -OpenSearch provides an integrated  vector database that can support AI systems by serving as a knowledge base. This benefits AI applications like generative AI and natural language search by providing a long-term memory of AI-generated outputs. These outputs can be used to enhance information retrieval and analytics, improve efficiency and stability, and give generative AI models a broader and deeper pool of data from which to draw more accurate responses to queries. + +[Get started]({{site.url}}{{site.baseurl}}/vector-search/getting-started/){: .btn-dark-blue} + ## Key features @@ -62,24 +28,3 @@ OpenSearch vector search supports the following key features: - **Multi-vector support**: Store and search multiple vectors per document using nested fields, useful for complex documents with multiple components requiring separate vector representations. - **Memory-efficient search**: Optimize memory usage through various quantization techniques and efficient indexing methods, making vector search practical even with large-scale deployments. - **Hybrid search capabilities**: Combine traditional keyword search with vector-based semantic search to use the strengths of both approaches, improving search relevance and accuracy. - - -[Get started]({{site.url}}{{site.baseurl}}/vector-search/getting-started/){: .btn-dark-blue} - - ---- -## Explore vector search documentation - -Choose one of the following learning paths to explore vector search documentation. - -### Learning path: Bring your own vectors - -If you’ve already generated your own vector embeddings, OpenSearch makes it easy to ingest and search them. Follow this documentation sequence to learn more: - -{% include list.html list_items=page.raw_steps%} - -### Learning path: Seamless text-to-embedding search - -Simplify your search process by letting OpenSearch handle embedding generation. Follow this documentation sequence to learn more: - -{% include list.html list_items=page.ml_steps%} \ No newline at end of file diff --git a/_vector-search/optimizing-storage/disk-based-vector-search.md b/_vector-search/optimizing-storage/disk-based-vector-search.md index 13d3fda3ed..5215d5ecba 100644 --- a/_vector-search/optimizing-storage/disk-based-vector-search.md +++ b/_vector-search/optimizing-storage/disk-based-vector-search.md @@ -136,7 +136,7 @@ POST _bulk ## Search -Search is also performed in the same way as in other index configurations. The key difference is that, by default, the `oversample_factor` of the rescore parameter is set to `3.0` (unless you override the `compression_level`). For more information, see [Rescoring quantized results using full precision]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/#rescoring-quantized-results-to-full-precision). To perform vector search on a disk-optimized index, provide the search vector: +Search is also performed in the same way as in other index configurations. The key difference is that, by default, the `oversample_factor` of the rescore parameter is set to `3.0` (unless you override the `compression_level`). For more information, see [Rescoring quantized results to full precision]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/#rescoring-quantized-results-to-full-precision). To perform vector search on a disk-optimized index, provide the search vector: ```json GET my-vector-index/_search From 0be522f58bf2e1d1d7d0a5b8f6706323dc0191c3 Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Mon, 17 Feb 2025 10:59:22 -0500 Subject: [PATCH 19/32] More updates Signed-off-by: Fanit Kolchina --- .../knn-memory-optimized.md | 6 + .../knn-methods-engines.md | 10 +- .../supported-field-types/knn-spaces.md | 40 +-- .../supported-field-types/knn-vector.md | 98 +++++-- _query-dsl/specialized/kNN.md | 2 +- _vector-search/api.md | 100 +++++-- _vector-search/creating-vector-index.md | 34 +-- .../filter-search-knn.md | 10 +- .../auto-generated-embeddings.md | 14 +- _vector-search/getting-started/concepts.md | 127 +++++++++ _vector-search/getting-started/index.md | 250 +++++++++++------- .../pre-generated-embeddings.md | 148 ----------- .../getting-started/tutorials/index.md | 64 ++++- .../tutorials/neural-search-tutorial.md | 22 +- .../getting-started/vector-search-basics.md | 44 +++ .../getting-started/vector-search-options.md | 94 +++++++ _vector-search/index.md | 10 +- _vector-search/ingesting-data.md | 15 +- .../conversational-search.md | 6 +- .../ml-powered-search/hybrid-search.md | 2 +- _vector-search/ml-powered-search/index.md | 6 +- .../ml-powered-search/multimodal-search.md | 2 +- .../ml-powered-search/semantic-search.md | 2 +- _vector-search/performance-tuning-search.md | 2 +- _vector-search/searching-data.md | 20 +- .../specialized-operations/index.md | 5 +- .../nested-search-knn.md | 20 +- .../approximate-knn.md | 21 +- .../vector-search-techniques/index.md | 22 +- .../knn-score-script.md | 11 +- .../painless-functions.md | 19 +- 31 files changed, 785 insertions(+), 441 deletions(-) rename _vector-search/{specialized-operations => }/filter-search-knn.md (99%) create mode 100644 _vector-search/getting-started/concepts.md delete mode 100644 _vector-search/getting-started/pre-generated-embeddings.md create mode 100644 _vector-search/getting-started/vector-search-basics.md create mode 100644 _vector-search/getting-started/vector-search-options.md diff --git a/_field-types/supported-field-types/knn-memory-optimized.md b/_field-types/supported-field-types/knn-memory-optimized.md index 82dd8ca5b2..89dcd74f78 100644 --- a/_field-types/supported-field-types/knn-memory-optimized.md +++ b/_field-types/supported-field-types/knn-memory-optimized.md @@ -918,3 +918,9 @@ The memory required for IVF can be estimated using the following formula, where ```r 1.1 * (((dimension / 8) * num_vectors) + (nlist * dimension / 8)) ``` + +## Related articles + +- [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/kNN/) +- [Disk-based vector search]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/disk-based-vector-search/) +- [Vector quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/knn-vector-quantization/) \ No newline at end of file diff --git a/_field-types/supported-field-types/knn-methods-engines.md b/_field-types/supported-field-types/knn-methods-engines.md index 481fcd9672..403fdc7fb0 100644 --- a/_field-types/supported-field-types/knn-methods-engines.md +++ b/_field-types/supported-field-types/knn-methods-engines.md @@ -341,7 +341,7 @@ In a typical OpenSearch cluster, a certain portion of RAM is reserved for the JV Having a replica doubles the total number of vectors. {: .note } -For information about using memory estimation with vector quantization, see the [vector quantization documentation]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/knn-vector-quantization/). +For information about using memory estimation with vector quantization, see the [Vector quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/knn-vector-quantization/). {: .note } ### HNSW memory estimation @@ -362,4 +362,10 @@ As an example, assume you have a million vectors with a `dimension` of `256` and ```r 1.1 * (((4 * 256) * 1,000,000) + (4 * 128 * 256)) ~= 1.126 GB -``` \ No newline at end of file +``` + +## Related articles + +- [Performance tuning]({{site.url}}{{site.baseurl}}/vector-search/performance-tuning/) +- [Optimizing vector storage]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/) +- [Vector quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/knn-vector-quantization/) \ No newline at end of file diff --git a/_field-types/supported-field-types/knn-spaces.md b/_field-types/supported-field-types/knn-spaces.md index 1a8f693c31..4ea3b764ae 100644 --- a/_field-types/supported-field-types/knn-spaces.md +++ b/_field-types/supported-field-types/knn-spaces.md @@ -41,29 +41,29 @@ The space type is specified when creating an index: ```json PUT test-index { - "settings": { - "index": { - "knn": true, - "knn.algo_param.ef_search": 100 - } - }, - "mappings": { - "properties": { - "my_vector1": { - "type": "knn_vector", - "dimension": 1024, - "method": { - "name": "hnsw", - "space_type": "l2", - "engine": "nmslib", - "parameters": { - "ef_construction": 128, - "m": 24 - } - } + "settings": { + "index": { + "knn": true, + "knn.algo_param.ef_search": 100 + } + }, + "mappings": { + "properties": { + "my_vector1": { + "type": "knn_vector", + "dimension": 1024, + "method": { + "name": "hnsw", + "space_type": "l2", + "engine": "nmslib", + "parameters": { + "ef_construction": 128, + "m": 24 } } } + } + } } ``` {% include copy-curl.html %} diff --git a/_field-types/supported-field-types/knn-vector.md b/_field-types/supported-field-types/knn-vector.md index b79839ad4c..7b2d4b7f41 100644 --- a/_field-types/supported-field-types/knn-vector.md +++ b/_field-types/supported-field-types/knn-vector.md @@ -40,28 +40,73 @@ PUT /test-index ## Method definitions -[Method definitions]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/) are used when the underlying [approximate k-NN]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/) algorithm does not require training. For example, the following `knn_vector` field specifies that Faiss implementation of HNSW should be used for approximate k-NN search. During indexing, Faiss builds the corresponding HNSW segment files: +[Method definitions]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/) are used when the underlying [approximate k-NN (ANN)]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/) algorithm does not require training. For example, the following `knn_vector` field specifies that Faiss implementation of HNSW should be used for ANN search. During indexing, Faiss builds the corresponding HNSW segment files: ```json -"my_vector": { - "type": "knn_vector", - "dimension": 4, - "space_type": "l2", - "method": { - "name": "hnsw", - "engine": "faiss", - "parameters": { - "ef_construction": 100, - "m": 16 +PUT test-index +{ + "settings": { + "index": { + "knn": true, + "knn.algo_param.ef_search": 100 + } + }, + "mappings": { + "properties": { + "my_vector1": { + "type": "knn_vector", + "dimension": 1024, + "method": { + "name": "hnsw", + "space_type": "l2", + "engine": "faiss", + "parameters": { + "ef_construction": 100, + "m": 16 + } + } + } + } + } +} +``` +{% include copy-curl.html %} + +You can also specify the `space_type` on the top level: + +```json +PUT test-index +{ + "settings": { + "index": { + "knn": true, + "knn.algo_param.ef_search": 100 + } + }, + "mappings": { + "properties": { + "my_vector1": { + "type": "knn_vector", + "dimension": 1024, + "space_type": "l2", + "method": { + "name": "hnsw", + "engine": "faiss", + "parameters": { + "ef_construction": 100, + "m": 16 + } + } + } } } } ``` +{% include copy-curl.html %} ## Model IDs -Model IDs are used when the underlying approximate k-NN algorithm requires a training step. As a prerequisite, the model must be created with the [Train API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#train-a-model). The -model contains the information needed to initialize the native library segment files. +Model IDs are used when the underlying ANN algorithm requires a training step. As a prerequisite, the model must be created using the [Train API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#train-a-model). The model contains the information needed to initialize the native library segment files. To configure a model for a vector field, specify the `model_id`: ```json "my_vector": { @@ -70,15 +115,32 @@ model contains the information needed to initialize the native library segment f } ``` -However, if you intend to use Painless scripting or a k-NN score script, you only need to pass the dimension. - ```json +However, if you intend to use Painless scripting or a k-NN score script, you only need to pass the `dimension`: + +```json "my_vector": { "type": "knn_vector", "dimension": 128 } - ``` +``` + +For more information, see [Building a vector index from a model]({{site.url}}{{site.baseurl}}/vector-search/vector-search-techniques/approximate-knn/#building-a-vector-index-from-a-model). + +### Parameters + +The following table lists the parameters accepted by k-NN vector field types. + +Parameter | Data type | Description +:--- | :--- +`type` | String | The vector field type. Must be `knn_vector`. Required. +`dimension` | Integer | The size of the vectors used. Valid values are in the [1, 16,000] range. Required. +`space_type` | String | The vector space used to calculate the distance between vectors. Valid values are `l1`, `l2`, `linf`, `cosinesimil`, `innerproduct`, `hamming`, and `hammingbit`. Not every method/engine combination supports each of the spaces. For a list of supported spaces, see the specific engine section. Note: This value can also be specified within the `method`. Optional. For more information, see [Spaces]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-spaces/). +`method` | Object | The algorithm used for organizing vector data at indexing time and searching it at search time. Used when the ANN algorithm does not require training. Optional. For more information, see [Methods and engines]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/). +`model_id` | String | The model ID for a trained model. Used when the ANN algorithm requires training. See [Model IDs](#model-ids). Optional. -## Related articles +## Next steps +- [Spaces]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-spaces/) +- [Methods and engines]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/) - [Vector search]({{site.url}}{{site.baseurl}}/vector-search/) -- [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/kNN/) \ No newline at end of file +- [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/knn/) \ No newline at end of file diff --git a/_query-dsl/specialized/kNN.md b/_query-dsl/specialized/kNN.md index c02023e36c..94d6a72180 100644 --- a/_query-dsl/specialized/kNN.md +++ b/_query-dsl/specialized/kNN.md @@ -28,7 +28,7 @@ The top-level `vector_field` specifies the vector field against which to run a s Field | Data type | Required/Optional | Description :--- | :--- | :--- | :--- `vector` | Array of floats | Required | The query vector to use for vector search. -`k` | Integer | Optional | The number of nearest neighbors to return. Required if `max_distance` or `min_score` is not specified. +`k` | Integer | Optional | The number of nearest neighbors to return. Valid values are in the [1, 10,000] range. Required if `max_distance` or `min_score` is not specified. `max_distance` | Float | Optional | The maximum distance threshold for search results. Only one of `k`, `max_distance`, or `min_score` can be specified. For more information, see [Radial search]({{site.url}}{{site.baseurl}}/vector-search/specialized-operations/radial-search-knn/). `min_score` | Float | Optional | The minimum score threshold for search results. Only one of `k`, `max_distance`, or `min_score` can be specified. For more information, see [Radial search]({{site.url}}{{site.baseurl}}/vector-search/specialized-operations/radial-search-knn/). `filter` | Object | Optional | A filter to apply to the k-NN search. For more information, see [Vector search with filters]({{site.url}}{{site.baseurl}}/vector-search/specialized-operations/filter-search-knn/). **Important**: Filter can only be used with the `faiss` or `lucene` engines. diff --git a/_vector-search/api.md b/_vector-search/api.md index 1980b9a1c8..3b2d864571 100644 --- a/_vector-search/api.md +++ b/_vector-search/api.md @@ -1,25 +1,30 @@ --- layout: default -title: Vector API +title: API nav_order: 80 has_children: false redirect_from: - /search-plugins/knn/api/ --- -# Vector API +# k-NN API -OpenSearch provides several vector APIs for managing, monitoring, and optimizing your vector workload. +OpenSearch provides several k-NN APIs for managing, monitoring, and optimizing your vector workload. ## Stats The k-NN `stats` API provides information about the current status of the k-NN plugin, which implements vector search functionality. This includes both cluster-level and node-level statistics. Cluster-level statistics have a single value for the entire cluster. Node-level statistics have a single value for each node in the cluster. You can filter the query by `nodeId` and `statName`, as shown in the following example: -``` +```json GET /_plugins/_knn/nodeId1,nodeId2/stats/statName1,statName2 ``` +{% include copy-curl.html %} + +### Response body fields + +The following table lists the available response body fields. -Statistic | Description +Field | Description :--- | :--- `circuit_breaker_triggered` | Indicates whether the circuit breaker is triggered. This statistic is only relevant to approximate k-NN search. `total_load_time` | The time in nanoseconds that k-NN has taken to load native library indexes into the cache. This statistic is only relevant to approximate k-NN search. @@ -41,9 +46,9 @@ Statistic | Description `script_compilation_errors` | The number of errors during script compilation. This statistic is only relevant to k-NN score script search. `script_query_requests` | The total number of script queries. This statistic is only relevant to k-NN score script search. `script_query_errors` | The number of errors during script queries. This statistic is only relevant to k-NN score script search. -`nmslib_initialized` | Boolean value indicating whether the *nmslib* JNI library has been loaded and initialized on the node. -`faiss_initialized` | Boolean value indicating whether the *faiss* JNI library has been loaded and initialized on the node. -`model_index_status` | Status of model system index. Valid values are "red", "yellow", "green". If the index does not exist, this will be null. +`nmslib_initialized` | Boolean value indicating whether the `nmslib` JNI library has been loaded and initialized on the node. +`faiss_initialized` | Boolean value indicating whether the `faiss` JNI library has been loaded and initialized on the node. +`model_index_status` | Status of model system index. Valid values are `red`, `yellow`, and `green`. If the index does not exist, this value is `null`. `indexing_from_model_degraded` | Boolean value indicating if indexing from a model is degraded. This happens if there is not enough JVM memory to cache the models. `ing_requests` | The number of training requests made to the node. `training_errors` | The number of training errors that have occurred on the node. @@ -53,9 +58,11 @@ Statistic | Description Some statistics contain *graph* in the name. In these cases, *graph* is synonymous with *native library index*. The term *graph* is reflective of when the plugin only supported the HNSW algorithm, which consists of hierarchical graphs. {: .note} -#### Usage +#### Example request -The following code examples show how to retrieve statistics related to the k-NN plugin. The first example fetches comprehensive statistics for the k-NN plugin across all nodes in the cluster, while the second example retrieves specific metrics (circuit breaker status and graph memory usage) for a single node. +The following examples show how to retrieve statistics related to the k-NN plugin. + +The following example fetches comprehensive statistics for the k-NN plugin across all nodes in the cluster: ```json GET /_plugins/_knn/stats?pretty @@ -106,6 +113,9 @@ GET /_plugins/_knn/stats?pretty } } ``` +{% include copy-curl.html %} + +The following example retrieves specific metrics (circuit breaker status and graph memory usage) for a single node: ```json GET /_plugins/_knn/HYMrXXsBSamUkcAjhjeN0w/stats/circuit_breaker_triggered,graph_memory_usage?pretty @@ -124,6 +134,7 @@ GET /_plugins/_knn/HYMrXXsBSamUkcAjhjeN0w/stats/circuit_breaker_triggered,graph_ } } ``` +{% include copy-curl.html %} ## Warmup operation @@ -135,9 +146,9 @@ As an alternative, you can avoid this latency issue by running the k-NN plugin w After the process is finished, you can search against the indexes without initial latency penalties. The warmup API operation is idempotent, so if a segment's native library files are already loaded into memory, this operation has no effect. It only loads files not currently stored in memory. -#### Usage +#### Example request -This request performs a warmup on three indexes: +The following request performs a warmup on three indexes: ```json GET /_plugins/_knn/warmup/index1,index2,index3?pretty @@ -149,14 +160,16 @@ GET /_plugins/_knn/warmup/index1,index2,index3?pretty } } ``` +{% include copy-curl.html %} -`total` indicates how many shards the k-NN plugin attempted to warm up. The response also includes the number of shards the plugin succeeded and failed to warm up. +The `total` value indicates how many shards the k-NN plugin attempted to warm up. The response also includes the number of shards the plugin succeeded and failed to warm up. The call does not return results until the warmup operation finishes or the request times out. If the request times out, then the operation continues on the cluster. To monitor the warmup operation, use the OpenSearch `_tasks` API: ```json GET /_tasks ``` +{% include copy-curl.html %} After the operation has finished, use the [k-NN `_stats` API operation](#stats) to see what the k-NN plugin loaded into the graph. @@ -181,7 +194,7 @@ The k-NN clear cache API evicts all native library files for all shards (primari This API operation only works with indexes created using the `faiss` and `nmslib` (deprecated) engines. It has no effect on indexes created using the `lucene` engine. {: .note} -#### Usage +#### Example request The following request evicts the native library indexes of three indexes from the cache: @@ -195,6 +208,7 @@ POST /_plugins/_knn/clear_cache/index1,index2,index3?pretty } } ``` +{% include copy-curl.html %} The `total` parameter indicates the number of shards that the API attempted to clear from the cache. The response includes both the number of cleared shards and the number of shards that the plugin failed to clear. @@ -210,22 +224,31 @@ POST /_plugins/_knn/clear_cache/index*?pretty } } ``` +{% include copy-curl.html %} The API call does not return results until the operation finishes or the request times out. If the request times out, then the operation continues on the cluster. To monitor the request, use the `_tasks` API, as shown in the following example: ```json GET /_tasks ``` +{% include copy-curl.html %} When the operation finishes, use the [k-NN `_stats` API operation](#stats) to see which indexes have been evicted from the cache. ## Get a model -The GET model operation retrieves information about models present in the cluster. Some native library index configurations require a training step before indexing and querying can begin. The output of training is a model that can be used to initialize native library index files during indexing. The model is serialized in the k-NN model system index. See the following GET example: +The GET model operation retrieves information about models present in the cluster. Some native library index configurations require a training step before indexing and querying can begin. The output of training is a model that can be used to initialize native library index files during indexing. The model is serialized in the k-NN model system index. -``` +#### Example request + +```json GET /_plugins/_knn/models/{model_id} ``` +{% include copy-curl.html %} + +### Response body fields + +The following table lists the available response body fields. Response field | Description :--- | :--- @@ -235,13 +258,15 @@ Response field | Description `timestamp` | The date and time when the model was created. `description` | A user-provided description of the model. `error` | An error message explaining why the model is in a failed state. -`space_type` | The space type for which this model is trained, for example, Euclidean or cosine. Note - this value can be set in the top-level of the request as well +`space_type` | The space type for which this model is trained, for example, Euclidean or cosine. Note: This value can be set in the top level of the request. `dimension` | The dimensionality of the vector space for which this model is designed. `engine` | The native library used to create the model, either `faiss` or `nmslib` (deprecated). -### Usage +#### Example request -The following examples show how to retrieve information about a specific model using the k-NN plugin API. The first example returns all the available information about the model, while the second example shows how to selectively retrieve fields. +The following examples show how to retrieve information about a specific model using the k-NN plugin API. + +The following example returns all the available information about the model: ```json GET /_plugins/_knn/models/test-model?pretty @@ -257,6 +282,9 @@ GET /_plugins/_knn/models/test-model?pretty "engine" : "faiss" } ``` +{% include copy-curl.html %} + +The following example shows how to selectively retrieve fields: ```json GET /_plugins/_knn/models/test-model?pretty&filter_path=model_id,state @@ -265,12 +293,13 @@ GET /_plugins/_knn/models/test-model?pretty&filter_path=model_id,state "state" : "created" } ``` +{% include copy-curl.html %} ## Search for a model You can use an OpenSearch query to search for a model in the index. See the following usage example. -#### Usage +#### Example request The following example shows how to search for k-NN models in an OpenSearch cluster and how to retrieve the metadata for those models, excluding the potentially large `model_blob` field: @@ -281,7 +310,12 @@ GET/POST /_plugins/_knn/models/_search?pretty&_source_excludes=model_blob ... } } +``` +{% include copy-curl.html %} +The response contains the model information: + +```json { "took" : 0, "timed_out" : false, @@ -322,7 +356,7 @@ GET/POST /_plugins/_knn/models/_search?pretty&_source_excludes=model_blob You can delete a model in the cluster by using the DELETE operation. See the following usage example. -#### Usage +#### Example request The following example shows how to delete a k-NN model: @@ -333,17 +367,26 @@ DELETE /_plugins/_knn/models/{model_id} "acknowledged": true } ``` +{% include copy-curl.html %} ## Train a model You can create and train a model that can be used for initializing k-NN native library indexes during indexing. This API pulls training data from a `knn_vector` field in a training index, creates and trains a model, and then serializes it to the model system index. Training data must match the dimension passed in the request body. This request is returned when training begins. To monitor the model's state, use the [Get model API](#get-a-model). +### Query parameters + +The following table lists the available query parameters. + Query parameter | Description :--- | :--- `model_id` | The unique identifier of the fetched model. If not specified, then a random ID is generated. Optional. `node_id` | Specifies the preferred node on which to execute the training process. If provided, the specified node is used for training if it has the necessary capabilities and resources available. Optional. -Request parameter | Description +### Request body fields + +The following table lists the available request body fields. + +Request field | Description :--- | :--- `training_index` | The index from which the training data is retrieved. `training_field` | The `knn_vector` field in the `training_index` from which the training data is retrieved. The dimension of this field must match the `dimension` passed in this request. @@ -354,7 +397,7 @@ Request parameter | Description `method` | The configuration of the approximate k-NN method used for search operations. For more information about the available methods, see [Methods and engines]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/). The method requires training to be valid. `space_type` | The space type for which this model is trained, for example, Euclidean or cosine. Note: This value can also be set in the `method` parameter. -#### Usage +#### Example request The following examples show how to initiate the training process for a k-NN model: @@ -382,11 +425,9 @@ POST /_plugins/_knn/models/{model_id}/_train?preference={node_id} } } } - -{ - "model_id": "model_x" -} ``` +{% include copy-curl.html %} + ```json POST /_plugins/_knn/models/_train?preference={node_id} @@ -412,7 +453,12 @@ POST /_plugins/_knn/models/_train?preference={node_id} } } } +``` +{% include copy-curl.html %} + +#### Example response +```json { "model_id": "dcdwscddscsad" } diff --git a/_vector-search/creating-vector-index.md b/_vector-search/creating-vector-index.md index acd1b5b834..b17b186f29 100644 --- a/_vector-search/creating-vector-index.md +++ b/_vector-search/creating-vector-index.md @@ -48,15 +48,15 @@ Regardless of the type of vector search, the following elements are part of crea Set the `dimension` property to match the size of the vectors used. 4. **Choose a space type**: - Select a distance metric for similarity comparisons, such as `l2` (Euclidean distance) or `cosinesimil`. For more information, see [Spaces]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-spaces/). + Optionally, select a distance metric for similarity comparisons, such as `l2` (Euclidean distance) or `cosinesimil`. For more information, see [Spaces]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-spaces/). 5. (Advanced) **Select a method**: Optionally, configure the indexing method, such as HNSW or IVF, to optimize vector search performance. For more information, see [Methods and engines]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/). To create a vector index, choose one of the following options: -- [Pre-generated embeddings or raw vectors](#pre-generated-embeddings-or-raw-vectors): Ingest pre-generated embeddings or raw vectors into your index to perform raw vector search. -- [Auto-generated embeddings](#auto-generated-embeddings): Ingest text that will be converted into vector embeddings within OpenSearch in order to perform semantic search using ML models. +- [Store raw vectors or embeddings generated outside of OpenSearch](#storing-raw-vectors-or-generated-outside-of-opensearch): Ingest pre-generated embeddings or raw vectors into your index for raw vector search. +- [Convert data to embeddings during ingestion](#converting-data-to-embeddings-during-ingestion): Ingest text that will be converted into vector embeddings within OpenSearch in order to perform semantic search using ML models. The following table summarizes key index configuration differences for the supported use cases. @@ -64,10 +64,10 @@ The following table summarizes key index configuration differences for the suppo | Feature | Vector field type | Ingest pipeline | Transformation | Use case | |--------------------------|-----------------------|---------------------|-------------------------|-------------------------| -| **Pre-generated embeddings or raw vectors** | [`knn_vector`]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) | Not required | Direct ingestion | Raw vector search | -| **Auto-generated embeddings** | [`knn_vector`]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) | Required | Auto-generated vectors | ML-powered search | +| **Store raw vectors or embeddings generated outside of OpenSearch** | [`knn_vector`]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) | Not required | Direct ingestion | Raw vector search | +| **Convert data to embeddings during ingestion** | [`knn_vector`]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) | Required | Auto-generated vectors | ML-powered search | -## Pre-generated embeddings or raw vectors +## Storing raw vectors or embeddings generated outside of OpenSearch To ingest raw vectors into an index, configure a vector field (in this request, `my_vector`) and specify its `dimension`: @@ -81,21 +81,17 @@ PUT /my-raw-vector-index "properties": { "my_vector": { "type": "knn_vector", - "dimension": 128, - "method": { - "name": "hnsw", - "engine": "faiss", - "space_type": "l2" - } + "dimension": 3 } } } } ``` +{% include copy-curl.html %} -## Auto-generated embeddings +## Converting data to embeddings during ingestion -Auto-generating embeddings require configuring an [ingest pipeline]({{site.url}}{{site.baseurl}}/api-reference/ingest-apis/index/) with a model ID of the embedding model: +To automatically generate embeddings during ingestion, configure an [ingest pipeline]({{site.url}}{{site.baseurl}}/api-reference/ingest-apis/index/) with a model ID of the embedding model: ```json PUT /_ingest/pipeline/nlp-ingest-pipeline @@ -133,12 +129,7 @@ PUT /my-semantic-search-index }, "passage_embedding": { "type": "knn_vector", - "dimension": 768, - "method": { - "name": "hnsw", - "engine": "lucene", - "space_type": "l2" - } + "dimension": 768 } } } @@ -150,5 +141,4 @@ PUT /my-semantic-search-index - [Ingesting data into a vector index]({{site.url}}{{site.baseurl}}/vector-search/searching-data/) - [k-NN vector]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) -- [Methods and engines]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/) -- [k-NN vector field type]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) \ No newline at end of file +- [Methods and engines]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/) \ No newline at end of file diff --git a/_vector-search/specialized-operations/filter-search-knn.md b/_vector-search/filter-search-knn.md similarity index 99% rename from _vector-search/specialized-operations/filter-search-knn.md rename to _vector-search/filter-search-knn.md index 391021ee65..fbf489e9e5 100644 --- a/_vector-search/specialized-operations/filter-search-knn.md +++ b/_vector-search/filter-search-knn.md @@ -1,15 +1,13 @@ --- layout: default -title: Vector search with filters -nav_order: 20 -parent: Specialized vector search -has_children: false +title: Filtering data +nav_order: 40 has_math: true redirect_from: - /search-plugins/knn/filter-search-knn/ --- -# Vector search with filters +# Filtering data To refine vector search results, you can filter a vector search using one of the following methods: @@ -628,7 +626,9 @@ The response includes documents containing the matching hotels: } ``` + ### post-filter parameter + +#### x64 architecture + + +For x64 architecture, the following versions of the Faiss library are built and shipped with the artifact: + +- `libopensearchknn_faiss_avx512_spr.so`: The Faiss library containing advanced AVX-512 SIMD instructions for newer-generation processors, available on public clouds such as AWS for c/m/r 7i or newer instances. +- `libopensearchknn_faiss_avx512.so`: The Faiss library containing AVX-512 SIMD instructions. +- `libopensearchknn_faiss_avx2.so`: The Faiss library containing AVX2 SIMD instructions. +- `libopensearchknn_faiss.so`: The non-optimized Faiss library without SIMD instructions. + +When using the Faiss library, the performance ranking is as follows: advanced AVX-512 > AVX-512 > AVX2 > no optimization. +{: .note } + +If your hardware supports advanced AVX-512(spr), OpenSearch loads the `libopensearchknn_faiss_avx512_spr.so` library at runtime. + +If your hardware supports AVX-512, OpenSearch loads the `libopensearchknn_faiss_avx512.so` library at runtime. + +If your hardware supports AVX2 but doesn't support AVX-512, Open loads the `libopensearchknn_faiss_avx2.so` library at runtime. + +To disable the advanced AVX-512 (for Sapphire Rapids or newer-generation processors), AVX-512, and AVX2 SIMD instructions and load the non-optimized Faiss library (`libopensearchknn_faiss.so`), specify the `knn.faiss.avx512_spr.disabled`, `knn.faiss.avx512.disabled`, and `knn.faiss.avx2.disabled` static settings as `true` in `opensearch.yml` (by default, all of these are `false`). + +Note that to update a static setting, you must stop the cluster, change the setting, and restart the cluster. For more information, see [Static settings]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/index/#static-settings). + +#### ARM64 architecture + +For the ARM64 architecture, only one performance-boosting Faiss library (`libopensearchknn_faiss.so`) is built and shipped. The library contains Neon SIMD instructions and cannot be disabled. + ### Example configurations The following example uses the `ivf` method without specifying an encoder (by default, OpenSearch uses the `flat` encoder): @@ -334,6 +369,22 @@ If memory is a concern, consider adding a PQ encoder to your HNSW or IVF index. You can reduce the memory footprint by a factor of 2, with a minimal loss in search quality, by using the [`fp_16` encoder]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/faiss-16-bit-quantization/). If your vector dimensions are within the [-128, 127] byte range, we recommend using the [byte quantizer]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/#byte-vectors) to reduce the memory footprint by a factor of 4. To learn more about vector quantization options, see [k-NN vector quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/knn-vector-quantization/). +## Engine recommendations + +In general, select Faiss for large-scale use cases. Lucene is a good option for smaller deployments and offers benefits like smart filtering, where the optimal filtering strategy—pre-filtering, post-filtering, or exact k-NN—is automatically applied depending on the situation. The following table summarizes the differences between each option. + +| | Faiss/HNSW | Faiss/IVF | Lucene/HNSW | +|:---|:---|:---|:---| +| Max dimensions | 16,000 | 16,000 | 16,000 | +| Filter | Post-filter | Post-filter | Filter during search | +| Training required | No (Yes for product quantization) | Yes | No | +| Similarity metrics | `l2`, `innerproduct` | `l2`, `innerproduct` | `l2`, `cosinesimil` | +| Number of vectors | Tens of billions | Tens of billions | Less than 10 million | +| Indexing latency | Low | Lowest | Low | +| Query latency and quality | Low latency and high quality | Low latency and low quality | High latency and high quality | +| Vector compression | Flat

Product quantization | Flat

Product quantization | Flat | +| Memory consumption | High

Low with product quantization | Medium

Low with product quantization | High | + ## Memory estimation In a typical OpenSearch cluster, a certain portion of RAM is reserved for the JVM heap. OpenSearch allocates native library indexes to a portion of the remaining RAM. This portion's size is determined by the `circuit_breaker_limit` cluster setting. By default, the limit is set to 50%. diff --git a/_field-types/supported-field-types/knn-spaces.md b/_field-types/supported-field-types/knn-spaces.md index 4ea3b764ae..a90f0379fe 100644 --- a/_field-types/supported-field-types/knn-spaces.md +++ b/_field-types/supported-field-types/knn-spaces.md @@ -11,6 +11,35 @@ has_math: true In vector search, a _space_ defines how the distance (or similarity) between two vectors is calculated. The choice of space affects how nearest neighbors are determined during search operations. +## Distance calculation + +A space defines the function used to measure the distance between two points in order to determine the k-nearest neighbors. From the k-NN perspective, a lower score equates to a closer and better result. This is the opposite of how OpenSearch scores results, where a higher score equates to a better result. OpenSearch supports the following spaces. + +Not every method/engine combination supports each of the spaces. For a list of supported spaces, see the specific engine section in the [method documentation]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/). +{: .note} + +| Space type | Search type | Distance function ($$d$$ ) | OpenSearch score | +| :--- | :--- | :--- | +| `l1` | Approximate, exact | $$ d(\mathbf{x}, \mathbf{y}) = \sum_{i=1}^n \lvert x_i - y_i \rvert $$ | $$ score = {1 \over {1 + d} } $$ | +| `l2` | Approximate, exact | $$ d(\mathbf{x}, \mathbf{y}) = \sum_{i=1}^n (x_i - y_i)^2 $$ | $$ score = {1 \over 1 + d } $$ | +| `linf` | Approximate, exact | $$ d(\mathbf{x}, \mathbf{y}) = max(\lvert x_i - y_i \rvert) $$ | $$ score = {1 \over 1 + d } $$ | +| `cosinesimil` | Approximate, exact | $$ d(\mathbf{x}, \mathbf{y}) = 1 - cos { \theta } = 1 - {\mathbf{x} \cdot \mathbf{y} \over \lVert \mathbf{x}\rVert \cdot \lVert \mathbf{y}\rVert}$$$$ = 1 - {\sum_{i=1}^n x_i y_i \over \sqrt{\sum_{i=1}^n x_i^2} \cdot \sqrt{\sum_{i=1}^n y_i^2}}$$,
where $$\lVert \mathbf{x}\rVert$$ and $$\lVert \mathbf{y}\rVert$$ represent the norms of vectors $$\mathbf{x}$$ and $$\mathbf{y}$$, respectively. | $$ score = {2 - d \over 2} $$ | +| `innerproduct` (supported for Lucene in OpenSearch version 2.13 and later) | Approximate | **NMSLIB** and **Faiss**:
$$ d(\mathbf{x}, \mathbf{y}) = - {\mathbf{x} \cdot \mathbf{y}} = - \sum_{i=1}^n x_i y_i $$

**Lucene**:
$$ d(\mathbf{x}, \mathbf{y}) = {\mathbf{x} \cdot \mathbf{y}} = \sum_{i=1}^n x_i y_i $$ | **NMSLIB** and **Faiss**:
$$ \text{If} d \ge 0, score = {1 \over 1 + d }$$
$$\text{If} d < 0, score = −d + 1$$

**Lucene:**
$$ \text{If} d > 0, score = d + 1 $$
$$\text{If} d \le 0, score = {1 \over 1 + (-1 \cdot d) }$$ | +| `innerproduct` (supported for Lucene in OpenSearch version 2.13 and later) | Exact | $$ d(\mathbf{x}, \mathbf{y}) = - {\mathbf{x} \cdot \mathbf{y}} = - \sum_{i=1}^n x_i y_i $$ | $$ \text{If} d \ge 0, score = {1 \over 1 + d }$$
$$\text{If} d < 0, score = −d + 1$$ | +| `hamming` (supported for binary vectors in OpenSearch version 2.16 and later) | Approximate, exact | $$ d(\mathbf{x}, \mathbf{y}) = \text{countSetBits}(\mathbf{x} \oplus \mathbf{y})$$ | $$ score = {1 \over 1 + d } $$ | +| `hammingbit` (supported for binary and long vectors) | Exact | $$ d(\mathbf{x}, \mathbf{y}) = \text{countSetBits}(\mathbf{x} \oplus \mathbf{y})$$ | $$ score = {1 \over 1 + d } $$ | + +The cosine similarity formula does not include the `1 -` prefix. However, because similarity search libraries equate lower scores with closer results, they return `1 - cosineSimilarity` for the cosine similarity space---this is why `1 -` is included in the distance function. +{: .note } + +With cosine similarity, it is not valid to pass a zero vector (`[0, 0, ...]`) as input. This is because the magnitude of such a vector is 0, which raises a `divide by 0` exception in the corresponding formula. Requests containing the zero vector will be rejected, and a corresponding exception will be thrown. +{: .note } + +The `hamming` space type is supported for binary vectors in OpenSearch version 2.16 and later. For more information, see [Binary k-NN vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized#binary-vectors). +{: .note} + +## Specifying the space type + The space type is specified when creating an index: - You can specify the space type at the top level of the field mapping: @@ -67,30 +96,3 @@ The space type is specified when creating an index: } ``` {% include copy-curl.html %} - -## Distance calculation - -A space defines the function used to measure the distance between two points in order to determine the k-nearest neighbors. From the k-NN perspective, a lower score equates to a closer and better result. This is the opposite of how OpenSearch scores results, where a higher score equates to a better result. OpenSearch supports the following spaces. - -Not every method/engine combination supports each of the spaces. For a list of supported spaces, see the specific engine section in the [method documentation]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/). -{: .note} - -| Space type | Search type | Distance function ($$d$$ ) | OpenSearch score | -| :--- | :--- | :--- | -| `l1` | Approximate, exact | $$ d(\mathbf{x}, \mathbf{y}) = \sum_{i=1}^n \lvert x_i - y_i \rvert $$ | $$ score = {1 \over {1 + d} } $$ | -| `l2` | Approximate, exact | $$ d(\mathbf{x}, \mathbf{y}) = \sum_{i=1}^n (x_i - y_i)^2 $$ | $$ score = {1 \over 1 + d } $$ | -| `linf` | Approximate, exact | $$ d(\mathbf{x}, \mathbf{y}) = max(\lvert x_i - y_i \rvert) $$ | $$ score = {1 \over 1 + d } $$ | -| `cosinesimil` | Approximate, exact | $$ d(\mathbf{x}, \mathbf{y}) = 1 - cos { \theta } = 1 - {\mathbf{x} \cdot \mathbf{y} \over \lVert \mathbf{x}\rVert \cdot \lVert \mathbf{y}\rVert}$$$$ = 1 - {\sum_{i=1}^n x_i y_i \over \sqrt{\sum_{i=1}^n x_i^2} \cdot \sqrt{\sum_{i=1}^n y_i^2}}$$,
where $$\lVert \mathbf{x}\rVert$$ and $$\lVert \mathbf{y}\rVert$$ represent the norms of vectors $$\mathbf{x}$$ and $$\mathbf{y}$$, respectively. | $$ score = {2 - d \over 2} $$ | -| `innerproduct` (supported for Lucene in OpenSearch version 2.13 and later) | Approximate | **NMSLIB** and **Faiss**:
$$ d(\mathbf{x}, \mathbf{y}) = - {\mathbf{x} \cdot \mathbf{y}} = - \sum_{i=1}^n x_i y_i $$

**Lucene**:
$$ d(\mathbf{x}, \mathbf{y}) = {\mathbf{x} \cdot \mathbf{y}} = \sum_{i=1}^n x_i y_i $$ | **NMSLIB** and **Faiss**:
$$ \text{If} d \ge 0, score = {1 \over 1 + d }$$
$$\text{If} d < 0, score = −d + 1$$

**Lucene:**
$$ \text{If} d > 0, score = d + 1 $$
$$\text{If} d \le 0, score = {1 \over 1 + (-1 \cdot d) }$$ | -| `innerproduct` (supported for Lucene in OpenSearch version 2.13 and later) | Exact | $$ d(\mathbf{x}, \mathbf{y}) = - {\mathbf{x} \cdot \mathbf{y}} = - \sum_{i=1}^n x_i y_i $$ | $$ \text{If} d \ge 0, score = {1 \over 1 + d }$$
$$\text{If} d < 0, score = −d + 1$$ | -| `hamming` (supported for binary vectors in OpenSearch version 2.16 and later) | Approximate, exact | $$ d(\mathbf{x}, \mathbf{y}) = \text{countSetBits}(\mathbf{x} \oplus \mathbf{y})$$ | $$ score = {1 \over 1 + d } $$ | -| `hammingbit` (supported for binary and long vectors) | Exact | $$ d(\mathbf{x}, \mathbf{y}) = \text{countSetBits}(\mathbf{x} \oplus \mathbf{y})$$ | $$ score = {1 \over 1 + d } $$ | - -The cosine similarity formula does not include the `1 -` prefix. However, because similarity search libraries equate lower scores with closer results, they return `1 - cosineSimilarity` for the cosine similarity space---this is why `1 -` is included in the distance function. -{: .note } - -With cosine similarity, it is not valid to pass a zero vector (`[0, 0, ...]`) as input. This is because the magnitude of such a vector is 0, which raises a `divide by 0` exception in the corresponding formula. Requests containing the zero vector will be rejected, and a corresponding exception will be thrown. -{: .note } - -The `hamming` space type is supported for binary vectors in OpenSearch version 2.16 and later. For more information, see [Binary k-NN vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized#binary-vectors). -{: .note} diff --git a/_field-types/supported-field-types/knn-vector.md b/_field-types/supported-field-types/knn-vector.md index 7b2d4b7f41..4e82e7fed5 100644 --- a/_field-types/supported-field-types/knn-vector.md +++ b/_field-types/supported-field-types/knn-vector.md @@ -15,7 +15,7 @@ The `knn_vector` data type allows you to ingest vectors into an OpenSearch index ## Example -For example, to map `my_vector` as a `knn_vector`, use the following request: +To map `my_vector` as a `knn_vector`, use the following request: ```json PUT /test-index @@ -38,6 +38,35 @@ PUT /test-index ``` {% include copy-curl.html %} +## Optimizing vector storage + +To optimize vector storage, you can specify a [vector workload mode]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/#vector-workload-modes) as `in_memory` (which optimizes for lowest latency) or `on_disk` (which optimizes for lowest cost). The `on_disk` mode reduces memory usage. Optionally, you can specify a [`compression_level`]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/#compression-levels) to fine-tune the vector memory consumption: + + +```json +PUT test-index +{ + "settings": { + "index": { + "knn": true + } + }, + "mappings": { + "properties": { + "my_vector": { + "type": "knn_vector", + "dimension": 3, + "space_type": "l2", + "mode": "on_disk", + "compression_level": "16x" + } + } + } +} +``` +{% include copy-curl.html %} + + ## Method definitions [Method definitions]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/) are used when the underlying [approximate k-NN (ANN)]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/) algorithm does not require training. For example, the following `knn_vector` field specifies that Faiss implementation of HNSW should be used for ANN search. During indexing, Faiss builds the corresponding HNSW segment files: @@ -134,7 +163,10 @@ Parameter | Data type | Description :--- | :--- `type` | String | The vector field type. Must be `knn_vector`. Required. `dimension` | Integer | The size of the vectors used. Valid values are in the [1, 16,000] range. Required. -`space_type` | String | The vector space used to calculate the distance between vectors. Valid values are `l1`, `l2`, `linf`, `cosinesimil`, `innerproduct`, `hamming`, and `hammingbit`. Not every method/engine combination supports each of the spaces. For a list of supported spaces, see the specific engine section. Note: This value can also be specified within the `method`. Optional. For more information, see [Spaces]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-spaces/). +`data_type` | String | The data type for the vector elements. Valid values are `binary`, `byte`, and `float`. Optional. Default is `float`. +`space_type` | String | The vector space used to calculate the distance between vectors. Valid values are `l1`, `l2`, `linf`, `cosinesimil`, `innerproduct`, `hamming`, and `hammingbit`. Not every method/engine combination supports each of the spaces. For a list of supported spaces, see the specific engine section. Note: This value can also be specified within the `method`. Optional. For more information, see [Spaces]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-spaces/). +`mode` | String | Sets appropriate default values for k-NN parameters based on your priority: either low latency or low cost. Valid values are `in_memory` and `on_disk`. Optional. Default is `in_memory`. For more information, see [Memory-optimized vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/). +`compression_level` | String | Selects a quantization encoder that reduces vector memory consumption by the given factor. Valid values are `1x`, `2x`, `4x`, `8x`, `16x`, and `32x`. Optional. For more information, see [Memory-optimized vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/). `method` | Object | The algorithm used for organizing vector data at indexing time and searching it at search time. Used when the ANN algorithm does not require training. Optional. For more information, see [Methods and engines]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/). `model_id` | String | The model ID for a trained model. Used when the ANN algorithm requires training. See [Model IDs](#model-ids). Optional. @@ -142,5 +174,6 @@ Parameter | Data type | Description - [Spaces]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-spaces/) - [Methods and engines]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/) +- [Memory-optimized vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/) - [Vector search]({{site.url}}{{site.baseurl}}/vector-search/) - [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/knn/) \ No newline at end of file diff --git a/_query-dsl/specialized/kNN.md b/_query-dsl/specialized/kNN.md index b322b3acb2..f4195b1b4d 100644 --- a/_query-dsl/specialized/kNN.md +++ b/_query-dsl/specialized/kNN.md @@ -27,7 +27,7 @@ The top-level `vector_field` specifies the vector field against which to run a s Field | Data type | Required/Optional | Description :--- | :--- | :--- | :--- -`vector` | Array of floats | Required | The query vector to use for vector search. +`vector` | Array of floats or bytes | Required | The query vector to use for vector search. The data type of the vector elements must match the data type of vectors indexed in the [`knn_vector` field]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) searched. `k` | Integer | Optional | The number of nearest neighbors to return. Valid values are in the [1, 10,000] range. Required if `max_distance` or `min_score` is not specified. `max_distance` | Float | Optional | The maximum distance threshold for search results. Only one of `k`, `max_distance`, or `min_score` can be specified. For more information, see [Radial search]({{site.url}}{{site.baseurl}}/vector-search/specialized-operations/radial-search-knn/). `min_score` | Float | Optional | The minimum score threshold for search results. Only one of `k`, `max_distance`, or `min_score` can be specified. For more information, see [Radial search]({{site.url}}{{site.baseurl}}/vector-search/specialized-operations/radial-search-knn/). @@ -202,7 +202,7 @@ GET /my-vector-index/_search ``` {% include copy-curl.html %} -## Related articles +## Next steps - [k-NN vector]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) - [Rescoring quantized results to full precision]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/#rescoring-quantized-results-to-full-precision) \ No newline at end of file diff --git a/_vector-search/creating-vector-index.md b/_vector-search/creating-vector-index.md index b17b186f29..d01b49fbd7 100644 --- a/_vector-search/creating-vector-index.md +++ b/_vector-search/creating-vector-index.md @@ -41,17 +41,20 @@ Regardless of the type of vector search, the following elements are part of crea 1. **Enable k-NN search**: Set `index.knn` to `true` in the index settings to enable k-NN search functionality. -2. **Define a vector field**: +1. **Define a vector field**: Specify the field that will store the vector data. When defining a `knn_vector` field in OpenSearch, you can select from different data types to balance storage requirements and performance. By default, k-NN vectors are float vectors, but you can also opt for byte or binary vectors for more efficient storage. For more information, see [k-NN vector]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/). -3. **Specify dimension**: +1. **Specify dimension**: Set the `dimension` property to match the size of the vectors used. -4. **Choose a space type**: - Optionally, select a distance metric for similarity comparisons, such as `l2` (Euclidean distance) or `cosinesimil`. For more information, see [Spaces]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-spaces/). +1. (Optional) **Choose a space type**: + Select a distance metric for similarity comparisons, such as `l2` (Euclidean distance) or `cosinesimil`. For more information, see [Spaces]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-spaces/). -5. (Advanced) **Select a method**: - Optionally, configure the indexing method, such as HNSW or IVF, to optimize vector search performance. For more information, see [Methods and engines]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/). +1. (Optional) **Select a workload mode and/or compression level**: + Select a workload mode and/or compression level in order to optimize vector storage. For more information, see [Optimizing vector storage]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/). + +1. (Optional, advanced) **Select a method**: + Configure the indexing method, such as HNSW or IVF, to optimize vector search performance. For more information, see [Methods and engines]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/). To create a vector index, choose one of the following options: @@ -65,7 +68,7 @@ The following table summarizes key index configuration differences for the suppo | Feature | Vector field type | Ingest pipeline | Transformation | Use case | |--------------------------|-----------------------|---------------------|-------------------------|-------------------------| | **Store raw vectors or embeddings generated outside of OpenSearch** | [`knn_vector`]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) | Not required | Direct ingestion | Raw vector search | -| **Convert data to embeddings during ingestion** | [`knn_vector`]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) | Required | Auto-generated vectors | ML-powered search | +| **Convert data to embeddings during ingestion** | [`knn_vector`]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) | Required | Auto-generated vectors | ML-powered search

Automating embedding generation reduces data preprocessing and provides a more managed vector search experience. | ## Storing raw vectors or embeddings generated outside of OpenSearch diff --git a/_vector-search/getting-started/vector-search-options.md b/_vector-search/getting-started/vector-search-options.md index 683074bcef..28acb28bb4 100644 --- a/_vector-search/getting-started/vector-search-options.md +++ b/_vector-search/getting-started/vector-search-options.md @@ -1,6 +1,6 @@ --- layout: default -title: Bringing or generating embeddings +title: Bringing your own or generating embeddings parent: Getting started nav_order: 20 quickstart_cards: @@ -41,9 +41,9 @@ auto_items: link: "/vector-search/searching-data/#searching-auto-generated-embeddings" --- -# Bringing or generating embeddings +# Bringing your own or generating embeddings -In OpenSearch, you can either ingest your own vectors or generate embeddings from text, images, or other data directly within OpenSearch. +In OpenSearch, you can either bring your own vectors or let OpenSearch generate them automatically from your data. Automated embedding generation integrated into OpenSearch reduces data preprocessing effort at ingestion and search time. ### Option 1: Bring your own raw vectors or generated embeddings @@ -71,7 +71,7 @@ Working with embeddings generated outside of OpenSearch involves the following s ### Option 2: Generate embeddings within OpenSearch -OpenSearch automatically generates vector embeddings from text, images, or other data using a machine learning (ML) model. +OpenSearch automatically generates vector embeddings from your data using a machine learning (ML) model. - **Ingestion**: You ingest plain data, and OpenSearch uses an ML model to generate embeddings dynamically. ![Auto-generated embeddings ingestion]({{site.url}}{{site.baseurl}}/images/vector-search/auto-vector-ingest.png) diff --git a/_vector-search/performance-tuning-indexing.md b/_vector-search/performance-tuning-indexing.md index a670eb34ac..5146ee014d 100644 --- a/_vector-search/performance-tuning-indexing.md +++ b/_vector-search/performance-tuning-indexing.md @@ -49,11 +49,7 @@ The `_source` field contains the original JSON document body that was passed at "location": { "type": "knn_vector", "dimension": 2, - "space_type": "l2", - "method": { - "name": "hnsw", - "engine": "faiss" - } + "space_type": "l2" } } } @@ -75,11 +71,7 @@ In OpenSearch 2.15 or later, you can further improve indexing speed and reduce d "location": { "type": "knn_vector", "dimension": 2, - "space_type": "l2", - "method": { - "name": "hnsw", - "engine": "faiss" - } + "space_type": "l2" } } } diff --git a/_vector-search/performance-tuning-search.md b/_vector-search/performance-tuning-search.md index 68cc732bc6..4cdfb3a620 100644 --- a/_vector-search/performance-tuning-search.md +++ b/_vector-search/performance-tuning-search.md @@ -44,13 +44,5 @@ This API operation only loads the segments of active indexes into the cache. If ## Avoid reading stored fields - If your use case is only to read the IDs and scores of the nearest neighbors, you can disable reading stored fields, which saves time retrieving the vectors from stored fields. +If your use case is only to read the IDs and scores of the nearest neighbors, you can disable reading stored fields, which saves time retrieving the vectors from stored fields. -## Use `mmap` file I/O - - For the Lucene-based approximate k-NN search, there is no dedicated cache layer that speeds up read/write operations. Instead, the plugin relies on the existing caching mechanism in OpenSearch core. In versions 2.4 and earlier of the Lucene-based approximate k-NN search, read/write operations were based on Java NIO by default, which can be slow, depending on the Lucene version and number of segments per shard. Starting with version 2.5, k-NN enables [`mmap`](https://en.wikipedia.org/wiki/Mmap) file I/O by default when the store type is `hybridfs` (the default store type in OpenSearch). This leads to fast file I/O operations and improves the overall performance of both data ingestion and search. The two file extensions specific to vector values that use `mmap` are `.vec` and `.vem`. For more information about these file extensions, see [the Lucene documentation](https://lucene.apache.org/core/9_0_0/core/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsFormat.html). - - The `mmap` file I/O uses the system file cache rather than memory allocated for the Java heap, so no additional allocation is required. To change the default list of extensions set by the plugin, update the `index.store.hybrid.mmap.extensions` setting at the cluster level using the [Cluster Settings API]({{site.url}}{{site.baseurl}}/api-reference/cluster-api/cluster-settings/). - - This is an expert-level setting that requires closing the index before updating the setting and reopening it after the update. - {: .important} \ No newline at end of file diff --git a/_vector-search/performance-tuning.md b/_vector-search/performance-tuning.md index 1d20153711..433e17a73c 100644 --- a/_vector-search/performance-tuning.md +++ b/_vector-search/performance-tuning.md @@ -10,9 +10,8 @@ redirect_from: # Performance tuning This topic provides performance tuning recommendations to improve indexing and search performance for approximate k-NN (ANN). From a high level, k-NN works according to these principles: -* Native library indexes are created per knn_vector field / (Lucene) segment pair. +* Vector indexes are created per knn_vector field / (Lucene) segment pair. * Queries execute on segments sequentially inside the shard (same as any other OpenSearch query). -* Each native library index in the segment returns <=k neighbors. * The coordinator node picks up final size number of neighbors from the neighbors returned by each shard. This topic also provides recommendations for comparing approximate k-NN to exact k-NN with score script. @@ -21,7 +20,7 @@ This topic also provides recommendations for comparing approximate k-NN to exact Each of the three engines used for approximate k-NN search has its own attributes that make one more sensible to use than the others in a given situation. Use the following information to help determine which engine will best meet your requirements. -In general, NMSLIB (deprecated) outperforms both Faiss and Lucene when used for search operations. However, to optimize for indexing throughput, Faiss is a good option. For relatively smaller datasets (up to a few million vectors), the Lucene engine demonstrates better latencies and recall. At the same time, the size of the index is smallest compared to the other engines, which allows it to use smaller AWS instances for data nodes. For further considerations, see [Choosing the right method]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/#choosing-the-right-method) and [Memory estimation]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/#memory-estimation). +To optimize for indexing throughput, Faiss is a good option. For relatively smaller datasets (up to a few million vectors), the Lucene engine demonstrates better latencies and recall. At the same time, the size of the index is smallest compared to the other engines, which allows it to use smaller AWS instances for data nodes. For further considerations, see [Choosing the right method]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/#choosing-the-right-method) and [Memory estimation]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/#memory-estimation). When considering cluster node sizing, a general approach is to first establish an even distribution of the index across the cluster. However, there are other considerations. To help make these choices, you can refer to the OpenSearch managed service guidance in the section [Sizing domains](https://docs.aws.amazon.com/opensearch-service/latest/developerguide/sizing-domains.html). @@ -35,39 +34,4 @@ The default parameters work on a broader set of use cases, but make sure to run The standard k-NN query and custom scoring option perform differently. Test with a representative set of documents to see if the search results and latencies match your expectations. -Custom scoring works best if the initial filter reduces the number of documents to no more than 20,000. Increasing shard count can improve latency, but be sure to keep shard size within the [recommended guidelines]({{site.url}}{{site.baseurl}}/intro/#primary-and-replica-shards). - -## SIMD optimization for the Faiss engine - -Starting with version 2.13, OpenSearch supports [Single Instruction Multiple Data (SIMD)](https://en.wikipedia.org/wiki/Single_instruction,_multiple_data) processing if the underlying hardware supports SIMD instructions (AVX2 on x64 architecture and Neon on ARM64 architecture). SIMD is supported by default on Linux machines only for the Faiss engine. SIMD architecture helps boost overall performance by improving indexing throughput and reducing search latency. Starting with version 2.18, OpenSearch supports AVX-512 SIMD instructions on x64 architecture. Starting with version 2.19, OpenSearch supports advanced AVX-512 SIMD instructions on x64 architecture for Intel Sapphire Rapids or a newer-generation processor, improving the performance of Hamming distance computation. - -SIMD optimization is applicable only if the vector dimension is a multiple of 8. -{: .note} - - -### x64 architecture - - -For x64 architecture, the following versions of the Faiss library are built and shipped with the artifact: - -- `libopensearchknn_faiss_avx512_spr.so`: The Faiss library containing advanced AVX-512 SIMD instructions for newer-generation processors, available on public clouds such as AWS for c/m/r 7i or newer instances. -- `libopensearchknn_faiss_avx512.so`: The Faiss library containing AVX-512 SIMD instructions. -- `libopensearchknn_faiss_avx2.so`: The Faiss library containing AVX2 SIMD instructions. -- `libopensearchknn_faiss.so`: The non-optimized Faiss library without SIMD instructions. - -When using the Faiss library, the performance ranking is as follows: advanced AVX-512 > AVX-512 > AVX2 > no optimization. -{: .note } - -If your hardware supports advanced AVX-512(spr), OpenSearch loads the `libopensearchknn_faiss_avx512_spr.so` library at runtime. - -If your hardware supports AVX-512, OpenSearch loads the `libopensearchknn_faiss_avx512.so` library at runtime. - -If your hardware supports AVX2 but doesn't support AVX-512, Open loads the `libopensearchknn_faiss_avx2.so` library at runtime. - -To disable the advanced AVX-512 (for Sapphire Rapids or newer-generation processors), AVX-512, and AVX2 SIMD instructions and load the non-optimized Faiss library (`libopensearchknn_faiss.so`), specify the `knn.faiss.avx512_spr.disabled`, `knn.faiss.avx512.disabled`, and `knn.faiss.avx2.disabled` static settings as `true` in `opensearch.yml` (by default, all of these are `false`). - -Note that to update a static setting, you must stop the cluster, change the setting, and restart the cluster. For more information, see [Static settings]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/index/#static-settings). - -### ARM64 architecture - -For the ARM64 architecture, only one performance-boosting Faiss library (`libopensearchknn_faiss.so`) is built and shipped. The library contains Neon SIMD instructions and cannot be disabled. \ No newline at end of file +Custom scoring works best if the initial filter reduces the number of documents to no more than 20,000. Increasing shard count can improve latency, but be sure to keep shard size within the [recommended guidelines]({{site.url}}{{site.baseurl}}/intro/#primary-and-replica-shards). \ No newline at end of file diff --git a/_vector-search/vector-search-techniques/index.md b/_vector-search/vector-search-techniques/index.md index 3e443d82f9..a725f8e550 100644 --- a/_vector-search/vector-search-techniques/index.md +++ b/_vector-search/vector-search-techniques/index.md @@ -16,7 +16,7 @@ OpenSearch implements vector search as *k-nearest neighbors*, or *k-NN*, search. OpenSearch supports three different methods for obtaining the k-nearest neighbors from an index of vectors: -- [Approximate search]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/)) (approximate k-NN, or ANN): Returns approximate nearest neighbors to the query vector. Usually, approximate search algorithms sacrifice indexing speed and search accuracy in exchange for performance benefits such as lower latency, smaller memory footprints, and more scalable search. For most use cases, approximate search is the best option. +- [Approximate search]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/) (approximate k-NN, or ANN): Returns approximate nearest neighbors to the query vector. Usually, approximate search algorithms sacrifice indexing speed and search accuracy in exchange for performance benefits such as lower latency, smaller memory footprints, and more scalable search. For most use cases, approximate search is the best option. - Exact search: A brute-force, exact k-NN search of vector fields. OpenSearch supports the following types of exact search: - [Exact search with scoring script]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-score-script/): Using a scoring script, you can apply a filter to an index before executing the nearest neighbor search. @@ -27,41 +27,7 @@ Overall, for larger data sets, you should generally choose the approximate neare ## Approximate search -OpenSearch supports the following algorithms (_methods_) for approximate vector search: - -- Hierarchical Navigable Small World (HNSW) -- Inverted File System (IVF) - -Additionally, you can choose one of the following libraries (_engines_) that implement these algorithms: - -- [Facebook AI Similarity Search (Faiss)](https://github.com/facebookresearch/faiss) -- Lucene -- [Non-Metric Space Library (NMSLIB)](https://github.com/nmslib/nmslib) (deprecated) - -The following table lists the combinations of search methods and libraries supported by the k-NN engine for approximate vector search. - -Method | Engine -:--- | :--- -HNSW | Faiss, Lucene, NMSLIB (deprecated) -IVF | Faiss - -For more information, see [Methods and engines]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/) and [Choosing the right method]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/#choosing-the-right-method). - -## Engine recommendations - -In general, select Faiss for large-scale use cases. Lucene is a good option for smaller deployments and offers benefits like smart filtering, where the optimal filtering strategy—pre-filtering, post-filtering, or exact k-NN—is automatically applied depending on the situation. The following table summarizes the differences between each option. - -| | Faiss/HNSW | Faiss/IVF | Lucene/HNSW | -|:---|:---|:---|:---| -| Max dimensions | 16,000 | 16,000 | 16,000 | -| Filter | Post-filter | Post-filter | Filter during search | -| Training required | No (Yes for product quantization) | Yes | No | -| Similarity metrics | `l2`, `innerproduct` | `l2`, `innerproduct` | `l2`, `cosinesimil` | -| Number of vectors | Tens of billions | Tens of billions | Less than 10 million | -| Indexing latency | Low | Lowest | Low | -| Query latency and quality | Low latency and high quality | Low latency and low quality | High latency and high quality | -| Vector compression | Flat

Product quantization | Flat

Product quantization | Flat | -| Memory consumption | High

Low with product quantization | Medium

Low with product quantization | High | +OpenSearch supports multiple backend algorithms (_methods_) and libraries implementing these algorithms (_engines_). It automatically selects the optimal configuration based on the chosen mode and available memory. For more information, see [Methods and engines]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/). ## Using sparse vectors From 9d42335dc8238b93d20cfb66097d29c3125160f7 Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Mon, 17 Feb 2025 14:25:54 -0500 Subject: [PATCH 22/32] Fix links Signed-off-by: Fanit Kolchina --- .../knn-memory-optimized.md | 4 +- .../knn-methods-engines.md | 2 +- _query-dsl/specialized/kNN.md | 2 +- _vector-search/creating-vector-index.md | 4 +- _vector-search/getting-started/concepts.md | 2 +- _vector-search/getting-started/index.md | 40 +++++++++++++++++-- .../getting-started/vector-search-options.md | 6 +-- .../optimizing-storage/binary-quantization.md | 2 +- .../disk-based-vector-search.md | 2 +- .../faiss-16-bit-quantization.md | 4 +- .../faiss-product-quantization.md | 2 +- .../lucene-scalar-quantization.md | 2 +- _vector-search/settings.md | 4 +- .../knn-score-script.md | 2 +- 14 files changed, 55 insertions(+), 23 deletions(-) diff --git a/_field-types/supported-field-types/knn-memory-optimized.md b/_field-types/supported-field-types/knn-memory-optimized.md index 89dcd74f78..8ca661d196 100644 --- a/_field-types/supported-field-types/knn-memory-optimized.md +++ b/_field-types/supported-field-types/knn-memory-optimized.md @@ -173,7 +173,7 @@ In [k-NN benchmarking tests](https://github.com/opensearch-project/opensearch-be When using `byte` vectors, expect some loss of precision in the recall compared to using `float` vectors. Byte vectors are useful in large-scale applications and use cases that prioritize a reduced memory footprint in exchange for a minimal loss of recall. {: .important} -When using `byte` vectors with the `faiss` engine, we recommend using [SIMD optimization]({{site.url}}{{site.baseurl}}/vector-search/performance-tuning/#simd-optimization-for-the-faiss-engine), which helps to significantly reduce search latencies and improve indexing throughput. +When using `byte` vectors with the `faiss` engine, we recommend using [SIMD optimization]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/#simd-optimization), which helps to significantly reduce search latencies and improve indexing throughput. {: .important} Introduced in k-NN plugin version 2.9, the optional `data_type` parameter defines the data type of a vector. The default value of this parameter is `float`. @@ -919,7 +919,7 @@ The memory required for IVF can be estimated using the following formula, where 1.1 * (((dimension / 8) * num_vectors) + (nlist * dimension / 8)) ``` -## Related articles +## Next steps - [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/kNN/) - [Disk-based vector search]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/disk-based-vector-search/) diff --git a/_field-types/supported-field-types/knn-methods-engines.md b/_field-types/supported-field-types/knn-methods-engines.md index e40dfe3c06..2108af5237 100644 --- a/_field-types/supported-field-types/knn-methods-engines.md +++ b/_field-types/supported-field-types/knn-methods-engines.md @@ -415,7 +415,7 @@ As an example, assume you have a million vectors with a `dimension` of `256` and 1.1 * (((4 * 256) * 1,000,000) + (4 * 128 * 256)) ~= 1.126 GB ``` -## Related articles +## Next steps - [Performance tuning]({{site.url}}{{site.baseurl}}/vector-search/performance-tuning/) - [Optimizing vector storage]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/) diff --git a/_query-dsl/specialized/kNN.md b/_query-dsl/specialized/kNN.md index f4195b1b4d..b60ebc18c0 100644 --- a/_query-dsl/specialized/kNN.md +++ b/_query-dsl/specialized/kNN.md @@ -31,7 +31,7 @@ Field | Data type | Required/Optional | Description `k` | Integer | Optional | The number of nearest neighbors to return. Valid values are in the [1, 10,000] range. Required if `max_distance` or `min_score` is not specified. `max_distance` | Float | Optional | The maximum distance threshold for search results. Only one of `k`, `max_distance`, or `min_score` can be specified. For more information, see [Radial search]({{site.url}}{{site.baseurl}}/vector-search/specialized-operations/radial-search-knn/). `min_score` | Float | Optional | The minimum score threshold for search results. Only one of `k`, `max_distance`, or `min_score` can be specified. For more information, see [Radial search]({{site.url}}{{site.baseurl}}/vector-search/specialized-operations/radial-search-knn/). -`filter` | Object | Optional | A filter to apply to the k-NN search. For more information, see [Vector search with filters]({{site.url}}{{site.baseurl}}/vector-search/specialized-operations/filter-search-knn/). **Important**: Filter can only be used with the `faiss` or `lucene` engines. +`filter` | Object | Optional | A filter to apply to the k-NN search. For more information, see [Vector search with filters]({{site.url}}{{site.baseurl}}/vector-search/filter-search-knn/). **Important**: Filter can only be used with the `faiss` or `lucene` engines. `method_parameters` | Object | Optional | Additional parameters for fine-tuning the search:
- `ef_search` (Integer): The number of vectors to examine (for `hnsw` method)
- `nprobes` (Integer): The number of buckets to examine (for `ivf` method). For more information, see [Specifying method parameters in the query](#specifying-method-parameters-in-the-query). `rescore` | Object or Boolean | Optional | Parameters for configuring rescoring functionality:
- `oversample_factor` (Float): Controls the oversampling of candidate vectors before ranking. Valid values are in the `[1.0, 100.0]` range. Default is `1.0` (no rescoring). To use the default `oversample_factor` of `1.0`, set `rescore` to `true`. For more information, see [Rescoring results](#rescoring-results). `expand_nested_docs` | Boolean | Optional | When `true`, retrieves scores for all nested field documents within each parent document. Used with nested queries. For more information, see [Vector search with nested fields]({{site.url}}{{site.baseurl}}/vector-search/specialized-operations/nested-search-knn/). diff --git a/_vector-search/creating-vector-index.md b/_vector-search/creating-vector-index.md index d01b49fbd7..1e6282eb33 100644 --- a/_vector-search/creating-vector-index.md +++ b/_vector-search/creating-vector-index.md @@ -58,7 +58,7 @@ Regardless of the type of vector search, the following elements are part of crea To create a vector index, choose one of the following options: -- [Store raw vectors or embeddings generated outside of OpenSearch](#storing-raw-vectors-or-generated-outside-of-opensearch): Ingest pre-generated embeddings or raw vectors into your index for raw vector search. +- [Store raw vectors or embeddings generated outside of OpenSearch](#storing-raw-vectors-or-embeddings-generated-outside-of-opensearch): Ingest pre-generated embeddings or raw vectors into your index for raw vector search. - [Convert data to embeddings during ingestion](#converting-data-to-embeddings-during-ingestion): Ingest text that will be converted into vector embeddings within OpenSearch in order to perform semantic search using ML models. @@ -140,7 +140,7 @@ PUT /my-semantic-search-index ``` {% include copy-curl.html %} -## Related articles +## Next steps - [Ingesting data into a vector index]({{site.url}}{{site.baseurl}}/vector-search/searching-data/) - [k-NN vector]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) diff --git a/_vector-search/getting-started/concepts.md b/_vector-search/getting-started/concepts.md index fb7eff82df..c8c237135d 100644 --- a/_vector-search/getting-started/concepts.md +++ b/_vector-search/getting-started/concepts.md @@ -81,7 +81,7 @@ _k-nearest neighbors (k-NN) search_ finds the k most similar vectors to a given ### Multimodal search -[_Multimodal search_]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/ multimodal-search/) enables searching across multiple types of data, such as text, images, and audio. It allows queries in one format (for example, text) to retrieve results in another (for example, images). +[_Multimodal search_]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/multimodal-search/) enables searching across multiple types of data, such as text, images, and audio. It allows queries in one format (for example, text) to retrieve results in another (for example, images). ### Radial search diff --git a/_vector-search/getting-started/index.md b/_vector-search/getting-started/index.md index e6c290474d..899c7a52f8 100644 --- a/_vector-search/getting-started/index.md +++ b/_vector-search/getting-started/index.md @@ -22,22 +22,54 @@ Before you start, ensure that [Docker](https://docs.docker.com/get-docker/) is i Download and run OpenSearch: ```bash -docker pull opensearchproject/opensearch:latest && docker run -it -p 9200:9200 -e "discovery.type=single-node" -e "DISABLE_SECURITY_PLUGIN=true" opensearchproject/opensearch:latest +docker pull opensearchproject/opensearch:latest && docker run -it -p 9200:9200 -p 9600:9600 -e "discovery.type=single-node" -e "DISABLE_SECURITY_PLUGIN=true" opensearchproject/opensearch:latest ``` {% include copy.html %} -OpenSearch is now running on port 9200: +OpenSearch is now running on port 9200. Note that this demo configuration is insecure and should not be run in production environments. + +
+ + Test it + + {: .text-delta} + +To verify that OpenSearch is running, send the following request: ```bash curl https://localhost:9200 ``` {% include copy.html %} +You should get a response that looks like this: + +```json +{ + "name" : "a937e018cee5", + "cluster_name" : "docker-cluster", + "cluster_uuid" : "GLAjAG6bTeWErFUy_d-CLw", + "version" : { + "distribution" : "opensearch", + "number" : , + "build_type" : , + "build_hash" : , + "build_date" : , + "build_snapshot" : false, + "lucene_version" : , + "minimum_wire_compatibility_version" : "7.10.0", + "minimum_index_compatibility_version" : "7.0.0" + }, + "tagline" : "The OpenSearch Project: https://opensearch.org/" +} +``` + +
+ For more information about installing OpenSearch, see [Installation quickstart]({{site.url}}{{site.baseurl}}/getting-started/quickstart/) and [Install and upgrade OpenSearch]({{site.url}}{{site.baseurl}}/install-and-configure/). ## Step 1: Create a vector index -First, create an index that will store sample hotel data. To signal to OpenSearch that this is a vector index, set `index.knn` to `true`. You'll store the vectors in a vector field called `location`. The vectors you'll ingest will be two-dimensional and the distance between vectors will be calculated using [Euclidean `l2` similarity metric]({{site.url}}{{site.baseurl}}/getting-started/vector-search-basics/#calculating-similarity): +First, create an index that will store sample hotel data. To signal to OpenSearch that this is a vector index, set `index.knn` to `true`. You'll store the vectors in a vector field called `location`. The vectors you'll ingest will be two-dimensional and the distance between vectors will be calculated using [Euclidean `l2` similarity metric]({{site.url}}{{site.baseurl}}/vector-search/getting-started/vector-search-basics/#calculating-similarity): ```json PUT /hotels-index @@ -168,5 +200,5 @@ If your data isn't already in vector format, you can generate vector embeddings - [Vector search basics]({{site.url}}{{site.baseurl}}/vector-search/getting-started/vector-search-basics/) - [Bringing or generating embeddings ]({{site.url}}{{site.baseurl}}/vector-search/getting-started/vector-search-options/) -- [Vector search with filters]({{site.url}}{{site.baseurl}}/vector-search/specialized-operations/filter-search-knn/) +- [Vector search with filters]({{site.url}}{{site.baseurl}}/vector-search/filter-search-knn/) - [Generating vector embeddings within OpenSearch]({{site.url}}{{site.baseurl}}/vector-search/getting-started/auto-generated-embeddings/) \ No newline at end of file diff --git a/_vector-search/getting-started/vector-search-options.md b/_vector-search/getting-started/vector-search-options.md index 28acb28bb4..8039f671b0 100644 --- a/_vector-search/getting-started/vector-search-options.md +++ b/_vector-search/getting-started/vector-search-options.md @@ -6,7 +6,7 @@ nav_order: 20 quickstart_cards: - heading: "Getting started with vector search" description: "Use raw vectors or embeddings generated outside of OpenSearch" - link: "/vector-search/getting-started/pre-generated-embeddings/" + link: "/vector-search/getting-started/" tutorial_cards: - heading: "Generating embeddings within OpenSearch" description: "Automatically convert data to embeddings within OpenSearch" @@ -19,13 +19,13 @@ pre_items: description: "Generate embeddings outside of OpenSearch using your favorite embedding utility." - heading: "Create an OpenSearch index" description: "Create an OpenSearch index to upload your embeddings." - link: "/vector-search/creating-vector-index/#pre-generated-embeddings-or-raw-vectors" + link: "/vector-search/creating-vector-index/#storing-raw-vectors-or-embeddings-generated-outside-of-opensearch" - heading: "Ingest embeddings" description: "Ingest your embeddings into the index." link: "/vector-search/ingesting-data/#raw-vector-ingestion" - heading: "Search embeddings" description: "Search your embeddings using vector search." - link: "/vector-search/searching-data/#searching-pre-generated-embeddings-or-raw-vectors" + link: "/vector-search/searching-data/#searching-raw-vectors" auto_items: - heading: "Configure an embedding model" description: "Configure a machine learning model that will automatically generate embeddings from your text at ingest time and query time." diff --git a/_vector-search/optimizing-storage/binary-quantization.md b/_vector-search/optimizing-storage/binary-quantization.md index cbda3c29f5..dd89b216c2 100644 --- a/_vector-search/optimizing-storage/binary-quantization.md +++ b/_vector-search/optimizing-storage/binary-quantization.md @@ -198,7 +198,7 @@ Memory = 1.1 * ((256 * 4 / 8) + 8 * 16) * 1,000,000 ~= 0.282 GB ``` -## Related articles +## Next steps - [Memory-optimized vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/) - [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/knn/) \ No newline at end of file diff --git a/_vector-search/optimizing-storage/disk-based-vector-search.md b/_vector-search/optimizing-storage/disk-based-vector-search.md index 5215d5ecba..1f31bb5d5d 100644 --- a/_vector-search/optimizing-storage/disk-based-vector-search.md +++ b/_vector-search/optimizing-storage/disk-based-vector-search.md @@ -209,7 +209,7 @@ You can override the `compression_level` for disk-optimized indexes in the same - For more information about binary quantization, see [Binary quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/binary-quantization/). - For more information about k-NN vector workload modes, see [Vector workload modes]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/#vector-workload-modes). -## Related articles +## Next steps - [Memory-optimized vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/) - [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/knn/) \ No newline at end of file diff --git a/_vector-search/optimizing-storage/faiss-16-bit-quantization.md b/_vector-search/optimizing-storage/faiss-16-bit-quantization.md index c8c6bca779..dd5e3ec349 100644 --- a/_vector-search/optimizing-storage/faiss-16-bit-quantization.md +++ b/_vector-search/optimizing-storage/faiss-16-bit-quantization.md @@ -12,7 +12,7 @@ has_math: true Starting with version 2.13, OpenSearch supports performing scalar quantization for the Faiss engine within OpenSearch. Within the Faiss engine, a scalar quantizer (SQfp16) performs the conversion between 32-bit and 16-bit vectors. At ingestion time, when you upload 32-bit floating-point vectors to OpenSearch, SQfp16 quantizes them into 16-bit floating-point vectors and stores the quantized vectors in a vector index. -At search time, SQfp16 decodes the vector values back into 32-bit floating-point values for distance computation. The SQfp16 quantization can decrease the memory footprint by a factor of 2. Additionally, it leads to a minimal loss in recall when differences between vector values are large compared to the error introduced by eliminating their two least significant bits. When used with [SIMD optimization]({{site.url}}{{site.baseurl}}/vector-search/performance-tuning/#simd-optimization-for-the-faiss-engine), SQfp16 quantization can also significantly reduce search latencies and improve indexing throughput. +At search time, SQfp16 decodes the vector values back into 32-bit floating-point values for distance computation. The SQfp16 quantization can decrease the memory footprint by a factor of 2. Additionally, it leads to a minimal loss in recall when differences between vector values are large compared to the error introduced by eliminating their two least significant bits. When used with [SIMD optimization]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/#simd-optimization), SQfp16 quantization can also significantly reduce search latencies and improve indexing throughput. SIMD optimization is not supported on Windows. Using Faiss scalar quantization on Windows can lead to a significant drop in performance, including decreased indexing throughput and increased search latencies. {: .warning} @@ -153,7 +153,7 @@ As an example, assume that you have 1 million vectors with a dimension of 256 an 1.1 * (((2 * 256) * 1,000,000) + (4 * 128 * 256)) ~= 0.525 GB ``` -## Related articles +## Next steps - [Memory-optimized vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/) - [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/knn/) \ No newline at end of file diff --git a/_vector-search/optimizing-storage/faiss-product-quantization.md b/_vector-search/optimizing-storage/faiss-product-quantization.md index 8b064fd649..e163a519e7 100644 --- a/_vector-search/optimizing-storage/faiss-product-quantization.md +++ b/_vector-search/optimizing-storage/faiss-product-quantization.md @@ -51,7 +51,7 @@ For example, assume that you have 1 million vectors with a dimension of 256, `iv 1.1 * ((8 / 8 * 64 + 24) * 1000000 + 100 * (2^8 * 4 * 256 + 4 * 512 * 256)) ~= 0.171 GB ``` -## Related articles +## Next steps - [Memory-optimized vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/) - [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/knn/) \ No newline at end of file diff --git a/_vector-search/optimizing-storage/lucene-scalar-quantization.md b/_vector-search/optimizing-storage/lucene-scalar-quantization.md index 944360b239..e2e11f93df 100644 --- a/_vector-search/optimizing-storage/lucene-scalar-quantization.md +++ b/_vector-search/optimizing-storage/lucene-scalar-quantization.md @@ -114,7 +114,7 @@ As an example, assume that you have 1 million vectors with a dimension of 256 an 1.1 * (256 + 8 * 16) * 1,000,000 ~= 0.4 GB ``` -## Related articles +## Next steps - [Memory-optimized vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/) - [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/knn/) \ No newline at end of file diff --git a/_vector-search/settings.md b/_vector-search/settings.md index 0406af1d76..fbdd5c258a 100644 --- a/_vector-search/settings.md +++ b/_vector-search/settings.md @@ -27,8 +27,8 @@ Setting | Static/Dynamic | Default | Description `knn.model.index.number_of_shards`| Dynamic | `1` | The number of shards to use for the model system index, which is the OpenSearch index that stores the models used for approximate nearest neighbor (ANN) search. `knn.model.index.number_of_replicas`| Dynamic | `1` | The number of replica shards to use for the model system index. Generally, in a multi-node cluster, this value should be at least 1 in order to increase stability. `knn.model.cache.size.limit` | Dynamic | `10%` | The model cache limit cannot exceed 25% of the JVM heap. -`knn.faiss.avx2.disabled` | Static | `false` | A static setting that specifies whether to disable the SIMD-based `libopensearchknn_faiss_avx2.so` library and load the non-optimized `libopensearchknn_faiss.so` library for the Faiss engine on machines with x64 architecture. For more information, see [SIMD optimization for the Faiss engine]({{site.url}}{{site.baseurl}}/vector-search/performance-tuning/#simd-optimization-for-the-faiss-engine). -`knn.faiss.avx512.disabled` | Static | `false` | A static setting that specifies whether to disable the SIMD-based `libopensearchknn_faiss_avx512.so` library and load the `libopensearchknn_faiss_avx2.so` library or the non-optimized `libopensearchknn_faiss.so` library for the Faiss engine on machines with x64 architecture. For more information, see [SIMD optimization for the Faiss engine]({{site.url}}{{site.baseurl}}/vector-search/performance-tuning/#simd-optimization-for-the-faiss-engine). +`knn.faiss.avx2.disabled` | Static | `false` | A static setting that specifies whether to disable the SIMD-based `libopensearchknn_faiss_avx2.so` library and load the non-optimized `libopensearchknn_faiss.so` library for the Faiss engine on machines with x64 architecture. For more information, see [SIMD optimization]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/#simd-optimization). +`knn.faiss.avx512.disabled` | Static | `false` | A static setting that specifies whether to disable the SIMD-based `libopensearchknn_faiss_avx512.so` library and load the `libopensearchknn_faiss_avx2.so` library or the non-optimized `libopensearchknn_faiss.so` library for the Faiss engine on machines with x64 architecture. For more information, see [SIMD optimization]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/#simd-optimization). ## Index settings diff --git a/_vector-search/vector-search-techniques/knn-score-script.md b/_vector-search/vector-search-techniques/knn-score-script.md index d6c6a0c3d2..78a0fcf05a 100644 --- a/_vector-search/vector-search-techniques/knn-score-script.md +++ b/_vector-search/vector-search-techniques/knn-score-script.md @@ -107,7 +107,7 @@ All parameters are required. - `query_value` is the point you want to find the nearest neighbors for. For the Euclidean and cosine similarity spaces, the value must be an array of floats that matches the dimension set in the field's mapping. For Hamming bit distance, this value can be either of type signed long or a base64-encoded string (for the long and binary field types, respectively). - `space_type` corresponds to the distance function. For more information, see [Spaces]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-spaces/). -The [post filter example in the approximate approach]({{site.url}}{{site.baseurl}}/vector-search/specialized-operations/filter-search-knn/) shows a search that returns fewer than `k` results. If you want to avoid this situation, the score script method lets you essentially invert the order of events. In other words, you can filter down the set of documents over which to execute the k-nearest neighbor search. +The [post filter example in the approximate approach]({{site.url}}{{site.baseurl}}/vector-search/filter-search-knn/) shows a search that returns fewer than `k` results. If you want to avoid this situation, the score script method lets you essentially invert the order of events. In other words, you can filter down the set of documents over which to execute the k-nearest neighbor search. This example shows a pre-filter approach to k-NN search with the score script approach. First, create the index: From a6a1cfb616f6b891860b6c82d28aa6cae224cc16 Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Mon, 17 Feb 2025 14:46:15 -0500 Subject: [PATCH 23/32] Fix links Signed-off-by: Fanit Kolchina --- _vector-search/settings.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/_vector-search/settings.md b/_vector-search/settings.md index fbdd5c258a..94944bae88 100644 --- a/_vector-search/settings.md +++ b/_vector-search/settings.md @@ -42,9 +42,9 @@ Setting | Static/Dynamic | Default | Description `index.knn.algo_param.ef_search` | Dynamic | `100` | `ef` (or `efSearch`) represents the size of the dynamic list for the nearest neighbors used during a search. Higher `ef` values lead to a more accurate but slower search. `ef` cannot be set to a value lower than the number of queried nearest neighbors, `k`. `ef` can take any value between `k` and the size of the dataset. `index.knn.advanced.approximate_threshold` | Dynamic | `15000` | The number of vectors a segment must have before creating specialized data structures for approximate search. Set to `-1` to disable building vector data structures and `0` to always build them. `index.knn.advanced.filtered_exact_search_threshold`| Dynamic | None | The filtered ID threshold value used to switch to exact search during filtered ANN search. If the number of filtered IDs in a segment is lower than this setting's value, then exact search will be performed on the filtered IDs. -`index.knn.algo_param.ef_construction` | Static | `100` | Deprecated in 1.0.0. Use the [mapping parameters](https://opensearch.org/docs/latest/search-plugins/knn/knn-index/#method-definitions) to set this value instead. -`index.knn.algo_param.m` | Static | `16` | Deprecated in 1.0.0. Use the [mapping parameters](https://opensearch.org/docs/latest/search-plugins/knn/knn-index/#method-definitions) to set this value instead. -`index.knn.space_type` | Static | `l2` | Deprecated in 1.0.0. Use the [mapping parameters](https://opensearch.org/docs/latest/search-plugins/knn/knn-index/#method-definitions) to set this value instead. +`index.knn.algo_param.ef_construction` | Static | `100` | Deprecated in 1.0.0. Use the [mapping parameters]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/) to set this value instead. +`index.knn.algo_param.m` | Static | `16` | Deprecated in 1.0.0. Use the [mapping parameters]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/) to set this value instead. +`index.knn.space_type` | Static | `l2` | Deprecated in 1.0.0. Use the [mapping parameters]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/) to set this value instead. An index created in OpenSearch version 2.11 or earlier will still use the old `ef_construction` and `ef_search` values (`512`). {: .note} \ No newline at end of file From ad6fb3c4a5017172b58e8ed283304b6b490c95b9 Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Mon, 17 Feb 2025 15:07:59 -0500 Subject: [PATCH 24/32] Rename query file Signed-off-by: Fanit Kolchina --- _field-types/supported-field-types/knn-memory-optimized.md | 2 +- _field-types/supported-field-types/knn-vector.md | 2 +- _query-dsl/specialized/index.md | 2 +- _query-dsl/specialized/{kNN.md => k-nn.md} | 2 +- _vector-search/getting-started/auto-generated-embeddings.md | 2 +- _vector-search/getting-started/concepts.md | 4 ++-- _vector-search/getting-started/index.md | 2 +- _vector-search/getting-started/vector-search-basics.md | 2 +- _vector-search/index.md | 2 +- _vector-search/ingesting-data.md | 2 +- _vector-search/optimizing-storage/binary-quantization.md | 2 +- _vector-search/optimizing-storage/disk-based-vector-search.md | 2 +- .../optimizing-storage/faiss-16-bit-quantization.md | 2 +- .../optimizing-storage/faiss-product-quantization.md | 2 +- .../optimizing-storage/lucene-scalar-quantization.md | 2 +- _vector-search/searching-data.md | 4 ++-- 16 files changed, 18 insertions(+), 18 deletions(-) rename _query-dsl/specialized/{kNN.md => k-nn.md} (98%) diff --git a/_field-types/supported-field-types/knn-memory-optimized.md b/_field-types/supported-field-types/knn-memory-optimized.md index 8ca661d196..5507c29bcd 100644 --- a/_field-types/supported-field-types/knn-memory-optimized.md +++ b/_field-types/supported-field-types/knn-memory-optimized.md @@ -921,6 +921,6 @@ The memory required for IVF can be estimated using the following formula, where ## Next steps -- [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/kNN/) +- [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/k-nn/) - [Disk-based vector search]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/disk-based-vector-search/) - [Vector quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/knn-vector-quantization/) \ No newline at end of file diff --git a/_field-types/supported-field-types/knn-vector.md b/_field-types/supported-field-types/knn-vector.md index 4e82e7fed5..d9a702b64e 100644 --- a/_field-types/supported-field-types/knn-vector.md +++ b/_field-types/supported-field-types/knn-vector.md @@ -176,4 +176,4 @@ Parameter | Data type | Description - [Methods and engines]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/) - [Memory-optimized vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/) - [Vector search]({{site.url}}{{site.baseurl}}/vector-search/) -- [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/knn/) \ No newline at end of file +- [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/k-nn/) \ No newline at end of file diff --git a/_query-dsl/specialized/index.md b/_query-dsl/specialized/index.md index 1e0f8194f6..d28451cfa8 100644 --- a/_query-dsl/specialized/index.md +++ b/_query-dsl/specialized/index.md @@ -14,7 +14,7 @@ OpenSearch supports the following specialized queries: - `more_like_this`: Finds documents similar to the provided text, document, or collection of documents. -- [`knn`]({{site.url}}{{site.baseurl}}/query-dsl/specialized/kNN/): Used for searching raw vectors during [vector search]({{site.url}}{{site.baseurl}}/vector-search/). +- [`knn`]({{site.url}}{{site.baseurl}}/query-dsl/specialized/k-nn/): Used for searching raw vectors during [vector search]({{site.url}}{{site.baseurl}}/vector-search/). - [`neural`]({{site.url}}{{site.baseurl}}/query-dsl/specialized/neural/): Used for searching by text or image in [vector search]({{site.url}}{{site.baseurl}}/search-plugins/neural-search/). diff --git a/_query-dsl/specialized/kNN.md b/_query-dsl/specialized/k-nn.md similarity index 98% rename from _query-dsl/specialized/kNN.md rename to _query-dsl/specialized/k-nn.md index b60ebc18c0..1cae567444 100644 --- a/_query-dsl/specialized/kNN.md +++ b/_query-dsl/specialized/k-nn.md @@ -7,7 +7,7 @@ nav_order: 10 # k-NN query -Use the `knn` query for executing nearest neighbor searches over vector fields. +Use the `knn` query for running nearest neighbor searches on vector fields. ## Request body fields diff --git a/_vector-search/getting-started/auto-generated-embeddings.md b/_vector-search/getting-started/auto-generated-embeddings.md index cd61ccd582..99e6219f67 100644 --- a/_vector-search/getting-started/auto-generated-embeddings.md +++ b/_vector-search/getting-started/auto-generated-embeddings.md @@ -119,7 +119,7 @@ PUT /_ingest/pipeline/nlp-ingest-pipeline ### Step 3(b): Create a vector index -Now you'll create a vector index by setting `index.knn` to `true`. In the index, the field named `text` will contains an image description, and a [`knn_vector`]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) field named `passage_embedding` will contains the vector embedding of the text. Additionally, set the default ingest pipeline to the `nlp-ingest-pipeline` you created in the previous step: +Now you'll create a vector index by setting `index.knn` to `true`. In the index, the field named `text` contains an image description, and a [`knn_vector`]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) field named `passage_embedding` contains the vector embedding of the text. Additionally, set the default ingest pipeline to the `nlp-ingest-pipeline` you created in the previous step: ```json diff --git a/_vector-search/getting-started/concepts.md b/_vector-search/getting-started/concepts.md index c8c237135d..1c21124cb0 100644 --- a/_vector-search/getting-started/concepts.md +++ b/_vector-search/getting-started/concepts.md @@ -59,7 +59,7 @@ _k-nearest neighbors (k-NN) search_ finds the k most similar vectors to a given ### k-NN query -[_k-NN query_]({{site.url}}{{site.baseurl}}/query-dsl/specialized/knn/) searches vector fields using a query vector. +[_k-NN query_]({{site.url}}{{site.baseurl}}/query-dsl/specialized/k-nn/) searches vector fields using a query vector. ### Neural query @@ -81,7 +81,7 @@ _k-nearest neighbors (k-NN) search_ finds the k most similar vectors to a given ### Multimodal search -[_Multimodal search_]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/multimodal-search/) enables searching across multiple types of data, such as text, images, and audio. It allows queries in one format (for example, text) to retrieve results in another (for example, images). +[_Multimodal search_]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/multimodal-search/) enables searching across multiple types of data, such as text and images. It allows queries in one format (for example, text) to retrieve results in another (for example, images). ### Radial search diff --git a/_vector-search/getting-started/index.md b/_vector-search/getting-started/index.md index 899c7a52f8..289fd9233c 100644 --- a/_vector-search/getting-started/index.md +++ b/_vector-search/getting-started/index.md @@ -199,6 +199,6 @@ If your data isn't already in vector format, you can generate vector embeddings ## Next steps - [Vector search basics]({{site.url}}{{site.baseurl}}/vector-search/getting-started/vector-search-basics/) -- [Bringing or generating embeddings ]({{site.url}}{{site.baseurl}}/vector-search/getting-started/vector-search-options/) +- [Bringing your own or generating embeddings ]({{site.url}}{{site.baseurl}}/vector-search/getting-started/vector-search-options/) - [Vector search with filters]({{site.url}}{{site.baseurl}}/vector-search/filter-search-knn/) - [Generating vector embeddings within OpenSearch]({{site.url}}{{site.baseurl}}/vector-search/getting-started/auto-generated-embeddings/) \ No newline at end of file diff --git a/_vector-search/getting-started/vector-search-basics.md b/_vector-search/getting-started/vector-search-basics.md index 9cd155c7e1..a628b6bee0 100644 --- a/_vector-search/getting-started/vector-search-basics.md +++ b/_vector-search/getting-started/vector-search-basics.md @@ -41,4 +41,4 @@ To learn more about the distance metrics, see [Spaces]({{site.url}}{{site.baseur ## Next steps -- [Bringing or generating embeddings]({{site.url}}{{site.baseurl}}/vector-search/getting-started/vector-search-options/) \ No newline at end of file +- [Bringing your own or generating embeddings]({{site.url}}{{site.baseurl}}/vector-search/getting-started/vector-search-options/) \ No newline at end of file diff --git a/_vector-search/index.md b/_vector-search/index.md index 2fe1c7e2e3..e8251a737b 100644 --- a/_vector-search/index.md +++ b/_vector-search/index.md @@ -13,7 +13,7 @@ redirect_from: # Vector search -Vector search enhances traditional lexical search by encoding text, images, and audio as vectors, enabling similarity search with semantic understanding. OpenSearch unifies traditional search, analytics, and vector search in a single solution. As a vector database, it streamlines AI application development by efficiently storing and retrieving high-dimensional data. +Vector search enhances traditional lexical search by encoding data such as text or images as vectors, enabling similarity search with semantic understanding. OpenSearch unifies traditional search, analytics, and vector search in a single solution. As a vector database, it streamlines AI application development by efficiently storing and retrieving high-dimensional data. [Get started]({{site.url}}{{site.baseurl}}/vector-search/getting-started/){: .btn-dark-blue} diff --git a/_vector-search/ingesting-data.md b/_vector-search/ingesting-data.md index 813d3b48b0..546afab096 100644 --- a/_vector-search/ingesting-data.md +++ b/_vector-search/ingesting-data.md @@ -15,7 +15,7 @@ The following table compares ingestion for each vector search method. | Feature | Data format | Ingest pipeline | Vector generation | Additional fields | |-------------------------------|----------------------------|---------------------|---------------------------------|-----------------------------------| | **Raw vector ingestion** | Pre-generated vectors | Not required | External | Optional metadata | -| **Converting data to embeddings during ingestion** | Text, image, or other data | Required | Internal (during ingestion) | Original data + embeddings | +| **Converting data to embeddings during ingestion** | Text or image data | Required | Internal (during ingestion) | Original data + embeddings | ## Raw vector ingestion diff --git a/_vector-search/optimizing-storage/binary-quantization.md b/_vector-search/optimizing-storage/binary-quantization.md index dd89b216c2..514003cd01 100644 --- a/_vector-search/optimizing-storage/binary-quantization.md +++ b/_vector-search/optimizing-storage/binary-quantization.md @@ -201,4 +201,4 @@ Memory = 1.1 * ((256 * 4 / 8) + 8 * 16) * 1,000,000 ## Next steps - [Memory-optimized vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/) -- [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/knn/) \ No newline at end of file +- [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/k-nn/) \ No newline at end of file diff --git a/_vector-search/optimizing-storage/disk-based-vector-search.md b/_vector-search/optimizing-storage/disk-based-vector-search.md index 1f31bb5d5d..fe11aec841 100644 --- a/_vector-search/optimizing-storage/disk-based-vector-search.md +++ b/_vector-search/optimizing-storage/disk-based-vector-search.md @@ -212,4 +212,4 @@ You can override the `compression_level` for disk-optimized indexes in the same ## Next steps - [Memory-optimized vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/) -- [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/knn/) \ No newline at end of file +- [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/k-nn/) \ No newline at end of file diff --git a/_vector-search/optimizing-storage/faiss-16-bit-quantization.md b/_vector-search/optimizing-storage/faiss-16-bit-quantization.md index dd5e3ec349..de386192d5 100644 --- a/_vector-search/optimizing-storage/faiss-16-bit-quantization.md +++ b/_vector-search/optimizing-storage/faiss-16-bit-quantization.md @@ -156,4 +156,4 @@ As an example, assume that you have 1 million vectors with a dimension of 256 an ## Next steps - [Memory-optimized vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/) -- [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/knn/) \ No newline at end of file +- [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/k-nn/) \ No newline at end of file diff --git a/_vector-search/optimizing-storage/faiss-product-quantization.md b/_vector-search/optimizing-storage/faiss-product-quantization.md index e163a519e7..7c27a1bad4 100644 --- a/_vector-search/optimizing-storage/faiss-product-quantization.md +++ b/_vector-search/optimizing-storage/faiss-product-quantization.md @@ -54,4 +54,4 @@ For example, assume that you have 1 million vectors with a dimension of 256, `iv ## Next steps - [Memory-optimized vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/) -- [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/knn/) \ No newline at end of file +- [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/k-nn/) \ No newline at end of file diff --git a/_vector-search/optimizing-storage/lucene-scalar-quantization.md b/_vector-search/optimizing-storage/lucene-scalar-quantization.md index e2e11f93df..021f1a8537 100644 --- a/_vector-search/optimizing-storage/lucene-scalar-quantization.md +++ b/_vector-search/optimizing-storage/lucene-scalar-quantization.md @@ -117,4 +117,4 @@ As an example, assume that you have 1 million vectors with a dimension of 256 an ## Next steps - [Memory-optimized vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/) -- [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/knn/) \ No newline at end of file +- [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/k-nn/) \ No newline at end of file diff --git a/_vector-search/searching-data.md b/_vector-search/searching-data.md index 40435c5d09..0924f523f2 100644 --- a/_vector-search/searching-data.md +++ b/_vector-search/searching-data.md @@ -15,7 +15,7 @@ The following table compares the search syntax and typical use cases for each ve | Feature | Query type | Input format | Model required | Use case | |----------------------------------|------------------|------------------|---------------------|----------------------------| | **Raw vectors** | `knn` | Vector array | No | Raw vector search | -| **Auto-generated embeddings** | `neural` | Text, image, or other data | Yes | ML-powered search | +| **Auto-generated embeddings** | `neural` | Text or image data | Yes | ML-powered search | ## Searching raw vectors @@ -61,5 +61,5 @@ GET /my-semantic-search-index/_search - [Getting started with semantic and hybrid search]({{site.url}}{{site.baseurl}}/vector-search/getting-started/tutorials/neural-search-tutorial/) - [Filtering data]({{site.url}}{{site.baseurl}}/vector-search/filter-search-knn/) - [ML-powered search]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/) -- [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/knn/) +- [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/k-nn/) - [Neural query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/neural/) From 18e10dff5d74476cc2b68caf032afdac20099e00 Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Mon, 17 Feb 2025 15:14:49 -0500 Subject: [PATCH 25/32] Fix links Signed-off-by: Fanit Kolchina --- _vector-search/creating-vector-index.md | 4 ++++ _vector-search/ingesting-data.md | 4 ++++ _vector-search/searching-data.md | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/_vector-search/creating-vector-index.md b/_vector-search/creating-vector-index.md index 1e6282eb33..f5e10785d1 100644 --- a/_vector-search/creating-vector-index.md +++ b/_vector-search/creating-vector-index.md @@ -140,6 +140,10 @@ PUT /my-semantic-search-index ``` {% include copy-curl.html %} +## Working with sparse vectors + +OpenSearch also supports sparse vectors. For more information, see [Neural sparse search]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/neural-sparse-search/). + ## Next steps - [Ingesting data into a vector index]({{site.url}}{{site.baseurl}}/vector-search/searching-data/) diff --git a/_vector-search/ingesting-data.md b/_vector-search/ingesting-data.md index 546afab096..70ab3b83eb 100644 --- a/_vector-search/ingesting-data.md +++ b/_vector-search/ingesting-data.md @@ -75,6 +75,10 @@ POST /my-semantic-search-index/_doc The pipeline automatically generates and stores the embeddings in the `passage_embedding` field. +## Working with sparse vectors + +OpenSearch also supports sparse vectors. For more information, see [Neural sparse search]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/neural-sparse-search/). + ## Next steps - [Searching vector data]({{site.url}}{{site.baseurl}}/vector-search/searching-data/) diff --git a/_vector-search/searching-data.md b/_vector-search/searching-data.md index 0924f523f2..bcdb9a4106 100644 --- a/_vector-search/searching-data.md +++ b/_vector-search/searching-data.md @@ -56,6 +56,10 @@ GET /my-semantic-search-index/_search ``` {% include copy-curl.html %} +## Working with sparse vectors + +OpenSearch also supports sparse vectors. For more information, see [Neural sparse search]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/neural-sparse-search/). + ## Next steps - [Getting started with semantic and hybrid search]({{site.url}}{{site.baseurl}}/vector-search/getting-started/tutorials/neural-search-tutorial/) From c007e4649f0c97884111d9fc01970ee03cca270b Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Mon, 17 Feb 2025 15:41:40 -0500 Subject: [PATCH 26/32] Add sparse vector option Signed-off-by: Fanit Kolchina --- .../supported-field-types/knn-spaces.md | 100 +++++++++--------- .../disk-based-vector-search.md | 8 +- _vector-search/performance-tuning-indexing.md | 82 +++++++------- _vector-search/performance-tuning-search.md | 35 +++--- 4 files changed, 112 insertions(+), 113 deletions(-) diff --git a/_field-types/supported-field-types/knn-spaces.md b/_field-types/supported-field-types/knn-spaces.md index a90f0379fe..fe28fb2172 100644 --- a/_field-types/supported-field-types/knn-spaces.md +++ b/_field-types/supported-field-types/knn-spaces.md @@ -7,7 +7,7 @@ nav_order: 10 has_math: true --- -## Spaces +# Spaces In vector search, a _space_ defines how the distance (or similarity) between two vectors is calculated. The choice of space affects how nearest neighbors are determined during search operations. @@ -40,59 +40,59 @@ The `hamming` space type is supported for binary vectors in OpenSearch version 2 ## Specifying the space type -The space type is specified when creating an index: +The space type is specified when creating an index. -- You can specify the space type at the top level of the field mapping: +You can specify the space type at the top level of the field mapping: - ```json - PUT /test-index - { - "settings": { - "index": { - "knn": true - } - }, - "mappings": { - "properties": { - "my_vector1": { - "type": "knn_vector", - "dimension": 3, - "space_type": "l2" - } - } - } +```json +PUT /test-index +{ + "settings": { + "index": { + "knn": true + } + }, + "mappings": { + "properties": { + "my_vector1": { + "type": "knn_vector", + "dimension": 3, + "space_type": "l2" + } } - ``` - {% include copy-curl.html %} + } +} +``` +{% include copy-curl.html %} -- Alternatively, you can specify the space type within the `method` object if defining a method: +Alternatively, you can specify the space type within the `method` object if defining a method: - ```json - PUT test-index - { - "settings": { - "index": { - "knn": true, - "knn.algo_param.ef_search": 100 - } - }, - "mappings": { - "properties": { - "my_vector1": { - "type": "knn_vector", - "dimension": 1024, - "method": { - "name": "hnsw", - "space_type": "l2", - "engine": "nmslib", - "parameters": { - "ef_construction": 128, - "m": 24 - } - } - } - } +```json +PUT test-index +{ + "settings": { + "index": { + "knn": true, + "knn.algo_param.ef_search": 100 } + }, + "mappings": { + "properties": { + "my_vector1": { + "type": "knn_vector", + "dimension": 1024, + "method": { + "name": "hnsw", + "space_type": "l2", + "engine": "nmslib", + "parameters": { + "ef_construction": 128, + "m": 24 + } + } + } } - ``` - {% include copy-curl.html %} + } +} +``` +{% include copy-curl.html %} diff --git a/_vector-search/optimizing-storage/disk-based-vector-search.md b/_vector-search/optimizing-storage/disk-based-vector-search.md index fe11aec841..d97de64e8a 100644 --- a/_vector-search/optimizing-storage/disk-based-vector-search.md +++ b/_vector-search/optimizing-storage/disk-based-vector-search.md @@ -14,7 +14,7 @@ redirect_from: For low-memory environments, OpenSearch provides _disk-based vector search_, which significantly reduces the operational costs for vector workloads. Disk-based vector search uses [binary quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/binary-quantization/), compressing vectors and thereby reducing the memory requirements. This memory optimization provides large memory savings at the cost of slightly increased search latency while still maintaining strong recall. -To use disk-based vector search, set the [`mode`]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/#vector-workload-modes) parameter to `on_disk` for your vector field type. This parameter will configure your index to use secondary storage. +To use disk-based vector search, set the [`mode`]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/#vector-workload-modes) parameter to `on_disk` for your vector field type. This parameter will configure your index to use secondary storage. For more information about disk-based search parameters, see [Memory-optimized vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/). ## Creating an index for disk-based vector search @@ -206,10 +206,6 @@ You can override the `compression_level` for disk-optimized indexes in the same ## Next steps -- For more information about binary quantization, see [Binary quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/binary-quantization/). -- For more information about k-NN vector workload modes, see [Vector workload modes]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/#vector-workload-modes). - -## Next steps - +- [Binary quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/binary-quantization/). - [Memory-optimized vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/) - [k-NN query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/k-nn/) \ No newline at end of file diff --git a/_vector-search/performance-tuning-indexing.md b/_vector-search/performance-tuning-indexing.md index 5146ee014d..65d9c6747c 100644 --- a/_vector-search/performance-tuning-indexing.md +++ b/_vector-search/performance-tuning-indexing.md @@ -13,14 +13,15 @@ Take any of the following steps to improve indexing performance, especially when Either disable the refresh interval (default = 1 sec) or set a long duration for the refresh interval to avoid creating multiple small segments: - ```json - PUT //_settings - { - "index" : { - "refresh_interval" : "-1" - } - } - ``` +```json +PUT //_settings +{ + "index" : { + "refresh_interval" : "-1" + } +} +``` +{% include copy-curl.html %} Make sure to reenable `refresh_interval` after indexing is complete. @@ -39,43 +40,44 @@ Monitor CPU utilization and choose the correct number of threads. Because native The `_source` field contains the original JSON document body that was passed at index time. This field is not indexed and is not searchable but is stored so that it can be returned when executing fetch requests such as `get` and `search`. When using vector fields within the source, you can remove the vector field to save disk space, as shown in the following example where the `location` vector is excluded: - ```json - PUT //_mappings - { - "_source": { - "excludes": ["location"] - }, - "properties": { - "location": { - "type": "knn_vector", - "dimension": 2, - "space_type": "l2" - } - } - } - ``` - +```json +PUT //_mappings +{ + "_source": { + "excludes": ["location"] + }, + "properties": { + "location": { + "type": "knn_vector", + "dimension": 2, + "space_type": "l2" + } + } +} +``` +{% include copy-curl.html %} Disabling the `_source` field can cause certain features to become unavailable, such as the `update`, `update_by_query`, and `reindex` APIs and the ability to debug queries or aggregations by using the original document at index time. In OpenSearch 2.15 or later, you can further improve indexing speed and reduce disk space by removing the vector field from the `_recovery_source`, as shown in the following example: - ```json - PUT //_mappings - { - "_source": { - "excludes": ["location"], - "recovery_source_excludes": ["location"] - }, - "properties": { - "location": { - "type": "knn_vector", - "dimension": 2, - "space_type": "l2" - } - } - } - ``` +```json +PUT //_mappings +{ + "_source": { + "excludes": ["location"], + "recovery_source_excludes": ["location"] + }, + "properties": { + "location": { + "type": "knn_vector", + "dimension": 2, + "space_type": "l2" + } + } +} +``` +{% include copy-curl.html %} This is an expert-level setting. Disabling the `_recovery_source` may lead to failures during peer-to-peer recovery. Before disabling the `_recovery_source`, check with your OpenSearch cluster admin to determine whether your cluster performs regular flushes before starting the peer-to-peer recovery of shards prior to disabling the `_recovery_source`. {: .warning} diff --git a/_vector-search/performance-tuning-search.md b/_vector-search/performance-tuning-search.md index 4cdfb3a620..48a22ab7b6 100644 --- a/_vector-search/performance-tuning-search.md +++ b/_vector-search/performance-tuning-search.md @@ -11,32 +11,33 @@ Take the following steps to improve search performance. ## Reduce segment count - To improve search performance, you must keep the number of segments under control. Lucene's IndexSearcher searches over all of the segments in a shard to find the 'size' best results. +To improve search performance, you must keep the number of segments under control. Lucene's IndexSearcher searches over all of the segments in a shard to find the 'size' best results. - Ideally, having one segment per shard provides the optimal performance with respect to search latency. You can configure an index to have multiple shards to avoid giant shards and achieve more parallelism. +Ideally, having one segment per shard provides the optimal performance with respect to search latency. You can configure an index to have multiple shards to avoid giant shards and achieve more parallelism. - You can control the number of segments by choosing a larger refresh interval, or during indexing by asking OpenSearch to slow down segment creation by disabling the refresh interval. +You can control the number of segments by choosing a larger refresh interval, or during indexing by asking OpenSearch to slow down segment creation by disabling the refresh interval. ## Warm up the index - Native library indexes are constructed during indexing, but they're loaded into memory during the first search. In Lucene, each segment is searched sequentially (so, for k-NN, each segment returns up to k nearest neighbors of the query point), and the top 'size' number of results based on the score are returned from all the results returned by segments at a shard level (higher score = better result). +Native library indexes are constructed during indexing, but they're loaded into memory during the first search. In Lucene, each segment is searched sequentially (so, for k-NN, each segment returns up to k nearest neighbors of the query point), and the top 'size' number of results based on the score are returned from all the results returned by segments at a shard level (higher score = better result). - Once a native library index is loaded (native library indexes are loaded outside OpenSearch JVM), OpenSearch caches them in memory. Initial queries are expensive and take a few seconds, while subsequent queries are faster and take milliseconds (assuming the k-NN circuit breaker isn't hit). +Once a native library index is loaded (native library indexes are loaded outside OpenSearch JVM), OpenSearch caches them in memory. Initial queries are expensive and take a few seconds, while subsequent queries are faster and take milliseconds (assuming the k-NN circuit breaker isn't hit). - To avoid this latency penalty during your first queries, you can use the warmup API operation on the indexes you want to search: +To avoid this latency penalty during your first queries, you can use the warmup API operation on the indexes you want to search: - ```json - GET /_plugins/_knn/warmup/index1,index2,index3?pretty - { - "_shards" : { - "total" : 6, - "successful" : 6, - "failed" : 0 - } - } - ``` +```json +GET /_plugins/_knn/warmup/index1,index2,index3?pretty +{ + "_shards" : { + "total" : 6, + "successful" : 6, + "failed" : 0 + } +} +``` +{% include copy-curl.html %} - The warmup API operation loads all native library indexes for all shards (primary and replica) for the specified indexes into the cache, so there's no penalty to load native library indexes during initial searches. +The warmup API operation loads all native library indexes for all shards (primary and replica) for the specified indexes into the cache, so there's no penalty to load native library indexes during initial searches. This API operation only loads the segments of active indexes into the cache. If a merge or refresh operation finishes after the API runs, or if you add new documents, you need to rerun the API to load those native library indexes into memory. {: .warning} From e2a74279f979ae50611f5e33f33dd16e5d53fc14 Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Mon, 17 Feb 2025 15:46:28 -0500 Subject: [PATCH 27/32] Compress requests Signed-off-by: Fanit Kolchina --- _vector-search/getting-started/index.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/_vector-search/getting-started/index.md b/_vector-search/getting-started/index.md index 289fd9233c..c81385b93f 100644 --- a/_vector-search/getting-started/index.md +++ b/_vector-search/getting-started/index.md @@ -26,7 +26,7 @@ docker pull opensearchproject/opensearch:latest && docker run -it -p 9200:9200 - ``` {% include copy.html %} -OpenSearch is now running on port 9200. Note that this demo configuration is insecure and should not be run in production environments. +OpenSearch is now running on port 9200. Note that this demo configuration is insecure and should not be used in production environments.
@@ -75,9 +75,7 @@ First, create an index that will store sample hotel data. To signal to OpenSearc PUT /hotels-index { "settings": { - "index": { - "knn": true - } + "index.knn": true }, "mappings": { "properties": { @@ -194,7 +192,7 @@ The response contains the hotels closest to the specified pin location: ## Generating vector embeddings in OpenSearch -If your data isn't already in vector format, you can generate vector embeddings directly within OpenSearch. This allows you to transform text, images, and other data types into numerical representations for similarity search. For more information, see [Generating vector embeddings within OpenSearch]({{site.url}}{{site.baseurl}}/vector-search/getting-started/auto-generated-embeddings/). +If your data isn't already in vector format, you can generate vector embeddings directly within OpenSearch. This allows you to transform text or images into their numerical representations for similarity search. For more information, see [Generating vector embeddings within OpenSearch]({{site.url}}{{site.baseurl}}/vector-search/getting-started/auto-generated-embeddings/). ## Next steps From 8d4ffe4aaca0da38a59597f2c6eb811c1c590b5f Mon Sep 17 00:00:00 2001 From: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com> Date: Wed, 19 Feb 2025 16:37:40 -0500 Subject: [PATCH 28/32] Apply suggestions from code review Co-authored-by: Nathan Bower Signed-off-by: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com> --- _about/index.md | 8 +-- .../knn-memory-optimized.md | 28 ++++----- .../knn-methods-engines.md | 62 +++++++++---------- .../supported-field-types/knn-spaces.md | 4 +- .../supported-field-types/knn-vector.md | 10 +-- 5 files changed, 56 insertions(+), 56 deletions(-) diff --git a/_about/index.md b/_about/index.md index 1bffe053fd..cda44f157f 100644 --- a/_about/index.md +++ b/_about/index.md @@ -15,7 +15,7 @@ why_use: description: "Use OpenSearch as a vector database to combine the power of traditional search, analytics, and vector search." link: "/vector-search/" - heading: "Fast, scalable full-text search" - description: "Help users find the right information within your application, website, or data lake catalog." + description: "Help users find the right information in your application, website, or data lake catalog." link: "/search-plugins/" - heading: "Application and infrastructure monitoring" description: "Use observability logs, metrics, and traces to monitor your applications in real time." @@ -44,16 +44,16 @@ features: link: "/dashboards/" getting_started: - heading: "Get started with OpenSearch" - description: "Learn about OpenSearch and get started ingesting and searching data." + description: "Learn about OpenSearch and start ingesting and searching data." link: "/getting-started/" - heading: "Get started with OpenSearch Dashboards" - description: "Learn about OpenSearch Dashboards applications and tools to visualize data." + description: "Learn about OpenSearch Dashboards applications and tools used to visualize data." link: "/dashboards/quickstart/" - heading: "Get started with vector search" description: "Learn about vector search options and build your first vector search application." link: "/search-plugins/" - heading: "Get started with OpenSearch security" - description: "Learn about using security in OpenSearch." + description: "Learn about security in OpenSearch." link: "/getting-started/security/" --- diff --git a/_field-types/supported-field-types/knn-memory-optimized.md b/_field-types/supported-field-types/knn-memory-optimized.md index 5507c29bcd..25fadceb1e 100644 --- a/_field-types/supported-field-types/knn-memory-optimized.md +++ b/_field-types/supported-field-types/knn-memory-optimized.md @@ -8,18 +8,18 @@ nav_order: 30 # Memory-optimized vectors -Vector search operations can be memory-intensive, particularly when dealing with large-scale deployments. OpenSearch provides several strategies to optimize memory usage while maintaining search performance. You can choose between different workload modes that prioritize either low latency or low cost, apply various compression levels to reduce memory footprint, and use alternative vector representations like byte or binary vectors. These optimization techniques allow you to balance memory consumption, search performance, and cost based on your specific use case requirements. +Vector search operations can be memory intensive, particularly when dealing with large-scale deployments. OpenSearch provides several strategies for optimizing memory usage while maintaining search performance. You can choose between different workload modes that prioritize either low latency or low cost, apply various compression levels to reduce memory footprint, or use alternative vector representations like byte or binary vectors. These optimization techniques allow you to balance memory consumption, search performance, and cost based on your specific use case requirements. ## Vector workload modes -Vector search requires balancing between search performance and operational costs. While in-memory search provides the lowest latency, [disk-based search]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/disk-based-vector-search/) offers a more cost-effective approach by reducing memory usage, though with slightly higher search latency. To choose between these approaches, use the `mode` mapping parameter in your `knn_vector` field configuration. This parameter sets appropriate default values for k-NN parameters based on your priority: either low latency or low cost. For additional optimization, you can override these default parameter values in your k-NN field mapping. +Vector search requires balancing search performance and operational costs. While in-memory search provides the lowest latency, [disk-based search]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/disk-based-vector-search/) offers a more cost-effective approach by reducing memory usage, though it results in slightly higher search latency. To choose between these approaches, use the `mode` mapping parameter in your `knn_vector` field configuration. This parameter sets appropriate default values for k-NN parameters based on your priority: either low latency or low cost. For additional optimization, you can override these default parameter values in your k-NN field mapping. OpenSearch supports the following vector workload modes. | Mode | Default engine | Description | |:---|:---|:---| | `in_memory` (Default) | `faiss` | Prioritizes low-latency search. This mode uses the `faiss` engine without any quantization applied. It is configured with the default parameter values for vector search in OpenSearch. | -| `on_disk` | `faiss` | Prioritizes low-cost vector search while maintaining strong recall. By default, the `on_disk` mode uses quantization and rescoring to execute a two-pass approach to retrieve the top neighbors. The `on_disk` mode supports only `float` vector types. | +| `on_disk` | `faiss` | Prioritizes low-cost vector search while maintaining strong recall. By default, the `on_disk` mode uses quantization and rescoring to execute a two-phase approach in order to retrieve the top neighbors. The `on_disk` mode supports only `float` vector types. | To create a vector index that uses the `on_disk` mode for low-cost search, send the following request: @@ -168,17 +168,17 @@ By default, k-NN vectors are `float` vectors, in which each dimension is 4 bytes Byte vectors are supported only for the `lucene` and `faiss` engines. They are not supported for the `nmslib` engine. {: .note} -In [k-NN benchmarking tests](https://github.com/opensearch-project/opensearch-benchmark-workloads/tree/main/vectorsearch), the use of `byte` rather than `float` vectors resulted in a significant reduction in storage and memory usage as well as improved indexing throughput and reduced query latency. Additionally, precision on recall was not greatly affected (note that recall can depend on various factors, such as the [quantization technique](#quantization-techniques) and data distribution). +In [k-NN benchmarking tests](https://github.com/opensearch-project/opensearch-benchmark-workloads/tree/main/vectorsearch), the use of `byte` rather than `float` vectors resulted in a significant reduction in storage and memory usage as well as improved indexing throughput and reduced query latency. Additionally, recall precision was not greatly affected (note that recall can depend on various factors, such as the [quantization technique](#quantization-techniques) used and the data distribution). -When using `byte` vectors, expect some loss of precision in the recall compared to using `float` vectors. Byte vectors are useful in large-scale applications and use cases that prioritize a reduced memory footprint in exchange for a minimal loss of recall. +When using `byte` vectors, expect some loss of recall precision compared to using `float` vectors. Byte vectors are useful in large-scale applications and use cases that prioritize a reduced memory footprint in exchange for a minimal loss of recall. {: .important} -When using `byte` vectors with the `faiss` engine, we recommend using [SIMD optimization]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/#simd-optimization), which helps to significantly reduce search latencies and improve indexing throughput. +When using `byte` vectors with the `faiss` engine, we recommend using [Single Instruction Multiple Data (SIMD) optimization]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/#simd-optimization), which helps to significantly reduce search latencies and improve indexing throughput. {: .important} Introduced in k-NN plugin version 2.9, the optional `data_type` parameter defines the data type of a vector. The default value of this parameter is `float`. -To use a `byte` vector, set the `data_type` parameter to `byte` when creating mappings for an index: +To use a `byte` vector, set the `data_type` parameter to `byte` when creating mappings for an index. ### Example: HNSW @@ -253,7 +253,7 @@ GET test-index/_search ### Example: IVF -The `ivf` method requires a training step that creates and trains the model used to initialize the native library index during segment creation. For more information, see [Building a vector index from a model]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-vector-index-from-a-model). +The `ivf` method requires a training step that creates a model and trains it to initialize the native library index during segment creation. For more information, see [Building a vector index from a model]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-vector-index-from-a-model). First, create an index that will contain byte vector training data. Specify the `faiss` engine and `ivf` algorithm and make sure that the `dimension` matches the dimension of the model you want to create: @@ -383,7 +383,7 @@ In the best-case scenario, byte vectors require 25% of the memory required by 32 #### HNSW memory estimation -The memory required for Hierarchical Navigable Small Worlds (HNSW) is estimated to be `1.1 * (dimension + 8 * m)` bytes/vector, where `m` is the maximum number of bidirectional links created for each element during the construction of the graph. +The memory required for Hierarchical Navigable Small World (HNSW) is estimated to be `1.1 * (dimension + 8 * m)` bytes/vector, where `m` is the maximum number of bidirectional links created for each element during the construction of the graph. As an example, assume that you have 1 million vectors with a `dimension` of `256` and an `m` of `16`. The memory requirement can be estimated as follows: @@ -393,7 +393,7 @@ As an example, assume that you have 1 million vectors with a `dimension` of `256 #### IVF memory estimation -The memory required for IVF is estimated to be `1.1 * ((dimension * num_vectors) + (4 * nlist * dimension))` bytes/vector, where `nlist` is the number of buckets to partition vectors into. +The memory required for Inverted File Index (IVF) is estimated to be `1.1 * ((dimension * num_vectors) + (4 * nlist * dimension))` bytes/vector, where `nlist` is the number of buckets into which to partition vectors. As an example, assume that you have 1 million vectors with a `dimension` of `256` and an `nlist` of `128`. The memory requirement can be estimated as follows: @@ -404,7 +404,7 @@ As an example, assume that you have 1 million vectors with a `dimension` of `256 ### Quantization techniques -If your vectors are of the type `float`, you need to first convert them to the `byte` type before ingesting the documents. This conversion is accomplished by _quantizing the dataset_---reducing the precision of its vectors. There are many quantization techniques, such as scalar quantization or product quantization (PQ), which is used in the Faiss engine. The choice of quantization technique depends on the type of data you're using and can affect the accuracy of recall values. The following sections describe the scalar quantization algorithms that were used to quantize the [k-NN benchmarking test](https://github.com/opensearch-project/opensearch-benchmark-workloads/tree/main/vectorsearch) data for the [L2](#scalar-quantization-for-the-l2-space-type) and [cosine similarity](#scalar-quantization-for-the-cosine-similarity-space-type) space types. The provided pseudocode is for illustration purposes only. +If your vectors are of the type `float`, you need to first convert them to the `byte` type before ingesting documents. This conversion is accomplished by _quantizing the dataset_---reducing the precision of its vectors. The Faiss engine supports several quantization techniques, such as scalar quantization (SQ) and product quantization (PQ). The choice of quantization technique depends on the type of data you're using and can affect the accuracy of recall values. The following sections describe the scalar quantization algorithms that were used to quantize the [k-NN benchmarking test](https://github.com/opensearch-project/opensearch-benchmark-workloads/tree/main/vectorsearch) data for the [L2](#scalar-quantization-for-the-l2-space-type) and [cosine similarity](#scalar-quantization-for-the-cosine-similarity-space-type) space types. The provided pseudocode is for illustration purposes only. #### Scalar quantization for the L2 space type @@ -520,7 +520,7 @@ There are several requirements for using binary vectors in the OpenSearch k-NN p - The `data_type` of the binary vector index must be `binary`. - The `space_type` of the binary vector index must be `hamming`. - The `dimension` of the binary vector index must be a multiple of 8. -- You must convert your binary data into 8-bit signed integers (`int8`) in the [-128, 127] range. For example, the binary sequence of 8 bits `0, 1, 1, 0, 0, 0, 1, 1` must be converted into its equivalent byte value of `99` to be used as a binary vector input. +- You must convert your binary data into 8-bit signed integers (`int8`) in the [-128, 127] range. For example, the binary sequence of 8 bits `0, 1, 1, 0, 0, 0, 1, 1` must be converted into its equivalent byte value of `99` in order to be used as a binary vector input. ### Example: HNSW @@ -642,7 +642,7 @@ The response contains the two vectors closest to the query vector: ### Example: IVF -The IVF method requires a training step that creates and trains the model used to initialize the native library index during segment creation. For more information, see [Building a vector index from a model]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-vector-index-from-a-model). +The IVF method requires a training step that creates a model and trains it to initialize the native library index during segment creation. For more information, see [Building a vector index from a model]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-vector-index-from-a-model). First, create an index that will contain binary vector training data. Specify the Faiss engine and IVF algorithm and make sure that the `dimension` matches the dimension of the model you want to create: @@ -913,7 +913,7 @@ The memory required for HNSW can be estimated using the following formula, where #### IVF memory estimation -The memory required for IVF can be estimated using the following formula, where `nlist` is the number of buckets to partition vectors into: +The memory required for IVF can be estimated using the following formula, where `nlist` is the number of buckets into which to partition vectors: ```r 1.1 * (((dimension / 8) * num_vectors) + (nlist * dimension / 8)) diff --git a/_field-types/supported-field-types/knn-methods-engines.md b/_field-types/supported-field-types/knn-methods-engines.md index 2108af5237..d91ba8fcb8 100644 --- a/_field-types/supported-field-types/knn-methods-engines.md +++ b/_field-types/supported-field-types/knn-methods-engines.md @@ -19,7 +19,7 @@ An _engine_ is the library that implements these methods. Different engines can OpenSearch supports the following engines: - [**Lucene**](#lucene-engine): The native search library, offering an HNSW implementation with efficient filtering capabilities -- [**Faiss**](#faiss-engine) (Facebook AI Similarity Search): A comprehensive library implementing both HNSW and IVF methods, with additional vector compression options +- [**Faiss**](#faiss-engine) (Facebook AI Similarity Search): A comprehensive library implementing both the HNSW and IVF methods, with additional vector compression options - [**NMSLIB**](#nmslib-engine-deprecated) (Non-Metric Space Library): A legacy implementation of HNSW (now deprecated) ## Method definition example @@ -27,11 +27,11 @@ OpenSearch supports the following engines: A method definition contains the following components: - The `name` of the method (for example, `hnsw` or `ivf`) -- The `space_type` the method is built for (for example, `l2` or `cosinesimil`) +- The `space_type` for which the method is built (for example, `l2` or `cosinesimil`) - The `engine` that will implement the method (for example, `faiss` or `lucene`) - A map of `parameters` specific to that implementation -The following example configures an `hnsw` method with an `l2` space type, a `faiss` engine, and the method-specific parameters: +The following example configures an `hnsw` method with the `l2` space type, the `faiss` engine, and the method-specific parameters: ```json PUT test-index @@ -63,7 +63,7 @@ PUT test-index ``` {% include copy-curl.html %} -Not every method/engine combination supports each of the spaces. For a list of supported spaces, see the specific engine section. +Not every method/engine combination supports each of the spaces. For a list of supported spaces, see the section for a specific engine. {: .note} ## Common parameters @@ -72,14 +72,14 @@ The following parameters are common to all method definitions. Mapping parameter | Required | Default | Updatable | Description :--- | :--- | :--- | :--- | :--- -`name` | Yes | N/A | No | The nearest neighbor method. Valid values are `hnsw` and `ivf`. Not every engine combination supports each of the methods. For a list of supported methods, see the specific engine section. -`space_type` | No | `l2` | No | The vector space used to calculate the distance between vectors. Valid values are `l1`, `l2`, `linf`, `cosinesimil`, `innerproduct`, `hamming`, and `hammingbit`. Not every method/engine combination supports each of the spaces. For a list of supported spaces, see the specific engine section. Note: This value can also be specified at the top level of the mapping. For more information, see [Spaces]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-spaces/). +`name` | Yes | N/A | No | The nearest neighbor method. Valid values are `hnsw` and `ivf`. Not every engine combination supports each of the methods. For a list of supported methods, see the section for a specific engine. +`space_type` | No | `l2` | No | The vector space used to calculate the distance between vectors. Valid values are `l1`, `l2`, `linf`, `cosinesimil`, `innerproduct`, `hamming`, and `hammingbit`. Not every method/engine combination supports each of the spaces. For a list of supported spaces, see the section for a specific engine. Note: This value can also be specified at the top level of the mapping. For more information, see [Spaces]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-spaces/). `engine` | No | `faiss` | No | The approximate k-NN library to use for indexing and search. Valid values are `faiss`, `lucene`, and `nmslib` (deprecated). -`parameters` | No | `null` | No | The parameters used for the nearest neighbor method. For more information, see the specific engine section. +`parameters` | No | `null` | No | The parameters used for the nearest neighbor method. For more information, see the section for a specific engine. ## Lucene engine -The Lucene engine provides a native implementation of vector search directly within Lucene. It offers efficient filtering capabilities and is well-suited for smaller deployments. +The Lucene engine provides a native implementation of vector search directly within Lucene. It offers efficient filtering capabilities and is well suited for smaller deployments. ### Supported methods @@ -95,10 +95,10 @@ The HNSW method supports the following parameters. Parameter name | Required | Default | Updatable | Description :--- | :--- | :--- | :--- | :--- -`ef_construction` | No | 100 | No | The size of the dynamic list used during k-NN graph creation. Higher values result in a more accurate graph but slower indexing speed.
Note: Lucene uses the term `beam_width` internally, but OpenSearch documentation uses `ef_construction` for consistency. -`m` | No | 16 | No | The number of bidirectional links created for each new element. Impacts memory consumption significantly. Keep between 2 and 100.
Note: Lucene uses the term `max_connections` internally, but OpenSearch documentation uses `m` for consistency. +`ef_construction` | No | 100 | No | The size of the dynamic list used during k-NN graph creation. Higher values result in a more accurate graph but slower indexing speed.
Note: Lucene uses the term `beam_width` internally, but the OpenSearch documentation uses `ef_construction` for consistency. +`m` | No | 16 | No | The number of bidirectional links created for each new element. Impacts memory consumption significantly. Keep between `2` and `100`.
Note: Lucene uses the term `max_connections` internally, but the OpenSearch documentation uses `m` for consistency. -Lucene HNSW implementation ignores `ef_search` and dynamically sets it to the value of "k" in the search request. Therefore, there is no need to make settings for `ef_search` when using the Lucene engine. +The Lucene HNSW implementation ignores `ef_search` and dynamically sets it to the value of "k" in the search request. There is therefore no need to configure settings for `ef_search` when using the Lucene engine. {: .note} An index created in OpenSearch version 2.11 or earlier will still use the old `ef_construction` value (`512`). @@ -140,7 +140,7 @@ Parameter name | Required | Default | Updatable | Description `ef_search` | No | 100 | No | The size of the dynamic list used during k-NN searches. Higher values result in more accurate but slower searches. `ef_construction` | No | 100 | No | The size of the dynamic list used during k-NN graph creation. Higher values result in a more accurate graph but slower indexing speed. `m` | No | 16 | No | The number of bidirectional links that the plugin creates for each new element. Increasing and decreasing this value can have a large impact on memory consumption. Keep this value between `2` and `100`. -`encoder` | No | flat | No | Encoder definition for encoding vectors. Encoders can reduce the memory footprint of your index, at the expense of search accuracy. +`encoder` | No | flat | No | An encoder definition for encoding vectors. Encoders can reduce the memory footprint of your index at the expense of search accuracy. An index created in OpenSearch version 2.11 or earlier will still use the old `ef_construction` value (`512`). {: .note} @@ -151,15 +151,15 @@ The IVF method supports the following parameters. Parameter name | Required | Default | Updatable | Description :--- | :--- | :--- | :--- | :--- -`nlist` | No | 4 | No | Number of buckets to partition vectors into. Higher values may increase accuracy but increase memory and training latency. -`nprobes` | No | 1 | No | Number of buckets to search during query. Higher values increase accuracy but slow searches. -`encoder` | No | flat | No | Encoder definition for encoding vectors. +`nlist` | No | 4 | No | The number of buckets into which to partition vectors. Higher values may increase accuracy but also increase memory and training latency. +`nprobes` | No | 1 | No | The number of buckets to search during a query. Higher values result in more accurate but slower searches. +`encoder` | No | flat | No | An encoder definition for encoding vectors. For more information about these parameters, see the [Faiss documentation](https://github.com/facebookresearch/faiss/wiki/Faiss-indexes). ### IVF training requirements -The IVF algorithm requires a training step. To create an index that uses IVF, you need to train a model with the [Train API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#train-a-model), passing the IVF method definition. IVF requires that, at a minimum, there are `nlist` training data points, but we recommend [that you use more than this](https://github.com/facebookresearch/faiss/wiki/Guidelines-to-choose-an-index#how-big-is-the-dataset). Training data can be composed of either the same data that is going to be ingested or a separate dataset. +The IVF algorithm requires a training step. To create an index that uses IVF, you need to train a model with the [Train API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#train-a-model), passing the IVF method definition. IVF requires, at a minimum, that there be `nlist` training data points, but we recommend [that you use more than this](https://github.com/facebookresearch/faiss/wiki/Guidelines-to-choose-an-index#how-big-is-the-dataset). Training data can be the same as the data you plan to index or come from a separate dataset. ### Supported encoders @@ -170,7 +170,7 @@ OpenSearch currently supports the following encoders in the Faiss library. Encoder name | Requires training | Description :--- | :--- | :--- `flat` (Default) | No | Encode vectors as floating-point arrays. This encoding does not reduce memory footprint. -[`pq`](#pq-parameters) | Yes | An abbreviation for _product quantization_, it is a lossy compression technique that uses clustering to encode a vector into a fixed size of bytes, with the goal of minimizing the drop in k-NN search accuracy. At a high level, vectors are broken up into `m` subvectors, and then each subvector is represented by a `code_size` code obtained from a code book produced during training. For more information about product quantization, see [this blog post](https://medium.com/dotstar/understanding-faiss-part-2-79d90b1e5388). +[`pq`](#pq-parameters) | Yes | An abbreviation for _product quantization_, PQ is a lossy compression technique that uses clustering to encode a vector into a fixed byte size, with the goal of minimizing the drop in k-NN search accuracy. At a high level, vectors are separated into `m` subvectors, and then each subvector is represented by a `code_size` code obtained from a code book produced during training. For more information about product quantization, see [this blog post](https://medium.com/dotstar/understanding-faiss-part-2-79d90b1e5388). [`sq`](#sq-parameters) | No | An abbreviation for _scalar quantization_. Starting with OpenSearch version 2.13, you can use the `sq` encoder to quantize 32-bit floating-point vectors into 16-bit floats. In version 2.13, the built-in `sq` encoder is the SQFP16 Faiss encoder. The encoder reduces memory footprint with a minimal loss of precision and improves performance by using SIMD optimization (using AVX2 on x86 architecture or Neon on ARM64 architecture). For more information, see [Faiss scalar quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/faiss-16-bit-quantization/). #### PQ parameters @@ -179,10 +179,10 @@ The `pq` encoder supports the following parameters. Parameter name | Required | Default | Updatable | Description :--- | :--- | :--- | :--- | :--- -`m` | No | `1` | No | Determines the number of subvectors into which to break the vector. Subvectors are encoded independently of each other. This vector dimension must be divisible by `m`. Maximum value is 1,024. +`m` | No | `1` | No | Determines the number of subvectors into which to separate the vector. Subvectors are encoded independently of each other. This vector dimension must be divisible by `m`. Maximum value is 1,024. `code_size` | No | `8` | No | Determines the number of bits into which to encode a subvector. Maximum value is `8`. For `ivf`, this value must be less than or equal to `8`. For `hnsw`, this value must be `8`. -The `hnsw` method supports the `pq` encoder for OpenSearch versions 2.10 and later. The `code_size` parameter of a `pq` encoder with the `hnsw` method must be **8**. +The `hnsw` method supports the `pq` encoder for OpenSearch version 2.10 and later. The `code_size` parameter of a `pq` encoder with the `hnsw` method must be **8**. {: .important} #### SQ parameters @@ -192,7 +192,7 @@ The `sq` encoder supports the following parameters. Parameter name | Required | Default | Updatable | Description :--- | :--- | :-- | :--- | :--- `type` | No | `fp16` | No | The type of scalar quantization to be used to encode 32-bit float vectors into the corresponding type. As of OpenSearch 2.13, only the `fp16` encoder type is supported. For the `fp16` encoder, vector values must be in the [-65504.0, 65504.0] range. -`clip` | No | `false` | No | If `true`, then any vector values outside of the supported range for the specified vector type are rounded so that they are in the range. If `false`, then the request is rejected if any vector values are outside of the supported range. Setting `clip` to `true` may decrease recall. +`clip` | No | `false` | No | If `true`, then any vector values outside of the supported range for the specified vector type are rounded so that they are within the range. If `false`, then the request is rejected if any vector values are outside of the supported range. Setting `clip` to `true` may decrease recall. For more information and examples, see [Using Faiss scalar quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/faiss-16-bit-quantization/). @@ -221,9 +221,9 @@ If your hardware supports advanced AVX-512(spr), OpenSearch loads the `libopense If your hardware supports AVX-512, OpenSearch loads the `libopensearchknn_faiss_avx512.so` library at runtime. -If your hardware supports AVX2 but doesn't support AVX-512, Open loads the `libopensearchknn_faiss_avx2.so` library at runtime. +If your hardware supports AVX2 but doesn't support AVX-512, OpenSearch loads the `libopensearchknn_faiss_avx2.so` library at runtime. -To disable the advanced AVX-512 (for Sapphire Rapids or newer-generation processors), AVX-512, and AVX2 SIMD instructions and load the non-optimized Faiss library (`libopensearchknn_faiss.so`), specify the `knn.faiss.avx512_spr.disabled`, `knn.faiss.avx512.disabled`, and `knn.faiss.avx2.disabled` static settings as `true` in `opensearch.yml` (by default, all of these are `false`). +To disable the advanced AVX-512 (for Sapphire Rapids or newer-generation processors), AVX-512, and AVX2 SIMD instructions and load the non-optimized Faiss library (`libopensearchknn_faiss.so`), specify the `knn.faiss.avx512_spr.disabled`, `knn.faiss.avx512.disabled`, and `knn.faiss.avx2.disabled` static settings as `true` in `opensearch.yml` (by default, all of these are set to `false`). Note that to update a static setting, you must stop the cluster, change the setting, and restart the cluster. For more information, see [Static settings]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/index/#static-settings). @@ -233,7 +233,7 @@ For the ARM64 architecture, only one performance-boosting Faiss library (`libope ### Example configurations -The following example uses the `ivf` method without specifying an encoder (by default, OpenSearch uses the `flat` encoder): +The following example uses the `ivf` method without specifying an encoder (by default, OpenSearch uses the `flat` encoder): ```json "method": { @@ -318,11 +318,11 @@ The following example uses the `hnsw` method with an `sq` encoder of type `fp16` ## NMSLIB engine (deprecated) -The Non-Metric Space Library (NMSLIB) engine was one of the first vector search implementations in OpenSearch. While still supported, it is deprecated in favor of the Faiss and Lucene engines. +The Non-Metric Space Library (NMSLIB) engine was one of the first vector search implementations in OpenSearch. While still supported, it has been deprecated in favor of the Faiss and Lucene engines. ### Supported methods -The NMSLIB engine supports the following methods. +The NMSLIB engine supports the following method. Method name | Requires training | Supported spaces :--- | :--- | :--- @@ -335,7 +335,7 @@ The HNSW method supports the following parameters. Parameter name | Required | Default | Updatable | Description :--- | :--- | :--- | :--- | :--- `ef_construction` | No | 100 | No | The size of the dynamic list used during k-NN graph creation. Higher values result in a more accurate graph but slower indexing speed. -`m` | No | 16 | No | The number of bidirectional links created for each new element. Impacts memory consumption significantly. Keep between 2 and 100. +`m` | No | 16 | No | The number of bidirectional links created for each new element. Impacts memory consumption significantly. Keep between `2` and `100`. For NMSLIB (deprecated), *ef_search* is set in the [index settings]({{site.url}}{{site.baseurl}}/vector-search/settings/#index-settings). {: .note} @@ -359,7 +359,7 @@ An index created in OpenSearch version 2.11 or earlier will still use the old `e ## Choosing the right method -There are several options to choose from when building your `knn_vector` field. To determine the correct methods and parameters, you should first understand the requirements of your workload and what trade-offs you are willing to make. Factors to consider are (1) query latency, (2) query quality, (3) memory limits, and (4) indexing latency. +There are several options to choose from when building your `knn_vector` field. To select the correct method and parameters, you should first understand the requirements of your workload and what trade-offs you are willing to make. Factors to consider are (1) query latency, (2) query quality, (3) memory limits, and (4) indexing latency. If memory is not a concern, HNSW offers a strong query latency/query quality trade-off. @@ -389,17 +389,17 @@ In general, select Faiss for large-scale use cases. Lucene is a good option for In a typical OpenSearch cluster, a certain portion of RAM is reserved for the JVM heap. OpenSearch allocates native library indexes to a portion of the remaining RAM. This portion's size is determined by the `circuit_breaker_limit` cluster setting. By default, the limit is set to 50%. -Having a replica doubles the total number of vectors. +Using a replica doubles the total number of vectors. {: .note } -For information about using memory estimation with vector quantization, see the [Vector quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/knn-vector-quantization/). +For information about using memory estimation with vector quantization, see [Vector quantization]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/knn-vector-quantization/). {: .note } ### HNSW memory estimation The memory required for HNSW is estimated to be `1.1 * (4 * dimension + 8 * m)` bytes/vector. -As an example, assume you have a million vectors with a `dimension` of 256 and an `m` of 16. The memory requirement can be estimated as follows: +As an example, assume you have 1 million vectors with a `dimension` of 256 and an `m` of 16. The memory requirement can be estimated as follows: ```r 1.1 * (4 * 256 + 8 * 16) * 1,000,000 ~= 1.267 GB @@ -409,7 +409,7 @@ As an example, assume you have a million vectors with a `dimension` of 256 and a The memory required for IVF is estimated to be `1.1 * (((4 * dimension) * num_vectors) + (4 * nlist * d))` bytes. -As an example, assume you have a million vectors with a `dimension` of `256` and an `nlist` of `128`. The memory requirement can be estimated as follows: +As an example, assume you have 1 million vectors with a `dimension` of `256` and an `nlist` of `128`. The memory requirement can be estimated as follows: ```r 1.1 * (((4 * 256) * 1,000,000) + (4 * 128 * 256)) ~= 1.126 GB diff --git a/_field-types/supported-field-types/knn-spaces.md b/_field-types/supported-field-types/knn-spaces.md index fe28fb2172..7b0ce09aab 100644 --- a/_field-types/supported-field-types/knn-spaces.md +++ b/_field-types/supported-field-types/knn-spaces.md @@ -13,9 +13,9 @@ In vector search, a _space_ defines how the distance (or similarity) between two ## Distance calculation -A space defines the function used to measure the distance between two points in order to determine the k-nearest neighbors. From the k-NN perspective, a lower score equates to a closer and better result. This is the opposite of how OpenSearch scores results, where a higher score equates to a better result. OpenSearch supports the following spaces. +A space defines the function used to measure the distance between two points in order to determine the k-nearest neighbors. In k-NN search, a lower score equates to a closer and better result. This is the opposite of how OpenSearch scores results, where a higher score equates to a better result. OpenSearch supports the following spaces. -Not every method/engine combination supports each of the spaces. For a list of supported spaces, see the specific engine section in the [method documentation]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/). +Not every method/engine combination supports each of the spaces. For a list of supported spaces, see the section for a specific engine in the [method documentation]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/). {: .note} | Space type | Search type | Distance function ($$d$$ ) | OpenSearch score | diff --git a/_field-types/supported-field-types/knn-vector.md b/_field-types/supported-field-types/knn-vector.md index d9a702b64e..6e10cbb601 100644 --- a/_field-types/supported-field-types/knn-vector.md +++ b/_field-types/supported-field-types/knn-vector.md @@ -69,7 +69,7 @@ PUT test-index ## Method definitions -[Method definitions]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/) are used when the underlying [approximate k-NN (ANN)]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/) algorithm does not require training. For example, the following `knn_vector` field specifies that Faiss implementation of HNSW should be used for ANN search. During indexing, Faiss builds the corresponding HNSW segment files: +[Method definitions]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/) are used when the underlying [approximate k-NN (ANN)]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/) algorithm does not require training. For example, the following `knn_vector` field specifies that a Faiss implementation of HNSW should be used for ANN search. During indexing, Faiss builds the corresponding HNSW segment files: ```json PUT test-index @@ -101,7 +101,7 @@ PUT test-index ``` {% include copy-curl.html %} -You can also specify the `space_type` on the top level: +You can also specify the `space_type` at the top level: ```json PUT test-index @@ -163,12 +163,12 @@ Parameter | Data type | Description :--- | :--- `type` | String | The vector field type. Must be `knn_vector`. Required. `dimension` | Integer | The size of the vectors used. Valid values are in the [1, 16,000] range. Required. -`data_type` | String | The data type for the vector elements. Valid values are `binary`, `byte`, and `float`. Optional. Default is `float`. -`space_type` | String | The vector space used to calculate the distance between vectors. Valid values are `l1`, `l2`, `linf`, `cosinesimil`, `innerproduct`, `hamming`, and `hammingbit`. Not every method/engine combination supports each of the spaces. For a list of supported spaces, see the specific engine section. Note: This value can also be specified within the `method`. Optional. For more information, see [Spaces]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-spaces/). +`data_type` | String | The data type of the vector elements. Valid values are `binary`, `byte`, and `float`. Optional. Default is `float`. +`space_type` | String | The vector space used to calculate the distance between vectors. Valid values are `l1`, `l2`, `linf`, `cosinesimil`, `innerproduct`, `hamming`, and `hammingbit`. Not every method/engine combination supports each of the spaces. For a list of supported spaces, see the section for a specific engine. Note: This value can also be specified within the `method`. Optional. For more information, see [Spaces]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-spaces/). `mode` | String | Sets appropriate default values for k-NN parameters based on your priority: either low latency or low cost. Valid values are `in_memory` and `on_disk`. Optional. Default is `in_memory`. For more information, see [Memory-optimized vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/). `compression_level` | String | Selects a quantization encoder that reduces vector memory consumption by the given factor. Valid values are `1x`, `2x`, `4x`, `8x`, `16x`, and `32x`. Optional. For more information, see [Memory-optimized vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized/). `method` | Object | The algorithm used for organizing vector data at indexing time and searching it at search time. Used when the ANN algorithm does not require training. Optional. For more information, see [Methods and engines]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/). -`model_id` | String | The model ID for a trained model. Used when the ANN algorithm requires training. See [Model IDs](#model-ids). Optional. +`model_id` | String | The model ID of a trained model. Used when the ANN algorithm requires training. See [Model IDs](#model-ids). Optional. ## Next steps From e19af365c34e657b17cb91c4efa35d042395b7ab Mon Sep 17 00:00:00 2001 From: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com> Date: Wed, 19 Feb 2025 16:56:48 -0500 Subject: [PATCH 29/32] Apply suggestions from code review Co-authored-by: Nathan Bower Signed-off-by: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com> --- _includes/home_cards.html | 2 +- _query-dsl/specialized/k-nn.md | 7 +++-- _vector-search/api.md | 18 ++++++------ _vector-search/creating-vector-index.md | 14 +++++----- _vector-search/filter-search-knn.md | 6 ++-- .../auto-generated-embeddings.md | 6 ++-- _vector-search/getting-started/concepts.md | 28 +++++++++---------- 7 files changed, 41 insertions(+), 40 deletions(-) diff --git a/_includes/home_cards.html b/_includes/home_cards.html index e467b82032..fb3639005a 100644 --- a/_includes/home_cards.html +++ b/_includes/home_cards.html @@ -56,7 +56,7 @@

OpenSearch Benchmark

-

Measure performance metrics for your OpenSearch cluster.

+

Measure OpenSearch cluster performance metrics.

diff --git a/_query-dsl/specialized/k-nn.md b/_query-dsl/specialized/k-nn.md index 1cae567444..4d30ad235b 100644 --- a/_query-dsl/specialized/k-nn.md +++ b/_query-dsl/specialized/k-nn.md @@ -28,11 +28,11 @@ The top-level `vector_field` specifies the vector field against which to run a s Field | Data type | Required/Optional | Description :--- | :--- | :--- | :--- `vector` | Array of floats or bytes | Required | The query vector to use for vector search. The data type of the vector elements must match the data type of vectors indexed in the [`knn_vector` field]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/) searched. -`k` | Integer | Optional | The number of nearest neighbors to return. Valid values are in the [1, 10,000] range. Required if `max_distance` or `min_score` is not specified. +`k` | Integer | Optional | The number of nearest neighbors to return. Valid values are in the [1, 10,000] range. Required if either `max_distance` or `min_score` is not specified. `max_distance` | Float | Optional | The maximum distance threshold for search results. Only one of `k`, `max_distance`, or `min_score` can be specified. For more information, see [Radial search]({{site.url}}{{site.baseurl}}/vector-search/specialized-operations/radial-search-knn/). `min_score` | Float | Optional | The minimum score threshold for search results. Only one of `k`, `max_distance`, or `min_score` can be specified. For more information, see [Radial search]({{site.url}}{{site.baseurl}}/vector-search/specialized-operations/radial-search-knn/). -`filter` | Object | Optional | A filter to apply to the k-NN search. For more information, see [Vector search with filters]({{site.url}}{{site.baseurl}}/vector-search/filter-search-knn/). **Important**: Filter can only be used with the `faiss` or `lucene` engines. -`method_parameters` | Object | Optional | Additional parameters for fine-tuning the search:
- `ef_search` (Integer): The number of vectors to examine (for `hnsw` method)
- `nprobes` (Integer): The number of buckets to examine (for `ivf` method). For more information, see [Specifying method parameters in the query](#specifying-method-parameters-in-the-query). +`filter` | Object | Optional | A filter to apply to the k-NN search. For more information, see [Vector search with filters]({{site.url}}{{site.baseurl}}/vector-search/filter-search-knn/). **Important**: A filter can only be used with the `faiss` or `lucene` engines. +`method_parameters` | Object | Optional | Additional parameters for fine-tuning the search:
- `ef_search` (Integer): The number of vectors to examine (for the `hnsw` method)
- `nprobes` (Integer): The number of buckets to examine (for the `ivf` method). For more information, see [Specifying method parameters in the query](#specifying-method-parameters-in-the-query). `rescore` | Object or Boolean | Optional | Parameters for configuring rescoring functionality:
- `oversample_factor` (Float): Controls the oversampling of candidate vectors before ranking. Valid values are in the `[1.0, 100.0]` range. Default is `1.0` (no rescoring). To use the default `oversample_factor` of `1.0`, set `rescore` to `true`. For more information, see [Rescoring results](#rescoring-results). `expand_nested_docs` | Boolean | Optional | When `true`, retrieves scores for all nested field documents within each parent document. Used with nested queries. For more information, see [Vector search with nested fields]({{site.url}}{{site.baseurl}}/vector-search/specialized-operations/nested-search-knn/). @@ -176,6 +176,7 @@ Engine | Notes ## Rescoring results You can fine-tune search by providing the `ef_search` and `oversample_factor` parameters. + The `oversample_factor` parameter controls the factor by which the search oversamples the candidate vectors before ranking them. Using a higher oversample factor means that more candidates will be considered before ranking, improving accuracy but also increasing search time. When selecting the `oversample_factor` value, consider the trade-off between accuracy and efficiency. For example, setting the `oversample_factor` to `2.0` will double the number of candidates considered during the ranking phase, which may help achieve better results. The following request specifies the `ef_search` and `oversample_factor` parameters: diff --git a/_vector-search/api.md b/_vector-search/api.md index 3b2d864571..efefd28b19 100644 --- a/_vector-search/api.md +++ b/_vector-search/api.md @@ -46,9 +46,9 @@ Field | Description `script_compilation_errors` | The number of errors during script compilation. This statistic is only relevant to k-NN score script search. `script_query_requests` | The total number of script queries. This statistic is only relevant to k-NN score script search. `script_query_errors` | The number of errors during script queries. This statistic is only relevant to k-NN score script search. -`nmslib_initialized` | Boolean value indicating whether the `nmslib` JNI library has been loaded and initialized on the node. -`faiss_initialized` | Boolean value indicating whether the `faiss` JNI library has been loaded and initialized on the node. -`model_index_status` | Status of model system index. Valid values are `red`, `yellow`, and `green`. If the index does not exist, this value is `null`. +`nmslib_initialized` | A Boolean value indicating whether the `nmslib` JNI library has been loaded and initialized on the node. +`faiss_initialized` | A Boolean value indicating whether the `faiss` JNI library has been loaded and initialized on the node. +`model_index_status` | The status of the model system index. Valid values are `red`, `yellow`, and `green`. If the index does not exist, this value is `null`. `indexing_from_model_degraded` | Boolean value indicating if indexing from a model is degraded. This happens if there is not enough JVM memory to cache the models. `ing_requests` | The number of training requests made to the node. `training_errors` | The number of training errors that have occurred on the node. @@ -60,7 +60,7 @@ Some statistics contain *graph* in the name. In these cases, *graph* is synonymo #### Example request -The following examples show how to retrieve statistics related to the k-NN plugin. +The following examples demonstrate how to retrieve statistics related to the k-NN plugin. The following example fetches comprehensive statistics for the k-NN plugin across all nodes in the cluster: @@ -162,7 +162,7 @@ GET /_plugins/_knn/warmup/index1,index2,index3?pretty ``` {% include copy-curl.html %} -The `total` value indicates how many shards the k-NN plugin attempted to warm up. The response also includes the number of shards the plugin succeeded and failed to warm up. +The `total` value indicates the number of shards that the k-NN plugin attempted to warm up. The response also includes the number of shards that the plugin successfully warmed up and failed to warm up. The call does not return results until the warmup operation finishes or the request times out. If the request times out, then the operation continues on the cluster. To monitor the warmup operation, use the OpenSearch `_tasks` API: @@ -258,13 +258,13 @@ Response field | Description `timestamp` | The date and time when the model was created. `description` | A user-provided description of the model. `error` | An error message explaining why the model is in a failed state. -`space_type` | The space type for which this model is trained, for example, Euclidean or cosine. Note: This value can be set in the top level of the request. +`space_type` | The space type for which the model is trained, for example, Euclidean or cosine. Note: This value can be set at the top level of the request. `dimension` | The dimensionality of the vector space for which this model is designed. `engine` | The native library used to create the model, either `faiss` or `nmslib` (deprecated). #### Example request -The following examples show how to retrieve information about a specific model using the k-NN plugin API. +The following examples demonstrate how to retrieve information about a specific model using the k-NN plugin API. The following example returns all the available information about the model: @@ -284,7 +284,7 @@ GET /_plugins/_knn/models/test-model?pretty ``` {% include copy-curl.html %} -The following example shows how to selectively retrieve fields: +The following example demonstrates how to selectively retrieve fields: ```json GET /_plugins/_knn/models/test-model?pretty&filter_path=model_id,state @@ -394,7 +394,7 @@ Request field | Description `max_training_vector_count` | The maximum number of vectors from the training index to be used for training. Defaults to all the vectors in the index. Optional. `search_size` | The training data is pulled from the training index using scroll queries. This parameter defines the number of results to return per scroll query. Default is `10000`. Optional. `description` | A user-provided description of the model. Optional. -`method` | The configuration of the approximate k-NN method used for search operations. For more information about the available methods, see [Methods and engines]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/). The method requires training to be valid. +`method` | The configuration of the approximate k-NN method used for search operations. For more information about the available methods, see [Methods and engines]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/). The method requires training in order to be valid. `space_type` | The space type for which this model is trained, for example, Euclidean or cosine. Note: This value can also be set in the `method` parameter. #### Example request diff --git a/_vector-search/creating-vector-index.md b/_vector-search/creating-vector-index.md index f5e10785d1..01baaca1f4 100644 --- a/_vector-search/creating-vector-index.md +++ b/_vector-search/creating-vector-index.md @@ -36,15 +36,15 @@ PUT /test-index {% include copy-curl.html %} -Regardless of the type of vector search, the following elements are part of creating a vector index: +Creating a vector index involves the following key steps: 1. **Enable k-NN search**: Set `index.knn` to `true` in the index settings to enable k-NN search functionality. 1. **Define a vector field**: - Specify the field that will store the vector data. When defining a `knn_vector` field in OpenSearch, you can select from different data types to balance storage requirements and performance. By default, k-NN vectors are float vectors, but you can also opt for byte or binary vectors for more efficient storage. For more information, see [k-NN vector]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/). + Specify the field that will store the vector data. When defining a `knn_vector` field in OpenSearch, you can select from different data types to balance storage requirements and performance. By default, k-NN vectors are float vectors, but you can also choose byte or binary vectors for more efficient storage. For more information, see [k-NN vector]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/). -1. **Specify dimension**: +1. **Specify the dimension**: Set the `dimension` property to match the size of the vectors used. 1. (Optional) **Choose a space type**: @@ -54,12 +54,12 @@ Regardless of the type of vector search, the following elements are part of crea Select a workload mode and/or compression level in order to optimize vector storage. For more information, see [Optimizing vector storage]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/). 1. (Optional, advanced) **Select a method**: - Configure the indexing method, such as HNSW or IVF, to optimize vector search performance. For more information, see [Methods and engines]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/). + Configure the indexing method, such as HNSW or IVF, used to optimize vector search performance. For more information, see [Methods and engines]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/). To create a vector index, choose one of the following options: -- [Store raw vectors or embeddings generated outside of OpenSearch](#storing-raw-vectors-or-embeddings-generated-outside-of-opensearch): Ingest pre-generated embeddings or raw vectors into your index for raw vector search. -- [Convert data to embeddings during ingestion](#converting-data-to-embeddings-during-ingestion): Ingest text that will be converted into vector embeddings within OpenSearch in order to perform semantic search using ML models. +- [Store raw vectors or embeddings generated outside of OpenSearch](#storing-raw-vectors-or-embeddings-generated-outside-of-opensearch): Ingest pregenerated embeddings or raw vectors into your index for raw vector search. +- [Convert data to embeddings during ingestion](#converting-data-to-embeddings-during-ingestion): Ingest text that will be converted into vector embeddings in OpenSearch in order to perform semantic search using machine learning (ML) models. The following table summarizes key index configuration differences for the supported use cases. @@ -94,7 +94,7 @@ PUT /my-raw-vector-index ## Converting data to embeddings during ingestion -To automatically generate embeddings during ingestion, configure an [ingest pipeline]({{site.url}}{{site.baseurl}}/api-reference/ingest-apis/index/) with a model ID of the embedding model: +To automatically generate embeddings during ingestion, configure an [ingest pipeline]({{site.url}}{{site.baseurl}}/api-reference/ingest-apis/index/) with the model ID of the embedding model: ```json PUT /_ingest/pipeline/nlp-ingest-pipeline diff --git a/_vector-search/filter-search-knn.md b/_vector-search/filter-search-knn.md index fbf489e9e5..a4583ff443 100644 --- a/_vector-search/filter-search-knn.md +++ b/_vector-search/filter-search-knn.md @@ -12,8 +12,8 @@ redirect_from: To refine vector search results, you can filter a vector search using one of the following methods: - [Efficient k-NN filtering](#efficient-k-nn-filtering): This approach applies filtering _during_ the k-NN search, as opposed to before or after the k-NN search, which ensures that `k` results are returned (if there are at least `k` results in total). This approach is supported by the following engines: - - Lucene engine with a Hierarchical Navigable Small World (HNSW) algorithm (OpenSearch versions 2.4 and later) - - Faiss engine with an HNSW algorithm (OpenSearch versions 2.9 and later) or IVF algorithm (OpenSearch versions 2.10 and later) + - Lucene engine with a Hierarchical Navigable Small World (HNSW) algorithm (OpenSearch version 2.4 and later) + - Faiss engine with an HNSW algorithm (OpenSearch version 2.9 and later) or IVF algorithm (OpenSearch version 2.10 and later) - [Post-filtering](#post-filtering): Because it is performed after the k-NN search, this approach may return significantly fewer than `k` results for a restrictive filter. You can use the following two filtering strategies for this approach: - [Boolean post-filter](#boolean-filter-with-ann-search): This approach runs an [approximate nearest neighbor (ANN)]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/) search and then applies a filter to the results. The two query parts are executed independently, and then the results are combined based on the query operator (`should`, `must`, and so on) provided in the query. @@ -261,7 +261,7 @@ For more ways to construct a filter, see [Constructing a filter](#constructing-a ### Faiss k-NN filter implementation -For k-NN searches, you can use `faiss` filters with an HNSW algorithm (OpenSearch versions 2.9 and later) or IVF algorithm (OpenSearch versions 2.10 and later). +For k-NN searches, you can use `faiss` filters with an HNSW algorithm (OpenSearch version 2.9 and later) or IVF algorithm (OpenSearch version 2.10 and later). When you specify a Faiss filter for a k-NN search, the Faiss algorithm decides whether to perform an exact k-NN search with pre-filtering or an approximate search with modified post-filtering. The algorithm uses the following variables: diff --git a/_vector-search/getting-started/auto-generated-embeddings.md b/_vector-search/getting-started/auto-generated-embeddings.md index 99e6219f67..903651520f 100644 --- a/_vector-search/getting-started/auto-generated-embeddings.md +++ b/_vector-search/getting-started/auto-generated-embeddings.md @@ -56,7 +56,7 @@ POST /_plugins/_ml/models/_register?deploy=true ``` {% include copy-curl.html %} -Registering a model is an asynchronous task. OpenSearch sends back a task ID for this task: +Registering a model is an asynchronous task. OpenSearch returns a task ID for this task: ```json { @@ -250,5 +250,5 @@ The response contains the matching documents: ## Next steps -- Learn about configuring semantic and hybrid search in the [Getting started with semantic and hybrid search]({{site.url}}{{site.baseurl}}/vector-search/getting-started/tutorials/neural-search-tutorial/) -- Learn about the supported types of ML-powered search [ML-powered search]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/) \ No newline at end of file +- See [Getting started with semantic and hybrid search]({{site.url}}{{site.baseurl}}/vector-search/getting-started/tutorials/neural-search-tutorial/) to learn about configuring semantic and hybrid search. +- See [ML-powered search]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/) to learn about the supported types of ML-powered search. \ No newline at end of file diff --git a/_vector-search/getting-started/concepts.md b/_vector-search/getting-started/concepts.md index 1c21124cb0..07a238c0e8 100644 --- a/_vector-search/getting-started/concepts.md +++ b/_vector-search/getting-started/concepts.md @@ -13,11 +13,11 @@ This page defines key terms and techniques related to vector search in OpenSearc ### Vector embeddings -[_Vector embeddings_]({{site.url}}{{site.baseurl}}/vector-search/getting-started/vector-search-basics/#vector-embeddings) are numerical representations of data—such as text, images, or audio—that encode meaning or features into a high-dimensional space. These embeddings enable similarity-based comparisons for search and machine learning tasks. +[_Vector embeddings_]({{site.url}}{{site.baseurl}}/vector-search/getting-started/vector-search-basics/#vector-embeddings) are numerical representations of data—such as text, images, or audio—that encode meaning or features into a high-dimensional space. These embeddings enable similarity-based comparisons for search and machine learning (ML) tasks. ### Dense vectors -_Dense vectors_ are high-dimensional numerical representations where most elements have nonzero values. They are typically produced by deep learning models and are used in semantic search and machine learning applications. +_Dense vectors_ are high-dimensional numerical representations where most elements have nonzero values. They are typically produced by deep learning models and are used in semantic search and ML applications. ### Sparse vectors @@ -27,7 +27,7 @@ _Sparse vectors_ contain mostly zero values and are often used in techniques lik ### Vector search -[_Vector search_]({{site.url}}{{site.baseurl}}/vector-search/getting-started/vector-search-basics/), also known as similarity search or nearest neighbor search, is a technique for finding items that are most similar to a given input vector. It is widely used in applications such as recommendation systems, image retrieval, and natural language processing. +[_Vector search_]({{site.url}}{{site.baseurl}}/vector-search/getting-started/vector-search-basics/), also known as _similarity search_ or _nearest neighbor search_, is a technique for finding items that are most similar to a given input vector. It is widely used in applications such as recommendation systems, image retrieval, and natural language processing. ### Space @@ -41,15 +41,15 @@ A [_method_]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn- An [_engine_]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/) is the underlying library that implements vector search methods. It determines how vectors are indexed, stored, and retrieved during similarity search operations. -## k-NN Search +## k-NN search ### k-NN search -_k-nearest neighbors (k-NN) search_ finds the k most similar vectors to a given query vector within an index. The similarity is determined based on a specified distance metric. +_k-nearest neighbors (k-NN) search_ finds the k most similar vectors to a given query vector in an index. The similarity is determined based on a specified distance metric. ### Exact k-NN search -[_Exact k-NN search_]({{site.url}}{{site.baseurl}}/vector-search/vector-search-techniques/knn-score-script/) performs a brute-force comparison between a query vector and all vectors in the index, computing the exact nearest neighbors. This approach provides high accuracy but can be computationally expensive for large datasets. +[_Exact k-NN search_]({{site.url}}{{site.baseurl}}/vector-search/vector-search-techniques/knn-score-script/) performs a brute-force comparison between a query vector and all vectors in an index, computing the exact nearest neighbors. This approach provides high accuracy but can be computationally expensive for large datasets. ### Approximate k-NN search @@ -59,15 +59,15 @@ _k-nearest neighbors (k-NN) search_ finds the k most similar vectors to a given ### k-NN query -[_k-NN query_]({{site.url}}{{site.baseurl}}/query-dsl/specialized/k-nn/) searches vector fields using a query vector. +A [_k-NN query_]({{site.url}}{{site.baseurl}}/query-dsl/specialized/k-nn/) searches vector fields using a query vector. ### Neural query -[_Neural query_]({{site.url}}{{site.baseurl}}/query-dsl/specialized/neural/) searches vector fields using text or image data. +A [_neural query_]({{site.url}}{{site.baseurl}}/query-dsl/specialized/neural/) searches vector fields using text or image data. ### Neural sparse query -[_Neural sparse query_]({{site.url}}{{site.baseurl}}/query-dsl/specialized/neural-sparse/) searches vector fields using raw text or sparse vector tokens. +A [_neural sparse query_]({{site.url}}{{site.baseurl}}/query-dsl/specialized/neural-sparse/) searches vector fields using raw text or sparse vector tokens. ## Search techniques @@ -81,7 +81,7 @@ _k-nearest neighbors (k-NN) search_ finds the k most similar vectors to a given ### Multimodal search -[_Multimodal search_]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/multimodal-search/) enables searching across multiple types of data, such as text and images. It allows queries in one format (for example, text) to retrieve results in another (for example, images). +[_Multimodal search_]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/multimodal-search/) enables you to search across multiple types of data, such as text and images. It allows queries in one format (for example, text) to retrieve results in another (for example, images). ### Radial search @@ -93,11 +93,11 @@ _k-nearest neighbors (k-NN) search_ finds the k most similar vectors to a given ### Conversational search -[_Conversational search_]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/conversational-search/) allows you to interact with a search system using natural language queries and refine results through follow-up questions. This approach enhances user experience by making search more intuitive and interactive. +[_Conversational search_]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/conversational-search/) allows you to interact with a search system using natural language queries and refine results through follow-up questions. This approach enhances the user experience by making search more intuitive and interactive. ### Retrieval-augmented generation -[_Retrieval-augmented generation (RAG)_]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/conversational-search/#rag) enhances large language models (LLMs) by retrieving relevant information from an index and incorporating it into the model’s response. This approach improves the accuracy and relevance of generated text. +[_Retrieval-augmented generation (RAG)_]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/conversational-search/#rag) enhances large language models (LLMs) by retrieving relevant information from an index and incorporating it into the model's response. This approach improves the accuracy and relevance of generated text. ## Indexing and storage techniques @@ -115,11 +115,11 @@ _Scalar quantization_ reduces vector precision by mapping floating-point values ### Product quantization -_Product quantization_ divides high-dimensional vectors into smaller subspaces and quantizes each subspace separately, enabling efficient approximate nearest neighbor search with reduced memory usage. +_Product quantization (PQ)_ divides high-dimensional vectors into smaller subspaces and quantizes each subspace separately, enabling efficient approximate nearest neighbor search with reduced memory usage. ### Binary quantization -_Binary quantization_ compresses vector representations by converting numerical values into binary formats. This technique reduces storage requirements and speeds up similarity computations. +_Binary quantization_ compresses vector representations by converting numerical values to binary formats. This technique reduces storage requirements and accelerates similarity computations. ### Disk-based vector search From 8c4945925f75369bc8e209e29baba535ecc5dffb Mon Sep 17 00:00:00 2001 From: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com> Date: Wed, 19 Feb 2025 18:06:39 -0500 Subject: [PATCH 30/32] Apply suggestions from code review Co-authored-by: Nathan Bower Signed-off-by: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com> --- _vector-search/getting-started/index.md | 6 ++--- .../getting-started/tutorials/index.md | 2 +- .../getting-started/vector-search-basics.md | 4 ++-- .../getting-started/vector-search-options.md | 14 +++++------ _vector-search/index.md | 6 ++--- _vector-search/ingesting-data.md | 2 +- _vector-search/ml-powered-search/index.md | 6 ++--- _vector-search/optimizing-storage/index.md | 4 ++-- _vector-search/performance-tuning-indexing.md | 2 +- _vector-search/performance-tuning-search.md | 12 +++++----- _vector-search/performance-tuning.md | 24 +++++++++---------- _vector-search/searching-data.md | 4 ++-- _vector-search/settings.md | 12 +++++----- .../specialized-operations/index.md | 2 +- .../approximate-knn.md | 14 +++++------ .../vector-search-techniques/index.md | 6 ++--- .../knn-score-script.md | 12 +++++----- .../painless-functions.md | 20 ++++++++-------- 18 files changed, 76 insertions(+), 76 deletions(-) diff --git a/_vector-search/getting-started/index.md b/_vector-search/getting-started/index.md index c81385b93f..254aea9b7b 100644 --- a/_vector-search/getting-started/index.md +++ b/_vector-search/getting-started/index.md @@ -10,7 +10,7 @@ redirect_from: # Getting started with vector search -This guide shows you how to bring your own vectors to OpenSearch. In this example, you'll create a vector index, ingest vector data into the index, and search the data. +This guide shows you how to bring your own vectors into OpenSearch. In this example, you'll create a vector index, ingest vector data into the index, and search the data. ## Prerequisite: Install OpenSearch @@ -69,7 +69,7 @@ For more information about installing OpenSearch, see [Installation quickstart]( ## Step 1: Create a vector index -First, create an index that will store sample hotel data. To signal to OpenSearch that this is a vector index, set `index.knn` to `true`. You'll store the vectors in a vector field called `location`. The vectors you'll ingest will be two-dimensional and the distance between vectors will be calculated using [Euclidean `l2` similarity metric]({{site.url}}{{site.baseurl}}/vector-search/getting-started/vector-search-basics/#calculating-similarity): +First, create an index that will store sample hotel data. To signal to OpenSearch that this is a vector index, set `index.knn` to `true`. You'll store the vectors in a vector field named `location`. The vectors you'll ingest will be two-dimensional, and the distance between vectors will be calculated using the [Euclidean `l2` similarity metric]({{site.url}}{{site.baseurl}}/vector-search/getting-started/vector-search-basics/#calculating-similarity): ```json PUT /hotels-index @@ -197,6 +197,6 @@ If your data isn't already in vector format, you can generate vector embeddings ## Next steps - [Vector search basics]({{site.url}}{{site.baseurl}}/vector-search/getting-started/vector-search-basics/) -- [Bringing your own or generating embeddings ]({{site.url}}{{site.baseurl}}/vector-search/getting-started/vector-search-options/) +- [Bringing your own or generating embeddings]({{site.url}}{{site.baseurl}}/vector-search/getting-started/vector-search-options/) - [Vector search with filters]({{site.url}}{{site.baseurl}}/vector-search/filter-search-knn/) - [Generating vector embeddings within OpenSearch]({{site.url}}{{site.baseurl}}/vector-search/getting-started/auto-generated-embeddings/) \ No newline at end of file diff --git a/_vector-search/getting-started/tutorials/index.md b/_vector-search/getting-started/tutorials/index.md index 6840d10f10..27fdd99f61 100644 --- a/_vector-search/getting-started/tutorials/index.md +++ b/_vector-search/getting-started/tutorials/index.md @@ -43,7 +43,7 @@ agents: # Tutorials -Explore these tutorials to build ML-powered search applications, from semantic search to custom chatbots. +Explore these tutorials to learn how to build machine learning (ML)-powered search applications, from semantic search applications to custom chatbots.
diff --git a/_vector-search/getting-started/vector-search-basics.md b/_vector-search/getting-started/vector-search-basics.md index a628b6bee0..f45e2f28de 100644 --- a/_vector-search/getting-started/vector-search-basics.md +++ b/_vector-search/getting-started/vector-search-basics.md @@ -7,7 +7,7 @@ nav_order: 10 # Vector search basics -Vector search, also known as similarity search or nearest neighbor search, is a powerful technique for finding items that are most similar to a given input. Use cases include semantic search to understand user intent, recommendations (for example, an "other songs you might like" feature in a music application), image recognition, and fraud detection. For more background information about vector search, see [Nearest neighbor search](https://en.wikipedia.org/wiki/Nearest_neighbor_search). +_Vector search_, also known as _similarity search_ or _nearest neighbor search_, is a powerful technique for finding items that are most similar to a given input. Use cases include semantic search to understand user intent, recommendations (for example, an "other songs you might like" feature in a music application), image recognition, and fraud detection. For more background information about vector search, see [Nearest neighbor search](https://en.wikipedia.org/wiki/Nearest_neighbor_search). ## Vector embeddings @@ -27,7 +27,7 @@ To learn more about the types of vector search that OpenSearch supports, see [Ve ## Calculating similarity -Vector similarity measures how close two vectors are in a multi-dimensional space, enabling tasks like nearest neighbor search and ranking results by relevance. OpenSearch supports multiple distance metrics (_spaces_) for calculating vector similarity: +Vector similarity measures how close two vectors are in a multi-dimensional space, facilitating tasks like nearest neighbor search and ranking results by relevance. OpenSearch supports multiple distance metrics (_spaces_) for calculating vector similarity: - **L1 (Manhattan distance):** Sums the absolute differences between vector components. - **L2 (Euclidean distance):** Calculates the square root of the sum of squared differences, making it sensitive to magnitude. diff --git a/_vector-search/getting-started/vector-search-options.md b/_vector-search/getting-started/vector-search-options.md index 8039f671b0..b7b9013b07 100644 --- a/_vector-search/getting-started/vector-search-options.md +++ b/_vector-search/getting-started/vector-search-options.md @@ -18,7 +18,7 @@ pre_items: - heading: "Generate embeddings" description: "Generate embeddings outside of OpenSearch using your favorite embedding utility." - heading: "Create an OpenSearch index" - description: "Create an OpenSearch index to upload your embeddings." + description: "Create an OpenSearch index to store your embeddings." link: "/vector-search/creating-vector-index/#storing-raw-vectors-or-embeddings-generated-outside-of-opensearch" - heading: "Ingest embeddings" description: "Ingest your embeddings into the index." @@ -28,10 +28,10 @@ pre_items: link: "/vector-search/searching-data/#searching-raw-vectors" auto_items: - heading: "Configure an embedding model" - description: "Configure a machine learning model that will automatically generate embeddings from your text at ingest time and query time." + description: "Configure a machine learning model that will automatically generate embeddings from your text at ingestion time and query time." link: "/ml-commons-plugin/integrating-ml-models/" - heading: "Create an OpenSearch index" - description: "Create an OpenSearch index to upload your text." + description: "Create an OpenSearch index to store your text." link: "/vector-search/creating-vector-index/#converting-data-to-embeddings-during-ingestion" - heading: "Ingest text" description: "Ingest your text into the index." @@ -43,12 +43,12 @@ auto_items: # Bringing your own or generating embeddings -In OpenSearch, you can either bring your own vectors or let OpenSearch generate them automatically from your data. Automated embedding generation integrated into OpenSearch reduces data preprocessing effort at ingestion and search time. +In OpenSearch, you can either bring your own vectors or let OpenSearch generate them automatically from your data. Letting OpenSearch automatically generate your embeddings reduces data preprocessing effort at ingestion and search time. ### Option 1: Bring your own raw vectors or generated embeddings You already have pre-computed embeddings or raw vectors from external tools or services. - - **Ingestion**: Ingest pre-generated embeddings directly into OpenSearch. + - **Ingestion**: Ingest pregenerated embeddings directly into OpenSearch. ![Pre-generated embeddings ingestion]({{site.url}}{{site.baseurl}}/images/vector-search/raw-vector-ingest.png) - **Search**: Perform vector search to find the vectors that are closest to a query vector. @@ -71,8 +71,8 @@ Working with embeddings generated outside of OpenSearch involves the following s ### Option 2: Generate embeddings within OpenSearch -OpenSearch automatically generates vector embeddings from your data using a machine learning (ML) model. - - **Ingestion**: You ingest plain data, and OpenSearch uses an ML model to generate embeddings dynamically. +Use this option to let OpenSearch automatically generate vector embeddings from your data using a machine learning (ML) model. + - **Ingestion**: You ingest plain data, and OpenSearch uses an ML model to generate embeddings dynamically. ![Auto-generated embeddings ingestion]({{site.url}}{{site.baseurl}}/images/vector-search/auto-vector-ingest.png) - **Search**: At query time, OpenSearch uses the same ML model to convert your input data to embeddings, and these embeddings are used for vector search. diff --git a/_vector-search/index.md b/_vector-search/index.md index e8251a737b..43308f183c 100644 --- a/_vector-search/index.md +++ b/_vector-search/index.md @@ -13,7 +13,7 @@ redirect_from: # Vector search -Vector search enhances traditional lexical search by encoding data such as text or images as vectors, enabling similarity search with semantic understanding. OpenSearch unifies traditional search, analytics, and vector search in a single solution. As a vector database, it streamlines AI application development by efficiently storing and retrieving high-dimensional data. +Vector search enhances traditional lexical search by encoding data such as text or images as vectors, enabling similarity search with semantic understanding. OpenSearch unifies traditional search, analytics, and vector search into a single solution. As a vector database, it streamlines AI application development by efficiently storing and retrieving high-dimensional data. [Get started]({{site.url}}{{site.baseurl}}/vector-search/getting-started/){: .btn-dark-blue} @@ -25,6 +25,6 @@ OpenSearch vector search supports the following key features: - [**Automatic embedding generation**]({{site.url}}{{site.baseurl}}/vector-search/getting-started/auto-generated-embeddings/): Generate vector embeddings dynamically within OpenSearch using built-in machine learning models, eliminating the need for external preprocessing of your data. - [**Advanced filtering capabilities**]({{site.url}}{{site.baseurl}}/vector-search/filter-search-knn/): Combine vector search with traditional filters to refine results, enabling precise control over search outcomes while maintaining semantic relevance. -- [**Multi-vector support**]({{site.url}}{{site.baseurl}}/vector-search/specialized-operations/nested-search-knn/): Store and search multiple vectors per document using nested fields, useful for complex documents with multiple components requiring separate vector representations. -- [**Memory-efficient search**]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/): Optimize memory usage through various quantization techniques and efficient indexing methods, making vector search practical even with large-scale deployments. +- [**Multi-vector support**]({{site.url}}{{site.baseurl}}/vector-search/specialized-operations/nested-search-knn/): Store and search multiple vectors per document using nested fields, which can be useful for complex documents with multiple components requiring separate vector representations. +- [**Memory-efficient search**]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/): Optimize memory usage through various quantization techniques and efficient indexing methods, making vector search practical even in large-scale deployments. - [**Hybrid search capabilities**]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/hybrid-search/): Combine traditional keyword search with vector-based semantic search to use the strengths of both approaches, improving search relevance and accuracy. diff --git a/_vector-search/ingesting-data.md b/_vector-search/ingesting-data.md index 70ab3b83eb..4987757736 100644 --- a/_vector-search/ingesting-data.md +++ b/_vector-search/ingesting-data.md @@ -10,7 +10,7 @@ After creating a vector index, you need to either ingest raw vector data or conv ## Comparison of ingestion methods -The following table compares ingestion for each vector search method. +The following table compares the two ingestion methods. | Feature | Data format | Ingest pipeline | Vector generation | Additional fields | |-------------------------------|----------------------------|---------------------|---------------------------------|-----------------------------------| diff --git a/_vector-search/ml-powered-search/index.md b/_vector-search/ml-powered-search/index.md index d412a5df57..3f3f28c086 100644 --- a/_vector-search/ml-powered-search/index.md +++ b/_vector-search/ml-powered-search/index.md @@ -33,17 +33,17 @@ search_method_cards: description: "Uses sparse retrieval based on sparse embedding models to search text data." link: "/vector-search/ml-powered-search/neural-sparse-search/" - heading: "Conversational search with RAG" - description: "Uses retrieval-augmented generation and conversational memory to provide context-aware responses." + description: "Uses retrieval-augmented generation (RAG) and conversational memory to provide context-aware responses." link: "/vector-search/ml-powered-search/conversational-search/" chunking_cards: - heading: "Text chunking" - description: "Use text chunking to ensure adherence to token limit for embedding models." + description: "Use text chunking to ensure adherence to embedding model token limits." link: "/vector-search/ml-powered-search/text-chunking/" --- # ML-powered search -ML-powered search streamlines your workflow by generating embeddings automatically. OpenSearch converts text into vectors during indexing and querying. It creates and indexes vector embeddings for documents, then processes query text into embeddings to find and return the most relevant results. +Machine learning (ML)-powered search streamlines your workflow by generating embeddings automatically. OpenSearch converts text to vectors during indexing and querying. It creates and indexes vector embeddings for documents and then processes query text into embeddings to find and return the most relevant results. ## Prerequisite diff --git a/_vector-search/optimizing-storage/index.md b/_vector-search/optimizing-storage/index.md index 1892230801..7c0e2c49cb 100644 --- a/_vector-search/optimizing-storage/index.md +++ b/_vector-search/optimizing-storage/index.md @@ -11,12 +11,12 @@ storage_cards: description: "Reduce vector storage space by quantizing vectors." link: "/vector-search/optimizing-storage/knn-vector-quantization/" - heading: "Disk-based vector search" - description: "Uses binary quantization to reduce operational costs of vector workloads." + description: "Uses binary quantization to reduce the operational costs of vector workloads." link: "/vector-search/optimizing-storage/disk-based-vector-search/" --- # Optimizing vector storage -Vector search operations can be resource-intensive, especially when dealing with large-scale vector datasets. OpenSearch provides several optimization techniques to reduce memory usage. +Vector search operations can be resource intensive, especially when dealing with large-scale vector datasets. OpenSearch provides several optimization techniques for reducing memory usage. {% include cards.html cards=page.storage_cards %} \ No newline at end of file diff --git a/_vector-search/performance-tuning-indexing.md b/_vector-search/performance-tuning-indexing.md index 65d9c6747c..61435ea4da 100644 --- a/_vector-search/performance-tuning-indexing.md +++ b/_vector-search/performance-tuning-indexing.md @@ -33,7 +33,7 @@ Make sure to reenable `refresh_interval` after indexing is complete. If your hardware has multiple cores, you can allow multiple threads in native library index construction by speeding up the indexing process. Determine the number of threads to allot with the [knn.algo_param.index_thread_qty]({{site.url}}{{site.baseurl}}/search-plugins/knn/settings#cluster-settings) setting. -Monitor CPU utilization and choose the correct number of threads. Because native library index construction is costly, choosing more threads then you need can cause additional CPU load. +Monitor CPU utilization and choose the correct number of threads. Because native library index construction is costly, choosing more threads than you need can cause additional CPU load. ## (Expert level) Disable vector field storage in the source field diff --git a/_vector-search/performance-tuning-search.md b/_vector-search/performance-tuning-search.md index 48a22ab7b6..1e83a31bd0 100644 --- a/_vector-search/performance-tuning-search.md +++ b/_vector-search/performance-tuning-search.md @@ -13,15 +13,15 @@ Take the following steps to improve search performance. To improve search performance, you must keep the number of segments under control. Lucene's IndexSearcher searches over all of the segments in a shard to find the 'size' best results. -Ideally, having one segment per shard provides the optimal performance with respect to search latency. You can configure an index to have multiple shards to avoid giant shards and achieve more parallelism. +Having one segment per shard provides optimal performance with respect to search latency. You can configure an index to have multiple shards in order to avoid very large shards and achieve more parallelism. -You can control the number of segments by choosing a larger refresh interval, or during indexing by asking OpenSearch to slow down segment creation by disabling the refresh interval. +You can control the number of segments by choosing a larger refresh interval or during indexing by asking OpenSearch to slow down segment creation by disabling the refresh interval. ## Warm up the index -Native library indexes are constructed during indexing, but they're loaded into memory during the first search. In Lucene, each segment is searched sequentially (so, for k-NN, each segment returns up to k nearest neighbors of the query point), and the top 'size' number of results based on the score are returned from all the results returned by segments at a shard level (higher score = better result). +Native library indexes are constructed during indexing, but they're loaded into memory during the first search. In Lucene, each segment is searched sequentially (so, for k-NN, each segment returns up to k nearest neighbors of the query point). The top `size` results, ranked by score, are returned from all segment-level results within a shard (a higher score indicates a better result). -Once a native library index is loaded (native library indexes are loaded outside OpenSearch JVM), OpenSearch caches them in memory. Initial queries are expensive and take a few seconds, while subsequent queries are faster and take milliseconds (assuming the k-NN circuit breaker isn't hit). +Once a native library index is loaded (native library indexes are loaded outside of the OpenSearch JVM), OpenSearch caches them in memory. Initial queries are expensive and complete in a few seconds, while subsequent queries are faster and complete in milliseconds (assuming that the k-NN circuit breaker isn't triggered). To avoid this latency penalty during your first queries, you can use the warmup API operation on the indexes you want to search: @@ -37,7 +37,7 @@ GET /_plugins/_knn/warmup/index1,index2,index3?pretty ``` {% include copy-curl.html %} -The warmup API operation loads all native library indexes for all shards (primary and replica) for the specified indexes into the cache, so there's no penalty to load native library indexes during initial searches. +The warmup API operation loads all native library indexes for all shards (primaries and replicas) for the specified indexes into the cache, so there's no penalty for loading native library indexes during initial searches. This API operation only loads the segments of active indexes into the cache. If a merge or refresh operation finishes after the API runs, or if you add new documents, you need to rerun the API to load those native library indexes into memory. {: .warning} @@ -45,5 +45,5 @@ This API operation only loads the segments of active indexes into the cache. If ## Avoid reading stored fields -If your use case is only to read the IDs and scores of the nearest neighbors, you can disable reading stored fields, which saves time retrieving the vectors from stored fields. +If your use case only involves reading the IDs and scores of the nearest neighbors, you can disable the reading of stored fields, which saves time that would otherwise be spent retrieving the vectors from stored fields. diff --git a/_vector-search/performance-tuning.md b/_vector-search/performance-tuning.md index 433e17a73c..3b12a6adf7 100644 --- a/_vector-search/performance-tuning.md +++ b/_vector-search/performance-tuning.md @@ -9,29 +9,29 @@ redirect_from: # Performance tuning -This topic provides performance tuning recommendations to improve indexing and search performance for approximate k-NN (ANN). From a high level, k-NN works according to these principles: -* Vector indexes are created per knn_vector field / (Lucene) segment pair. -* Queries execute on segments sequentially inside the shard (same as any other OpenSearch query). -* The coordinator node picks up final size number of neighbors from the neighbors returned by each shard. +This topic provides performance tuning recommendations for improving indexing and search performance for approximate k-NN (ANN) search. At a high level, k-NN works according to these principles: +* Vector indexes are created per `knn_vector` field/Lucene segment pair. +* Queries execute sequentially on segments in the shard (as with any other OpenSearch query). +* The coordinator node selects the final `size` neighbors from the neighbors returned by each shard. -This topic also provides recommendations for comparing approximate k-NN to exact k-NN with score script. +The following sections provide recommendations regarding comparing ANN to exact k-NN with a score script. ## Recommendations for engines and cluster node sizing -Each of the three engines used for approximate k-NN search has its own attributes that make one more sensible to use than the others in a given situation. Use the following information to help determine which engine will best meet your requirements. +Each of the three engines used for ANN search has attributes that make it more sensible to use than the others in a given situation. Use the following information to help determine which engine will best meet your requirements. To optimize for indexing throughput, Faiss is a good option. For relatively smaller datasets (up to a few million vectors), the Lucene engine demonstrates better latencies and recall. At the same time, the size of the index is smallest compared to the other engines, which allows it to use smaller AWS instances for data nodes. For further considerations, see [Choosing the right method]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/#choosing-the-right-method) and [Memory estimation]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/#memory-estimation). -When considering cluster node sizing, a general approach is to first establish an even distribution of the index across the cluster. However, there are other considerations. To help make these choices, you can refer to the OpenSearch managed service guidance in the section [Sizing domains](https://docs.aws.amazon.com/opensearch-service/latest/developerguide/sizing-domains.html). +When considering cluster node sizing, a general approach is to first establish an even distribution of the index across the cluster. However, there are other considerations. To help make these choices, you can refer to the OpenSearch managed service guidance in the [Sizing domains](https://docs.aws.amazon.com/opensearch-service/latest/developerguide/sizing-domains.html) section. ## Improving recall -Recall depends on multiple factors like number of vectors, number of dimensions, segments, and so on. Searching over a large number of small segments and aggregating the results leads to better recall than searching over a small number of large segments and aggregating results. The larger the native library index, the more chances of losing recall if you're using smaller algorithm parameters. Choosing larger values for algorithm parameters should help solve this issue but sacrifices search latency and indexing time. That being said, it's important to understand your system's requirements for latency and accuracy, and then choose the number of segments you want your index to have based on experimentation. +Recall depends on multiple factors, such as the number of vectors, dimensions, segments, and so on. Searching a large number of small segments and aggregating the results leads to better recall than searching a small number of large segments and aggregating the results. Larger native library indexes are more likely to lose recall if you're using smaller algorithm parameters. Choosing larger values for algorithm parameters should help solve this issue but sacrifices search latency and indexing time. It's important to understand your system's requirements for latency and accuracy and then choose the number of segments based on experimentation. -The default parameters work on a broader set of use cases, but make sure to run your own experiments on your data sets and choose the appropriate values. For index-level settings, see [Index settings]({{site.url}}{{site.baseurl}}/vector-search/settings/#index-settings). +The default parameters work for a broader set of use cases, but make sure to run your own experiments on your datasets and choose the appropriate values. For index-level settings, see [Index settings]({{site.url}}{{site.baseurl}}/vector-search/settings/#index-settings). -## Approximate nearest neighbor compared to score script +## ANN compared to score script -The standard k-NN query and custom scoring option perform differently. Test with a representative set of documents to see if the search results and latencies match your expectations. +The standard k-NN query and custom scoring options perform differently. Run tests with a representative set of documents to see if the search results and latencies match your expectations. -Custom scoring works best if the initial filter reduces the number of documents to no more than 20,000. Increasing shard count can improve latency, but be sure to keep shard size within the [recommended guidelines]({{site.url}}{{site.baseurl}}/intro/#primary-and-replica-shards). \ No newline at end of file +Custom scoring works best if the initial filter reduces the number of documents to no more than 20,000. Increasing the shard count can improve latency, but be sure to keep the shard size within the [recommended guidelines]({{site.url}}{{site.baseurl}}/intro/#primary-and-replica-shards). \ No newline at end of file diff --git a/_vector-search/searching-data.md b/_vector-search/searching-data.md index bcdb9a4106..44285da4cb 100644 --- a/_vector-search/searching-data.md +++ b/_vector-search/searching-data.md @@ -8,7 +8,7 @@ nav_order: 35 OpenSearch supports various methods for searching vector data, tailored to how the vectors were created and indexed. This guide explains the query syntax and options for raw vector search and auto-generated embedding search. -## Search types comparison +## Search type comparison The following table compares the search syntax and typical use cases for each vector search method. @@ -38,7 +38,7 @@ GET /my-raw-vector-index/_search ## Searching auto-generated embeddings -For ML-powered searches using auto-generated embeddings, use the `neural` query type and provide query text input: +For machine learning (ML)-powered searches using auto-generated embeddings, use the `neural` query type and provide query text input: ```json GET /my-semantic-search-index/_search diff --git a/_vector-search/settings.md b/_vector-search/settings.md index 94944bae88..bebfc878ce 100644 --- a/_vector-search/settings.md +++ b/_vector-search/settings.md @@ -18,29 +18,29 @@ Setting | Static/Dynamic | Default | Description :--- | :--- | :--- | :--- `knn.plugin.enabled`| Dynamic | `true` | Enables or disables the k-NN plugin. `knn.algo_param.index_thread_qty` | Dynamic | `1` | The number of threads used for native library and Lucene library (for OpenSearch version 2.19 and later) index creation. Keeping this value low reduces the CPU impact of the k-NN plugin but also reduces indexing performance. -`knn.cache.item.expiry.enabled` | Dynamic | `false` | Whether to remove native library indexes that have not been accessed for a certain duration from memory. +`knn.cache.item.expiry.enabled` | Dynamic | `false` | Whether to remove native library indexes from memory that have not been accessed in a specified period of time. `knn.cache.item.expiry.minutes` | Dynamic | `3h` | If enabled, the amount of idle time before a native library index is removed from memory. `knn.circuit_breaker.unset.percentage` | Dynamic | `75` | The native memory usage threshold for the circuit breaker. Memory usage must be lower than this percentage of `knn.memory.circuit_breaker.limit` in order for `knn.circuit_breaker.triggered` to remain `false`. -`knn.circuit_breaker.triggered` | Dynamic | `false` | True when memory usage exceeds the `knn.circuit_breaker.unset.percentage` value. +`knn.circuit_breaker.triggered` | Dynamic | `false` | `true` when memory usage exceeds the `knn.circuit_breaker.unset.percentage` value. `knn.memory.circuit_breaker.limit` | Dynamic | `50%` | The native memory limit for native library indexes. At the default value, if a machine has 100 GB of memory and the JVM uses 32 GB, then the k-NN plugin uses 50% of the remaining 68 GB (34 GB). If memory usage exceeds this value, then the plugin removes the native library indexes used least recently. `knn.memory.circuit_breaker.enabled` | Dynamic | `true` | Whether to enable the k-NN memory circuit breaker. `knn.model.index.number_of_shards`| Dynamic | `1` | The number of shards to use for the model system index, which is the OpenSearch index that stores the models used for approximate nearest neighbor (ANN) search. `knn.model.index.number_of_replicas`| Dynamic | `1` | The number of replica shards to use for the model system index. Generally, in a multi-node cluster, this value should be at least 1 in order to increase stability. `knn.model.cache.size.limit` | Dynamic | `10%` | The model cache limit cannot exceed 25% of the JVM heap. -`knn.faiss.avx2.disabled` | Static | `false` | A static setting that specifies whether to disable the SIMD-based `libopensearchknn_faiss_avx2.so` library and load the non-optimized `libopensearchknn_faiss.so` library for the Faiss engine on machines with x64 architecture. For more information, see [SIMD optimization]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/#simd-optimization). +`knn.faiss.avx2.disabled` | Static | `false` | A static setting that specifies whether to disable the SIMD-based `libopensearchknn_faiss_avx2.so` library and load the non-optimized `libopensearchknn_faiss.so` library for the Faiss engine on machines with x64 architecture. For more information, see [Single Instruction Multiple Data (SIMD) optimization]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/#simd-optimization). `knn.faiss.avx512.disabled` | Static | `false` | A static setting that specifies whether to disable the SIMD-based `libopensearchknn_faiss_avx512.so` library and load the `libopensearchknn_faiss_avx2.so` library or the non-optimized `libopensearchknn_faiss.so` library for the Faiss engine on machines with x64 architecture. For more information, see [SIMD optimization]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/#simd-optimization). ## Index settings -The following table lists all available index-level k-NN settings. For information about updating these settings, see [Index-level index setting]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/index-settings/#index-level-index-settings). +The following table lists all available index-level k-NN settings. For information about updating these settings, see [Index-level index settings]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/index-settings/#index-level-index-settings). -At the moment, several parameters defined in the settings are in the deprecation process. Those parameters should be set in the mapping instead of the index settings. Parameters set in the mapping will override the parameters set in the index settings. Setting the parameters in the mapping allows an index to have multiple `knn_vector` fields with different parameters. +Several parameters defined in the settings are currently in the deprecation process. Those parameters should be set in the mapping instead of in the index settings. Parameters set in the mapping will override the parameters set in the index settings. Setting the parameters in the mapping allows an index to have multiple `knn_vector` fields with different parameters. Setting | Static/Dynamic | Default | Description :--- | :--- | :--- | :--- `index.knn` | Static | `false` | Whether the index should build native library indexes for the `knn_vector` fields. If set to `false`, the `knn_vector` fields will be stored in doc values, but approximate k-NN search functionality will be disabled. `index.knn.algo_param.ef_search` | Dynamic | `100` | `ef` (or `efSearch`) represents the size of the dynamic list for the nearest neighbors used during a search. Higher `ef` values lead to a more accurate but slower search. `ef` cannot be set to a value lower than the number of queried nearest neighbors, `k`. `ef` can take any value between `k` and the size of the dataset. -`index.knn.advanced.approximate_threshold` | Dynamic | `15000` | The number of vectors a segment must have before creating specialized data structures for approximate search. Set to `-1` to disable building vector data structures and `0` to always build them. +`index.knn.advanced.approximate_threshold` | Dynamic | `15000` | The number of vectors that a segment must have before creating specialized data structures for ANN search. Set to `-1` to disable building vector data structures and to `0` to always build them. `index.knn.advanced.filtered_exact_search_threshold`| Dynamic | None | The filtered ID threshold value used to switch to exact search during filtered ANN search. If the number of filtered IDs in a segment is lower than this setting's value, then exact search will be performed on the filtered IDs. `index.knn.algo_param.ef_construction` | Static | `100` | Deprecated in 1.0.0. Use the [mapping parameters]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/) to set this value instead. `index.knn.algo_param.m` | Static | `16` | Deprecated in 1.0.0. Use the [mapping parameters]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/) to set this value instead. diff --git a/_vector-search/specialized-operations/index.md b/_vector-search/specialized-operations/index.md index 52a09dc435..4a97327fd4 100644 --- a/_vector-search/specialized-operations/index.md +++ b/_vector-search/specialized-operations/index.md @@ -11,7 +11,7 @@ cards: description: "Use vector search to search nested fields" link: "/vector-search/specialized-operations/nested-search-knn/" - heading: "Radial search" - description: "Search all points within a vector space that reside within a specified maximum distance or minimum score threshold from a query point" + description: "Search all points in a vector space that reside within a specified maximum distance or minimum score threshold from a query point" link: "/vector-search/specialized-operations/radial-search-knn/" --- diff --git a/_vector-search/vector-search-techniques/approximate-knn.md b/_vector-search/vector-search-techniques/approximate-knn.md index c06961cf20..aba6825c47 100644 --- a/_vector-search/vector-search-techniques/approximate-knn.md +++ b/_vector-search/vector-search-techniques/approximate-knn.md @@ -18,15 +18,15 @@ The approximate k-NN search methods in OpenSearch use approximate nearest neighb For information about the algorithms OpenSearch supports, see [Methods and engines]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/). {: .note} -OpenSearch builds a native library index of the vectors for each `knn-vector` field/Lucene segment pair during indexing, which can be used to efficiently find the k-nearest neighbors to a query vector during search. To learn more about Lucene segments, see the [Apache Lucene documentation](https://lucene.apache.org/core/8_9_0/core/org/apache/lucene/codecs/lucene87/package-summary.html#package.description). These native library indexes are loaded into native memory during search and managed by a cache. To learn more about preloading native library indexes into memory, refer to the [Warmup API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#warmup-operation). Additionally, you can see which native library indexes are already loaded in memory using the [Stats API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#stats). +OpenSearch builds a native library index of the vectors for each `knn-vector` field/Lucene segment pair during indexing, which can be used to efficiently find the k-nearest neighbors to a query vector during search. To learn more about Lucene segments, see the [Apache Lucene documentation](https://lucene.apache.org/core/8_9_0/core/org/apache/lucene/codecs/lucene87/package-summary.html#package.description). These native library indexes are loaded into native memory during search and managed by a cache. To learn more about preloading native library indexes into memory, see [Warmup API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#warmup-operation). Additionally, you can see which native library indexes are already loaded into memory using the [Stats API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#stats). -Because the native library indexes are constructed during indexing, it is not possible to apply a filter on an index and then use this search method. All filters are applied to the results produced by the approximate nearest neighbor search. +Because the native library indexes are constructed during indexing, it is not possible to apply a filter on an index and then use this search method. All filters are applied to the results produced by the ANN search. ## Get started with approximate k-NN To use the approximate search functionality, you must first create a vector index with `index.knn` set to `true`. This setting tells OpenSearch to create native library indexes for the index. -Next, you must add one or more fields of the `knn_vector` data type. This example creates an index with two `knn_vector` fields using the `faiss` engine: +Next, you must add one or more fields of the `knn_vector` data type. The following example creates an index with two `knn_vector` fields using the `faiss` engine: ```json PUT my-knn-index-1 @@ -103,7 +103,7 @@ POST _bulk ``` {% include copy-curl.html %} -Then you can run an approximate nearest neighbor search on the data using the `knn` query type: +Then you can run an ANN search on the data using the `knn` query type: ```json GET my-knn-index-1/_search @@ -143,9 +143,9 @@ Starting in OpenSearch 2.14, you can use `k`, `min_score`, or `max_distance` for ## Building a vector index from a model -For some of the algorithms that OpenSearch supports, the native library index needs to be trained before it can be used. It would be expensive to train every newly created segment, so, instead, OpenSearch features the concept of a *model* that initializes the native library index during segment creation. You can create a model by calling the [Train API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#train-a-model) and passing in the source of the training data and the method definition of the model. Once training is complete, the model is serialized to a k-NN model system index. Then, during indexing, the model is pulled from this index to initialize the segments. +For some of the algorithms that OpenSearch supports, the native library index needs to be trained before it can be used. It would be expensive to train every newly created segment, so, instead, OpenSearch features the concept of a *model* that initializes the native library index during segment creation. You can create a model by calling the [Train API]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#train-a-model) and passing in the source of the training data and the method definition of the model. Once training is complete, the model is serialized to a k-NN model system index. Then, during indexing, the model is pulled from that index to initialize the segments. -To train a model, you first need an OpenSearch index containing training data. Training data can come from any `knn_vector` field that has a dimension matching the dimension of the model you want to create. Training data can be the same data that you are going to index or data in a separate set. To create a training index, send the following request: +To train a model, you first need an OpenSearch index containing training data. Training data can come from any `knn_vector` field that has a dimension matching the dimension of the model you want to create. Training data can be the same as the data you plan to index or come from a separate dataset. To create a training index, send the following request: ```json PUT /train-index @@ -183,7 +183,7 @@ POST _bulk ``` {% include copy-curl.html %} -After indexing into the training index completes, you can call the Train API: +After completing indexing into the training index, you can call the Train API: ```json POST /_plugins/_knn/models/my-model/_train diff --git a/_vector-search/vector-search-techniques/index.md b/_vector-search/vector-search-techniques/index.md index a725f8e550..8a6be78330 100644 --- a/_vector-search/vector-search-techniques/index.md +++ b/_vector-search/vector-search-techniques/index.md @@ -23,11 +23,11 @@ OpenSearch supports three different methods for obtaining the k-nearest neighbor - [Painless extensions]({{site.url}}{{site.baseurl}}/search-plugins/knn/painless-functions/): Adds the distance functions as Painless extensions that you can use in more complex combinations. You can use this method to perform a brute-force, exact vector search of an index, which also supports pre-filtering. -Overall, for larger data sets, you should generally choose the approximate nearest neighbor method because it scales significantly better. For smaller data sets, where you may want to apply a filter, you should choose the custom scoring approach. If you have a more complex use case where you need to use a distance function as part of their scoring method, you should use the Painless scripting approach. +In general, you should choose the ANN method for larger datasets because it scales significantly better. For smaller datasets, where you may want to apply a filter, you should choose the custom scoring approach. If you have a more complex use case where you need to use a distance function as part of the scoring method, you should use the Painless scripting approach. ## Approximate search -OpenSearch supports multiple backend algorithms (_methods_) and libraries implementing these algorithms (_engines_). It automatically selects the optimal configuration based on the chosen mode and available memory. For more information, see [Methods and engines]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/). +OpenSearch supports multiple backend algorithms (_methods_) and libraries for implementing these algorithms (_engines_). It automatically selects the optimal configuration based on the chosen mode and available memory. For more information, see [Methods and engines]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/). ## Using sparse vectors @@ -35,4 +35,4 @@ _Neural sparse search_ offers an efficient alternative to dense vector search by ## Combining multiple search techniques -_Hybrid search_ enhances search relevance by combining multiple search techniques within OpenSearch. It integrates traditional keyword search with vector-based semantic search. Through a configurable search pipeline, hybrid search normalizes and combines scores from different search methods to provide unified, relevant results. This approach is particularly effective for complex queries where both semantic understanding and exact matching are important. The search pipeline can be further customized with post-filtering operations and aggregations to meet specific search requirements. For more information, see [Hybrid search]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/hybrid-search/). +_Hybrid search_ enhances search relevance by combining multiple search techniques in OpenSearch. It integrates traditional keyword search with vector-based semantic search. Through a configurable search pipeline, hybrid search normalizes and combines scores from different search methods to provide unified, relevant results. This approach is particularly effective for complex queries where both semantic understanding and exact matching are important. The search pipeline can be further customized with post-filtering operations and aggregations to meet specific search requirements. For more information, see [Hybrid search]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/hybrid-search/). diff --git a/_vector-search/vector-search-techniques/knn-score-script.md b/_vector-search/vector-search-techniques/knn-score-script.md index 78a0fcf05a..f858b278d9 100644 --- a/_vector-search/vector-search-techniques/knn-score-script.md +++ b/_vector-search/vector-search-techniques/knn-score-script.md @@ -11,15 +11,15 @@ redirect_from: # Exact k-NN search with scoring script -You can use exact k-NN search with scoring script to find the exact k-nearest neighbors to a given query point. Using the k-NN score script, you can apply a filter on an index before executing the nearest neighbor search. This is useful for dynamic search cases where the index body may vary based on other conditions. +You can use exact k-NN search with a scoring script to find the exact k-nearest neighbors to a given query point. Using the k-NN scoring script, you can apply a filter on an index before executing the nearest neighbor search. This is useful for dynamic search use cases, where the index body may vary based on other conditions. -Because the score script approach executes a brute force search, it doesn't scale as efficiently as the [approximate approach]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/). In some cases, it might be better to think about refactoring your workflow or index structure to use the approximate approach instead of the score script approach. +Because the score script approach executes a brute force search, it doesn't scale as efficiently as the [approximate approach]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/). In some cases, it might be better to consider refactoring your workflow or index structure to use the approximate approach instead of the score script approach. ## Getting started with the score script for vectors -Similarly to approximate nearest neighbor search, in order to use the score script on a body of vectors, you must first create an index with one or more `knn_vector` fields. +Similarly to approximate nearest neighbor (ANN) search, in order to use the score script on a body of vectors, you must first create an index with one or more `knn_vector` fields. -If you intend to only use the score script approach (and not the approximate approach) you can set `index.knn` to `false` and not set `index.knn.space_type`. You can choose the space type during search. For the spaces the k-NN score script supports, see [Spaces]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-spaces/). +If you intend to only use the score script approach (and not the approximate approach), you can set `index.knn` to `false` and not set `index.knn.space_type`. You can choose the space type during search. For the spaces that the k-NN score script supports, see [Spaces]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-spaces/). This example creates an index with two `knn_vector` fields: @@ -42,7 +42,7 @@ PUT my-knn-index-1 ``` {% include copy-curl.html %} -If you *only* want to use the score script, you can omit `"index.knn": true`. This approach leads to faster indexing speed and lower memory usage, but you lose the ability to run standard k-NN queries on the index. +If you want to *only* use the score script, you can omit `"index.knn": true`. This approach leads to faster indexing speed and lower memory usage, but you lose the ability to run standard k-NN queries on the index. {: .tip} After you create the index, you can add some data to it: @@ -107,7 +107,7 @@ All parameters are required. - `query_value` is the point you want to find the nearest neighbors for. For the Euclidean and cosine similarity spaces, the value must be an array of floats that matches the dimension set in the field's mapping. For Hamming bit distance, this value can be either of type signed long or a base64-encoded string (for the long and binary field types, respectively). - `space_type` corresponds to the distance function. For more information, see [Spaces]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-spaces/). -The [post filter example in the approximate approach]({{site.url}}{{site.baseurl}}/vector-search/filter-search-knn/) shows a search that returns fewer than `k` results. If you want to avoid this situation, the score script method lets you essentially invert the order of events. In other words, you can filter down the set of documents over which to execute the k-nearest neighbor search. +The [post filter example in the approximate approach]({{site.url}}{{site.baseurl}}/vector-search/filter-search-knn/) shows a search that returns fewer than `k` results. If you want to avoid this, the score script method lets you essentially invert the order of events. In other words, you can filter the set of documents on which to execute the k-NN search. This example shows a pre-filter approach to k-NN search with the score script approach. First, create the index: diff --git a/_vector-search/vector-search-techniques/painless-functions.md b/_vector-search/vector-search-techniques/painless-functions.md index a2ed46588c..ff16a19f4d 100644 --- a/_vector-search/vector-search-techniques/painless-functions.md +++ b/_vector-search/vector-search-techniques/painless-functions.md @@ -12,11 +12,11 @@ redirect_from: # Painless scripting extensions -With the Painless scripting extensions, you can use k-NN distance functions directly in your Painless scripts to perform operations on `knn_vector` fields. Painless has a strict list of allowed functions and classes per context to ensure its scripts are secure. OpenSearch adds Painless Scripting extensions to a few of the distance functions used in [k-NN score script]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-score-script/), so you can use them to customize your k-NN workload. +With Painless scripting extensions, you can use k-NN distance functions directly in your Painless scripts to perform operations on `knn_vector` fields. Painless has a strict list of allowed functions and classes per context to ensure that its scripts are secure. OpenSearch adds Painless scripting extensions to a few of the distance functions used in [k-NN score script]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-score-script/), so you can use them to customize your k-NN workload. -## Get started with k-NN's Painless Scripting functions +## Get started with k-NN Painless scripting functions -To use k-NN's Painless Scripting functions, first create an index with `knn_vector` fields like in [k-NN score script]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-score-script#getting-started-with-the-score-script-for-vectors). Once the index is created and you ingest some data, you can use the Painless extensions: +To use k-NN Painless scripting functions, first create an index with `knn_vector` fields, as described in [Getting started with the score script for vectors]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-score-script#getting-started-with-the-score-script-for-vectors). Once you have created the index and ingested some data, you can use Painless extensions: ```json GET my-knn-index-2/_search @@ -54,10 +54,10 @@ The following table describes the Painless functions OpenSearch provides. Function name | Function signature | Description :--- | :--- -`l2Squared` | `float l2Squared (float[] queryVector, doc['vector field'])` | This function calculates the square of the L2 distance (Euclidean distance) between a given query vector and document vectors. The shorter the distance, the more relevant the document is, so this example inverts the return value of the `l2Squared` function. If the document vector matches the query vector, the result is `0`, so this example also adds `1` to the distance to avoid divide by zero errors. -`l1Norm` | `float l1Norm (float[] queryVector, doc['vector field'])` | This function calculates the L1 Norm distance (Manhattan distance) between a given query vector and document vectors. -`cosineSimilarity` | `float cosineSimilarity (float[] queryVector, doc['vector field'])` | Cosine similarity is an inner product of the query vector and document vector normalized to both have a length of `1`. If the magnitude of the query vector doesn't change throughout the query, you can pass the magnitude of the query vector to improve performance instead of calculating the magnitude every time for every filtered document:
`float cosineSimilarity (float[] queryVector, doc['vector field'], float normQueryVector)`
In general, the range of cosine similarity is [-1, 1]. However, in the case of information retrieval, the cosine similarity of two documents ranges from `0` to `1` because the `tf-idf` statistic can't be negative. Therefore, OpenSearch adds `1.0` in order to always yield a positive cosine similarity score. -`hamming` | `float hamming (float[] queryVector, doc['vector field'])` | This function calculates the Hamming distance between a given query vector and document vectors. The Hamming distance is the number of positions at which the corresponding elements are different. The shorter the distance, the more relevant the document is, so this example inverts the return value of the Hamming distance. +`l2Squared` | `float l2Squared (float[] queryVector, doc['vector field'])` | This function calculates the square of the L2 distance (Euclidean distance) between a given query vector and document vectors. A shorter distance indicates a more relevant document, so this example inverts the return value of the `l2Squared` function. If the document vector matches the query vector, the result is `0`, so this example also adds `1` to the distance to avoid divide-by-zero errors. +`l1Norm` | `float l1Norm (float[] queryVector, doc['vector field'])` | This function calculates the L1 norm distance (Manhattan distance) between a given query vector and document vectors. +`cosineSimilarity` | `float cosineSimilarity (float[] queryVector, doc['vector field'])` | Cosine similarity is an inner product of the query vector and document vector normalized to both have a length of `1`. If the magnitude of the query vector doesn't change throughout the query, you can pass the magnitude of the query vector to improve performance instead of repeatedly calculating the magnitude for every filtered document:
`float cosineSimilarity (float[] queryVector, doc['vector field'], float normQueryVector)`
In general, the range of cosine similarity is [-1, 1]. However, in the case of information retrieval, the cosine similarity of two documents ranges from `0` to `1` because the `tf-idf` statistic can't be negative. Therefore, OpenSearch adds `1.0` in order to always yield a positive cosine similarity score. +`hamming` | `float hamming (float[] queryVector, doc['vector field'])` | This function calculates the Hamming distance between a given query vector and document vectors. The Hamming distance is the number of positions at which the corresponding elements are different. A shorter distance indicates a more relevant document, so this example inverts the return value of the Hamming distance. The `hamming` space type is supported for binary vectors in OpenSearch version 2.16 and later. For more information, see [Binary k-NN vectors]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-memory-optimized#binary-vectors). {: .note} @@ -68,13 +68,13 @@ The `hamming` space type is supported for binary vectors in OpenSearch version 2 2. If a vector field doesn't have a value, the function throws an `IllegalStateException`. - You can avoid this situation by first checking if a document has a value in its field: + You can avoid this by first checking whether a document contains a value in its field: ``` "source": "doc[params.field].size() == 0 ? 0 : 1 / (1 + l2Squared(params.query_value, doc[params.field]))", ``` - Because scores can only be positive, this script ranks documents with vector fields higher than those without. + Because scores can only be positive, this script ranks documents with vector fields higher than those without vector fields. -With cosine similarity, it is not valid to pass a zero vector (`[0, 0, ...]`) as input. This is because the magnitude of such a vector is 0, which raises a `divide by 0` exception in the corresponding formula. Requests containing the zero vector will be rejected, and a corresponding exception will be thrown. +When using cosine similarity, it is not valid to pass a zero vector (`[0, 0, ...]`) as input. This is because the magnitude of such a vector is 0, which raises a `divide by 0` exception in the corresponding formula. Requests containing the zero vector will be rejected, and a corresponding exception will be thrown. {: .note } From 0ca38f0612d6362d78ddcbe528eff9b6dec488d7 Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Wed, 19 Feb 2025 19:41:03 -0500 Subject: [PATCH 31/32] Editorial comments Signed-off-by: Fanit Kolchina --- _about/index.md | 28 +++++++++---------- .../knn-methods-engines.md | 12 ++++---- _vector-search/api.md | 10 +++---- _vector-search/creating-vector-index.md | 2 +- _vector-search/filter-search-knn.md | 2 +- _vector-search/optimizing-storage/index.md | 4 +-- .../knn-vector-quantization.md | 12 ++++---- _vector-search/performance-tuning-indexing.md | 2 +- _vector-search/performance-tuning.md | 4 +-- _vector-search/settings.md | 2 +- .../approximate-knn.md | 2 +- .../vector-search-techniques/index.md | 2 +- .../knn-score-script.md | 24 ++++++++-------- .../painless-functions.md | 6 ++-- 14 files changed, 56 insertions(+), 56 deletions(-) diff --git a/_about/index.md b/_about/index.md index cda44f157f..404a5a4d6f 100644 --- a/_about/index.md +++ b/_about/index.md @@ -12,48 +12,48 @@ redirect_from: - /opensearch/index/ why_use: - heading: "Vector database" - description: "Use OpenSearch as a vector database to combine the power of traditional search, analytics, and vector search." + description: "Use OpenSearch as a vector database to combine the power of traditional search, analytics, and vector search" link: "/vector-search/" - heading: "Fast, scalable full-text search" - description: "Help users find the right information in your application, website, or data lake catalog." + description: "Help users find the right information in your application, website, or data lake catalog" link: "/search-plugins/" - heading: "Application and infrastructure monitoring" - description: "Use observability logs, metrics, and traces to monitor your applications in real time." + description: "Use observability logs, metrics, and traces to monitor your applications in real time" link: "/observing-your-data/" - heading: "Security and event information management" - description: "Centralize logs to enable real-time security monitoring and forensic analysis." + description: "Centralize logs to enable real-time security monitoring and forensic analysis" link: "/security/" features: - heading: "Vector search" - description: "Build AI/ML-powered vector search applications using semantic, hybrid, or conversational search and more." + description: "Build AI/ML-powered vector search applications" link: "/vector-search/" - heading: "Machine learning" - description: "Integrate machine learning models into your workloads." + description: "Integrate machine learning models into your workloads" link: "/ml-commons-plugin/" - heading: "Customizing your search" - description: "From optimizing performance to improving relevance, customize your search experience." + description: "From optimizing performance to improving relevance, customize your search experience" link: "/search-plugins/" - heading: "Workflow automation" - description: "Automate complex OpenSearch setup and preprocessing tasks." + description: "Automate complex OpenSearch setup and preprocessing tasks" link: "/automating-configurations/" - heading: "Anomaly detection" - description: "Identify atypical data and receive automatic notifications." + description: "Identify atypical data and receive automatic notifications" link: "/monitoring-plugins/ad/" - heading: "Building visualizations" - description: "Visualize your data in OpenSearch Dashboards." + description: "Visualize your data in OpenSearch Dashboards" link: "/dashboards/" getting_started: - heading: "Get started with OpenSearch" - description: "Learn about OpenSearch and start ingesting and searching data." + description: "Learn about OpenSearch and start ingesting and searching data" link: "/getting-started/" - heading: "Get started with OpenSearch Dashboards" - description: "Learn about OpenSearch Dashboards applications and tools used to visualize data." + description: "Learn about OpenSearch Dashboards applications and tools used to visualize data" link: "/dashboards/quickstart/" - heading: "Get started with vector search" - description: "Learn about vector search options and build your first vector search application." + description: "Learn about vector search options and build your first vector search application" link: "/search-plugins/" - heading: "Get started with OpenSearch security" - description: "Learn about security in OpenSearch." + description: "Learn about security in OpenSearch" link: "/getting-started/security/" --- diff --git a/_field-types/supported-field-types/knn-methods-engines.md b/_field-types/supported-field-types/knn-methods-engines.md index d91ba8fcb8..4ac160533d 100644 --- a/_field-types/supported-field-types/knn-methods-engines.md +++ b/_field-types/supported-field-types/knn-methods-engines.md @@ -101,7 +101,7 @@ Parameter name | Required | Default | Updatable | Description The Lucene HNSW implementation ignores `ef_search` and dynamically sets it to the value of "k" in the search request. There is therefore no need to configure settings for `ef_search` when using the Lucene engine. {: .note} -An index created in OpenSearch version 2.11 or earlier will still use the old `ef_construction` value (`512`). +An index created in OpenSearch version 2.11 or earlier will still use the previous `ef_construction` value (`512`). {: .note} ### Example configuration @@ -142,7 +142,7 @@ Parameter name | Required | Default | Updatable | Description `m` | No | 16 | No | The number of bidirectional links that the plugin creates for each new element. Increasing and decreasing this value can have a large impact on memory consumption. Keep this value between `2` and `100`. `encoder` | No | flat | No | An encoder definition for encoding vectors. Encoders can reduce the memory footprint of your index at the expense of search accuracy. -An index created in OpenSearch version 2.11 or earlier will still use the old `ef_construction` value (`512`). +An index created in OpenSearch version 2.11 or earlier will still use the previous `ef_construction` value (`512`). {: .note} #### IVF parameters @@ -340,7 +340,7 @@ Parameter name | Required | Default | Updatable | Description For NMSLIB (deprecated), *ef_search* is set in the [index settings]({{site.url}}{{site.baseurl}}/vector-search/settings/#index-settings). {: .note} -An index created in OpenSearch version 2.11 or earlier will still use the old `ef_construction` value (`512`). +An index created in OpenSearch version 2.11 or earlier will still use the previous `ef_construction` value (`512`). {: .note} ### Example configuration @@ -377,13 +377,13 @@ In general, select Faiss for large-scale use cases. Lucene is a good option for |:---|:---|:---|:---| | Max dimensions | 16,000 | 16,000 | 16,000 | | Filter | Post-filter | Post-filter | Filter during search | -| Training required | No (Yes for product quantization) | Yes | No | +| Training required | No (Yes for PQ) | Yes | No | | Similarity metrics | `l2`, `innerproduct` | `l2`, `innerproduct` | `l2`, `cosinesimil` | | Number of vectors | Tens of billions | Tens of billions | Less than 10 million | | Indexing latency | Low | Lowest | Low | | Query latency and quality | Low latency and high quality | Low latency and low quality | High latency and high quality | -| Vector compression | Flat

Product quantization | Flat

Product quantization | Flat | -| Memory consumption | High

Low with product quantization | Medium

Low with product quantization | High | +| Vector compression | Flat

PQ | Flat

PQ | Flat | +| Memory consumption | High

Low with PQ | Medium

Low with PQ | High | ## Memory estimation diff --git a/_vector-search/api.md b/_vector-search/api.md index efefd28b19..3b87f35bcc 100644 --- a/_vector-search/api.md +++ b/_vector-search/api.md @@ -9,7 +9,7 @@ redirect_from: # k-NN API -OpenSearch provides several k-NN APIs for managing, monitoring, and optimizing your vector workload. +OpenSearch provides several k-nearest neighbors (k-NN) APIs for managing, monitoring, and optimizing your vector workload. ## Stats @@ -42,10 +42,10 @@ Field | Description `load_success_count` | The number of times k-NN successfully loaded a native library index into the cache. This statistic is only relevant to approximate k-NN search. `load_exception_count` | The number of times an exception occurred when trying to load a native library index into the cache. This statistic is only relevant to approximate k-NN search. `indices_in_cache` | For each OpenSearch index with a `knn_vector` field and approximate k-NN turned on, this statistic provides the number of native library indexes that OpenSearch index has and the total `graph_memory_usage` that the OpenSearch index is using, in kilobytes. -`script_compilations` | The number of times the k-NN script has been compiled. This value should usually be 1 or 0, but if the cache containing the compiled scripts is filled, the k-NN script might be recompiled. This statistic is only relevant to k-NN score script search. -`script_compilation_errors` | The number of errors during script compilation. This statistic is only relevant to k-NN score script search. -`script_query_requests` | The total number of script queries. This statistic is only relevant to k-NN score script search. -`script_query_errors` | The number of errors during script queries. This statistic is only relevant to k-NN score script search. +`script_compilations` | The number of times the k-NN script has been compiled. This value should usually be 1 or 0, but if the cache containing the compiled scripts is filled, the k-NN script might be recompiled. This statistic is only relevant to k-NN scoring script search. +`script_compilation_errors` | The number of errors during script compilation. This statistic is only relevant to k-NN scoring script search. +`script_query_requests` | The total number of script queries. This statistic is only relevant to k-NN scoring script search. +`script_query_errors` | The number of errors during script queries. This statistic is only relevant to k-NN scoring script search. `nmslib_initialized` | A Boolean value indicating whether the `nmslib` JNI library has been loaded and initialized on the node. `faiss_initialized` | A Boolean value indicating whether the `faiss` JNI library has been loaded and initialized on the node. `model_index_status` | The status of the model system index. Valid values are `red`, `yellow`, and `green`. If the index does not exist, this value is `null`. diff --git a/_vector-search/creating-vector-index.md b/_vector-search/creating-vector-index.md index 01baaca1f4..ec4bc13ebb 100644 --- a/_vector-search/creating-vector-index.md +++ b/_vector-search/creating-vector-index.md @@ -38,7 +38,7 @@ PUT /test-index Creating a vector index involves the following key steps: -1. **Enable k-NN search**: +1. **Enable k-nearest neighbors (k-NN) search**: Set `index.knn` to `true` in the index settings to enable k-NN search functionality. 1. **Define a vector field**: diff --git a/_vector-search/filter-search-knn.md b/_vector-search/filter-search-knn.md index a4583ff443..01b02238fa 100644 --- a/_vector-search/filter-search-knn.md +++ b/_vector-search/filter-search-knn.md @@ -11,7 +11,7 @@ redirect_from: To refine vector search results, you can filter a vector search using one of the following methods: -- [Efficient k-NN filtering](#efficient-k-nn-filtering): This approach applies filtering _during_ the k-NN search, as opposed to before or after the k-NN search, which ensures that `k` results are returned (if there are at least `k` results in total). This approach is supported by the following engines: +- [Efficient k-nearest neighbors (k-NN) filtering](#efficient-k-nn-filtering): This approach applies filtering _during_ the k-NN search, as opposed to before or after the k-NN search, which ensures that `k` results are returned (if there are at least `k` results in total). This approach is supported by the following engines: - Lucene engine with a Hierarchical Navigable Small World (HNSW) algorithm (OpenSearch version 2.4 and later) - Faiss engine with an HNSW algorithm (OpenSearch version 2.9 and later) or IVF algorithm (OpenSearch version 2.10 and later) diff --git a/_vector-search/optimizing-storage/index.md b/_vector-search/optimizing-storage/index.md index 7c0e2c49cb..4b04024e71 100644 --- a/_vector-search/optimizing-storage/index.md +++ b/_vector-search/optimizing-storage/index.md @@ -8,10 +8,10 @@ redirect_from: - /vector-search/optimizing-storage/ storage_cards: - heading: "Vector quantization" - description: "Reduce vector storage space by quantizing vectors." + description: "Reduce vector storage space by quantizing vectors" link: "/vector-search/optimizing-storage/knn-vector-quantization/" - heading: "Disk-based vector search" - description: "Uses binary quantization to reduce the operational costs of vector workloads." + description: "Uses binary quantization to reduce the operational costs of vector workloads" link: "/vector-search/optimizing-storage/disk-based-vector-search/" --- diff --git a/_vector-search/optimizing-storage/knn-vector-quantization.md b/_vector-search/optimizing-storage/knn-vector-quantization.md index 3bebfdb6f0..598d9d7eed 100644 --- a/_vector-search/optimizing-storage/knn-vector-quantization.md +++ b/_vector-search/optimizing-storage/knn-vector-quantization.md @@ -9,23 +9,23 @@ redirect_from: - /search-plugins/knn/knn-vector-quantization/ outside_cards: - heading: "Byte vectors" - description: "Quantize vectors into byte vectors." + description: "Quantize vectors into byte vectors" link: "/field-types/supported-field-types/knn-memory-optimized/#byte-vectors" - heading: "Binary vectors" - description: "Quantize vectors into binary vectors." + description: "Quantize vectors into binary vector" link: "/field-types/supported-field-types/knn-memory-optimized/#binary-vectors" inside_cards: - heading: "Lucene scalar quantization" - description: "Use built-in scalar quantization for the Lucene engine." + description: "Use built-in scalar quantization for the Lucene engine" link: "/vector-search/optimizing-storage/lucene-scalar-quantization/" - heading: "Faiss 16-bit scalar quantization" - description: "Use built-in scalar quantization for the Faiss engine." + description: "Use built-in scalar quantization for the Faiss engine" link: "/vector-search/optimizing-storage/faiss-16-bit-quantization/" - heading: "Faiss product quantization" - description: "Use built-in product quantization for the Faiss engine." + description: "Use built-in product quantization for the Faiss engine" link: "/vector-search/optimizing-storage/faiss-product-quantization/" - heading: "Binary quantization" - description: "Use built-in binary quantization for the Faiss engine." + description: "Use built-in binary quantization for the Faiss engine" link: "/vector-search/optimizing-storage/binary-quantization/" --- diff --git a/_vector-search/performance-tuning-indexing.md b/_vector-search/performance-tuning-indexing.md index 61435ea4da..d518fa2536 100644 --- a/_vector-search/performance-tuning-indexing.md +++ b/_vector-search/performance-tuning-indexing.md @@ -86,7 +86,7 @@ This is an expert-level setting. Disabling the `_recovery_source` may lead to fa This approach is recommended only for workloads that involve a single initial bulk upload and will be used exclusively for search after force merging to a single segment. -During indexing, vector search builds a specialized data structure for a `knn_vector` field to enable efficient approximate k-NN search. However, these structures are rebuilt during [force merge]({{site.url}}{{site.baseurl}}/api-reference/index-apis/force-merge/) on vector indexes. To optimize indexing speed, follow these steps: +During indexing, vector search builds a specialized data structure for a `knn_vector` field to enable efficient approximate k-nearest neighbors (k-NN) search. However, these structures are rebuilt during [force merge]({{site.url}}{{site.baseurl}}/api-reference/index-apis/force-merge/) on vector indexes. To optimize indexing speed, follow these steps: 1. **Disable vector data structure creation**: Disable vector data structure creation for new segments by setting [`index.knn.advanced.approximate_threshold`]({{site.url}}{{site.baseurl}}/vector-search/settings/#index-settings) to `-1`. diff --git a/_vector-search/performance-tuning.md b/_vector-search/performance-tuning.md index 3b12a6adf7..f4c04edb1c 100644 --- a/_vector-search/performance-tuning.md +++ b/_vector-search/performance-tuning.md @@ -14,7 +14,7 @@ This topic provides performance tuning recommendations for improving indexing an * Queries execute sequentially on segments in the shard (as with any other OpenSearch query). * The coordinator node selects the final `size` neighbors from the neighbors returned by each shard. -The following sections provide recommendations regarding comparing ANN to exact k-NN with a score script. +The following sections provide recommendations regarding comparing ANN to exact k-NN with a scoring script. ## Recommendations for engines and cluster node sizing @@ -30,7 +30,7 @@ Recall depends on multiple factors, such as the number of vectors, dimensions, s The default parameters work for a broader set of use cases, but make sure to run your own experiments on your datasets and choose the appropriate values. For index-level settings, see [Index settings]({{site.url}}{{site.baseurl}}/vector-search/settings/#index-settings). -## ANN compared to score script +## ANN compared to scoring script The standard k-NN query and custom scoring options perform differently. Run tests with a representative set of documents to see if the search results and latencies match your expectations. diff --git a/_vector-search/settings.md b/_vector-search/settings.md index bebfc878ce..ea468add2c 100644 --- a/_vector-search/settings.md +++ b/_vector-search/settings.md @@ -46,5 +46,5 @@ Setting | Static/Dynamic | Default | Description `index.knn.algo_param.m` | Static | `16` | Deprecated in 1.0.0. Use the [mapping parameters]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/) to set this value instead. `index.knn.space_type` | Static | `l2` | Deprecated in 1.0.0. Use the [mapping parameters]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/) to set this value instead. -An index created in OpenSearch version 2.11 or earlier will still use the old `ef_construction` and `ef_search` values (`512`). +An index created in OpenSearch version 2.11 or earlier will still use the previous `ef_construction` and `ef_search` values (`512`). {: .note} \ No newline at end of file diff --git a/_vector-search/vector-search-techniques/approximate-knn.md b/_vector-search/vector-search-techniques/approximate-knn.md index aba6825c47..96c375f9a3 100644 --- a/_vector-search/vector-search-techniques/approximate-knn.md +++ b/_vector-search/vector-search-techniques/approximate-knn.md @@ -11,7 +11,7 @@ redirect_from: # Approximate k-NN search -Standard k-NN search methods compute similarity using a brute-force approach that measures the nearest distance between a query and a number of points, which produces exact results. This works well in many applications. However, in the case of extremely large datasets with high dimensionality, this creates a scaling problem that reduces the efficiency of the search. Approximate k-NN search methods can overcome this by employing tools that restructure indexes more efficiently and reduce the dimensionality of searchable vectors. Using this approach requires a sacrifice in accuracy but increases search processing speeds appreciably. +Standard k-nearest neighbors (k-NN) search methods compute similarity using a brute-force approach that measures the nearest distance between a query and a number of points, which produces exact results. This works well in many applications. However, in the case of extremely large datasets with high dimensionality, this creates a scaling problem that reduces the efficiency of the search. Approximate k-NN search methods can overcome this by employing tools that restructure indexes more efficiently and reduce the dimensionality of searchable vectors. Using this approach requires a sacrifice in accuracy but increases search processing speeds appreciably. The approximate k-NN search methods in OpenSearch use approximate nearest neighbor (ANN) algorithms from the [NMSLIB](https://github.com/nmslib/nmslib), [Faiss](https://github.com/facebookresearch/faiss), and [Lucene](https://lucene.apache.org/) libraries to power k-NN search. These search methods employ ANN to improve search latency for large datasets. Of the three search methods OpenSearch provides, this method offers the best search scalability for large datasets. This approach is the preferred method when a dataset reaches hundreds of thousands of vectors. diff --git a/_vector-search/vector-search-techniques/index.md b/_vector-search/vector-search-techniques/index.md index 8a6be78330..9c27b09f5c 100644 --- a/_vector-search/vector-search-techniques/index.md +++ b/_vector-search/vector-search-techniques/index.md @@ -19,7 +19,7 @@ OpenSearch supports three different methods for obtaining the k-nearest neighbor - [Approximate search]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/) (approximate k-NN, or ANN): Returns approximate nearest neighbors to the query vector. Usually, approximate search algorithms sacrifice indexing speed and search accuracy in exchange for performance benefits such as lower latency, smaller memory footprints, and more scalable search. For most use cases, approximate search is the best option. - Exact search: A brute-force, exact k-NN search of vector fields. OpenSearch supports the following types of exact search: - - [Exact search with scoring script]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-score-script/): Using a scoring script, you can apply a filter to an index before executing the nearest neighbor search. + - [Exact search with a scoring script]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-score-script/): Using a scoring script, you can apply a filter to an index before executing the nearest neighbor search. - [Painless extensions]({{site.url}}{{site.baseurl}}/search-plugins/knn/painless-functions/): Adds the distance functions as Painless extensions that you can use in more complex combinations. You can use this method to perform a brute-force, exact vector search of an index, which also supports pre-filtering. diff --git a/_vector-search/vector-search-techniques/knn-score-script.md b/_vector-search/vector-search-techniques/knn-score-script.md index f858b278d9..da5b159baa 100644 --- a/_vector-search/vector-search-techniques/knn-score-script.md +++ b/_vector-search/vector-search-techniques/knn-score-script.md @@ -1,6 +1,6 @@ --- layout: default -title: Exact k-NN search with scoring script +title: Exact k-NN search with a scoring script nav_order: 20 parent: Vector search techniques has_children: true @@ -9,17 +9,17 @@ redirect_from: - /search-plugins/knn/knn-score-script/ --- -# Exact k-NN search with scoring script +# Exact k-NN search with a scoring script -You can use exact k-NN search with a scoring script to find the exact k-nearest neighbors to a given query point. Using the k-NN scoring script, you can apply a filter on an index before executing the nearest neighbor search. This is useful for dynamic search use cases, where the index body may vary based on other conditions. +You can use exact k-nearest neighbors (k-NN) search with a scoring script to find the exact k-nearest neighbors to a given query point. Using the k-NN scoring script, you can apply a filter on an index before executing the nearest neighbor search. This is useful for dynamic search use cases, where the index body may vary based on other conditions. -Because the score script approach executes a brute force search, it doesn't scale as efficiently as the [approximate approach]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/). In some cases, it might be better to consider refactoring your workflow or index structure to use the approximate approach instead of the score script approach. +Because the scoring script approach executes a brute force search, it doesn't scale as efficiently as the [approximate approach]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/). In some cases, it might be better to consider refactoring your workflow or index structure to use the approximate approach instead of the scoring script approach. -## Getting started with the score script for vectors +## Getting started with the scoring script for vectors -Similarly to approximate nearest neighbor (ANN) search, in order to use the score script on a body of vectors, you must first create an index with one or more `knn_vector` fields. +Similarly to approximate nearest neighbor (ANN) search, in order to use the scoring script on a body of vectors, you must first create an index with one or more `knn_vector` fields. -If you intend to only use the score script approach (and not the approximate approach), you can set `index.knn` to `false` and not set `index.knn.space_type`. You can choose the space type during search. For the spaces that the k-NN score script supports, see [Spaces]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-spaces/). +If you intend to only use the scoring script approach (and not the approximate approach), you can set `index.knn` to `false` and not set `index.knn.space_type`. You can choose the space type during search. For the spaces that the k-NN scoring script supports, see [Spaces]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-spaces/). This example creates an index with two `knn_vector` fields: @@ -42,7 +42,7 @@ PUT my-knn-index-1 ``` {% include copy-curl.html %} -If you want to *only* use the score script, you can omit `"index.knn": true`. This approach leads to faster indexing speed and lower memory usage, but you lose the ability to run standard k-NN queries on the index. +If you want to *only* use the scoring script, you can omit `"index.knn": true`. This approach leads to faster indexing speed and lower memory usage, but you lose the ability to run standard k-NN queries on the index. {: .tip} After you create the index, you can add some data to it: @@ -107,9 +107,9 @@ All parameters are required. - `query_value` is the point you want to find the nearest neighbors for. For the Euclidean and cosine similarity spaces, the value must be an array of floats that matches the dimension set in the field's mapping. For Hamming bit distance, this value can be either of type signed long or a base64-encoded string (for the long and binary field types, respectively). - `space_type` corresponds to the distance function. For more information, see [Spaces]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-spaces/). -The [post filter example in the approximate approach]({{site.url}}{{site.baseurl}}/vector-search/filter-search-knn/) shows a search that returns fewer than `k` results. If you want to avoid this, the score script method lets you essentially invert the order of events. In other words, you can filter the set of documents on which to execute the k-NN search. +The [post filter example in the approximate approach]({{site.url}}{{site.baseurl}}/vector-search/filter-search-knn/) shows a search that returns fewer than `k` results. If you want to avoid this, the scoring script method lets you essentially invert the order of events. In other words, you can filter the set of documents on which to execute the k-NN search. -This example shows a pre-filter approach to k-NN search with the score script approach. First, create the index: +This example shows a pre-filter approach to k-NN search with the scoring script approach. First, create the index: ```json PUT my-knn-index-2 @@ -180,9 +180,9 @@ GET my-knn-index-2/_search ``` {% include copy-curl.html %} -## Getting started with the score script for binary data +## Getting started with the scoring script for binary data -The k-NN score script also allows you to run k-NN search on your binary data with the Hamming distance space. +The k-NN scoring script also allows you to run k-NN search on your binary data with the Hamming distance space. In order to use Hamming distance, the field of interest must have either a `binary` or `long` field type. If you're using `binary` type, the data must be a base64-encoded string. This example shows how to use the Hamming distance space with a `binary` field type: diff --git a/_vector-search/vector-search-techniques/painless-functions.md b/_vector-search/vector-search-techniques/painless-functions.md index ff16a19f4d..4f106e378a 100644 --- a/_vector-search/vector-search-techniques/painless-functions.md +++ b/_vector-search/vector-search-techniques/painless-functions.md @@ -2,7 +2,7 @@ layout: default title: Painless extensions nav_order: 25 -parent: Exact k-NN search with scoring script +parent: Exact k-NN search with a scoring script grand_parent: Vector search techniques has_children: false has_math: true @@ -12,11 +12,11 @@ redirect_from: # Painless scripting extensions -With Painless scripting extensions, you can use k-NN distance functions directly in your Painless scripts to perform operations on `knn_vector` fields. Painless has a strict list of allowed functions and classes per context to ensure that its scripts are secure. OpenSearch adds Painless scripting extensions to a few of the distance functions used in [k-NN score script]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-score-script/), so you can use them to customize your k-NN workload. +With Painless scripting extensions, you can use k-nearest neighbors (k-NN) distance functions directly in your Painless scripts to perform operations on `knn_vector` fields. Painless has a strict list of allowed functions and classes per context to ensure that its scripts are secure. OpenSearch adds Painless scripting extensions to a few of the distance functions used in [k-NN scoring script]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-score-script/), so you can use them to customize your k-NN workload. ## Get started with k-NN Painless scripting functions -To use k-NN Painless scripting functions, first create an index with `knn_vector` fields, as described in [Getting started with the score script for vectors]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-score-script#getting-started-with-the-score-script-for-vectors). Once you have created the index and ingested some data, you can use Painless extensions: +To use k-NN Painless scripting functions, first create an index with `knn_vector` fields, as described in [Getting started with the scoring script for vectors]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-score-script#getting-started-with-the-scoring-script-for-vectors). Once you have created the index and ingested some data, you can use Painless extensions: ```json GET my-knn-index-2/_search From e8595c3115d8b27af0cae74cf086b7edac7f3b04 Mon Sep 17 00:00:00 2001 From: Fanit Kolchina Date: Thu, 20 Feb 2025 11:15:31 -0500 Subject: [PATCH 32/32] Reformat concepts page Signed-off-by: Fanit Kolchina --- _vector-search/getting-started/concepts.md | 104 ++++++--------------- 1 file changed, 26 insertions(+), 78 deletions(-) diff --git a/_vector-search/getting-started/concepts.md b/_vector-search/getting-started/concepts.md index 07a238c0e8..a292f535b8 100644 --- a/_vector-search/getting-started/concepts.md +++ b/_vector-search/getting-started/concepts.md @@ -11,117 +11,65 @@ This page defines key terms and techniques related to vector search in OpenSearc ## Vector representations -### Vector embeddings +- [**_Vector embeddings_**]({{site.url}}{{site.baseurl}}/vector-search/getting-started/vector-search-basics/#vector-embeddings) are numerical representations of data—such as text, images, or audio—that encode meaning or features into a high-dimensional space. These embeddings enable similarity-based comparisons for search and machine learning (ML) tasks. -[_Vector embeddings_]({{site.url}}{{site.baseurl}}/vector-search/getting-started/vector-search-basics/#vector-embeddings) are numerical representations of data—such as text, images, or audio—that encode meaning or features into a high-dimensional space. These embeddings enable similarity-based comparisons for search and machine learning (ML) tasks. +- **_Dense vectors_** are high-dimensional numerical representations where most elements have nonzero values. They are typically produced by deep learning models and are used in semantic search and ML applications. -### Dense vectors - -_Dense vectors_ are high-dimensional numerical representations where most elements have nonzero values. They are typically produced by deep learning models and are used in semantic search and ML applications. - -### Sparse vectors - -_Sparse vectors_ contain mostly zero values and are often used in techniques like neural sparse search to efficiently represent and retrieve information. +- **_Sparse vectors_** contain mostly zero values and are often used in techniques like neural sparse search to efficiently represent and retrieve information. ## Vector search fundamentals -### Vector search - -[_Vector search_]({{site.url}}{{site.baseurl}}/vector-search/getting-started/vector-search-basics/), also known as _similarity search_ or _nearest neighbor search_, is a technique for finding items that are most similar to a given input vector. It is widely used in applications such as recommendation systems, image retrieval, and natural language processing. - -### Space - -A [_space_]({{site.url}}{{site.baseurl}}/vector-search/getting-started/vector-search-basics/#calculating-similarity) defines how similarity or distance between two vectors is measured. Different spaces use different distance metrics, such as Euclidean distance or cosine similarity, to determine how closely vectors resemble each other. - -### Method +- [**_Vector search_**]({{site.url}}{{site.baseurl}}/vector-search/getting-started/vector-search-basics/), also known as _similarity search_ or _nearest neighbor search_, is a technique for finding items that are most similar to a given input vector. It is widely used in applications such as recommendation systems, image retrieval, and natural language processing. -A [_method_]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/) refers to the algorithm used to organize vector data during indexing and retrieve relevant results during search in approximate k-NN search. Different methods balance trade-offs between accuracy, speed, and memory usage. +- A [**_space_**]({{site.url}}{{site.baseurl}}/vector-search/getting-started/vector-search-basics/#calculating-similarity) defines how similarity or distance between two vectors is measured. Different spaces use different distance metrics, such as Euclidean distance or cosine similarity, to determine how closely vectors resemble each other. -### Engine +- A [**_method_**]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/) refers to the algorithm used to organize vector data during indexing and retrieve relevant results during search in approximate k-NN search. Different methods balance trade-offs between accuracy, speed, and memory usage. -An [_engine_]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/) is the underlying library that implements vector search methods. It determines how vectors are indexed, stored, and retrieved during similarity search operations. +- An [**_engine_**]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-methods-engines/) is the underlying library that implements vector search methods. It determines how vectors are indexed, stored, and retrieved during similarity search operations. ## k-NN search -### k-NN search +- **_k-nearest neighbors (k-NN) search_** finds the k most similar vectors to a given query vector in an index. The similarity is determined based on a specified distance metric. -_k-nearest neighbors (k-NN) search_ finds the k most similar vectors to a given query vector in an index. The similarity is determined based on a specified distance metric. +- [**_Exact k-NN search_**]({{site.url}}{{site.baseurl}}/vector-search/vector-search-techniques/knn-score-script/) performs a brute-force comparison between a query vector and all vectors in an index, computing the exact nearest neighbors. This approach provides high accuracy but can be computationally expensive for large datasets. -### Exact k-NN search - -[_Exact k-NN search_]({{site.url}}{{site.baseurl}}/vector-search/vector-search-techniques/knn-score-script/) performs a brute-force comparison between a query vector and all vectors in an index, computing the exact nearest neighbors. This approach provides high accuracy but can be computationally expensive for large datasets. - -### Approximate k-NN search - -[_Approximate k-NN search_]({{site.url}}{{site.baseurl}}/vector-search/vector-search-techniques/approximate-knn/) reduces computational complexity by using indexing techniques that speed up search operations while maintaining high accuracy. These methods restructure the index or reduce the dimensionality of vectors to improve performance. +- [**_Approximate k-NN search_**]({{site.url}}{{site.baseurl}}/vector-search/vector-search-techniques/approximate-knn/) reduces computational complexity by using indexing techniques that speed up search operations while maintaining high accuracy. These methods restructure the index or reduce the dimensionality of vectors to improve performance. ## Query types -### k-NN query - -A [_k-NN query_]({{site.url}}{{site.baseurl}}/query-dsl/specialized/k-nn/) searches vector fields using a query vector. - -### Neural query - -A [_neural query_]({{site.url}}{{site.baseurl}}/query-dsl/specialized/neural/) searches vector fields using text or image data. +- A [**_k-NN query_**]({{site.url}}{{site.baseurl}}/query-dsl/specialized/k-nn/) searches vector fields using a query vector. -### Neural sparse query +- A [**_neural query_**]({{site.url}}{{site.baseurl}}/query-dsl/specialized/neural/) searches vector fields using text or image data. -A [_neural sparse query_]({{site.url}}{{site.baseurl}}/query-dsl/specialized/neural-sparse/) searches vector fields using raw text or sparse vector tokens. +- A [**_neural sparse query_**]({{site.url}}{{site.baseurl}}/query-dsl/specialized/neural-sparse/) searches vector fields using raw text or sparse vector tokens. ## Search techniques -### Semantic search +- [**_Semantic search_**]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/semantic-search/) interprets the intent and contextual meaning of a query rather than relying solely on exact keyword matches. This approach improves the relevance of search results, especially for natural language queries. -[_Semantic search_]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/semantic-search/) interprets the intent and contextual meaning of a query rather than relying solely on exact keyword matches. This approach improves the relevance of search results, especially for natural language queries. +- [**_Hybrid search_**]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/hybrid-search/) combines lexical (keyword-based) search with semantic (vector-based) search to improve search relevance. This approach ensures that results include both exact keyword matches and conceptually similar content. -### Hybrid search +- [**_Multimodal search_**]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/multimodal-search/) enables you to search across multiple types of data, such as text and images. It allows queries in one format (for example, text) to retrieve results in another (for example, images). -[_Hybrid search_]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/hybrid-search/) combines lexical (keyword-based) search with semantic (vector-based) search to improve search relevance. This approach ensures that results include both exact keyword matches and conceptually similar content. +- [**_Radial search_**]({{site.url}}{{site.baseurl}}/vector-search/specialized-operations/radial-search-knn/) retrieves all vectors within a specified distance or similarity threshold from a query vector. It is useful for tasks that require finding all relevant matches within a given range rather than retrieving a fixed number of nearest neighbors. -### Multimodal search +- [**_Neural sparse search_**]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/neural-sparse-search/) uses an inverted index, similar to BM25, to efficiently retrieve relevant documents based on sparse vector representations. This approach maintains the efficiency of traditional lexical search while incorporating semantic understanding. -[_Multimodal search_]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/multimodal-search/) enables you to search across multiple types of data, such as text and images. It allows queries in one format (for example, text) to retrieve results in another (for example, images). +- [**_Conversational search_**]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/conversational-search/) allows you to interact with a search system using natural language queries and refine results through follow-up questions. This approach enhances the user experience by making search more intuitive and interactive. -### Radial search - -[_Radial search_]({{site.url}}{{site.baseurl}}/vector-search/specialized-operations/radial-search-knn/) retrieves all vectors within a specified distance or similarity threshold from a query vector. It is useful for tasks that require finding all relevant matches within a given range rather than retrieving a fixed number of nearest neighbors. - -### Neural sparse search - -[_Neural sparse search_]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/neural-sparse-search/) uses an inverted index, similar to BM25, to efficiently retrieve relevant documents based on sparse vector representations. This approach maintains the efficiency of traditional lexical search while incorporating semantic understanding. - -### Conversational search - -[_Conversational search_]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/conversational-search/) allows you to interact with a search system using natural language queries and refine results through follow-up questions. This approach enhances the user experience by making search more intuitive and interactive. - -### Retrieval-augmented generation - -[_Retrieval-augmented generation (RAG)_]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/conversational-search/#rag) enhances large language models (LLMs) by retrieving relevant information from an index and incorporating it into the model's response. This approach improves the accuracy and relevance of generated text. +- [**_Retrieval-augmented generation (RAG)_**]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/conversational-search/#rag) enhances large language models (LLMs) by retrieving relevant information from an index and incorporating it into the model's response. This approach improves the accuracy and relevance of generated text. ## Indexing and storage techniques -### Text chunking - -[_Text chunking_]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/text-chunking/) involves splitting long documents or text passages into smaller segments to improve search retrieval and relevance. Chunking helps vector search models process large amounts of text more effectively. - -### Vector quantization - -[_Vector quantization_]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/knn-vector-quantization/) is a technique for reducing the storage size of vector embeddings by approximating them using a smaller set of representative vectors. This process enables efficient storage and retrieval in large-scale vector search applications. - -### Scalar quantization - -_Scalar quantization_ reduces vector precision by mapping floating-point values to a limited set of discrete values, decreasing memory requirements while preserving search accuracy. - -### Product quantization +- [**_Text chunking_**]({{site.url}}{{site.baseurl}}/vector-search/ml-powered-search/text-chunking/) involves splitting long documents or text passages into smaller segments to improve search retrieval and relevance. Chunking helps vector search models process large amounts of text more effectively. -_Product quantization (PQ)_ divides high-dimensional vectors into smaller subspaces and quantizes each subspace separately, enabling efficient approximate nearest neighbor search with reduced memory usage. +- [**_Vector quantization_**]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/knn-vector-quantization/) is a technique for reducing the storage size of vector embeddings by approximating them using a smaller set of representative vectors. This process enables efficient storage and retrieval in large-scale vector search applications. -### Binary quantization +- **_Scalar quantization (SQ)_** reduces vector precision by mapping floating-point values to a limited set of discrete values, decreasing memory requirements while preserving search accuracy. -_Binary quantization_ compresses vector representations by converting numerical values to binary formats. This technique reduces storage requirements and accelerates similarity computations. +- **_Product quantization (PQ)_** divides high-dimensional vectors into smaller subspaces and quantizes each subspace separately, enabling efficient approximate nearest neighbor search with reduced memory usage. -### Disk-based vector search +- **_Binary quantization_** compresses vector representations by converting numerical values to binary formats. This technique reduces storage requirements and accelerates similarity computations. -[_Disk-based vector search_]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/disk-based-vector-search/) stores vector embeddings on disk rather than in memory, using binary quantization to reduce memory consumption while maintaining search efficiency. +- [**_Disk-based vector search_**]({{site.url}}{{site.baseurl}}/vector-search/optimizing-storage/disk-based-vector-search/) stores vector embeddings on disk rather than in memory, using binary quantization to reduce memory consumption while maintaining search efficiency.