Skip to content

Commit

Permalink
Refactor, add remote build request interface, complete FAISS request …
Browse files Browse the repository at this point in the history
…parameters, secure setting testing

Signed-off-by: owenhalpert <ohalpert@gmail.com>
  • Loading branch information
owenhalpert committed Feb 28, 2025
1 parent e439c84 commit dbddb29
Show file tree
Hide file tree
Showing 18 changed files with 619 additions and 351 deletions.
21 changes: 21 additions & 0 deletions src/main/java/org/opensearch/knn/common/KNNConstants.java
Original file line number Diff line number Diff line change
Expand Up @@ -165,4 +165,25 @@ public class KNNConstants {
public static final String DERIVED_VECTOR_FIELD_ATTRIBUTE_KEY = "knn-derived-source-enabled";
public static final String DERIVED_VECTOR_FIELD_ATTRIBUTE_TRUE_VALUE = "true";
public static final String DERIVED_VECTOR_FIELD_ATTRIBUTE_FALSE_VALUE = "false";

// Remote build constants
public static final String BUILD_ENDPOINT = "/_build";
public static final String STATUS_ENDPOINT = "/_status";
public static final String S3 = "s3";
public static final String BUCKET = "bucket";
// Build request keys
public static final String ALGORITHM = "algorithm";
public static final String ALGORITHM_PARAMETERS = "algorithm_parameters";
public static final String INDEX_PARAMETERS = "index_parameters";
public static final String DOC_COUNT = "doc_count";
public static final String TENANT_ID = "tenant_id";
public static final String DOC_ID_PATH = "doc_id_path";
public static final String VECTOR_PATH = "vector_path";
public static final String CONTAINER_NAME = "container_name";
public static final String REPOSITORY_TYPE = "repository_type";
// Server responses
public static final String JOB_ID = "job_id";
public static final String TASK_STATUS = "task_status";
public static final String INDEX_PATH = "index_path";
public static final String ERROR_MESSAGE = "error_message";
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,16 @@
package org.opensearch.knn.index.codec.nativeindex.remote;

import lombok.extern.log4j.Log4j2;
import org.apache.commons.lang.NotImplementedException;
import org.opensearch.common.StopWatch;
import org.opensearch.common.UUIDs;
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.index.IndexSettings;
import org.opensearch.knn.index.KNNSettings;
import org.opensearch.knn.index.codec.nativeindex.NativeIndexBuildStrategy;
import org.opensearch.knn.index.codec.nativeindex.model.BuildIndexParams;
import org.opensearch.knn.index.remote.RemoteBuildRequest;
import org.opensearch.knn.index.remote.RemoteBuildResponse;
import org.opensearch.knn.index.remote.RemoteIndexClient;
import org.opensearch.knn.index.remote.RemoteIndexHTTPClient;
import org.opensearch.repositories.RepositoriesService;
import org.opensearch.repositories.Repository;
Expand All @@ -25,6 +27,7 @@

import static org.opensearch.knn.index.KNNSettings.KNN_INDEX_REMOTE_VECTOR_BUILD_SETTING;
import static org.opensearch.knn.index.KNNSettings.KNN_INDEX_REMOTE_VECTOR_BUILD_THRESHOLD_SETTING;
import static org.opensearch.knn.index.KNNSettings.KNN_REMOTE_BUILD_SERVICE_ENDPOINT_SETTING;
import static org.opensearch.knn.index.KNNSettings.KNN_REMOTE_VECTOR_REPO_SETTING;

/**
Expand Down Expand Up @@ -127,18 +130,24 @@ public void buildAndWriteIndex(BuildIndexParams indexInfo) throws IOException {
log.debug("Repository write took {} ms for vector field [{}]", time_in_millis, indexInfo.getFieldName());

stopWatch = new StopWatch().start();
String jobId = RemoteIndexHTTPClient.getInstance()
.submitVectorBuild(indexSettings, indexInfo, getRepository().getMetadata(), blobName);
RemoteIndexClient client = getRemoteIndexClient();
RemoteBuildRequest remoteBuildRequest = client.constructBuildRequest(
indexSettings,
indexInfo,
getRepository().getMetadata(),
blobName
);
RemoteBuildResponse remoteBuildResponse = client.submitVectorBuild(remoteBuildRequest);
time_in_millis = stopWatch.stop().totalTime().millis();
log.debug("Submit vector build took {} ms for vector field [{}]", time_in_millis, indexInfo.getFieldName());

stopWatch = new StopWatch().start();
awaitVectorBuild();
RemoteStatusResponse remoteStatusResponse = client.awaitVectorBuild(remoteBuildResponse);
time_in_millis = stopWatch.stop().totalTime().millis();
log.debug("Await vector build took {} ms for vector field [{}]", time_in_millis, indexInfo.getFieldName());

stopWatch = new StopWatch().start();
vectorRepositoryAccessor.readFromRepository();
vectorRepositoryAccessor.readFromRepository(remoteStatusResponse.getIndexPath(), indexInfo.getIndexOutputWithBuffer());
time_in_millis = stopWatch.stop().totalTime().millis();
log.debug("Repository read took {} ms for vector field [{}]", time_in_millis, indexInfo.getFieldName());
} catch (Exception e) {
Expand Down Expand Up @@ -167,9 +176,14 @@ private BlobStoreRepository getRepository() throws RepositoryMissingException {
}

/**
* Wait on remote vector build to complete
* Determine which implementation of RemoteIndexClient to be used by the build strategy
* @return Concrete RemoteIndexClient implementation
*/
private void awaitVectorBuild() {
throw new NotImplementedException();
private RemoteIndexClient getRemoteIndexClient() {
String endpoint = KNNSettings.state().getSettingValue(KNN_REMOTE_BUILD_SERVICE_ENDPOINT_SETTING.getKey());
if (endpoint == null || endpoint.isEmpty()) {
throw new IllegalArgumentException("No endpoint set for RemoteIndexClient");
}
return RemoteIndexHTTPClient.getInstance();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.knn.index.codec.nativeindex.remote;

import lombok.AllArgsConstructor;
import lombok.Getter;

@Getter
@AllArgsConstructor
public class RemoteStatusResponse {
private String indexPath;
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import org.apache.commons.lang.NotImplementedException;
import org.opensearch.knn.index.VectorDataType;
import org.opensearch.knn.index.store.IndexOutputWithBuffer;
import org.opensearch.knn.index.vectorvalues.KNNVectorValues;

import java.io.IOException;
Expand Down Expand Up @@ -36,7 +37,7 @@ void writeToRepository(
/**
* Read constructed vector file from remote repository and write to IndexOutput
*/
default void readFromRepository() {
default void readFromRepository(String path, IndexOutputWithBuffer indexOutputWithBuffer) {
throw new NotImplementedException();
}
}
6 changes: 6 additions & 0 deletions src/main/java/org/opensearch/knn/index/engine/KNNEngine.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import com.google.common.collect.ImmutableSet;
import org.opensearch.common.ValidationException;
import org.opensearch.knn.index.SpaceType;
import org.opensearch.knn.index.codec.nativeindex.model.BuildIndexParams;
import org.opensearch.knn.index.engine.faiss.Faiss;
import org.opensearch.knn.index.engine.lucene.Lucene;
import org.opensearch.knn.index.engine.nmslib.Nmslib;
Expand Down Expand Up @@ -177,6 +178,11 @@ public KNNLibraryIndexingContext getKNNLibraryIndexingContext(
return knnLibrary.getKNNLibraryIndexingContext(knnMethodContext, knnMethodConfigContext);
}

@Override
public Map<String, Object> getRemoteIndexingParameters(BuildIndexParams params) {
return knnLibrary.getRemoteIndexingParameters(params);
}

@Override
public KNNLibrarySearchContext getKNNLibrarySearchContext(String methodName) {
return knnLibrary.getKNNLibrarySearchContext(methodName);
Expand Down
10 changes: 10 additions & 0 deletions src/main/java/org/opensearch/knn/index/engine/KNNLibrary.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@

import org.opensearch.common.ValidationException;
import org.opensearch.knn.index.SpaceType;
import org.opensearch.knn.index.codec.nativeindex.model.BuildIndexParams;

import java.util.Collections;
import java.util.List;
import java.util.Map;

/**
* KNNLibrary is an interface that helps the plugin communicate with k-NN libraries
Expand Down Expand Up @@ -147,4 +149,12 @@ default List<String> mmapFileExtensions() {
default boolean supportsRemoteIndexBuild() {
return false;
}

/**
* Get the remote build supported index parameter mapping to be sent to the remote build service.
* @param params to parse
*/
default Map<String, Object> getRemoteIndexingParameters(BuildIndexParams params) {
throw new UnsupportedOperationException("This method must be implemented by the implementing class");
}
}
78 changes: 78 additions & 0 deletions src/main/java/org/opensearch/knn/index/engine/faiss/Faiss.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,37 @@
import com.google.common.collect.ImmutableMap;
import org.opensearch.knn.common.KNNConstants;
import org.opensearch.knn.index.SpaceType;
import org.opensearch.knn.index.codec.nativeindex.model.BuildIndexParams;
import org.opensearch.knn.index.engine.KNNMethod;
import org.opensearch.knn.index.engine.KNNMethodConfigContext;
import org.opensearch.knn.index.engine.KNNMethodContext;
import org.opensearch.knn.index.engine.MethodResolver;
import org.opensearch.knn.index.engine.NativeLibrary;
import org.opensearch.knn.index.engine.ResolvedMethodContext;

import java.util.HashMap;
import java.util.Map;
import java.util.function.Function;

import static org.opensearch.knn.common.KNNConstants.ALGORITHM;
import static org.opensearch.knn.common.KNNConstants.ALGORITHM_PARAMETERS;
import static org.opensearch.knn.common.KNNConstants.INDEX_DESCRIPTION_PARAMETER;
import static org.opensearch.knn.common.KNNConstants.METHOD_HNSW;
import static org.opensearch.knn.common.KNNConstants.METHOD_IVF;
import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_EF_CONSTRUCTION;
import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_EF_SEARCH;
import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_M;
import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_NLIST;
import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_NLIST_DEFAULT;
import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_NPROBES;
import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_NPROBES_DEFAULT;
import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_SPACE_TYPE;
import static org.opensearch.knn.common.KNNConstants.NAME;
import static org.opensearch.knn.common.KNNConstants.PARAMETERS;
import static org.opensearch.knn.common.KNNConstants.SPACE_TYPE;
import static org.opensearch.knn.index.KNNSettings.INDEX_KNN_DEFAULT_ALGO_PARAM_EF_CONSTRUCTION;
import static org.opensearch.knn.index.KNNSettings.INDEX_KNN_DEFAULT_ALGO_PARAM_EF_SEARCH;
import static org.opensearch.knn.index.KNNSettings.INDEX_KNN_DEFAULT_SPACE_TYPE;

/**
* Implements NativeLibrary for the faiss native library
Expand Down Expand Up @@ -109,6 +128,65 @@ public Float scoreToRadialThreshold(Float score, SpaceType spaceType) {
return spaceType.scoreToDistanceTranslation(score);
}

/**
* Get the parameters that need to be passed to the remote build service for training
* @param indexInfo to parse
* @return Map of parameters to be used as "index_parameters"
*/
@Override
public Map<String, Object> getRemoteIndexingParameters(BuildIndexParams indexInfo) {
Map<String, Object> indexParameters = new HashMap<>();
String methodName = (String) indexInfo.getParameters().get(NAME);
indexParameters.put(ALGORITHM, methodName);
indexParameters.put(METHOD_PARAMETER_SPACE_TYPE, indexInfo.getParameters().getOrDefault(SPACE_TYPE, INDEX_KNN_DEFAULT_SPACE_TYPE));

assert (indexInfo.getParameters().containsKey(PARAMETERS));
Object innerParams = indexInfo.getParameters().get(PARAMETERS);
assert (innerParams instanceof Map);
{
Map<String, Object> algorithmParams = new HashMap<>();
Map<String, Object> innerMap = (Map<String, Object>) innerParams;
switch (methodName) {
case METHOD_HNSW -> {
algorithmParams.put(
METHOD_PARAMETER_EF_CONSTRUCTION,
innerMap.getOrDefault(METHOD_PARAMETER_EF_CONSTRUCTION, INDEX_KNN_DEFAULT_ALGO_PARAM_EF_CONSTRUCTION)
);
algorithmParams.put(
METHOD_PARAMETER_EF_SEARCH,
innerMap.getOrDefault(METHOD_PARAMETER_EF_SEARCH, INDEX_KNN_DEFAULT_ALGO_PARAM_EF_SEARCH)
);
Object indexDescription = indexInfo.getParameters().get(INDEX_DESCRIPTION_PARAMETER);
assert indexDescription instanceof String;
algorithmParams.put(METHOD_PARAMETER_M, getMFromIndexDescription((String) indexDescription));
}
case METHOD_IVF -> {
algorithmParams.put(
METHOD_PARAMETER_NLIST,
innerMap.getOrDefault(METHOD_PARAMETER_NLIST, METHOD_PARAMETER_NLIST_DEFAULT)
);
algorithmParams.put(
METHOD_PARAMETER_NPROBES,
innerMap.getOrDefault(METHOD_PARAMETER_NPROBES, METHOD_PARAMETER_NPROBES_DEFAULT)
);
}
}
indexParameters.put(ALGORITHM_PARAMETERS, algorithmParams);
}
return indexParameters;
}

public static int getMFromIndexDescription(String indexDescription) {
int commaIndex = indexDescription.indexOf(",");
if (commaIndex == -1) {
throw new IllegalArgumentException("Invalid index description: " + indexDescription);
}
String hnswPart = indexDescription.substring(0, commaIndex);
int m = Integer.parseInt(hnswPart.substring(4));
assert (m > 1 && m < 100);
return m;
}

@Override
public ResolvedMethodContext resolveMethod(
KNNMethodContext knnMethodContext,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
import com.google.common.collect.ImmutableMap;
import org.apache.lucene.util.Version;
import org.opensearch.knn.index.SpaceType;
import org.opensearch.knn.index.codec.nativeindex.model.BuildIndexParams;
import org.opensearch.knn.index.engine.JVMLibrary;
import org.opensearch.knn.index.engine.KNNEngine;
import org.opensearch.knn.index.engine.KNNMethod;
import org.opensearch.knn.index.engine.KNNMethodConfigContext;
import org.opensearch.knn.index.engine.KNNMethodContext;
Expand Down Expand Up @@ -89,6 +91,11 @@ public Float scoreToRadialThreshold(Float score, SpaceType spaceType) {
return score;
}

@Override
public Map<String, Object> getRemoteIndexingParameters(BuildIndexParams params) {
throw new UnsupportedOperationException(KNNEngine.LUCENE.getName() + " engine not supported for remote index build");
}

@Override
public List<String> mmapFileExtensions() {
return List.of("vec", "vex");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

import com.google.common.collect.ImmutableMap;
import org.opensearch.knn.index.SpaceType;
import org.opensearch.knn.index.codec.nativeindex.model.BuildIndexParams;
import org.opensearch.knn.index.engine.KNNEngine;
import org.opensearch.knn.index.engine.KNNMethod;
import org.opensearch.knn.index.engine.KNNMethodConfigContext;
import org.opensearch.knn.index.engine.KNNMethodContext;
Expand Down Expand Up @@ -60,6 +62,11 @@ public Float scoreToRadialThreshold(Float score, SpaceType spaceType) {
return score;
}

@Override
public Map<String, Object> getRemoteIndexingParameters(BuildIndexParams params) {
throw new UnsupportedOperationException(KNNEngine.NMSLIB.getName() + " not supported for remote index build");
}

@Override
public ResolvedMethodContext resolveMethod(
KNNMethodContext knnMethodContext,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.knn.index.remote;

import lombok.Getter;
import org.opensearch.cluster.metadata.RepositoryMetadata;
import org.opensearch.common.xcontent.json.JsonXContent;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.index.IndexSettings;
import org.opensearch.knn.index.KNNSettings;
import org.opensearch.knn.index.codec.nativeindex.model.BuildIndexParams;

import java.io.IOException;
import java.net.URI;

import static org.opensearch.knn.common.KNNConstants.CONTAINER_NAME;
import static org.opensearch.knn.common.KNNConstants.DIMENSION;
import static org.opensearch.knn.common.KNNConstants.DOC_COUNT;
import static org.opensearch.knn.common.KNNConstants.VECTOR_PATH;
import static org.opensearch.knn.common.KNNConstants.DOC_ID_PATH;
import static org.opensearch.knn.common.KNNConstants.INDEX_PARAMETERS;
import static org.opensearch.knn.common.KNNConstants.KNN_ENGINE;
import static org.opensearch.knn.common.KNNConstants.REPOSITORY_TYPE;
import static org.opensearch.knn.common.KNNConstants.TENANT_ID;
import static org.opensearch.knn.common.KNNConstants.VECTOR_DATA_TYPE_FIELD;
import static org.opensearch.knn.index.KNNSettings.KNN_REMOTE_BUILD_SERVICE_ENDPOINT_SETTING;

/**
* RemoteBuildRequest implementation for HTTP clients that sets the endpoint and offers a JSON conversion.
*/
@Getter
public class HTTPRemoteBuildRequest extends RemoteBuildRequest {
private final URI endpoint;

public HTTPRemoteBuildRequest(
IndexSettings indexSettings,
BuildIndexParams indexInfo,
RepositoryMetadata repositoryMetadata,
String blobName
) throws IOException {
super(indexSettings, indexInfo, repositoryMetadata, blobName);
this.endpoint = URI.create(KNNSettings.state().getSettingValue(KNN_REMOTE_BUILD_SERVICE_ENDPOINT_SETTING.getKey()));
}

public String toJson() throws IOException {
try (XContentBuilder builder = JsonXContent.contentBuilder()) {
builder.startObject();
builder.field(REPOSITORY_TYPE, repositoryType);
builder.field(CONTAINER_NAME, containerName);
builder.field(VECTOR_PATH, vectorPath);
builder.field(DOC_ID_PATH, docIdPath);
builder.field(TENANT_ID, tenantId);
builder.field(DIMENSION, dimension);
builder.field(DOC_COUNT, docCount);
builder.field(VECTOR_DATA_TYPE_FIELD, dataType);
builder.field(KNN_ENGINE, engine);
builder.field(INDEX_PARAMETERS, indexParameters);
builder.endObject();
return builder.toString();
}
}
}
Loading

0 comments on commit dbddb29

Please sign in to comment.