Skip to content

Commit

Permalink
3.0.0 Breaking Changes For KNN
Browse files Browse the repository at this point in the history
Signed-off-by: Vikasht34 <viktari@amazon.com>
  • Loading branch information
Vikasht34 committed Feb 27, 2025
1 parent c7ac05c commit d064b1f
Show file tree
Hide file tree
Showing 30 changed files with 217 additions and 538 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Refactoring
* Small Refactor Post Lucene 10.0.1 upgrade [#2541](https://github.com/opensearch-project/k-NN/pull/2541)
* Refactor codec to leverage backwards_codecs [#2546](https://github.com/opensearch-project/k-NN/pull/2546)
* 3.0.0 Breaking Changes For KNN [#2564] (https://github.com/opensearch-project/k-NN/pull/2564)

## [Unreleased 2.x](https://github.com/opensearch-project/k-NN/compare/2.19...2.x)
### Features
Expand Down
50 changes: 0 additions & 50 deletions src/main/java/org/opensearch/knn/index/KNNSettings.java
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,7 @@ public class KNNSettings {
/**
* Settings name
*/
public static final String KNN_SPACE_TYPE = "index.knn.space_type";
public static final String INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD = "index.knn.advanced.approximate_threshold";
public static final String KNN_ALGO_PARAM_M = "index.knn.algo_param.m";
public static final String KNN_ALGO_PARAM_EF_CONSTRUCTION = "index.knn.algo_param.ef_construction";
public static final String KNN_ALGO_PARAM_EF_SEARCH = "index.knn.algo_param.ef_search";
public static final String KNN_ALGO_PARAM_INDEX_THREAD_QTY = "knn.algo_param.index_thread_qty";
public static final String KNN_MEMORY_CIRCUIT_BREAKER_ENABLED = "knn.memory.circuit_breaker.enabled";
Expand All @@ -80,7 +77,6 @@ public class KNNSettings {
public static final String KNN_CIRCUIT_BREAKER_TRIGGERED = "knn.circuit_breaker.triggered";
public static final String KNN_CACHE_ITEM_EXPIRY_ENABLED = "knn.cache.item.expiry.enabled";
public static final String KNN_CACHE_ITEM_EXPIRY_TIME_MINUTES = "knn.cache.item.expiry.minutes";
public static final String KNN_PLUGIN_ENABLED = "knn.plugin.enabled";
public static final String KNN_CIRCUIT_BREAKER_UNSET_PERCENTAGE = "knn.circuit_breaker.unset.percentage";
public static final String KNN_INDEX = "index.knn";
public static final String MODEL_INDEX_NUMBER_OF_SHARDS = "knn.model.index.number_of_shards";
Expand Down Expand Up @@ -159,14 +155,6 @@ public class KNNSettings {
Setting.Property.NodeScope
);

public static final Setting<String> INDEX_KNN_SPACE_TYPE = Setting.simpleString(
KNN_SPACE_TYPE,
INDEX_KNN_DEFAULT_SPACE_TYPE,
new SpaceTypeValidator(),
IndexScope,
Setting.Property.Deprecated
);

/**
* build_vector_data_structure_threshold - This parameter determines when to build vector data structure for knn fields during indexing
* and merging. Setting -1 (min) will skip building graph, whereas on any other values, the graph will be built if
Expand All @@ -182,20 +170,6 @@ public class KNNSettings {
Dynamic
);

/**
* M - the number of bi-directional links created for every new element during construction.
* Reasonable range for M is 2-100. Higher M work better on datasets with high intrinsic
* dimensionality and/or high recall, while low M work better for datasets with low intrinsic dimensionality and/or low recalls.
* The parameter also determines the algorithm's memory consumption, which is roughly M * 8-10 bytes per stored element.
*/
public static final Setting<Integer> INDEX_KNN_ALGO_PARAM_M_SETTING = Setting.intSetting(
KNN_ALGO_PARAM_M,
INDEX_KNN_DEFAULT_ALGO_PARAM_M,
2,
IndexScope,
Setting.Property.Deprecated
);

/**
* ef or efSearch - the size of the dynamic list for the nearest neighbors (used during the search).
* Higher ef leads to more accurate but slower search. ef cannot be set lower than the number of queried nearest neighbors k.
Expand All @@ -209,18 +183,6 @@ public class KNNSettings {
Dynamic
);

/**
* ef_constrution - the parameter has the same meaning as ef, but controls the index_time/index_accuracy.
* Bigger ef_construction leads to longer construction(more indexing time), but better index quality.
*/
public static final Setting<Integer> INDEX_KNN_ALGO_PARAM_EF_CONSTRUCTION_SETTING = Setting.intSetting(
KNN_ALGO_PARAM_EF_CONSTRUCTION,
INDEX_KNN_DEFAULT_ALGO_PARAM_EF_CONSTRUCTION,
2,
IndexScope,
Setting.Property.Deprecated
);

public static final Setting<Integer> MODEL_INDEX_NUMBER_OF_SHARDS_SETTING = Setting.intSetting(
MODEL_INDEX_NUMBER_OF_SHARDS,
1,
Expand Down Expand Up @@ -393,11 +355,6 @@ public class KNNSettings {
*/
public static Map<String, Setting<?>> dynamicCacheSettings = new HashMap<String, Setting<?>>() {
{
/**
* KNN plugin enable/disable setting
*/
put(KNN_PLUGIN_ENABLED, Setting.boolSetting(KNN_PLUGIN_ENABLED, true, NodeScope, Dynamic));

/**
* Weight circuit breaker settings
*/
Expand Down Expand Up @@ -555,10 +512,7 @@ private Setting<?> getSetting(String key) {

public List<Setting<?>> getSettings() {
List<Setting<?>> settings = Arrays.asList(
INDEX_KNN_SPACE_TYPE,
INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD_SETTING,
INDEX_KNN_ALGO_PARAM_M_SETTING,
INDEX_KNN_ALGO_PARAM_EF_CONSTRUCTION_SETTING,
INDEX_KNN_ALGO_PARAM_EF_SEARCH_SETTING,
KNN_ALGO_PARAM_INDEX_THREAD_QTY_SETTING,
KNN_CIRCUIT_BREAKER_TRIGGERED_SETTING,
Expand All @@ -583,10 +537,6 @@ public List<Setting<?>> getSettings() {
.collect(Collectors.toList());
}

public static boolean isKNNPluginEnabled() {
return KNNSettings.state().getSettingValue(KNNSettings.KNN_PLUGIN_ENABLED);
}

public static boolean isCircuitBreakerTriggered() {
return KNNSettings.state().getSettingValue(KNNSettings.KNN_CIRCUIT_BREAKER_TRIGGERED);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import org.apache.logging.log4j.util.Strings;
import org.opensearch.common.settings.Settings;
import org.opensearch.index.mapper.MapperParsingException;
import org.opensearch.knn.index.KNNSettings;
import org.opensearch.knn.index.SpaceType;
import org.opensearch.knn.index.VectorDataType;

Expand Down Expand Up @@ -49,12 +48,6 @@ public SpaceType resolveSpaceType(
// 1. We try to get it from index setting, which is a relic of legacy.
// 2. Otherwise, we return a default one.
if (isSpaceTypeConfigured(methodSpaceType) == false && isSpaceTypeConfigured(topLevelSpaceType) == false) {
if (indexSettings != null) {
final String spaceType = indexSettings.get(KNNSettings.INDEX_KNN_SPACE_TYPE.getKey());
if (spaceType != null) {
return SpaceType.getSpace(spaceType);
}
}
return getDefaultSpaceType(vectorDataType);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@
import static org.opensearch.knn.index.mapper.KNNVectorFieldMapperUtil.createStoredFieldForFloatVector;
import static org.opensearch.knn.index.mapper.KNNVectorFieldMapperUtil.useFullFieldNameValidation;
import static org.opensearch.knn.index.mapper.KNNVectorFieldMapperUtil.validateIfCircuitBreakerIsNotTriggered;
import static org.opensearch.knn.index.mapper.KNNVectorFieldMapperUtil.validateIfKNNPluginEnabled;
import static org.opensearch.knn.index.mapper.ModelFieldMapper.UNSET_MODEL_DIMENSION_IDENTIFIER;

/**
Expand Down Expand Up @@ -425,6 +424,8 @@ public Mapper.Builder<?> parse(String name, Map<String, Object> node, ParserCont
// Resolve method component. For the legacy case where space type can be configured at index level,
// it first tries to use the given one then tries to get it from index setting when the space type is UNDEFINED.
resolveKNNMethodComponents(builder, parserContext, resolvedSpaceType);
// Validate if the KNN engine is allowed for index creation
validateBlockedKNNEngine(builder.knnMethodContext.get(), parserContext.indexVersionCreated());
validateFromKNNMethod(builder);
}

Expand Down Expand Up @@ -457,6 +458,25 @@ private void validateModeAndCompressionForDataType(KNNVectorFieldMapper.Builder
}
}

/**
* Validates whether the provided KNN engine is allowed for index creation.
* If the engine is NMSLIB and the OpenSearch version is 3.0.0 or later,
* it throws an IllegalArgumentException to prevent new index creation.
*
* @param knnMethodContext The KNN method configuration that contains the engine type.
* @param indexVersionCreated The OpenSearch version when the index is being created.
* @throws IllegalArgumentException if the engine is NMSLIB and version is 3.0.0 or later.
*/
private void validateBlockedKNNEngine(KNNMethodContext knnMethodContext, Version indexVersionCreated) {
if (knnMethodContext != null
&& knnMethodContext.getKnnEngine() == KNNEngine.NMSLIB
&& indexVersionCreated.onOrAfter(Version.V_3_0_0)) {
throw new IllegalArgumentException(
"NMSLIB engine is deprecated and cannot be used for new index creation in OpenSearch 3.0.0 and later."
);
}
}

private void validateFromFlat(KNNVectorFieldMapper.Builder builder) {
if (builder.modelId.get() != null || builder.knnMethodContext.get() != null) {
throw new IllegalArgumentException("Cannot set modelId or method parameters when index.knn setting is false");
Expand Down Expand Up @@ -686,7 +706,6 @@ protected List<Field> getFieldsForByteVector(final byte[] array) {
* Validation checks before parsing of doc begins
*/
protected void validatePreparse() {
validateIfKNNPluginEnabled();
validateIfCircuitBreakerIsNotTriggered();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,10 @@
import java.util.Arrays;
import java.util.Map;

import static org.opensearch.knn.common.KNNConstants.HNSW_ALGO_EF_CONSTRUCTION;
import static org.opensearch.knn.common.KNNConstants.HNSW_ALGO_M;
import static org.opensearch.knn.common.KNNConstants.KNN_ENGINE;
import static org.opensearch.knn.common.KNNConstants.METHOD_HNSW;
import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_EF_CONSTRUCTION;
import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_M;
import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_SPACE_TYPE;

/**
* Utility class for KNNVectorFieldMapper
Expand Down Expand Up @@ -121,15 +118,6 @@ static void validateIfCircuitBreakerIsNotTriggered() {
}
}

/**
* Validate if plugin is enabled
*/
static void validateIfKNNPluginEnabled() {
if (!KNNSettings.isKNNPluginEnabled()) {
throw new IllegalStateException("KNN plugin is disabled. To enable update knn.plugin.enabled setting to true");
}
}

/**
* Prerequisite: Index should a knn index which is validated via index settings index.knn setting. This function
* assumes that caller has already validated that index is a KNN index.
Expand All @@ -156,51 +144,12 @@ static boolean useFullFieldNameValidation(final Version indexCreatedVersion) {
return indexCreatedVersion != null && indexCreatedVersion.onOrAfter(Version.V_2_17_0);
}

public static SpaceType getSpaceType(final Settings indexSettings) {
String spaceType = indexSettings.get(KNNSettings.INDEX_KNN_SPACE_TYPE.getKey());
if (spaceType == null) {
spaceType = KNNSettings.INDEX_KNN_DEFAULT_SPACE_TYPE;
log.info(
String.format(
"[KNN] The setting \"%s\" was not set for the index. Likely caused by recent version upgrade. Setting the setting to the default value=%s",
METHOD_PARAMETER_SPACE_TYPE,
spaceType
)
);
}
return SpaceType.getSpace(spaceType);
}

private static int getM(Settings indexSettings) {
String m = indexSettings.get(KNNSettings.INDEX_KNN_ALGO_PARAM_M_SETTING.getKey());
if (m == null) {
log.info(
String.format(
"[KNN] The setting \"%s\" was not set for the index. Likely caused by recent version upgrade. Setting the setting to the default value=%s",
HNSW_ALGO_M,
KNNSettings.INDEX_KNN_DEFAULT_ALGO_PARAM_M
)
);
return KNNSettings.INDEX_KNN_DEFAULT_ALGO_PARAM_M;
}
return Integer.parseInt(m);
return KNNSettings.INDEX_KNN_DEFAULT_ALGO_PARAM_M;
}

private static int getEfConstruction(Settings indexSettings, Version indexVersion) {
final String efConstruction = indexSettings.get(KNNSettings.INDEX_KNN_ALGO_PARAM_EF_CONSTRUCTION_SETTING.getKey());
if (efConstruction == null) {
final int defaultEFConstructionValue = IndexHyperParametersUtil.getHNSWEFConstructionValue(indexVersion);
log.info(
String.format(
"[KNN] The setting \"%s\" was not set for the index. Likely caused by recent version upgrade. "
+ "Picking up default value for the index =%s",
HNSW_ALGO_EF_CONSTRUCTION,
defaultEFConstructionValue
)
);
return defaultEFConstructionValue;
}
return Integer.parseInt(efConstruction);
return IndexHyperParametersUtil.getHNSWEFConstructionValue(indexVersion);
}

static KNNMethodContext createKNNMethodContextFromLegacy(
Expand Down
4 changes: 0 additions & 4 deletions src/main/java/org/opensearch/knn/index/query/KNNQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.join.BitSetProducer;
import org.opensearch.common.StopWatch;
import org.opensearch.knn.index.KNNSettings;
import org.opensearch.knn.index.VectorDataType;
import org.opensearch.knn.index.query.rescore.RescoreContext;

Expand Down Expand Up @@ -171,9 +170,6 @@ public KNNQuery filterQuery(Query filterQuery) {
*/
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
if (!KNNSettings.isKNNPluginEnabled()) {
throw new IllegalStateException("KNN plugin is disabled. To enable update knn.plugin.enabled to true");
}
StopWatch stopWatch = null;
if (log.isDebugEnabled()) {
stopWatch = new StopWatch().start();
Expand Down
13 changes: 0 additions & 13 deletions src/main/java/org/opensearch/knn/jni/FaissService.java
Original file line number Diff line number Diff line change
Expand Up @@ -407,19 +407,6 @@ public static native KNNQueryResult[] queryBinaryIndexWithFilter(
*/
public static native byte[] trainByteIndex(Map<String, Object> indexParameters, int dimension, long trainVectorsPointer);

/**
* <p>
* The function is deprecated. Use {@link JNICommons#storeVectorData(long, float[][], long)}
* </p>
* Transfer vectors from Java to native
*
* @param vectorsPointer pointer to vectors in native memory. Should be 0 to create vector as well
* @param trainingData data to be transferred
* @return pointer to native memory location of training data
*/
@Deprecated(since = "2.14.0", forRemoval = true)
public static native long transferVectors(long vectorsPointer, float[][] trainingData);

/**
* Range search index with filter
*
Expand Down
15 changes: 0 additions & 15 deletions src/main/java/org/opensearch/knn/jni/JNIService.java
Original file line number Diff line number Diff line change
Expand Up @@ -419,21 +419,6 @@ public static byte[] trainIndex(Map<String, Object> indexParameters, int dimensi
);
}

/**
* <p>
* The function is deprecated. Use {@link JNICommons#storeVectorData(long, float[][], long, boolean)}
* </p>
* Transfer vectors from Java to native
*
* @param vectorsPointer pointer to vectors in native memory. Should be 0 to create vector as well
* @param trainingData data to be transferred
* @return pointer to native memory location of training data
*/
@Deprecated(since = "2.14.0", forRemoval = true)
public static long transferVectors(long vectorsPointer, float[][] trainingData) {
return FaissService.transferVectors(vectorsPointer, trainingData);
}

/**
* Range search index for a given query vector
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import org.opensearch.action.search.SearchResponse;
import org.opensearch.knn.index.engine.qframe.QuantizationConfig;
import org.opensearch.knn.jni.JNICommons;
import org.opensearch.knn.jni.JNIService;
import org.opensearch.knn.index.memory.NativeMemoryAllocation;
import org.opensearch.knn.quantization.factory.QuantizerFactory;
import org.opensearch.knn.quantization.models.quantizationOutput.BinaryQuantizationOutput;
Expand Down Expand Up @@ -59,9 +58,10 @@ public void accept(List<?> floats) {
}
} else {
trainingDataAllocation.setMemoryAddress(
JNIService.transferVectors(
JNICommons.storeVectorData(
trainingDataAllocation.getMemoryAddress(),
floats.stream().map(v -> ArrayUtils.toPrimitive((Float[]) v)).toArray(float[][]::new)
floats.stream().map(v -> ArrayUtils.toPrimitive((Float[]) v)).toArray(float[][]::new),
floats.size()
)
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.opensearch.knn.index.engine.MethodComponentContext;
import org.opensearch.knn.index.mapper.CompressionLevel;
import org.opensearch.knn.index.mapper.Mode;
import org.opensearch.knn.jni.JNICommons;
import org.opensearch.knn.jni.JNIService;
import org.opensearch.knn.index.engine.KNNEngine;
import org.opensearch.knn.indices.Model;
Expand Down Expand Up @@ -49,7 +50,7 @@ public void testCreateIndexFromModel() throws IOException, InterruptedException
int dimension = 3;

// "Train" a faiss flat index - this really just creates an empty index that does brute force k-NN
long vectorsPointer = JNIService.transferVectors(0, new float[0][0]);
long vectorsPointer = JNICommons.storeVectorData(0, new float[0][0], 0);
byte[] modelBlob = JNIService.trainIndex(
ImmutableMap.of(INDEX_DESCRIPTION_PARAMETER, "Flat", SPACE_TYPE, spaceType.getValue()),
dimension,
Expand Down
Loading

0 comments on commit d064b1f

Please sign in to comment.