Skip to content

Commit

Permalink
Fixed the ITs and some minor bugs in the code
Browse files Browse the repository at this point in the history
Signed-off-by: Navneet Verma <navneev@amazon.com>
  • Loading branch information
navneet1v committed May 31, 2024
1 parent d241221 commit a325223
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 9 deletions.
2 changes: 2 additions & 0 deletions src/main/java/org/opensearch/knn/index/SpaceType.java
Original file line number Diff line number Diff line change
Expand Up @@ -188,4 +188,6 @@ public static SpaceType getSpace(String spaceTypeName) {
public float scoreToDistanceTranslation(float score) {
throw new UnsupportedOperationException(String.format("Space [%s] does not have a score to distance translation", getValue()));
}

public static final Set<SpaceType> VECTOR_FIELD_SUPPORTED_SPACE_TYPES = Set.of(L2, COSINESIMIL, INNER_PRODUCT);
}
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE
final FloatVectorValues floatVectorValues = KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState);
// merging the graphs here
final KNNCodecUtil.Pair pair = getFloatsFromFloatVectorValues(floatVectorValues);
if (pair.getVectorAddress() == 0 || pair.docs.length == 0) {
log.info("Skipping engine index creation as there are no vectors or docs to be merged");
return;
}
KNN80DocValuesConsumer.createNativeIndex(segmentWriteState, fieldInfo, pair);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.Setter;
import lombok.ToString;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.search.DocIdSetIterator;
Expand Down Expand Up @@ -37,6 +38,7 @@ public class KNNCodecUtil {
public static final int JAVA_ROUNDING_NUMBER = 8;

@AllArgsConstructor
@ToString
public static final class Pair {
public int[] docs;
@Getter
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import lombok.extern.log4j.Log4j2;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.KnnByteVectorField;
import org.apache.lucene.document.KnnFloatVectorField;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
Expand Down Expand Up @@ -546,7 +547,8 @@ private MethodComponentContext getMethodComponentContext(KNNMethodContext knnMet
protected Field createVectorField(float[] vectorValue, int dimension, SpaceType spaceType) {
// Because we will come to this function only in case when Native engines are getting used. So I am avoiding the
// check of use Native engines here.
if (this.indexCreatedVersion.onOrAfter(Version.V_2_15_0)) {
// Also dimension field is only accessible here hence we have to use this function to create fieldType too
if (this.indexCreatedVersion.onOrAfter(Version.V_2_15_0) && SpaceType.VECTOR_FIELD_SUPPORTED_SPACE_TYPES.contains(spaceType)) {
FieldType tempFieldType = new FieldType(fieldType);
tempFieldType.setVectorAttributes(dimension, VectorEncoding.FLOAT32, spaceType.getVectorSimilarityFunction());
tempFieldType.freeze();
Expand All @@ -555,6 +557,19 @@ protected Field createVectorField(float[] vectorValue, int dimension, SpaceType
return new VectorField(name(), vectorValue, fieldType);
}

protected Field createVectorField(byte[] vectorValue, int dimension, SpaceType spaceType) {
// Because we will come to this function only in case when Native engines are getting used. So I am avoiding the
// check of use Native engines here.
// Also dimension field is only accessible here hence we have to use this function to create fieldType too
if (this.indexCreatedVersion.onOrAfter(Version.V_2_15_0) && SpaceType.VECTOR_FIELD_SUPPORTED_SPACE_TYPES.contains(spaceType)) {
FieldType tempFieldType = new FieldType(fieldType);
tempFieldType.setVectorAttributes(dimension, VectorEncoding.BYTE, spaceType.getVectorSimilarityFunction());
tempFieldType.freeze();
return new KnnByteVectorField(name(), vectorValue, tempFieldType);
}
return new VectorField(name(), vectorValue, fieldType);
}

protected void parseCreateField(ParseContext context, int dimension, SpaceType spaceType, MethodComponentContext methodComponentContext)
throws IOException {

Expand All @@ -569,7 +584,7 @@ protected void parseCreateField(ParseContext context, int dimension, SpaceType s
}
final byte[] array = bytesArrayOptional.get();
spaceType.validateVector(array);
VectorField point = new VectorField(name(), array, fieldType);
Field point = createVectorField(array, dimension, spaceType);

context.doc().add(point);
if (this.stored) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import org.opensearch.common.settings.Settings;
import org.opensearch.index.mapper.ParametrizedFieldMapper;
import org.opensearch.knn.index.KNNSettings;
import org.opensearch.knn.index.SpaceType;
import org.opensearch.knn.index.util.IndexHyperParametersUtil;
import org.opensearch.knn.index.util.KNNEngine;

Expand Down Expand Up @@ -65,9 +66,8 @@ public class LegacyFieldMapper extends KNNVectorFieldMapper {
this.fieldType.setIndexOptions(IndexOptions.NONE);
fieldType.putAttribute(KNN_FIELD, "true"); // This attribute helps to determine knn field type
// TODO: This code is duplicated here and also in MethodFieldMapper class, I will fix this in prod code
if (indexCreatedVersion.before(Version.V_2_15_0)) {
// fieldType.setVectorAttributes(dimension, VectorEncoding.FLOAT32, mappedFieldType.spaceType.getVectorSimilarityFunction());
// } else {
if (indexCreatedVersion.before(Version.V_2_15_0)
|| !SpaceType.VECTOR_FIELD_SUPPORTED_SPACE_TYPES.contains(SpaceType.getSpace(spaceType))) {
fieldType.setDocValuesType(DocValuesType.BINARY);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.opensearch.common.Explicit;
import org.opensearch.common.xcontent.XContentFactory;
import org.opensearch.knn.index.KNNMethodContext;
import org.opensearch.knn.index.SpaceType;
import org.opensearch.knn.index.util.KNNEngine;

import java.io.IOException;
Expand Down Expand Up @@ -62,10 +63,8 @@ public class MethodFieldMapper extends KNNVectorFieldMapper {
this.fieldType.putAttribute(KNN_ENGINE, knnEngine.getName());

// This for new VectorValuesFormat only enabling it for Faiss right now. We will change this to a version check later on .
if (knnMethodContext.getMethodComponentContext().getIndexVersion().before(Version.V_2_15_0)) {
// fieldType.setVectorAttributes(dimension, VectorEncoding.FLOAT32,
// knnMethodContext.getSpaceType().getVectorSimilarityFunction());
// } else {
if (knnMethodContext.getMethodComponentContext().getIndexVersion().before(Version.V_2_15_0)
|| !SpaceType.VECTOR_FIELD_SUPPORTED_SPACE_TYPES.contains(knnMethodContext.getSpaceType())) {
fieldType.setDocValuesType(DocValuesType.BINARY);
}

Expand Down

0 comments on commit a325223

Please sign in to comment.