diff --git a/src/main/java/org/opensearch/knn/index/SpaceType.java b/src/main/java/org/opensearch/knn/index/SpaceType.java index 240bfbe913..8b64999873 100644 --- a/src/main/java/org/opensearch/knn/index/SpaceType.java +++ b/src/main/java/org/opensearch/knn/index/SpaceType.java @@ -188,4 +188,6 @@ public static SpaceType getSpace(String spaceTypeName) { public float scoreToDistanceTranslation(float score) { throw new UnsupportedOperationException(String.format("Space [%s] does not have a score to distance translation", getValue())); } + + public static final Set VECTOR_FIELD_SUPPORTED_SPACE_TYPES = Set.of(L2, COSINESIMIL, INNER_PRODUCT); } diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEnginesKNNVectorsWriter.java b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEnginesKNNVectorsWriter.java index 645e1e3b6c..b92e81018e 100644 --- a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEnginesKNNVectorsWriter.java +++ b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEnginesKNNVectorsWriter.java @@ -95,6 +95,10 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE final FloatVectorValues floatVectorValues = KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState); // merging the graphs here final KNNCodecUtil.Pair pair = getFloatsFromFloatVectorValues(floatVectorValues); + if (pair.getVectorAddress() == 0 || pair.docs.length == 0) { + log.info("Skipping engine index creation as there are no vectors or docs to be merged"); + return; + } KNN80DocValuesConsumer.createNativeIndex(segmentWriteState, fieldInfo, pair); } diff --git a/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java b/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java index 1ef1d579d8..a8591efcc7 100644 --- a/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java +++ b/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java @@ -8,6 +8,7 @@ import lombok.AllArgsConstructor; import lombok.Getter; import lombok.Setter; +import lombok.ToString; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.search.DocIdSetIterator; @@ -37,6 +38,7 @@ public class KNNCodecUtil { public static final int JAVA_ROUNDING_NUMBER = 8; @AllArgsConstructor + @ToString public static final class Pair { public int[] docs; @Getter diff --git a/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapper.java b/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapper.java index 221e0afa10..c945c60b31 100644 --- a/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapper.java +++ b/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapper.java @@ -18,6 +18,7 @@ import lombok.extern.log4j.Log4j2; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.KnnByteVectorField; import org.apache.lucene.document.KnnFloatVectorField; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; @@ -546,7 +547,8 @@ private MethodComponentContext getMethodComponentContext(KNNMethodContext knnMet protected Field createVectorField(float[] vectorValue, int dimension, SpaceType spaceType) { // Because we will come to this function only in case when Native engines are getting used. So I am avoiding the // check of use Native engines here. - if (this.indexCreatedVersion.onOrAfter(Version.V_2_15_0)) { + // Also dimension field is only accessible here hence we have to use this function to create fieldType too + if (this.indexCreatedVersion.onOrAfter(Version.V_2_15_0) && SpaceType.VECTOR_FIELD_SUPPORTED_SPACE_TYPES.contains(spaceType)) { FieldType tempFieldType = new FieldType(fieldType); tempFieldType.setVectorAttributes(dimension, VectorEncoding.FLOAT32, spaceType.getVectorSimilarityFunction()); tempFieldType.freeze(); @@ -555,6 +557,19 @@ protected Field createVectorField(float[] vectorValue, int dimension, SpaceType return new VectorField(name(), vectorValue, fieldType); } + protected Field createVectorField(byte[] vectorValue, int dimension, SpaceType spaceType) { + // Because we will come to this function only in case when Native engines are getting used. So I am avoiding the + // check of use Native engines here. + // Also dimension field is only accessible here hence we have to use this function to create fieldType too + if (this.indexCreatedVersion.onOrAfter(Version.V_2_15_0) && SpaceType.VECTOR_FIELD_SUPPORTED_SPACE_TYPES.contains(spaceType)) { + FieldType tempFieldType = new FieldType(fieldType); + tempFieldType.setVectorAttributes(dimension, VectorEncoding.BYTE, spaceType.getVectorSimilarityFunction()); + tempFieldType.freeze(); + return new KnnByteVectorField(name(), vectorValue, tempFieldType); + } + return new VectorField(name(), vectorValue, fieldType); + } + protected void parseCreateField(ParseContext context, int dimension, SpaceType spaceType, MethodComponentContext methodComponentContext) throws IOException { @@ -569,7 +584,7 @@ protected void parseCreateField(ParseContext context, int dimension, SpaceType s } final byte[] array = bytesArrayOptional.get(); spaceType.validateVector(array); - VectorField point = new VectorField(name(), array, fieldType); + Field point = createVectorField(array, dimension, spaceType); context.doc().add(point); if (this.stored) { diff --git a/src/main/java/org/opensearch/knn/index/mapper/LegacyFieldMapper.java b/src/main/java/org/opensearch/knn/index/mapper/LegacyFieldMapper.java index 6992496592..dbecbd7181 100644 --- a/src/main/java/org/opensearch/knn/index/mapper/LegacyFieldMapper.java +++ b/src/main/java/org/opensearch/knn/index/mapper/LegacyFieldMapper.java @@ -14,6 +14,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.index.mapper.ParametrizedFieldMapper; import org.opensearch.knn.index.KNNSettings; +import org.opensearch.knn.index.SpaceType; import org.opensearch.knn.index.util.IndexHyperParametersUtil; import org.opensearch.knn.index.util.KNNEngine; @@ -65,9 +66,8 @@ public class LegacyFieldMapper extends KNNVectorFieldMapper { this.fieldType.setIndexOptions(IndexOptions.NONE); fieldType.putAttribute(KNN_FIELD, "true"); // This attribute helps to determine knn field type // TODO: This code is duplicated here and also in MethodFieldMapper class, I will fix this in prod code - if (indexCreatedVersion.before(Version.V_2_15_0)) { - // fieldType.setVectorAttributes(dimension, VectorEncoding.FLOAT32, mappedFieldType.spaceType.getVectorSimilarityFunction()); - // } else { + if (indexCreatedVersion.before(Version.V_2_15_0) + || !SpaceType.VECTOR_FIELD_SUPPORTED_SPACE_TYPES.contains(SpaceType.getSpace(spaceType))) { fieldType.setDocValuesType(DocValuesType.BINARY); } diff --git a/src/main/java/org/opensearch/knn/index/mapper/MethodFieldMapper.java b/src/main/java/org/opensearch/knn/index/mapper/MethodFieldMapper.java index 90a1059b84..21acab235d 100644 --- a/src/main/java/org/opensearch/knn/index/mapper/MethodFieldMapper.java +++ b/src/main/java/org/opensearch/knn/index/mapper/MethodFieldMapper.java @@ -12,6 +12,7 @@ import org.opensearch.common.Explicit; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.knn.index.KNNMethodContext; +import org.opensearch.knn.index.SpaceType; import org.opensearch.knn.index.util.KNNEngine; import java.io.IOException; @@ -62,10 +63,8 @@ public class MethodFieldMapper extends KNNVectorFieldMapper { this.fieldType.putAttribute(KNN_ENGINE, knnEngine.getName()); // This for new VectorValuesFormat only enabling it for Faiss right now. We will change this to a version check later on . - if (knnMethodContext.getMethodComponentContext().getIndexVersion().before(Version.V_2_15_0)) { - // fieldType.setVectorAttributes(dimension, VectorEncoding.FLOAT32, - // knnMethodContext.getSpaceType().getVectorSimilarityFunction()); - // } else { + if (knnMethodContext.getMethodComponentContext().getIndexVersion().before(Version.V_2_15_0) + || !SpaceType.VECTOR_FIELD_SUPPORTED_SPACE_TYPES.contains(knnMethodContext.getSpaceType())) { fieldType.setDocValuesType(DocValuesType.BINARY); }