diff --git a/src/main/java/org/opensearch/knn/index/SpaceType.java b/src/main/java/org/opensearch/knn/index/SpaceType.java index 240bfbe91..8b6499987 100644 --- a/src/main/java/org/opensearch/knn/index/SpaceType.java +++ b/src/main/java/org/opensearch/knn/index/SpaceType.java @@ -188,4 +188,6 @@ public static SpaceType getSpace(String spaceTypeName) { public float scoreToDistanceTranslation(float score) { throw new UnsupportedOperationException(String.format("Space [%s] does not have a score to distance translation", getValue())); } + + public static final Set VECTOR_FIELD_SUPPORTED_SPACE_TYPES = Set.of(L2, COSINESIMIL, INNER_PRODUCT); } diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEnginesKNNVectorsWriter.java b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEnginesKNNVectorsWriter.java index 645e1e3b6..b92e81018 100644 --- a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEnginesKNNVectorsWriter.java +++ b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEnginesKNNVectorsWriter.java @@ -95,6 +95,10 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE final FloatVectorValues floatVectorValues = KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState); // merging the graphs here final KNNCodecUtil.Pair pair = getFloatsFromFloatVectorValues(floatVectorValues); + if (pair.getVectorAddress() == 0 || pair.docs.length == 0) { + log.info("Skipping engine index creation as there are no vectors or docs to be merged"); + return; + } KNN80DocValuesConsumer.createNativeIndex(segmentWriteState, fieldInfo, pair); } diff --git a/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java b/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java index 1ef1d579d..a8591efcc 100644 --- a/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java +++ b/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java @@ -8,6 +8,7 @@ import lombok.AllArgsConstructor; import lombok.Getter; import lombok.Setter; +import lombok.ToString; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.search.DocIdSetIterator; @@ -37,6 +38,7 @@ public class KNNCodecUtil { public static final int JAVA_ROUNDING_NUMBER = 8; @AllArgsConstructor + @ToString public static final class Pair { public int[] docs; @Getter diff --git a/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapper.java b/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapper.java index 421fa5b8e..6c139bce5 100644 --- a/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapper.java +++ b/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapper.java @@ -18,6 +18,7 @@ import lombok.extern.log4j.Log4j2; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.KnnByteVectorField; import org.apache.lucene.document.KnnFloatVectorField; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; @@ -578,7 +579,8 @@ protected List getFieldsForByteVector(final byte[] array, final FieldType protected Field createVectorField(float[] vectorValue, int dimension, SpaceType spaceType) { // Because we will come to this function only in case when Native engines are getting used. So I am avoiding the // check of use Native engines here. - if (this.indexCreatedVersion.onOrAfter(Version.V_2_15_0)) { + // Also dimension field is only accessible here hence we have to use this function to create fieldType too + if (this.indexCreatedVersion.onOrAfter(Version.V_2_15_0) && SpaceType.VECTOR_FIELD_SUPPORTED_SPACE_TYPES.contains(spaceType)) { FieldType tempFieldType = new FieldType(fieldType); tempFieldType.setVectorAttributes(dimension, VectorEncoding.FLOAT32, spaceType.getVectorSimilarityFunction()); tempFieldType.freeze(); @@ -587,6 +589,19 @@ protected Field createVectorField(float[] vectorValue, int dimension, SpaceType return new VectorField(name(), vectorValue, fieldType); } + protected Field createVectorField(byte[] vectorValue, int dimension, SpaceType spaceType) { + // Because we will come to this function only in case when Native engines are getting used. So I am avoiding the + // check of use Native engines here. + // Also dimension field is only accessible here hence we have to use this function to create fieldType too + if (this.indexCreatedVersion.onOrAfter(Version.V_2_15_0) && SpaceType.VECTOR_FIELD_SUPPORTED_SPACE_TYPES.contains(spaceType)) { + FieldType tempFieldType = new FieldType(fieldType); + tempFieldType.setVectorAttributes(dimension, VectorEncoding.BYTE, spaceType.getVectorSimilarityFunction()); + tempFieldType.freeze(); + return new KnnByteVectorField(name(), vectorValue, tempFieldType); + } + return new VectorField(name(), vectorValue, fieldType); + } + protected void parseCreateField(ParseContext context, int dimension, SpaceType spaceType, MethodComponentContext methodComponentContext) throws IOException { diff --git a/src/main/java/org/opensearch/knn/index/mapper/LegacyFieldMapper.java b/src/main/java/org/opensearch/knn/index/mapper/LegacyFieldMapper.java index 699249659..dbecbd718 100644 --- a/src/main/java/org/opensearch/knn/index/mapper/LegacyFieldMapper.java +++ b/src/main/java/org/opensearch/knn/index/mapper/LegacyFieldMapper.java @@ -14,6 +14,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.index.mapper.ParametrizedFieldMapper; import org.opensearch.knn.index.KNNSettings; +import org.opensearch.knn.index.SpaceType; import org.opensearch.knn.index.util.IndexHyperParametersUtil; import org.opensearch.knn.index.util.KNNEngine; @@ -65,9 +66,8 @@ public class LegacyFieldMapper extends KNNVectorFieldMapper { this.fieldType.setIndexOptions(IndexOptions.NONE); fieldType.putAttribute(KNN_FIELD, "true"); // This attribute helps to determine knn field type // TODO: This code is duplicated here and also in MethodFieldMapper class, I will fix this in prod code - if (indexCreatedVersion.before(Version.V_2_15_0)) { - // fieldType.setVectorAttributes(dimension, VectorEncoding.FLOAT32, mappedFieldType.spaceType.getVectorSimilarityFunction()); - // } else { + if (indexCreatedVersion.before(Version.V_2_15_0) + || !SpaceType.VECTOR_FIELD_SUPPORTED_SPACE_TYPES.contains(SpaceType.getSpace(spaceType))) { fieldType.setDocValuesType(DocValuesType.BINARY); } diff --git a/src/main/java/org/opensearch/knn/index/mapper/MethodFieldMapper.java b/src/main/java/org/opensearch/knn/index/mapper/MethodFieldMapper.java index 90a1059b8..21acab235 100644 --- a/src/main/java/org/opensearch/knn/index/mapper/MethodFieldMapper.java +++ b/src/main/java/org/opensearch/knn/index/mapper/MethodFieldMapper.java @@ -12,6 +12,7 @@ import org.opensearch.common.Explicit; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.knn.index.KNNMethodContext; +import org.opensearch.knn.index.SpaceType; import org.opensearch.knn.index.util.KNNEngine; import java.io.IOException; @@ -62,10 +63,8 @@ public class MethodFieldMapper extends KNNVectorFieldMapper { this.fieldType.putAttribute(KNN_ENGINE, knnEngine.getName()); // This for new VectorValuesFormat only enabling it for Faiss right now. We will change this to a version check later on . - if (knnMethodContext.getMethodComponentContext().getIndexVersion().before(Version.V_2_15_0)) { - // fieldType.setVectorAttributes(dimension, VectorEncoding.FLOAT32, - // knnMethodContext.getSpaceType().getVectorSimilarityFunction()); - // } else { + if (knnMethodContext.getMethodComponentContext().getIndexVersion().before(Version.V_2_15_0) + || !SpaceType.VECTOR_FIELD_SUPPORTED_SPACE_TYPES.contains(knnMethodContext.getSpaceType())) { fieldType.setDocValuesType(DocValuesType.BINARY); }