From f0a57c49a7b5b834deb2caeb615fc6fe20667d1c Mon Sep 17 00:00:00 2001 From: John Mazanec Date: Thu, 20 Feb 2025 09:45:56 -0800 Subject: [PATCH] Add uT in WIP state Signed-off-by: John Mazanec --- .../codec/KNN10010Codec/KNN10010Codec.java | 9 ++- .../KNN9120Codec/KNN9120Codec.java | 2 +- .../DerivedSourceStoredFieldsFormat.java | 5 +- .../DerivedSourceStoredFieldsReader.java | 5 +- .../DerivedSourceStoredFieldsWriter.java | 2 +- .../DerivedSourceStoredFieldsFormatTests.java | 55 +++++++++++++++++++ .../DerivedSourceStoredFieldsWriterTests.java | 2 +- 7 files changed, 68 insertions(+), 12 deletions(-) rename src/main/java/org/opensearch/knn/index/codec/{KNN9120Codec => derivedsource}/DerivedSourceStoredFieldsFormat.java (97%) rename src/main/java/org/opensearch/knn/index/codec/{KNN9120Codec => derivedsource}/DerivedSourceStoredFieldsReader.java (94%) rename src/main/java/org/opensearch/knn/index/codec/{KNN9120Codec => derivedsource}/DerivedSourceStoredFieldsWriter.java (98%) create mode 100644 src/test/java/org/opensearch/knn/index/codec/derivedsource/DerivedSourceStoredFieldsFormatTests.java rename src/test/java/org/opensearch/knn/index/codec/{KNN9120Codec => derivedsource}/DerivedSourceStoredFieldsWriterTests.java (96%) diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN10010Codec/KNN10010Codec.java b/src/main/java/org/opensearch/knn/index/codec/KNN10010Codec/KNN10010Codec.java index acec0fa7f..a0da733b1 100644 --- a/src/main/java/org/opensearch/knn/index/codec/KNN10010Codec/KNN10010Codec.java +++ b/src/main/java/org/opensearch/knn/index/codec/KNN10010Codec/KNN10010Codec.java @@ -6,6 +6,7 @@ package org.opensearch.knn.index.codec.KNN10010Codec; import lombok.Builder; +import lombok.extern.log4j.Log4j2; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.CompoundFormat; import org.apache.lucene.codecs.DocValuesFormat; @@ -17,7 +18,7 @@ import org.opensearch.index.mapper.MapperService; import org.opensearch.knn.index.codec.KNN80Codec.KNN80CompoundFormat; import org.opensearch.knn.index.codec.KNN80Codec.KNN80DocValuesFormat; -import org.opensearch.knn.index.codec.KNN9120Codec.DerivedSourceStoredFieldsFormat; +import org.opensearch.knn.index.codec.derivedsource.DerivedSourceStoredFieldsFormat; import org.opensearch.knn.index.codec.KNN9120Codec.KNN9120PerFieldKnnVectorsFormat; import org.opensearch.knn.index.codec.derivedsource.DerivedSourceReadersSupplier; @@ -26,12 +27,12 @@ /** * KNN Codec that wraps the Lucene Codec which is part of Lucene 10.0.1 */ - +@Log4j2 public class KNN10010Codec extends FilterCodec { private static final String NAME = "KNN10010Codec"; public static final Codec DEFAULT_DELEGATE = new Lucene101Codec(); - private static final PerFieldKnnVectorsFormat DEFAULT_KNN_VECTOR_FORMAT = new KNN9120PerFieldKnnVectorsFormat(Optional.empty()); + public static final PerFieldKnnVectorsFormat DEFAULT_KNN_VECTOR_FORMAT = new KNN9120PerFieldKnnVectorsFormat(Optional.empty()); private final PerFieldKnnVectorsFormat perFieldKnnVectorsFormat; private final StoredFieldsFormat storedFieldsFormat; @@ -43,6 +44,7 @@ public class KNN10010Codec extends FilterCodec { */ public KNN10010Codec() { this(DEFAULT_DELEGATE, DEFAULT_KNN_VECTOR_FORMAT, null); + log.info("SPI"); } /** @@ -58,6 +60,7 @@ public KNN10010Codec(Codec delegate, PerFieldKnnVectorsFormat knnVectorsFormat, perFieldKnnVectorsFormat = knnVectorsFormat; this.mapperService = mapperService; this.storedFieldsFormat = getStoredFieldsFormat(); + log.info("Regular constructor"); } @Override diff --git a/src/main/java/org/opensearch/knn/index/codec/backward_codecs/KNN9120Codec/KNN9120Codec.java b/src/main/java/org/opensearch/knn/index/codec/backward_codecs/KNN9120Codec/KNN9120Codec.java index ecd02705b..6e7d90de8 100644 --- a/src/main/java/org/opensearch/knn/index/codec/backward_codecs/KNN9120Codec/KNN9120Codec.java +++ b/src/main/java/org/opensearch/knn/index/codec/backward_codecs/KNN9120Codec/KNN9120Codec.java @@ -16,7 +16,7 @@ import org.opensearch.index.mapper.MapperService; import org.opensearch.knn.index.codec.KNN80Codec.KNN80CompoundFormat; import org.opensearch.knn.index.codec.KNN80Codec.KNN80DocValuesFormat; -import org.opensearch.knn.index.codec.KNN9120Codec.DerivedSourceStoredFieldsFormat; +import org.opensearch.knn.index.codec.derivedsource.DerivedSourceStoredFieldsFormat; import org.opensearch.knn.index.codec.KNN9120Codec.KNN9120PerFieldKnnVectorsFormat; import org.opensearch.knn.index.codec.derivedsource.DerivedSourceReadersSupplier; diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN9120Codec/DerivedSourceStoredFieldsFormat.java b/src/main/java/org/opensearch/knn/index/codec/derivedsource/DerivedSourceStoredFieldsFormat.java similarity index 97% rename from src/main/java/org/opensearch/knn/index/codec/KNN9120Codec/DerivedSourceStoredFieldsFormat.java rename to src/main/java/org/opensearch/knn/index/codec/derivedsource/DerivedSourceStoredFieldsFormat.java index 109431d08..de2c539a9 100644 --- a/src/main/java/org/opensearch/knn/index/codec/KNN9120Codec/DerivedSourceStoredFieldsFormat.java +++ b/src/main/java/org/opensearch/knn/index/codec/derivedsource/DerivedSourceStoredFieldsFormat.java @@ -3,9 +3,10 @@ * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.knn.index.codec.KNN9120Codec; +package org.opensearch.knn.index.codec.derivedsource; import lombok.AllArgsConstructor; +import lombok.extern.log4j.Log4j2; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.StoredFieldsReader; @@ -20,7 +21,6 @@ import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.MapperService; import org.opensearch.knn.index.KNNSettings; -import org.opensearch.knn.index.codec.derivedsource.DerivedSourceReadersSupplier; import org.opensearch.knn.index.mapper.KNNVectorFieldType; import java.io.IOException; @@ -31,6 +31,7 @@ import static org.opensearch.knn.common.KNNConstants.DERIVED_VECTOR_FIELD_ATTRIBUTE_TRUE_VALUE; @AllArgsConstructor +@Log4j2 public class DerivedSourceStoredFieldsFormat extends StoredFieldsFormat { private static final String DELEGATE_CODEC_KEY = "knn_delegate_codec"; diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN9120Codec/DerivedSourceStoredFieldsReader.java b/src/main/java/org/opensearch/knn/index/codec/derivedsource/DerivedSourceStoredFieldsReader.java similarity index 94% rename from src/main/java/org/opensearch/knn/index/codec/KNN9120Codec/DerivedSourceStoredFieldsReader.java rename to src/main/java/org/opensearch/knn/index/codec/derivedsource/DerivedSourceStoredFieldsReader.java index 24900eb19..af3fbf68b 100644 --- a/src/main/java/org/opensearch/knn/index/codec/KNN9120Codec/DerivedSourceStoredFieldsReader.java +++ b/src/main/java/org/opensearch/knn/index/codec/derivedsource/DerivedSourceStoredFieldsReader.java @@ -3,7 +3,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.knn.index.codec.KNN9120Codec; +package org.opensearch.knn.index.codec.derivedsource; import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.index.FieldInfo; @@ -11,9 +11,6 @@ import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.util.IOUtils; import org.opensearch.index.fieldvisitor.FieldsVisitor; -import org.opensearch.knn.index.codec.derivedsource.DerivedSourceReadersSupplier; -import org.opensearch.knn.index.codec.derivedsource.DerivedSourceStoredFieldVisitor; -import org.opensearch.knn.index.codec.derivedsource.DerivedSourceVectorInjector; import java.io.IOException; import java.util.List; diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN9120Codec/DerivedSourceStoredFieldsWriter.java b/src/main/java/org/opensearch/knn/index/codec/derivedsource/DerivedSourceStoredFieldsWriter.java similarity index 98% rename from src/main/java/org/opensearch/knn/index/codec/KNN9120Codec/DerivedSourceStoredFieldsWriter.java rename to src/main/java/org/opensearch/knn/index/codec/derivedsource/DerivedSourceStoredFieldsWriter.java index 0c43f6a49..32098a546 100644 --- a/src/main/java/org/opensearch/knn/index/codec/KNN9120Codec/DerivedSourceStoredFieldsWriter.java +++ b/src/main/java/org/opensearch/knn/index/codec/derivedsource/DerivedSourceStoredFieldsWriter.java @@ -3,7 +3,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.knn.index.codec.KNN9120Codec; +package org.opensearch.knn.index.codec.derivedsource; import lombok.RequiredArgsConstructor; import org.apache.lucene.codecs.StoredFieldsWriter; diff --git a/src/test/java/org/opensearch/knn/index/codec/derivedsource/DerivedSourceStoredFieldsFormatTests.java b/src/test/java/org/opensearch/knn/index/codec/derivedsource/DerivedSourceStoredFieldsFormatTests.java new file mode 100644 index 000000000..b2d29169d --- /dev/null +++ b/src/test/java/org/opensearch/knn/index/codec/derivedsource/DerivedSourceStoredFieldsFormatTests.java @@ -0,0 +1,55 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.knn.index.codec.derivedsource; + +import lombok.SneakyThrows; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene101.Lucene101Codec; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.SerialMergeScheduler; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.opensearch.knn.KNNTestCase; +import org.opensearch.knn.index.codec.KNN10010Codec.KNN10010Codec; + +import static org.apache.lucene.codecs.lucene101.Lucene101Codec.Mode.BEST_COMPRESSION; + +public class DerivedSourceStoredFieldsFormatTests extends KNNTestCase { + + @SneakyThrows + public void testCustomCodecDelegate() { + // TODO: We need to replace this with a custom codec so that we can properly test. See + // https://github.com/opensearch-project/custom-codecs/blob/main/src/main/java/org/opensearch/index/codec/customcodecs/Lucene912CustomCodec.java#L37 + Codec codec = new KNN10010Codec(new Lucene101Codec(BEST_COMPRESSION), KNN10010Codec.DEFAULT_KNN_VECTOR_FORMAT, null); + + Directory dir = newFSDirectory(createTempDir()); + IndexWriterConfig iwc = newIndexWriterConfig(); + iwc.setMergeScheduler(new SerialMergeScheduler()); + iwc.setCodec(codec); + + String fieldName = "test"; + String randomString = randomAlphaOfLengthBetween(100, 1000); + TextField basicTextField = new TextField(fieldName, randomString, Field.Store.YES); + try (RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc)) { + Document doc = new Document(); + doc.add(basicTextField); + writer.addDocument(doc); + } + + try (IndexReader indexReader = DirectoryReader.open(dir)) { + IndexSearcher searcher = new IndexSearcher(indexReader); + Document doc = searcher.storedFields().document(0); + assertEquals(randomString, doc.get(fieldName)); + } + dir.close(); + } +} diff --git a/src/test/java/org/opensearch/knn/index/codec/KNN9120Codec/DerivedSourceStoredFieldsWriterTests.java b/src/test/java/org/opensearch/knn/index/codec/derivedsource/DerivedSourceStoredFieldsWriterTests.java similarity index 96% rename from src/test/java/org/opensearch/knn/index/codec/KNN9120Codec/DerivedSourceStoredFieldsWriterTests.java rename to src/test/java/org/opensearch/knn/index/codec/derivedsource/DerivedSourceStoredFieldsWriterTests.java index 2953539ad..ff69d7cf5 100644 --- a/src/test/java/org/opensearch/knn/index/codec/KNN9120Codec/DerivedSourceStoredFieldsWriterTests.java +++ b/src/test/java/org/opensearch/knn/index/codec/derivedsource/DerivedSourceStoredFieldsWriterTests.java @@ -3,7 +3,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.knn.index.codec.KNN9120Codec; +package org.opensearch.knn.index.codec.derivedsource; import lombok.SneakyThrows; import org.apache.lucene.codecs.StoredFieldsWriter;