Skip to content

Commit

Permalink
Add uT in WIP state
Browse files Browse the repository at this point in the history
Signed-off-by: John Mazanec <jmazane@amazon.com>
  • Loading branch information
jmazanec15 committed Feb 20, 2025
1 parent 833312f commit f0a57c4
Show file tree
Hide file tree
Showing 7 changed files with 68 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
package org.opensearch.knn.index.codec.KNN10010Codec;

import lombok.Builder;
import lombok.extern.log4j.Log4j2;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CompoundFormat;
import org.apache.lucene.codecs.DocValuesFormat;
Expand All @@ -17,7 +18,7 @@
import org.opensearch.index.mapper.MapperService;
import org.opensearch.knn.index.codec.KNN80Codec.KNN80CompoundFormat;
import org.opensearch.knn.index.codec.KNN80Codec.KNN80DocValuesFormat;
import org.opensearch.knn.index.codec.KNN9120Codec.DerivedSourceStoredFieldsFormat;
import org.opensearch.knn.index.codec.derivedsource.DerivedSourceStoredFieldsFormat;
import org.opensearch.knn.index.codec.KNN9120Codec.KNN9120PerFieldKnnVectorsFormat;
import org.opensearch.knn.index.codec.derivedsource.DerivedSourceReadersSupplier;

Expand All @@ -26,12 +27,12 @@
/**
* KNN Codec that wraps the Lucene Codec which is part of Lucene 10.0.1
*/

@Log4j2
public class KNN10010Codec extends FilterCodec {

private static final String NAME = "KNN10010Codec";
public static final Codec DEFAULT_DELEGATE = new Lucene101Codec();
private static final PerFieldKnnVectorsFormat DEFAULT_KNN_VECTOR_FORMAT = new KNN9120PerFieldKnnVectorsFormat(Optional.empty());
public static final PerFieldKnnVectorsFormat DEFAULT_KNN_VECTOR_FORMAT = new KNN9120PerFieldKnnVectorsFormat(Optional.empty());

private final PerFieldKnnVectorsFormat perFieldKnnVectorsFormat;
private final StoredFieldsFormat storedFieldsFormat;
Expand All @@ -43,6 +44,7 @@ public class KNN10010Codec extends FilterCodec {
*/
public KNN10010Codec() {
this(DEFAULT_DELEGATE, DEFAULT_KNN_VECTOR_FORMAT, null);
log.info("SPI");
}

/**
Expand All @@ -58,6 +60,7 @@ public KNN10010Codec(Codec delegate, PerFieldKnnVectorsFormat knnVectorsFormat,
perFieldKnnVectorsFormat = knnVectorsFormat;
this.mapperService = mapperService;
this.storedFieldsFormat = getStoredFieldsFormat();
log.info("Regular constructor");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import org.opensearch.index.mapper.MapperService;
import org.opensearch.knn.index.codec.KNN80Codec.KNN80CompoundFormat;
import org.opensearch.knn.index.codec.KNN80Codec.KNN80DocValuesFormat;
import org.opensearch.knn.index.codec.KNN9120Codec.DerivedSourceStoredFieldsFormat;
import org.opensearch.knn.index.codec.derivedsource.DerivedSourceStoredFieldsFormat;
import org.opensearch.knn.index.codec.KNN9120Codec.KNN9120PerFieldKnnVectorsFormat;
import org.opensearch.knn.index.codec.derivedsource.DerivedSourceReadersSupplier;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.knn.index.codec.KNN9120Codec;
package org.opensearch.knn.index.codec.derivedsource;

import lombok.AllArgsConstructor;
import lombok.extern.log4j.Log4j2;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.StoredFieldsReader;
Expand All @@ -20,7 +21,6 @@
import org.opensearch.index.mapper.MappedFieldType;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.knn.index.KNNSettings;
import org.opensearch.knn.index.codec.derivedsource.DerivedSourceReadersSupplier;
import org.opensearch.knn.index.mapper.KNNVectorFieldType;

import java.io.IOException;
Expand All @@ -31,6 +31,7 @@
import static org.opensearch.knn.common.KNNConstants.DERIVED_VECTOR_FIELD_ATTRIBUTE_TRUE_VALUE;

@AllArgsConstructor
@Log4j2
public class DerivedSourceStoredFieldsFormat extends StoredFieldsFormat {

private static final String DELEGATE_CODEC_KEY = "knn_delegate_codec";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,14 @@
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.knn.index.codec.KNN9120Codec;
package org.opensearch.knn.index.codec.derivedsource;

import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.util.IOUtils;
import org.opensearch.index.fieldvisitor.FieldsVisitor;
import org.opensearch.knn.index.codec.derivedsource.DerivedSourceReadersSupplier;
import org.opensearch.knn.index.codec.derivedsource.DerivedSourceStoredFieldVisitor;
import org.opensearch.knn.index.codec.derivedsource.DerivedSourceVectorInjector;

import java.io.IOException;
import java.util.List;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.knn.index.codec.KNN9120Codec;
package org.opensearch.knn.index.codec.derivedsource;

import lombok.RequiredArgsConstructor;
import org.apache.lucene.codecs.StoredFieldsWriter;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.knn.index.codec.derivedsource;

import lombok.SneakyThrows;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene101.Lucene101Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.opensearch.knn.KNNTestCase;
import org.opensearch.knn.index.codec.KNN10010Codec.KNN10010Codec;

import static org.apache.lucene.codecs.lucene101.Lucene101Codec.Mode.BEST_COMPRESSION;

public class DerivedSourceStoredFieldsFormatTests extends KNNTestCase {

@SneakyThrows
public void testCustomCodecDelegate() {
// TODO: We need to replace this with a custom codec so that we can properly test. See
// https://github.com/opensearch-project/custom-codecs/blob/main/src/main/java/org/opensearch/index/codec/customcodecs/Lucene912CustomCodec.java#L37
Codec codec = new KNN10010Codec(new Lucene101Codec(BEST_COMPRESSION), KNN10010Codec.DEFAULT_KNN_VECTOR_FORMAT, null);

Directory dir = newFSDirectory(createTempDir());
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setMergeScheduler(new SerialMergeScheduler());
iwc.setCodec(codec);

String fieldName = "test";
String randomString = randomAlphaOfLengthBetween(100, 1000);
TextField basicTextField = new TextField(fieldName, randomString, Field.Store.YES);
try (RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc)) {
Document doc = new Document();
doc.add(basicTextField);
writer.addDocument(doc);
}

try (IndexReader indexReader = DirectoryReader.open(dir)) {
IndexSearcher searcher = new IndexSearcher(indexReader);
Document doc = searcher.storedFields().document(0);
assertEquals(randomString, doc.get(fieldName));
}
dir.close();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.knn.index.codec.KNN9120Codec;
package org.opensearch.knn.index.codec.derivedsource;

import lombok.SneakyThrows;
import org.apache.lucene.codecs.StoredFieldsWriter;
Expand Down

0 comments on commit f0a57c4

Please sign in to comment.