Skip to content

Commit

Permalink
Fix missing bucket in terms aggregation with missing value
Browse files Browse the repository at this point in the history
Signed-off-by: kkewwei <kewei.11@bytedance.com>
Signed-off-by: kkewwei <kkewwei@163.com>
  • Loading branch information
kkewwei committed Feb 21, 2025
1 parent 664f254 commit 317a1d2
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Add highlighting for wildcard search on `match_only_text` field ([#17101](https://github.com/opensearch-project/OpenSearch/pull/17101))
- Fix illegal argument exception when creating a PIT ([#16781](https://github.com/opensearch-project/OpenSearch/pull/16781))
- Fix HTTP API calls that hang with 'Accept-Encoding: zstd' ([#17408](https://github.com/opensearch-project/OpenSearch/pull/17408))
- Fix missing bucket in terms aggregation with missing value ([#17418](https://github.com/opensearch-project/OpenSearch/pull/17418))

### Security

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ public long getValueCount() {

@Override
public int docValueCount() {
return values.docValueCount();
return Math.max(1, values.docValueCount());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.InetAddressPoint;
import org.apache.lucene.document.LatLonDocValuesField;
import org.apache.lucene.document.NumericDocValuesField;
Expand All @@ -42,6 +43,8 @@
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.NoMergePolicy;
Expand Down Expand Up @@ -75,6 +78,8 @@
import org.opensearch.index.mapper.RangeFieldMapper;
import org.opensearch.index.mapper.RangeType;
import org.opensearch.index.mapper.SeqNoFieldMapper;
import org.opensearch.index.mapper.TextFieldMapper;
import org.opensearch.index.mapper.TextParams;
import org.opensearch.index.mapper.Uid;
import org.opensearch.index.query.MatchAllQueryBuilder;
import org.opensearch.index.query.QueryBuilders;
Expand Down Expand Up @@ -1578,6 +1583,95 @@ public void testOrderByPipelineAggregation() throws Exception {
}
}

public void testBucketInTermsAggregationWithMissingValue() throws IOException {
try (Directory directory = newDirectory()) {
try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
// test text
{
FieldType type = TextParams.buildFieldType(() -> true, () -> false, () -> "positions", () -> false, () -> "no");
Document document = new Document();
document.add(new Field("mv_field", "name1", type));
document.add(new Field("mv_field", "name2", type));
indexWriter.addDocument(document);
document = new Document();
document.add(new Field("mv_field1", "value1", type));
indexWriter.addDocument(document);
document = new Document();
document.add(new Field("mv_field1", "value2", type));
indexWriter.addDocument(document);
indexWriter.flush();
try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) {
IndexSearcher indexSearcher = newIndexSearcher(indexReader);
TextFieldMapper.TextFieldType fieldType = new TextFieldMapper.TextFieldType("mv_field");
fieldType.setFielddata(true);

TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("nick").userValueTypeHint(ValueType.STRING)
.field("mv_field")
.missing("no_nickname");
TermsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
assertThat(aggregator, instanceOf(GlobalOrdinalsStringTermsAggregator.class));

aggregator.preCollection();
indexSearcher.search(new MatchAllDocsQuery(), aggregator);
aggregator.postCollection();
Terms result = reduce(aggregator);
assertEquals(3, result.getBuckets().size());
assertEquals("no_nickname", result.getBuckets().get(0).getKeyAsString());
assertEquals(2L, result.getBuckets().get(0).getDocCount());
assertEquals("name1", result.getBuckets().get(1).getKeyAsString());
assertEquals(1L, result.getBuckets().get(1).getDocCount());
assertEquals("name2", result.getBuckets().get(2).getKeyAsString());
assertEquals(1L, result.getBuckets().get(2).getDocCount());

}
indexWriter.deleteAll();
}

// test keyword
{
FieldType fieldtype = new FieldType(KeywordFieldMapper.Defaults.FIELD_TYPE);
fieldtype.setDocValuesType(DocValuesType.SORTED_SET);
fieldtype.setIndexOptions(IndexOptions.NONE);
fieldtype.setStored(true);

Document document = new Document();
document.add(new SortedSetDocValuesField("mv_field1", new BytesRef("name1")));
document.add(new SortedSetDocValuesField("mv_field1", new BytesRef("name2")));
indexWriter.addDocument(document);
document = new Document();
document.add(new SortedSetDocValuesField("mv_field2", new BytesRef("value1")));
indexWriter.addDocument(document);
document = new Document();
document.add(new SortedSetDocValuesField("mv_field2", new BytesRef("value2")));
indexWriter.addDocument(document);
indexWriter.flush();
try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) {
IndexSearcher indexSearcher = newIndexSearcher(indexReader);
KeywordFieldMapper.KeywordFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("mv_field1");

TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name").userValueTypeHint(
ValueType.STRING
).field("mv_field1").missing("no_nickname1");
TermsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
assertThat(aggregator, instanceOf(GlobalOrdinalsStringTermsAggregator.class));

aggregator.preCollection();
indexSearcher.search(new MatchAllDocsQuery(), aggregator);
aggregator.postCollection();
Terms result = reduce(aggregator);
assertEquals(3, result.getBuckets().size());
assertEquals("no_nickname1", result.getBuckets().get(0).getKeyAsString());
assertEquals(2L, result.getBuckets().get(0).getDocCount());
assertEquals("name1", result.getBuckets().get(1).getKeyAsString());
assertEquals(1L, result.getBuckets().get(1).getDocCount());
assertEquals("name2", result.getBuckets().get(2).getKeyAsString());
assertEquals(1L, result.getBuckets().get(2).getDocCount());
}
}
}
}
}

private final SeqNoFieldMapper.SequenceIDFields sequenceIDFields = SeqNoFieldMapper.SequenceIDFields.emptySeqID();

private List<Document> generateDocsWithNested(String id, int value, int[] nestedValues) {
Expand Down

0 comments on commit 317a1d2

Please sign in to comment.