From c152c77442de5bc6d34f64bbabf338a726de6a0f Mon Sep 17 00:00:00 2001 From: Maciej Aszyk Date: Mon, 20 Jan 2025 15:19:29 +0100 Subject: [PATCH] RavenDB-23631 Remove duplicates when performing `AndWith` with `TermReader` --- src/Corax/Querying/Matches/MultiTermMatch.cs | 10 +++- test/SlowTests/Corax/RavenDB_23631.cs | 57 ++++++++++++++++++++ 2 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 test/SlowTests/Corax/RavenDB_23631.cs diff --git a/src/Corax/Querying/Matches/MultiTermMatch.cs b/src/Corax/Querying/Matches/MultiTermMatch.cs index a650263fdb7c..67e9aa9c207e 100644 --- a/src/Corax/Querying/Matches/MultiTermMatch.cs +++ b/src/Corax/Querying/Matches/MultiTermMatch.cs @@ -367,12 +367,14 @@ private int AndWithFill(Span buffer, int matches) using var _ = _context.Allocate(3 * sizeof(long) * buffer.Length, out var bufferHolder); var longBuffer = MemoryMarshal.Cast(bufferHolder.ToSpan()); _termReader.Reset(ref this); + Span results = longBuffer.Slice(0, buffer.Length); Span incomingMatches = longBuffer.Slice(buffer.Length, buffer.Length); Span localMatches = longBuffer.Slice(2 * buffer.Length, buffer.Length); var actualMatches = buffer.Slice(0, matches); actualMatches.CopyTo(incomingMatches); + var currentMatchCount = 0; _totalResults = 0; @@ -385,7 +387,11 @@ private int AndWithFill(Span buffer, int matches) fillCounter++; _token.ThrowIfCancellationRequested(); _totalResults += read; - var common = MergeHelper.And(results, localMatches.Slice(0, read), incomingMatches.Slice(0, matches)); + var common = MergeHelper.And( + dst: results, + left: localMatches.Slice(0, read), + right: incomingMatches.Slice(0, matches)); + results = results.Slice(common); currentMatchCount += common; } @@ -393,7 +399,7 @@ private int AndWithFill(Span buffer, int matches) longBuffer.Slice(0, currentMatchCount).CopyTo(buffer); if (fillCounter > 1) - Sort.Run(buffer.Slice(0, currentMatchCount)); + currentMatchCount = Sorting.SortAndRemoveDuplicates(buffer[..currentMatchCount]); return currentMatchCount; } diff --git a/test/SlowTests/Corax/RavenDB_23631.cs b/test/SlowTests/Corax/RavenDB_23631.cs new file mode 100644 index 000000000000..57c25e13bfab --- /dev/null +++ b/test/SlowTests/Corax/RavenDB_23631.cs @@ -0,0 +1,57 @@ +using System; +using System.Collections.Generic; +using Corax; +using Corax.Indexing; +using Corax.Mappings; +using Corax.Querying; +using FastTests.Voron; +using Sparrow; +using Tests.Infrastructure; +using Xunit; +using Xunit.Abstractions; + +namespace SlowTests.Corax; + +public class RavenDB_23631(ITestOutputHelper output) : StorageTest(output) +{ + [RavenFact(RavenTestCategory.Querying | RavenTestCategory.Corax)] + public void MultiTermMatchDoesNotReturnDuplicatesWhenPerformingAndWith() + { + using var mapping = IndexFieldsMappingBuilder.CreateForWriter(false) + .AddBinding(0, "id()") + .AddBinding(1, "name") + .Build(); + + using (var writer = new IndexWriter(Env, mapping, SupportedFeatures.All)) + { + for (int i = 0; i < 1000; i++) + { + using (var builder = writer.Index($"id/{i}")) + { + builder.Write(0, Encodings.Utf8.GetBytes($"id/{i}")); + builder.IncrementList(); + builder.Write(1, Encodings.Utf8.GetBytes("name/0")); + builder.Write(1, Encodings.Utf8.GetBytes("name/1")); + builder.DecrementList(); + builder.EndWriting(); + } + } + + writer.Commit(); + } + + using (var searcher = new IndexSearcher(Env, mapping)) + { + var @in = searcher.InQuery("id()", ["id/0", "id/10"]); + var mtm = searcher.ExistsQuery(mapping.GetByFieldId(1).Metadata); + + var resultMatch = searcher.And(@in, mtm); + Span ids = stackalloc long[16]; + var read = resultMatch.Fill(ids); + Assert.Distinct(ids[..read].ToArray()); + Assert.Equal(2, read); + var nothingLeft = resultMatch.Fill(ids) == 0; + Assert.True(nothingLeft); + } + } +}