Skip to content

Commit

Permalink
Fix bugs in aggregate operator and null column (#615)
Browse files Browse the repository at this point in the history
Fix bug where insert comparer gave wrong result and also where updating null column increased its counter
  • Loading branch information
Ulimo authored Nov 25, 2024
1 parent f357db0 commit caad396
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 6 deletions.
7 changes: 2 additions & 5 deletions src/FlowtideDotNet.Core/ColumnStore/Column.cs
Original file line number Diff line number Diff line change
Expand Up @@ -348,11 +348,8 @@ public void UpdateAt<T>(in int index, in T value)
}
else if (value.Type == ArrowTypeId.Null)
{
if (_type == ArrowTypeId.Null)
{
_nullCounter++;
}
else
// If it is null and being updated to null nothing needs to be updated
if (_type != ArrowTypeId.Null)
{
CheckNullInitialization();
if (_validityList.Get(index))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public int FindIndex(in ColumnRowReference key, in AggregateKeyStorageContainer
key.referenceBatch.Columns[i].GetValueAt(key.RowIndex, dataValueContainer, default);
var (low, high) = keyContainer._data.Columns[i].SearchBoundries(dataValueContainer, start, end, default);

if (low != 0)
if (low < 0)
{
return low;
}
Expand Down
28 changes: 28 additions & 0 deletions tests/FlowtideDotNet.AcceptanceTests/AggregateTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,34 @@ GROUP BY userkey
.Select(x => new { Userkey = x.Key, Sum = (double)x.Sum(y => y.OrderKey), Count = x.Count() }));
}

/// <summary>
/// Test case to solve bug when using multiple aggregates and group by's
/// </summary>
/// <returns></returns>
[Fact]
public async Task MultipleMaxAggregates()
{
GenerateData();
await StartStream(@"
INSERT INTO output
SELECT
userkey, max(orderkey), max(orderkey)
FROM orders
GROUP BY userkey, orderkey
");
await WaitForUpdate();

for (int i = 0; i < 10; i++)
{
GenerateData();
await WaitForUpdate();
}

AssertCurrentDataEqual(Orders
.GroupBy(x => $"{x.UserKey}:{x.OrderKey}")
.Select(x => new { Userkey = int.Parse(x.Key.Substring(0, x.Key.IndexOf(':'))), Max1 = (double)x.Max(y => y.OrderKey), Max2 = x.Max(y => y.OrderKey) }));
}

[Fact]
public async Task HavingSameAggregate()
{
Expand Down
12 changes: 12 additions & 0 deletions tests/FlowtideDotNet.Core.Tests/ColumnStore/NullColumnTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -72,5 +72,17 @@ public void TestCopy()

Assert.Equal(1000, copy.Count);
}

[Fact]
public void TestUpdateNullColumn()
{
Column column = new Column(GlobalMemoryManager.Instance);

column.Add(NullValue.Instance);

column.UpdateAt(0, NullValue.Instance);

Assert.Single(column);
}
}
}

1 comment on commit caad396

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: caad396 Previous: 7db1083 Ratio
FlowtideDotNet.Benchmarks.Stream.StreamBenchmark.InnerJoin 557812300 ns (± 9567145.941449832) 588638700 ns (± 9231369.002121696) 0.95
FlowtideDotNet.Benchmarks.Stream.StreamBenchmark.LeftJoin 657167810 ns (± 30452020.12041785) 632393477.7777778 ns (± 23156158.76418074) 1.04
FlowtideDotNet.Benchmarks.Stream.StreamBenchmark.ProjectionAndNormalization 204528780 ns (± 9736344.556214787) 171268600 ns (± 6644227.006958748) 1.19
FlowtideDotNet.Benchmarks.Stream.StreamBenchmark.SumAggregation 221412430 ns (± 18915487.721268106) 190822830 ns (± 6415026.852121856) 1.16
FlowtideDotNet.Benchmarks.Stream.StreamBenchmark.ListAggWithMapAggregation 2190222950 ns (± 194714968.52783585) 2656394200 ns (± 136074715.58524996) 0.82

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.