Skip to content

Commit

Permalink
feat: (GH-56) Add arrayApproxEqualString to handle null characters in…
Browse files Browse the repository at this point in the history
… strings

Signed-off-by: Saurabh Kumar Singh <singh1203.ss@gmail.com>
  • Loading branch information
singh1203 committed Feb 19, 2025
1 parent 460f500 commit 44d8ad3
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 3 deletions.
33 changes: 31 additions & 2 deletions arrow/array/compare.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package array
import (
"fmt"
"math"
"strings"

"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/float16"
Expand Down Expand Up @@ -487,13 +488,13 @@ func arrayApproxEqual(left, right arrow.Array, opt equalOption) bool {
return arrayEqualBinary(l, r)
case *String:
r := right.(*String)
return arrayEqualString(l, r)
return arrayApproxEqualString(l, r)
case *LargeBinary:
r := right.(*LargeBinary)
return arrayEqualLargeBinary(l, r)
case *LargeString:
r := right.(*LargeString)
return arrayEqualLargeString(l, r)
return arrayApproxEqualLargeString(l, r)
case *BinaryView:
r := right.(*BinaryView)
return arrayEqualBinaryView(l, r)
Expand Down Expand Up @@ -644,6 +645,34 @@ func validityBitmapEqual(left, right arrow.Array) bool {
return true
}

func arrayApproxEqualString(left, right *String) bool {
for i := 0; i < left.Len(); i++ {
if left.IsNull(i) {
continue
}
if stripNulls(left.Value(i)) != stripNulls(right.Value(i)) {
return false
}
}
return true
}

func arrayApproxEqualLargeString(left, right *LargeString) bool {
for i := 0; i < left.Len(); i++ {
if left.IsNull(i) {
continue
}
if stripNulls(left.Value(i)) != stripNulls(right.Value(i)) {
return false
}
}
return true
}

func stripNulls(s string) string {
return strings.TrimRight(s, "\x00")
}

func arrayApproxEqualFloat16(left, right *Float16, opt equalOption) bool {
for i := 0; i < left.Len(); i++ {
if left.IsNull(i) {
Expand Down
51 changes: 50 additions & 1 deletion arrow/array/compare_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,48 @@ func TestArrayApproxEqual(t *testing.T) {
}
}

func TestArrayApproxEqualStrings(t *testing.T) {
for _, tc := range []struct {
name string
a1 interface{}
a2 interface{}
want bool
}{
{
name: "string",
a1: []string{"a", "b", "c", "d", "e", "f"},
a2: []string{"a", "b", "c", "d", "e", "f"},
want: true,
},
{
name: "string",
a1: []string{"a", "b\x00"},
a2: []string{"a", "b"},
want: true,
},
{
name: "string",
a1: []string{"a", "b\x00"},
a2: []string{"a\x00", "b"},
want: true,
},
}{
t.Run(tc.name, func(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
defer mem.AssertSize(t, 0)

a1 := arrayOf(mem, tc.a1, nil)
defer a1.Release()
a2 := arrayOf(mem, tc.a2, nil)
defer a2.Release()

if got, want := array.ApproxEqual(a1, a2), tc.want; got != want {
t.Fatalf("invalid comparison: got=%v, want=%v\na1: %v\na2: %v\n", got, want, a1, a2)
}
})
}
}

func TestArrayApproxEqualFloats(t *testing.T) {
f16sFrom := func(vs []float64) []float16.Num {
o := make([]float16.Num, len(vs))
Expand Down Expand Up @@ -445,6 +487,13 @@ func arrayOf(mem memory.Allocator, a interface{}, valids []bool) arrow.Array {
bldr.AppendValues(a, valids)
return bldr.NewFloat64Array()

case []string:
bldr := array.NewStringBuilder(mem)
defer bldr.Release()

bldr.AppendValues(a, valids)
return bldr.NewStringArray()

default:
panic(fmt.Errorf("arrdata: invalid data slice type %T", a))
}
Expand Down Expand Up @@ -725,4 +774,4 @@ func TestTableEqual(t *testing.T) {
}
})
}
}
}

0 comments on commit 44d8ad3

Please sign in to comment.